コード例 #1
0
ファイル: test_fasta.py プロジェクト: winni2k/cortexpy
    def test_outputs_multiple_combinations(self, tmpdir):
        # given
        records = [
            'CAACC',
            'AAACA',
            'AAACT',
        ]
        n_paths = 6
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = (
            runner.Cortexpy(SPAWN_PROCESS).traverse(to_json=False,
                                                    graph=output_graph,
                                                    contig='AAA')
        )
        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        ids = ['g0_p{}'.format(i) for i in range(n_paths)]
        for record in records:
            expect.has_record(record).has_id_in(*ids)
        expect.has_record('CAACT').has_id_in(*ids)
        expect.has_record('CAACA').has_id_in(*ids)
        expect.has_record('AAACC').has_id_in(*ids)
        expect.has_n_records(6)
コード例 #2
0
ファイル: test_engine.py プロジェクト: winni2k/cortexpy
    def test_with_bubble_and_two_colors_returns_all_kmers(
            self, tmpdir, ra_constructor):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex(kmer_size).with_dna_sequence(
            'AAACAAG').with_dna_sequence('AAATAAG').with_dna_sequence(
                'AAATAAG', name='sample_1').build(tmpdir))

        traverser = Engine(ra_constructor(open(output_graph, 'rb')),
                           traversal_colors=(0, ),
                           orientation=EngineTraversalOrientation.both)

        # when
        expect = KmerGraphExpectation(traverser.traverse_from('ACA').graph,
                                      sort_edges=True)

        # then
        expect.has_node('AAA').has_coverages(2, 1)
        expect.has_node('AAC').has_coverages(1, 0)
        expect.has_node('ACA').has_coverages(1, 0)
        expect.has_node('CAA').has_coverages(1, 0)
        expect.has_node('AAG').has_coverages(2, 1)
        expect.has_node('AAT').has_coverages(1, 1)
        expect.has_node('ATA').has_coverages(1, 1)
        expect.has_node('TAA').has_coverages(1, 1)

        expect.has_edges('AAA AAC 0', 'AAC ACA 0', 'ACA CAA 0', 'CAA AAG 0',
                         'AAA AAT 0', 'AAT ATA 0', 'ATA TAA 0', 'TAA AAG 0',
                         'AAA AAT 1', 'AAT ATA 1', 'ATA TAA 1', 'AAG TAA 1')
コード例 #3
0
ファイル: test_fasta.py プロジェクト: winni2k/cortexpy
    def test_outputs_contigs_for_bubble(self, tmpdir):
        # given
        records = [
            'AAACCC',
            'AAAGCCC',
        ]
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(SPAWN_PROCESS) \
            .traverse(graph=output_graph, contig='AAA')

        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        for record in records:
            expect.has_record(record)
        expect.has_n_records(2)
コード例 #4
0
ファイル: test_fasta.py プロジェクト: winni2k/cortexpy
    def test_dal19_data(self, tmpdir):
        # given
        records = [
            'CCCCGAGGGAAGCTCTATGAATTCGCCAATCCCAGTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGT',
            'GTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATA',
            'TAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATAGATAACACTACCAAAGAGCAAGACTATCAG'
        ]
        expected_records = [records[0] + records[1][47:] + records[2][48:], records[2]]
        kmer_size = 47
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(SPAWN_PROCESS) \
            .traverse(to_json=False,
                      graph=output_graph,
                      contig='CAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACAT')
        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        for record in expected_records:
            expect.has_record(record)
        expect.has_n_records(2)
コード例 #5
0
ファイル: test_term.py プロジェクト: winni2k/cortexpy
    def test_prints_single_kmer(self, tmpdir):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            'ACCAA').with_kmer_size(kmer_size).build(tmpdir))

        expected_kmer = 'CAA: CAA 1 1 .c...... ........'

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig='CAA').stdout

        # then
        assert [expected_kmer
                ] == CortexpyPrintOutputParser(stdout).get_kmer_strings()
コード例 #6
0
    def test_with_three_linked_kmers_and_two_colors_returns_three_kmers(
            self, tmpdir):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex(kmer_size).with_dna_sequence(
            'AAAC').with_dna_sequence('AAAT').build(tmpdir))

        retriever = ContigRetriever(open(output_graph, 'rb'))

        # when
        kmer_graph = retriever.get_kmer_graph('GTTT')

        # then
        assert set(kmer_graph.nodes) == {'TTT', 'GTT', 'ATT'}
        assert set(kmer_graph.edges) == {('GTT', 'TTT', 0), ('GTT', 'TTT', 1),
                                         ('ATT', 'TTT', 0)}
コード例 #7
0
ファイル: test_term.py プロジェクト: winni2k/cortexpy
    def test_prints_one_missing_kmer(self, tmpdir):
        # given
        kmer_size = 3
        record = 'GGG'
        output_graph = (builder.Mccortex().with_dna_sequence(
            'AAAA').with_kmer_size(kmer_size).build(tmpdir))

        expected_kmer = 'CCC: GGG 0 1 ........ ........'

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig=record).stdout

        # then
        assert [expected_kmer
                ] == CortexpyPrintOutputParser(stdout).get_kmer_strings()
コード例 #8
0
ファイル: test_engine.py プロジェクト: winni2k/cortexpy
    def test_with_two_subgraphs_returns_all_kmers(self, tmpdir,
                                                  ra_constructor):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex(kmer_size).with_dna_sequence(
            'AAAT').with_dna_sequence('GGGC').build(tmpdir))

        traverser = Engine(ra_constructor(open(output_graph, 'rb')),
                           traversal_colors=(0, ),
                           orientation=EngineTraversalOrientation.both)

        # when
        expect = KmerGraphExpectation(
            traverser.traverse_from_each_kmer_in_iterable(['AAA',
                                                           'GGG']).graph)

        # then
        expect.has_edges('AAA AAT 0', 'CCC GCC 0')
コード例 #9
0
    def test_outputs_json(self, tmpdir):
        # given
        record = 'ACCTT'
        kmer_size = 3
        output_graph = builder.Mccortex() \
            .with_dna_sequence(record) \
            .with_kmer_size(kmer_size) \
            .build(tmpdir)

        # when
        completed_process = runner \
            .Cortexpy(SPAWN_PROCESS) \
            .view_contig(to_json=True, graph=output_graph, contig=record)
        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = JsonGraph(json.loads(stdout))
        expect.is_directed()
コード例 #10
0
ファイル: test_fasta.py プロジェクト: winni2k/cortexpy
    def test_raises_on_max_nodes_exceeded(self, tmpdir):
        # given
        query = 'CAA'
        records = ['CAACC']
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(spawn_process=True).subgraph(graphs=[output_graph],
                                                                         contig=query,
                                                                         max_nodes=1)

        # then
        assert 0 != completed_process.returncode
        assert 'Max nodes (1) exceeded: 3 nodes found' in completed_process.stderr
コード例 #11
0
ファイル: test_fasta.py プロジェクト: winni2k/cortexpy
    def test_does_not_raise_without_max_nodes(self, tmpdir):
        # given
        query = 'CAACC'
        records = [query]
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(True).subgraph(graphs=[output_graph],
                                                           contig=query,
                                                           out=tmpdir / 'discarded.pickle',
                                                           )

        # then
        assert 0 == completed_process.returncode
コード例 #12
0
ファイル: test_term.py プロジェクト: winni2k/cortexpy
    def test_prints_three_kmers_including_one_revcomp(self, tmpdir):
        # given
        record = 'ACCTT'
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            record).with_kmer_size(kmer_size).build(tmpdir))

        expected_kmers = [
            'AAG 1 ......G.',
            'ACC 1 .......T',
            'AGG 1 a......T',
        ]

        # when
        stdout = runner.Cortexpy().view_graph(output_graph).stdout

        # then
        assert expected_kmers == CortexpyPrintOutputParser(
            stdout).get_kmer_strings()
コード例 #13
0
ファイル: test_term.py プロジェクト: winni2k/cortexpy
    def test_prints_three_kmers(self, tmpdir):
        # given
        record = 'ACCAA'
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            record).with_kmer_size(kmer_size).build(tmpdir))

        expected_kmers = [
            'ACC: ACC 1 1 ....A... ....A...',
            'CCA: CCA 1 1 a...A... a...A...',
            'CAA: CAA 1 1 .c...... .c......',
        ]

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig=record).stdout

        # then
        assert expected_kmers == CortexpyPrintOutputParser(
            stdout).get_kmer_strings()
コード例 #14
0
ファイル: test_term.py プロジェクト: winni2k/cortexpy
    def test_prints_one_missing_and_one_revcomp_kmer(self, tmpdir):
        # given
        dna_sequence = 'ACCTT'
        search_record = 'ACTT'
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            dna_sequence).with_kmer_size(kmer_size).build(tmpdir))

        expected_kmers = [
            'ACT: ACT 0 1 ........ .......T',
            'AAG: CTT 1 1 .C...... A.......',
        ]

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig=search_record).stdout
        expect = ViewExpectation(stdout)

        # then
        (expect.has_kmer(expected_kmers[0]).has_kmer(
            expected_kmers[1]).has_n_kmers(2))
コード例 #15
0
    def test_collapse_kmer_unitigs_option_with_missing_kmers(self, tmpdir):
        # given
        record1 = 'AAACCCGAA'
        record2 = 'ACCG'
        query_record = record1 + 'G'
        kmer_size = 3
        output_graph = builder.Mccortex() \
            .with_dna_sequence(record1) \
            .with_dna_sequence(record2) \
            .with_kmer_size(kmer_size) \
            .build(tmpdir)
        runner.Mccortex(kmer_size).view(output_graph)

        # when
        completed_process = runner \
            .Cortexpy(SPAWN_PROCESS) \
            .view_contig(contig=query_record,
                         to_json=True,
                         graph=output_graph)

        # then
        expect_zero_return_code(completed_process)

        stdout = completed_process.stdout
        expect = JsonGraph(json.loads(stdout))

        expect.has_n_nodes(4)
        expect.has_node_repr('AAACC').has_coverages_by_kmer([1, 1], [1, 1],
                                                            [2, 1])
        expect.has_node_repr('C').has_coverages_by_kmer([1, 1])
        expect.has_node_repr('GAA').has_coverages_by_kmer([2, 1], [1, 1],
                                                          [1, 1])
        expect.has_node_repr('G').has_coverages_by_kmer([0, 1])

        for color in [0, 1]:
            for edge in [['AAACC', 'C'], ['C', 'GAA']]:
                expect.has_repr_edge(edge[0], edge[1], color)
        expect.has_repr_edge('GAA', 'G', 1)
        expect.has_repr_edge('AAACC', 'GAA', 0)
        expect.has_n_edges(6)
コード例 #16
0
ファイル: test_fasta.py プロジェクト: winni2k/cortexpy
    def test_outputs_fasta_of_kmers(self, tmpdir):
        # given
        record = 'ATTCC'
        kmer_size = 3
        output_graph = builder.Mccortex() \
            .with_dna_sequence(record) \
            .with_kmer_size(kmer_size) \
            .build(tmpdir)

        # when
        completed_process = runner.Cortexpy(SPAWN_PROCESS) \
            .view_graph(kmers=True, graph=output_graph)

        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        expect.has_record('AAT')
        expect.has_record('GAA')
        expect.has_record('GGA')
        expect.has_n_records(3)
コード例 #17
0
    def test_collapse_kmer_unitigs_option(self, tmpdir):
        # given
        record1 = 'AAACCCGAA'
        record2 = 'ACCG'
        record3 = 'TTCGGGTTT'
        kmer_size = 3
        output_graph = builder.Mccortex() \
            .with_dna_sequence(record1) \
            .with_dna_sequence(record2) \
            .with_dna_sequence(record3) \
            .with_kmer_size(kmer_size) \
            .build(tmpdir)
        runner.Mccortex(kmer_size).view(output_graph)

        # when
        completed_process = runner \
            .Cortexpy(SPAWN_PROCESS) \
            .view_contig(contig=record1,
                         to_json=True,
                         graph=output_graph)

        # then
        expect_zero_return_code(completed_process)

        stdout = completed_process.stdout
        expect = JsonGraph(json.loads(stdout))

        expect.has_colors([0, 1])
        expect.has_n_nodes(3)
        expect.has_node_repr('AAACC').has_coverages_by_kmer([2, 1], [2, 1],
                                                            [3, 1])
        expect.has_node_repr('C').has_coverages_by_kmer([2, 1])
        expect.has_node_repr('GAA').has_coverages_by_kmer([3, 1], [2, 1],
                                                          [2, 1])

        for edge in [('AAACC', 'C', 0), ('AAACC', 'C', 1), ('C', 'GAA', 0),
                     ('C', 'GAA', 1), ('AAACC', 'GAA', 0)]:
            expect.has_repr_edge(*edge)
        expect.has_n_edges(5)
コード例 #18
0
ファイル: test_fasta.py プロジェクト: winni2k/cortexpy
    def test_raises_on_max_path_1_exceeded(self, tmpdir, max_paths):
        # given
        MAX_PATH_EXIT_CODE = 64
        records = ['CAACC', 'CAACT']
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        # when
        completed_process = runner.Cortexpy(spawn_process=True).traverse(
            graph=maker.build(tmpdir),
            max_paths=max_paths)

        # then
        if max_paths == 1:
            assert MAX_PATH_EXIT_CODE == completed_process.returncode
            assert f'Max paths ({max_paths}) exceeded' in completed_process.stderr
        else:
            assert 0 == completed_process.returncode
            expect = expectation.Fasta(completed_process.stdout)
            expect.has_record('CAACC')
            expect.has_record('CAACT')
            expect.has_n_records(2)