Beispiel #1
0
    def test_outputs_multiple_combinations(self, tmpdir):
        # given
        records = [
            'CAACC',
            'AAACA',
            'AAACT',
        ]
        n_paths = 6
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = (
            runner.Cortexpy(SPAWN_PROCESS).traverse(to_json=False,
                                                    graph=output_graph,
                                                    contig='AAA')
        )
        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        ids = ['g0_p{}'.format(i) for i in range(n_paths)]
        for record in records:
            expect.has_record(record).has_id_in(*ids)
        expect.has_record('CAACT').has_id_in(*ids)
        expect.has_record('CAACA').has_id_in(*ids)
        expect.has_record('AAACC').has_id_in(*ids)
        expect.has_n_records(6)
Beispiel #2
0
    def test_with_bubble_and_two_colors_returns_all_kmers(
            self, tmpdir, ra_constructor):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex(kmer_size).with_dna_sequence(
            'AAACAAG').with_dna_sequence('AAATAAG').with_dna_sequence(
                'AAATAAG', name='sample_1').build(tmpdir))

        traverser = Engine(ra_constructor(open(output_graph, 'rb')),
                           traversal_colors=(0, ),
                           orientation=EngineTraversalOrientation.both)

        # when
        expect = KmerGraphExpectation(traverser.traverse_from('ACA').graph,
                                      sort_edges=True)

        # then
        expect.has_node('AAA').has_coverages(2, 1)
        expect.has_node('AAC').has_coverages(1, 0)
        expect.has_node('ACA').has_coverages(1, 0)
        expect.has_node('CAA').has_coverages(1, 0)
        expect.has_node('AAG').has_coverages(2, 1)
        expect.has_node('AAT').has_coverages(1, 1)
        expect.has_node('ATA').has_coverages(1, 1)
        expect.has_node('TAA').has_coverages(1, 1)

        expect.has_edges('AAA AAC 0', 'AAC ACA 0', 'ACA CAA 0', 'CAA AAG 0',
                         'AAA AAT 0', 'AAT ATA 0', 'ATA TAA 0', 'TAA AAG 0',
                         'AAA AAT 1', 'AAT ATA 1', 'ATA TAA 1', 'AAG TAA 1')
Beispiel #3
0
    def test_outputs_contigs_for_bubble(self, tmpdir):
        # given
        records = [
            'AAACCC',
            'AAAGCCC',
        ]
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(SPAWN_PROCESS) \
            .traverse(graph=output_graph, contig='AAA')

        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        for record in records:
            expect.has_record(record)
        expect.has_n_records(2)
Beispiel #4
0
    def test_dal19_data(self, tmpdir):
        # given
        records = [
            'CCCCGAGGGAAGCTCTATGAATTCGCCAATCCCAGTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGT',
            'GTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATA',
            'TAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATAGATAACACTACCAAAGAGCAAGACTATCAG'
        ]
        expected_records = [records[0] + records[1][47:] + records[2][48:], records[2]]
        kmer_size = 47
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(SPAWN_PROCESS) \
            .traverse(to_json=False,
                      graph=output_graph,
                      contig='CAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACAT')
        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        for record in expected_records:
            expect.has_record(record)
        expect.has_n_records(2)
Beispiel #5
0
    def test_prints_single_kmer(self, tmpdir):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            'ACCAA').with_kmer_size(kmer_size).build(tmpdir))

        expected_kmer = 'CAA: CAA 1 1 .c...... ........'

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig='CAA').stdout

        # then
        assert [expected_kmer
                ] == CortexpyPrintOutputParser(stdout).get_kmer_strings()
Beispiel #6
0
    def test_with_three_linked_kmers_and_two_colors_returns_three_kmers(
            self, tmpdir):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex(kmer_size).with_dna_sequence(
            'AAAC').with_dna_sequence('AAAT').build(tmpdir))

        retriever = ContigRetriever(open(output_graph, 'rb'))

        # when
        kmer_graph = retriever.get_kmer_graph('GTTT')

        # then
        assert set(kmer_graph.nodes) == {'TTT', 'GTT', 'ATT'}
        assert set(kmer_graph.edges) == {('GTT', 'TTT', 0), ('GTT', 'TTT', 1),
                                         ('ATT', 'TTT', 0)}
Beispiel #7
0
    def test_prints_one_missing_kmer(self, tmpdir):
        # given
        kmer_size = 3
        record = 'GGG'
        output_graph = (builder.Mccortex().with_dna_sequence(
            'AAAA').with_kmer_size(kmer_size).build(tmpdir))

        expected_kmer = 'CCC: GGG 0 1 ........ ........'

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig=record).stdout

        # then
        assert [expected_kmer
                ] == CortexpyPrintOutputParser(stdout).get_kmer_strings()
Beispiel #8
0
    def test_with_two_subgraphs_returns_all_kmers(self, tmpdir,
                                                  ra_constructor):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex(kmer_size).with_dna_sequence(
            'AAAT').with_dna_sequence('GGGC').build(tmpdir))

        traverser = Engine(ra_constructor(open(output_graph, 'rb')),
                           traversal_colors=(0, ),
                           orientation=EngineTraversalOrientation.both)

        # when
        expect = KmerGraphExpectation(
            traverser.traverse_from_each_kmer_in_iterable(['AAA',
                                                           'GGG']).graph)

        # then
        expect.has_edges('AAA AAT 0', 'CCC GCC 0')
Beispiel #9
0
    def test_outputs_json(self, tmpdir):
        # given
        record = 'ACCTT'
        kmer_size = 3
        output_graph = builder.Mccortex() \
            .with_dna_sequence(record) \
            .with_kmer_size(kmer_size) \
            .build(tmpdir)

        # when
        completed_process = runner \
            .Cortexpy(SPAWN_PROCESS) \
            .view_contig(to_json=True, graph=output_graph, contig=record)
        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = JsonGraph(json.loads(stdout))
        expect.is_directed()
Beispiel #10
0
    def test_raises_on_max_nodes_exceeded(self, tmpdir):
        # given
        query = 'CAA'
        records = ['CAACC']
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(spawn_process=True).subgraph(graphs=[output_graph],
                                                                         contig=query,
                                                                         max_nodes=1)

        # then
        assert 0 != completed_process.returncode
        assert 'Max nodes (1) exceeded: 3 nodes found' in completed_process.stderr
Beispiel #11
0
    def test_does_not_raise_without_max_nodes(self, tmpdir):
        # given
        query = 'CAACC'
        records = [query]
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(True).subgraph(graphs=[output_graph],
                                                           contig=query,
                                                           out=tmpdir / 'discarded.pickle',
                                                           )

        # then
        assert 0 == completed_process.returncode
Beispiel #12
0
    def test_prints_three_kmers_including_one_revcomp(self, tmpdir):
        # given
        record = 'ACCTT'
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            record).with_kmer_size(kmer_size).build(tmpdir))

        expected_kmers = [
            'AAG 1 ......G.',
            'ACC 1 .......T',
            'AGG 1 a......T',
        ]

        # when
        stdout = runner.Cortexpy().view_graph(output_graph).stdout

        # then
        assert expected_kmers == CortexpyPrintOutputParser(
            stdout).get_kmer_strings()
Beispiel #13
0
    def test_prints_three_kmers(self, tmpdir):
        # given
        record = 'ACCAA'
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            record).with_kmer_size(kmer_size).build(tmpdir))

        expected_kmers = [
            'ACC: ACC 1 1 ....A... ....A...',
            'CCA: CCA 1 1 a...A... a...A...',
            'CAA: CAA 1 1 .c...... .c......',
        ]

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig=record).stdout

        # then
        assert expected_kmers == CortexpyPrintOutputParser(
            stdout).get_kmer_strings()
Beispiel #14
0
    def test_prints_one_missing_and_one_revcomp_kmer(self, tmpdir):
        # given
        dna_sequence = 'ACCTT'
        search_record = 'ACTT'
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            dna_sequence).with_kmer_size(kmer_size).build(tmpdir))

        expected_kmers = [
            'ACT: ACT 0 1 ........ .......T',
            'AAG: CTT 1 1 .C...... A.......',
        ]

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig=search_record).stdout
        expect = ViewExpectation(stdout)

        # then
        (expect.has_kmer(expected_kmers[0]).has_kmer(
            expected_kmers[1]).has_n_kmers(2))
Beispiel #15
0
    def test_collapse_kmer_unitigs_option_with_missing_kmers(self, tmpdir):
        # given
        record1 = 'AAACCCGAA'
        record2 = 'ACCG'
        query_record = record1 + 'G'
        kmer_size = 3
        output_graph = builder.Mccortex() \
            .with_dna_sequence(record1) \
            .with_dna_sequence(record2) \
            .with_kmer_size(kmer_size) \
            .build(tmpdir)
        runner.Mccortex(kmer_size).view(output_graph)

        # when
        completed_process = runner \
            .Cortexpy(SPAWN_PROCESS) \
            .view_contig(contig=query_record,
                         to_json=True,
                         graph=output_graph)

        # then
        expect_zero_return_code(completed_process)

        stdout = completed_process.stdout
        expect = JsonGraph(json.loads(stdout))

        expect.has_n_nodes(4)
        expect.has_node_repr('AAACC').has_coverages_by_kmer([1, 1], [1, 1],
                                                            [2, 1])
        expect.has_node_repr('C').has_coverages_by_kmer([1, 1])
        expect.has_node_repr('GAA').has_coverages_by_kmer([2, 1], [1, 1],
                                                          [1, 1])
        expect.has_node_repr('G').has_coverages_by_kmer([0, 1])

        for color in [0, 1]:
            for edge in [['AAACC', 'C'], ['C', 'GAA']]:
                expect.has_repr_edge(edge[0], edge[1], color)
        expect.has_repr_edge('GAA', 'G', 1)
        expect.has_repr_edge('AAACC', 'GAA', 0)
        expect.has_n_edges(6)
Beispiel #16
0
    def test_outputs_fasta_of_kmers(self, tmpdir):
        # given
        record = 'ATTCC'
        kmer_size = 3
        output_graph = builder.Mccortex() \
            .with_dna_sequence(record) \
            .with_kmer_size(kmer_size) \
            .build(tmpdir)

        # when
        completed_process = runner.Cortexpy(SPAWN_PROCESS) \
            .view_graph(kmers=True, graph=output_graph)

        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        expect.has_record('AAT')
        expect.has_record('GAA')
        expect.has_record('GGA')
        expect.has_n_records(3)
Beispiel #17
0
    def test_collapse_kmer_unitigs_option(self, tmpdir):
        # given
        record1 = 'AAACCCGAA'
        record2 = 'ACCG'
        record3 = 'TTCGGGTTT'
        kmer_size = 3
        output_graph = builder.Mccortex() \
            .with_dna_sequence(record1) \
            .with_dna_sequence(record2) \
            .with_dna_sequence(record3) \
            .with_kmer_size(kmer_size) \
            .build(tmpdir)
        runner.Mccortex(kmer_size).view(output_graph)

        # when
        completed_process = runner \
            .Cortexpy(SPAWN_PROCESS) \
            .view_contig(contig=record1,
                         to_json=True,
                         graph=output_graph)

        # then
        expect_zero_return_code(completed_process)

        stdout = completed_process.stdout
        expect = JsonGraph(json.loads(stdout))

        expect.has_colors([0, 1])
        expect.has_n_nodes(3)
        expect.has_node_repr('AAACC').has_coverages_by_kmer([2, 1], [2, 1],
                                                            [3, 1])
        expect.has_node_repr('C').has_coverages_by_kmer([2, 1])
        expect.has_node_repr('GAA').has_coverages_by_kmer([3, 1], [2, 1],
                                                          [2, 1])

        for edge in [('AAACC', 'C', 0), ('AAACC', 'C', 1), ('C', 'GAA', 0),
                     ('C', 'GAA', 1), ('AAACC', 'GAA', 0)]:
            expect.has_repr_edge(*edge)
        expect.has_n_edges(5)
Beispiel #18
0
    def test_raises_on_max_path_1_exceeded(self, tmpdir, max_paths):
        # given
        MAX_PATH_EXIT_CODE = 64
        records = ['CAACC', 'CAACT']
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        # when
        completed_process = runner.Cortexpy(spawn_process=True).traverse(
            graph=maker.build(tmpdir),
            max_paths=max_paths)

        # then
        if max_paths == 1:
            assert MAX_PATH_EXIT_CODE == completed_process.returncode
            assert f'Max paths ({max_paths}) exceeded' in completed_process.stderr
        else:
            assert 0 == completed_process.returncode
            expect = expectation.Fasta(completed_process.stdout)
            expect.has_record('CAACC')
            expect.has_record('CAACT')
            expect.has_n_records(2)