Beispiel #1
0
    def test_regular_ra_parser(self, num_kmers):
        # given
        kmer_size = 11
        b = builder.Graph() \
            .with_kmer_size(kmer_size) \
            .with_num_colors(1)
        seen_kmers = set()
        for _ in range(num_kmers):
            kmer_string = lexlo(''.join(
                [random.choice('ACGT') for _ in range(kmer_size)]))
            while kmer_string in seen_kmers:
                kmer_string = lexlo(''.join(
                    [random.choice('ACGT') for _ in range(kmer_size)]))
            seen_kmers.add(kmer_string)
            b.with_kmer(kmer_string)
        fh = b.build()

        ra = parser.RandomAccess(fh, kmer_cache_size=None)
        for k_string in list(ra):
            ra[k_string]
        with mock.patch.object(fh, 'read', wraps=fh.read) as mocked_read:
            # when
            for seen_kmer in sorted(seen_kmers):
                ra[seen_kmer]

            # then
            assert 2 * num_kmers == mocked_read.call_count
Beispiel #2
0
    def test_record_retrieval(self, data, kmer_size, num_colors, n_kmers):
        # given
        assume(n_kmers <= 4**(kmer_size - 1))
        graph_builder = (builder.Graph().with_kmer_size(
            kmer_size).with_num_colors(num_colors))

        expected_kmers = []
        seen_kmers = set()
        for _ in range(n_kmers):
            kmer = data.draw(kmer_records(kmer_size, num_colors))
            while kmer.kmer in seen_kmers:
                kmer = data.draw(kmer_records(kmer_size, num_colors))
            seen_kmers.add(kmer.kmer)
            graph_builder.with_kmer_record(kmer)
            expected_kmers.append(kmer)

        cg = self.RAClass(graph_builder.build())

        # when
        for expected_kmer in expected_kmers:
            kmer = cg[expected_kmer.kmer]

            # then
            assert expected_kmer.kmer == kmer.kmer
            assert np.all(expected_kmer.coverage == kmer.coverage)
            for expected, actual in zip(expected_kmer.edges, kmer.edges):
                assert expected == actual
Beispiel #3
0
    def test_two_linked_kmers_are_jsonifiable(self):
        # given
        colors = (0, 1)
        color_names = ['samp1', 'samp2']
        graph_builder = builder.Graph() \
            .with_kmer_size(3) \
            .with_num_colors(2) \
            .with_color_names(*color_names) \
            .with_kmer('AAA 1 1 .....C.. ........') \
            .with_kmer('AAC 1 0 a....... ........')

        graph = load_cortex_graph(graph_builder.build())
        graph = Interactor(graph) \
            .make_graph_nodes_consistent(seed_kmer_strings=['GTT']) \
            .graph
        kmer_json = cortexpy.graph.serializer.serializer.Serializer(
            graph).to_json()

        # when
        expect = expectation.JsonGraph.from_string(kmer_json)

        # then
        kmer_data = json.loads(kmer_json)  # does not raise
        assert kmer_data['graph']['colors'] == list(colors)
        assert kmer_data['graph']['sample_names'] == color_names

        expect.has_n_nodes(2)
        expect.has_n_edges(1)
Beispiel #4
0
    def test_index(self, data, kmer_size, n_kmers):
        # given
        assume(kmer_size % 2 == 1)
        num_colors = 1
        graph_builder = (builder.Graph().with_kmer_size(
            kmer_size).with_num_colors(num_colors))

        expected_kmers = []
        seen_kmers = set()
        for _ in range(n_kmers):
            kmer = data.draw(kmer_records(kmer_size, num_colors))
            while kmer.kmer in seen_kmers:
                kmer = data.draw(kmer_records(kmer_size, num_colors))
            seen_kmers.add(kmer.kmer)
            graph_builder.with_kmer_record(kmer)
            expected_kmers.append(kmer)
        expected_kmers = sorted(expected_kmers)

        graph_stream = graph_builder.build()
        header_stream = graph_builder.header.build()
        header = Header.from_stream(header_stream)

        # when
        sequence = KmerUintSequence(graph_handle=graph_stream,
                                    body_start=len(header_stream.getvalue()),
                                    header=header,
                                    n_records=len(expected_kmers))
        # then
        for idx, expected_kmer in enumerate(expected_kmers):
            # then
            assert idx == sequence.index_kmer_string(expected_kmer.kmer)
Beispiel #5
0
    def test_two_linked_kmers_are_jsonifiable(self):
        # given
        color_names = ['samp1', 'samp2']
        graph_builder = builder.Graph() \
            .with_kmer_size(3) \
            .with_num_colors(2) \
            .with_color_names(*color_names) \
            .with_kmer('AAA 1 1 .....C.. ........') \
            .with_kmer('AAC 1 0 a....... ........')

        retriever = ContigRetriever(graph_builder.build())
        graph = retriever.get_kmer_graph('GTTT')

        # when
        kmer_json = cortexpy.graph.serializer.serializer.Serializer(
            graph).to_json()
        expect = expectation.JsonGraph.from_string(kmer_json)

        # then
        kmer_data = json.loads(kmer_json)  # does not raise
        assert kmer_data['graph']['colors'] == [0, 1, 2]
        assert kmer_data['graph']['sample_names'] == color_names + [
            'retrieved_contig'
        ]

        expect.has_n_nodes(2)
        expect.has_n_edges(2)
Beispiel #6
0
    def test_works_with_no_kmers(self):
        # given
        graph_builder = (builder.Graph().with_kmer_size(3).with_num_colors(1))

        cg = self.RAClass(graph_builder.build())

        # when/then
        assert list(cg) == []
Beispiel #7
0
    def test_raises_on_even_kmer_size(self):
        # given
        graph_builder = builder.Graph()
        graph_builder.with_kmer_size(2)

        # when
        with pytest.raises(ValueError):
            self.RAClass(graph_builder.build())
Beispiel #8
0
    def test_raises_on_missing_kmer(self):
        # given
        graph_builder = builder.Graph()
        graph_builder.with_kmer_size(3)

        cg = self.RAClass(graph_builder.build())

        # when
        with pytest.raises(KeyError):
            cg['AAA']
    def test_two_nodes_linking_to_self(self):
        # given
        graph_builder = builder.Graph().with_kmer_size(3)

        # when
        expect = KmerGraphExpectation(
            ContigRetriever(graph_builder.build()).get_kmer_graph('TTAA'))

        # then
        expect.has_edge('TTA', 'TAA', 1)
        expect.has_n_edges(1)
    def test_with_one_kmer_returns_one_kmer(self):
        # given
        graph_builder = (builder.Graph().with_kmer_size(3))
        graph_builder.with_kmer('AAA', 1, '........')
        retriever = ContigRetriever(graph_builder.build())

        # when
        kmer_graph = retriever.get_kmer_graph('AAA')

        # then
        assert len(kmer_graph.edges) == 0
        assert list(kmer_graph) == ['AAA']
    def test_with_two_linked_kmers_returns_two_kmers(self):
        # given
        graph_builder = (builder.Graph().with_kmer_size(3))
        graph_builder.with_kmer('AAA', 1, '.....C..')
        graph_builder.with_kmer('AAC', 1, 'a.......')
        retriever = ContigRetriever(graph_builder.build())

        # when
        expect = KmerGraphExpectation(retriever.get_kmer_graph('AAA'))

        # then
        expect.has_nodes('AAA', 'AAC').has_n_edges(1)
        expect.has_edge('AAA', 'AAC', 0)
    def test_with_no_kmer_returns_missing_kmer(self):
        # given
        graph_builder = (builder.Graph().with_kmer_size(3))
        retriever = ContigRetriever(graph_builder.build())

        # when
        expect = KmerGraphExpectation(retriever.get_kmer_graph('AAA'))

        # then
        expect.has_n_nodes(1) \
            .has_n_edges(0) \
            .has_node('AAA') \
            .has_coverages(0, 1)
    def test_with_two_linked_kmers_returns_two_kmers(self):
        # given
        graph_builder = (builder.Graph().with_kmer_size(3))
        graph_builder.with_kmer('AAA', 1, '.....C..')
        graph_builder.with_kmer('AAC', 1, 'a.......')
        retriever = ContigRetriever(graph_builder.build())

        # when
        kmer_graph = retriever.get_kmer_graph('GTTT')

        # then
        assert set(kmer_graph.nodes) == {'GTT', 'TTT'}
        assert set(kmer_graph.edges) == {('GTT', 'TTT', 0), ('GTT', 'TTT', 1)}
    def test_with_one_kmer_asking_for_longer_contig_returns_one_kmer_with_coverage_2(
            self):
        # given
        graph_builder = (builder.Graph().with_kmer_size(3))
        graph_builder.with_kmer('AAA', 1, '........')
        retriever = ContigRetriever(graph_builder.build())

        # when
        kmer_graph = retriever.get_kmer_graph('AAAA')

        # then
        assert 1 == len(kmer_graph.edges)
        assert list(kmer_graph) == ['AAA']
        assert [1, 2] == list(kmer_graph.nodes['AAA']['kmer'].coverage)
Beispiel #15
0
    def test_gets_aaa_for_ttt_query(self, RAClass):
        # given
        graph_builder = builder.Graph()
        graph_builder.with_kmer_size(3)
        graph_builder.with_num_colors(1)

        expected_kmer = KmerRecord('AAA', [1], [as_edge_set('........')])
        graph_builder.with_kmer_record(expected_kmer)

        cg = RAClass(graph_builder.build())

        # when
        assert expected_kmer.kmer == cg.get_kmer_for_string('AAA').kmer
        assert expected_kmer.kmer == cg.get_kmer_for_string('TTT').kmer
Beispiel #16
0
    def test_gets_aaa(self):
        # given
        graph_builder = (builder.Graph().with_kmer_size(3).with_num_colors(1))

        expected_kmer = KmerRecord('AAA', (1, ), [as_edge_set('........')])
        graph_builder.with_kmer_record(expected_kmer)

        cg = self.RAClass(graph_builder.build())

        # when
        for kmer in cg.values():
            assert expected_kmer.kmer == kmer.kmer
            assert np.all(expected_kmer.coverage == kmer.coverage)
            assert expected_kmer.edges == kmer.edges
    def test_with_three_linked_kmers_and_two_colors_returns_three_kmers(self):
        # given
        graph_builder = (builder.Graph().with_kmer_size(3).with_num_colors(2))
        graph_builder.with_kmer('AAA', [1, 1], ['.....C..', '.......T'])
        graph_builder.with_kmer('AAC', [1, 0], ['a.......', '........'])
        graph_builder.with_kmer('AAT', [0, 1], ['........', 'a.......'])
        retriever = ContigRetriever(graph_builder.build())

        # when
        kmer_graph = retriever.get_kmer_graph('AAAC')

        # then
        assert set(kmer_graph) == {'AAA', 'AAC', 'AAT'}
        assert set(kmer_graph.edges) == {('AAA', 'AAC', 0), ('AAA', 'AAT', 1),
                                         ('AAA', 'AAC', 2)}
Beispiel #18
0
    def test_parses_records(self, data, kmer_size, num_colors, n_kmers,
                            test_serializer):
        # given
        assume(n_kmers <= (4**kmer_size) / 4)

        graph_builder = (builder.Graph().with_kmer_size(
            kmer_size).with_num_colors(num_colors))

        expected_kmers = []
        seen = set()
        for _ in range(n_kmers):
            kmer = data.draw(kmer_records(kmer_size, num_colors))
            while kmer.kmer in seen:
                kmer = data.draw(kmer_records(kmer_size, num_colors))
            seen.add(kmer.kmer)
            graph_builder.with_kmer_record(kmer)
            expected_kmers.append(kmer)
        ra_parser = self.RAClass(graph_builder.build())

        if test_serializer:
            for real_kmer in ra_parser.values():
                buffer = io.BytesIO()
                real_kmer.dump(buffer)
                assert real_kmer._kmer_data._data == buffer.getvalue()

            sample_names = ra_parser.sample_names
            buffer = io.BytesIO()
            key_list = list(ra_parser.keys())
            random.shuffle(key_list)
            kmer_serializer \
                .Kmers(keys=key_list,
                       val_callable=lambda k: ra_parser[k],
                       kmer_size=kmer_size,
                       num_colors=num_colors,
                       sample_names=sample_names) \
                .dump(buffer)
            buffer.seek(0)
            ra_parser = self.RAClass(buffer)

        # when
        for expected_kmer in expected_kmers:
            kmer = ra_parser[expected_kmer.kmer]

            # then
            assert expected_kmer.kmer == kmer.kmer
            assert np.all(expected_kmer.coverage == kmer.coverage)
            assert expected_kmer.edges == kmer.edges
    def test_two_nodes_linking_to_self(self):
        # given
        graph_builder = builder.Graph().with_kmer_size(3)

        # when
        kmer_list = ContigRetriever(graph_builder.build()).get_kmers('TTAA')

        # then
        assert len(kmer_list) == 2
        kmer = kmer_list[0][0]
        assert kmer_list[0][0].kmer == 'TAA'
        assert kmer_list[0][1] == 'TTA'
        assert kmer_list[1][0].kmer == 'TAA'
        assert kmer_list[1][1] == 'TAA'
        assert kmer_list[1][0] is kmer

        assert kmer.edges[1].is_edge('t')
        for letter in 'acgACGT':
            assert not kmer.edges[1].is_edge(letter)
Beispiel #20
0
    def test_slurping_ra_parser(self, num_kmers):
        # given
        kmer_size = 11
        b = builder.Graph() \
            .with_kmer_size(kmer_size) \
            .with_num_colors(1)
        seen_kmers = set()
        for _ in range(num_kmers):
            kmer_string = lexlo(''.join(
                [random.choice('ACGT') for _ in range(kmer_size)]))
            while kmer_string in seen_kmers:
                kmer_string = lexlo(''.join(
                    [random.choice('ACGT') for _ in range(kmer_size)]))
            seen_kmers.add(kmer_string)
            b.with_kmer(kmer_string)
        fh = b.build()

        fh.seek(0)
        with mock.patch.object(fh, 'seek', wraps=fh.seek) as mocked_seek:
            # when
            ra = parser.SlurpedRandomAccess.from_handle(fh)

            # then
            assert 0 == mocked_seek.call_count

            for seen_kmer in sorted(seen_kmers):
                ra[seen_kmer]
            assert 0 == mocked_seek.call_count

        fh.seek(0)
        num_header_reads = 10
        num_eof_reads = 1
        with mock.patch.object(fh, 'read', wraps=fh.read) as mocked_read:
            # when
            ra = parser.SlurpedRandomAccess.from_handle(fh)

            # then
            assert num_kmers + num_eof_reads + num_header_reads == mocked_read.call_count

            for seen_kmer in sorted(seen_kmers):
                ra[seen_kmer]
            assert num_kmers + num_eof_reads + num_header_reads == mocked_read.call_count
    def test_two_node_path_and_three_node_cycle(self):
        # given
        colors = [0, 1]
        graph_builder = (builder.Graph().with_kmer_size(3).with_kmer(
            'AAA', 1, '.....C..').with_kmer('AAC', 1, 'a.....G.').with_kmer(
                'ACG', 1, 'a.g.A...').with_kmer('CGA', 1,
                                                'a....C..').with_kmer(
                                                    'GAC', 1, '.c....G.'))

        retriever = ContigRetriever(graph_builder.build())

        # when
        expect = KmerGraphExpectation(retriever.get_kmer_graph('AAACGAC'))

        # then
        for color in colors:
            expect.has_edge('AAA', 'AAC', color)
            expect.has_edge('AAC', 'ACG', color)
            expect.has_edge('ACG', 'CGA', color)
            expect.has_edge('CGA', 'GAC', color)
        expect.has_edge('GAC', 'ACG', 0)
        expect.has_n_edges(9)
Beispiel #22
0
    def test_two_linked_kmers_pickle_ok(self):
        # given
        color_names = 'samp1', 'samp2'
        graph_builder = builder.Graph() \
            .with_kmer_size(3) \
            .with_num_colors(2) \
            .with_color_names(*color_names) \
            .with_kmer('AAA', [1, 1], ['.....C..', '.......T']) \
            .with_kmer('AAC', [1, 0], ['a.......', '........'])
        retriever = ContigRetriever(graph_builder.build())
        kmer_graph = retriever.get_kmer_graph('GTTT')

        # when
        buffer = io.BytesIO()
        nx.write_gpickle(kmer_graph, buffer)
        buffer.seek(0)
        unpickled_kmer_graph = nx.read_gpickle(buffer)

        # then
        assert len(unpickled_kmer_graph) == len(kmer_graph)
        unpickle_node_data = unpickled_kmer_graph.nodes(data=True)
        for node, data in kmer_graph.nodes(data=True):
            assert unpickle_node_data[node] == data
Beispiel #23
0
 def __attrs_post_init__(self):
     self.graph_builders = [
         builder.Graph().with_kmer_size(
             self.kmer_size).with_num_colors(n_graph_colors)
         for n_graph_colors in self.n_colors_per_graph
     ]