def test_regular_ra_parser(self, num_kmers): # given kmer_size = 11 b = builder.Graph() \ .with_kmer_size(kmer_size) \ .with_num_colors(1) seen_kmers = set() for _ in range(num_kmers): kmer_string = lexlo(''.join( [random.choice('ACGT') for _ in range(kmer_size)])) while kmer_string in seen_kmers: kmer_string = lexlo(''.join( [random.choice('ACGT') for _ in range(kmer_size)])) seen_kmers.add(kmer_string) b.with_kmer(kmer_string) fh = b.build() ra = parser.RandomAccess(fh, kmer_cache_size=None) for k_string in list(ra): ra[k_string] with mock.patch.object(fh, 'read', wraps=fh.read) as mocked_read: # when for seen_kmer in sorted(seen_kmers): ra[seen_kmer] # then assert 2 * num_kmers == mocked_read.call_count
def test_record_retrieval(self, data, kmer_size, num_colors, n_kmers): # given assume(n_kmers <= 4**(kmer_size - 1)) graph_builder = (builder.Graph().with_kmer_size( kmer_size).with_num_colors(num_colors)) expected_kmers = [] seen_kmers = set() for _ in range(n_kmers): kmer = data.draw(kmer_records(kmer_size, num_colors)) while kmer.kmer in seen_kmers: kmer = data.draw(kmer_records(kmer_size, num_colors)) seen_kmers.add(kmer.kmer) graph_builder.with_kmer_record(kmer) expected_kmers.append(kmer) cg = self.RAClass(graph_builder.build()) # when for expected_kmer in expected_kmers: kmer = cg[expected_kmer.kmer] # then assert expected_kmer.kmer == kmer.kmer assert np.all(expected_kmer.coverage == kmer.coverage) for expected, actual in zip(expected_kmer.edges, kmer.edges): assert expected == actual
def test_two_linked_kmers_are_jsonifiable(self): # given colors = (0, 1) color_names = ['samp1', 'samp2'] graph_builder = builder.Graph() \ .with_kmer_size(3) \ .with_num_colors(2) \ .with_color_names(*color_names) \ .with_kmer('AAA 1 1 .....C.. ........') \ .with_kmer('AAC 1 0 a....... ........') graph = load_cortex_graph(graph_builder.build()) graph = Interactor(graph) \ .make_graph_nodes_consistent(seed_kmer_strings=['GTT']) \ .graph kmer_json = cortexpy.graph.serializer.serializer.Serializer( graph).to_json() # when expect = expectation.JsonGraph.from_string(kmer_json) # then kmer_data = json.loads(kmer_json) # does not raise assert kmer_data['graph']['colors'] == list(colors) assert kmer_data['graph']['sample_names'] == color_names expect.has_n_nodes(2) expect.has_n_edges(1)
def test_index(self, data, kmer_size, n_kmers): # given assume(kmer_size % 2 == 1) num_colors = 1 graph_builder = (builder.Graph().with_kmer_size( kmer_size).with_num_colors(num_colors)) expected_kmers = [] seen_kmers = set() for _ in range(n_kmers): kmer = data.draw(kmer_records(kmer_size, num_colors)) while kmer.kmer in seen_kmers: kmer = data.draw(kmer_records(kmer_size, num_colors)) seen_kmers.add(kmer.kmer) graph_builder.with_kmer_record(kmer) expected_kmers.append(kmer) expected_kmers = sorted(expected_kmers) graph_stream = graph_builder.build() header_stream = graph_builder.header.build() header = Header.from_stream(header_stream) # when sequence = KmerUintSequence(graph_handle=graph_stream, body_start=len(header_stream.getvalue()), header=header, n_records=len(expected_kmers)) # then for idx, expected_kmer in enumerate(expected_kmers): # then assert idx == sequence.index_kmer_string(expected_kmer.kmer)
def test_two_linked_kmers_are_jsonifiable(self): # given color_names = ['samp1', 'samp2'] graph_builder = builder.Graph() \ .with_kmer_size(3) \ .with_num_colors(2) \ .with_color_names(*color_names) \ .with_kmer('AAA 1 1 .....C.. ........') \ .with_kmer('AAC 1 0 a....... ........') retriever = ContigRetriever(graph_builder.build()) graph = retriever.get_kmer_graph('GTTT') # when kmer_json = cortexpy.graph.serializer.serializer.Serializer( graph).to_json() expect = expectation.JsonGraph.from_string(kmer_json) # then kmer_data = json.loads(kmer_json) # does not raise assert kmer_data['graph']['colors'] == [0, 1, 2] assert kmer_data['graph']['sample_names'] == color_names + [ 'retrieved_contig' ] expect.has_n_nodes(2) expect.has_n_edges(2)
def test_works_with_no_kmers(self): # given graph_builder = (builder.Graph().with_kmer_size(3).with_num_colors(1)) cg = self.RAClass(graph_builder.build()) # when/then assert list(cg) == []
def test_raises_on_even_kmer_size(self): # given graph_builder = builder.Graph() graph_builder.with_kmer_size(2) # when with pytest.raises(ValueError): self.RAClass(graph_builder.build())
def test_raises_on_missing_kmer(self): # given graph_builder = builder.Graph() graph_builder.with_kmer_size(3) cg = self.RAClass(graph_builder.build()) # when with pytest.raises(KeyError): cg['AAA']
def test_two_nodes_linking_to_self(self): # given graph_builder = builder.Graph().with_kmer_size(3) # when expect = KmerGraphExpectation( ContigRetriever(graph_builder.build()).get_kmer_graph('TTAA')) # then expect.has_edge('TTA', 'TAA', 1) expect.has_n_edges(1)
def test_with_one_kmer_returns_one_kmer(self): # given graph_builder = (builder.Graph().with_kmer_size(3)) graph_builder.with_kmer('AAA', 1, '........') retriever = ContigRetriever(graph_builder.build()) # when kmer_graph = retriever.get_kmer_graph('AAA') # then assert len(kmer_graph.edges) == 0 assert list(kmer_graph) == ['AAA']
def test_with_two_linked_kmers_returns_two_kmers(self): # given graph_builder = (builder.Graph().with_kmer_size(3)) graph_builder.with_kmer('AAA', 1, '.....C..') graph_builder.with_kmer('AAC', 1, 'a.......') retriever = ContigRetriever(graph_builder.build()) # when expect = KmerGraphExpectation(retriever.get_kmer_graph('AAA')) # then expect.has_nodes('AAA', 'AAC').has_n_edges(1) expect.has_edge('AAA', 'AAC', 0)
def test_with_no_kmer_returns_missing_kmer(self): # given graph_builder = (builder.Graph().with_kmer_size(3)) retriever = ContigRetriever(graph_builder.build()) # when expect = KmerGraphExpectation(retriever.get_kmer_graph('AAA')) # then expect.has_n_nodes(1) \ .has_n_edges(0) \ .has_node('AAA') \ .has_coverages(0, 1)
def test_with_two_linked_kmers_returns_two_kmers(self): # given graph_builder = (builder.Graph().with_kmer_size(3)) graph_builder.with_kmer('AAA', 1, '.....C..') graph_builder.with_kmer('AAC', 1, 'a.......') retriever = ContigRetriever(graph_builder.build()) # when kmer_graph = retriever.get_kmer_graph('GTTT') # then assert set(kmer_graph.nodes) == {'GTT', 'TTT'} assert set(kmer_graph.edges) == {('GTT', 'TTT', 0), ('GTT', 'TTT', 1)}
def test_with_one_kmer_asking_for_longer_contig_returns_one_kmer_with_coverage_2( self): # given graph_builder = (builder.Graph().with_kmer_size(3)) graph_builder.with_kmer('AAA', 1, '........') retriever = ContigRetriever(graph_builder.build()) # when kmer_graph = retriever.get_kmer_graph('AAAA') # then assert 1 == len(kmer_graph.edges) assert list(kmer_graph) == ['AAA'] assert [1, 2] == list(kmer_graph.nodes['AAA']['kmer'].coverage)
def test_gets_aaa_for_ttt_query(self, RAClass): # given graph_builder = builder.Graph() graph_builder.with_kmer_size(3) graph_builder.with_num_colors(1) expected_kmer = KmerRecord('AAA', [1], [as_edge_set('........')]) graph_builder.with_kmer_record(expected_kmer) cg = RAClass(graph_builder.build()) # when assert expected_kmer.kmer == cg.get_kmer_for_string('AAA').kmer assert expected_kmer.kmer == cg.get_kmer_for_string('TTT').kmer
def test_gets_aaa(self): # given graph_builder = (builder.Graph().with_kmer_size(3).with_num_colors(1)) expected_kmer = KmerRecord('AAA', (1, ), [as_edge_set('........')]) graph_builder.with_kmer_record(expected_kmer) cg = self.RAClass(graph_builder.build()) # when for kmer in cg.values(): assert expected_kmer.kmer == kmer.kmer assert np.all(expected_kmer.coverage == kmer.coverage) assert expected_kmer.edges == kmer.edges
def test_with_three_linked_kmers_and_two_colors_returns_three_kmers(self): # given graph_builder = (builder.Graph().with_kmer_size(3).with_num_colors(2)) graph_builder.with_kmer('AAA', [1, 1], ['.....C..', '.......T']) graph_builder.with_kmer('AAC', [1, 0], ['a.......', '........']) graph_builder.with_kmer('AAT', [0, 1], ['........', 'a.......']) retriever = ContigRetriever(graph_builder.build()) # when kmer_graph = retriever.get_kmer_graph('AAAC') # then assert set(kmer_graph) == {'AAA', 'AAC', 'AAT'} assert set(kmer_graph.edges) == {('AAA', 'AAC', 0), ('AAA', 'AAT', 1), ('AAA', 'AAC', 2)}
def test_parses_records(self, data, kmer_size, num_colors, n_kmers, test_serializer): # given assume(n_kmers <= (4**kmer_size) / 4) graph_builder = (builder.Graph().with_kmer_size( kmer_size).with_num_colors(num_colors)) expected_kmers = [] seen = set() for _ in range(n_kmers): kmer = data.draw(kmer_records(kmer_size, num_colors)) while kmer.kmer in seen: kmer = data.draw(kmer_records(kmer_size, num_colors)) seen.add(kmer.kmer) graph_builder.with_kmer_record(kmer) expected_kmers.append(kmer) ra_parser = self.RAClass(graph_builder.build()) if test_serializer: for real_kmer in ra_parser.values(): buffer = io.BytesIO() real_kmer.dump(buffer) assert real_kmer._kmer_data._data == buffer.getvalue() sample_names = ra_parser.sample_names buffer = io.BytesIO() key_list = list(ra_parser.keys()) random.shuffle(key_list) kmer_serializer \ .Kmers(keys=key_list, val_callable=lambda k: ra_parser[k], kmer_size=kmer_size, num_colors=num_colors, sample_names=sample_names) \ .dump(buffer) buffer.seek(0) ra_parser = self.RAClass(buffer) # when for expected_kmer in expected_kmers: kmer = ra_parser[expected_kmer.kmer] # then assert expected_kmer.kmer == kmer.kmer assert np.all(expected_kmer.coverage == kmer.coverage) assert expected_kmer.edges == kmer.edges
def test_two_nodes_linking_to_self(self): # given graph_builder = builder.Graph().with_kmer_size(3) # when kmer_list = ContigRetriever(graph_builder.build()).get_kmers('TTAA') # then assert len(kmer_list) == 2 kmer = kmer_list[0][0] assert kmer_list[0][0].kmer == 'TAA' assert kmer_list[0][1] == 'TTA' assert kmer_list[1][0].kmer == 'TAA' assert kmer_list[1][1] == 'TAA' assert kmer_list[1][0] is kmer assert kmer.edges[1].is_edge('t') for letter in 'acgACGT': assert not kmer.edges[1].is_edge(letter)
def test_slurping_ra_parser(self, num_kmers): # given kmer_size = 11 b = builder.Graph() \ .with_kmer_size(kmer_size) \ .with_num_colors(1) seen_kmers = set() for _ in range(num_kmers): kmer_string = lexlo(''.join( [random.choice('ACGT') for _ in range(kmer_size)])) while kmer_string in seen_kmers: kmer_string = lexlo(''.join( [random.choice('ACGT') for _ in range(kmer_size)])) seen_kmers.add(kmer_string) b.with_kmer(kmer_string) fh = b.build() fh.seek(0) with mock.patch.object(fh, 'seek', wraps=fh.seek) as mocked_seek: # when ra = parser.SlurpedRandomAccess.from_handle(fh) # then assert 0 == mocked_seek.call_count for seen_kmer in sorted(seen_kmers): ra[seen_kmer] assert 0 == mocked_seek.call_count fh.seek(0) num_header_reads = 10 num_eof_reads = 1 with mock.patch.object(fh, 'read', wraps=fh.read) as mocked_read: # when ra = parser.SlurpedRandomAccess.from_handle(fh) # then assert num_kmers + num_eof_reads + num_header_reads == mocked_read.call_count for seen_kmer in sorted(seen_kmers): ra[seen_kmer] assert num_kmers + num_eof_reads + num_header_reads == mocked_read.call_count
def test_two_node_path_and_three_node_cycle(self): # given colors = [0, 1] graph_builder = (builder.Graph().with_kmer_size(3).with_kmer( 'AAA', 1, '.....C..').with_kmer('AAC', 1, 'a.....G.').with_kmer( 'ACG', 1, 'a.g.A...').with_kmer('CGA', 1, 'a....C..').with_kmer( 'GAC', 1, '.c....G.')) retriever = ContigRetriever(graph_builder.build()) # when expect = KmerGraphExpectation(retriever.get_kmer_graph('AAACGAC')) # then for color in colors: expect.has_edge('AAA', 'AAC', color) expect.has_edge('AAC', 'ACG', color) expect.has_edge('ACG', 'CGA', color) expect.has_edge('CGA', 'GAC', color) expect.has_edge('GAC', 'ACG', 0) expect.has_n_edges(9)
def test_two_linked_kmers_pickle_ok(self): # given color_names = 'samp1', 'samp2' graph_builder = builder.Graph() \ .with_kmer_size(3) \ .with_num_colors(2) \ .with_color_names(*color_names) \ .with_kmer('AAA', [1, 1], ['.....C..', '.......T']) \ .with_kmer('AAC', [1, 0], ['a.......', '........']) retriever = ContigRetriever(graph_builder.build()) kmer_graph = retriever.get_kmer_graph('GTTT') # when buffer = io.BytesIO() nx.write_gpickle(kmer_graph, buffer) buffer.seek(0) unpickled_kmer_graph = nx.read_gpickle(buffer) # then assert len(unpickled_kmer_graph) == len(kmer_graph) unpickle_node_data = unpickled_kmer_graph.nodes(data=True) for node, data in kmer_graph.nodes(data=True): assert unpickle_node_data[node] == data
def __attrs_post_init__(self): self.graph_builders = [ builder.Graph().with_kmer_size( self.kmer_size).with_num_colors(n_graph_colors) for n_graph_colors in self.n_colors_per_graph ]