def test_two_linked_kmers_are_jsonifiable(self): # given color_names = ['samp1', 'samp2'] graph_builder = builder.Graph() \ .with_kmer_size(3) \ .with_num_colors(2) \ .with_color_names(*color_names) \ .with_kmer('AAA 1 1 .....C.. ........') \ .with_kmer('AAC 1 0 a....... ........') retriever = ContigRetriever(graph_builder.build()) graph = retriever.get_kmer_graph('GTTT') # when kmer_json = cortexpy.graph.serializer.serializer.Serializer( graph).to_json() expect = expectation.JsonGraph.from_string(kmer_json) # then kmer_data = json.loads(kmer_json) # does not raise assert kmer_data['graph']['colors'] == [0, 1, 2] assert kmer_data['graph']['sample_names'] == color_names + [ 'retrieved_contig' ] expect.has_n_nodes(2) expect.has_n_edges(2)
def test_with_one_kmer_returns_one_kmer(self): # given graph_builder = (builder.Graph().with_kmer_size(3)) graph_builder.with_kmer('AAA', 1, '........') retriever = ContigRetriever(graph_builder.build()) # when kmer_graph = retriever.get_kmer_graph('AAA') # then assert len(kmer_graph.edges) == 0 assert list(kmer_graph) == ['AAA']
def test_with_two_linked_kmers_returns_two_kmers(self): # given graph_builder = (builder.Graph().with_kmer_size(3)) graph_builder.with_kmer('AAA', 1, '.....C..') graph_builder.with_kmer('AAC', 1, 'a.......') retriever = ContigRetriever(graph_builder.build()) # when expect = KmerGraphExpectation(retriever.get_kmer_graph('AAA')) # then expect.has_nodes('AAA', 'AAC').has_n_edges(1) expect.has_edge('AAA', 'AAC', 0)
def test_with_no_kmer_returns_missing_kmer(self): # given graph_builder = (builder.Graph().with_kmer_size(3)) retriever = ContigRetriever(graph_builder.build()) # when expect = KmerGraphExpectation(retriever.get_kmer_graph('AAA')) # then expect.has_n_nodes(1) \ .has_n_edges(0) \ .has_node('AAA') \ .has_coverages(0, 1)
def test_with_two_linked_kmers_returns_two_kmers(self): # given graph_builder = (builder.Graph().with_kmer_size(3)) graph_builder.with_kmer('AAA', 1, '.....C..') graph_builder.with_kmer('AAC', 1, 'a.......') retriever = ContigRetriever(graph_builder.build()) # when kmer_graph = retriever.get_kmer_graph('GTTT') # then assert set(kmer_graph.nodes) == {'GTT', 'TTT'} assert set(kmer_graph.edges) == {('GTT', 'TTT', 0), ('GTT', 'TTT', 1)}
def run(self): if self.retrieve: self.retriever = ContigRetriever(self.graph_builder.build()) return self.retriever.get_kmer_graph(self.contig_to_retrieve) elif self.traverse: traverser = Engine(RandomAccess(self.graph_builder.build()), traversal_colors=self.traversal_colors) graph = traverser.traverse_from(self.traversal_start_kmer).graph return Interactor(graph) \ .make_graph_nodes_consistent([self.traversal_start_kmer]) \ .graph else: raise Exception("Need to load a command")
def test_with_one_kmer_asking_for_longer_contig_returns_one_kmer_with_coverage_2( self): # given graph_builder = (builder.Graph().with_kmer_size(3)) graph_builder.with_kmer('AAA', 1, '........') retriever = ContigRetriever(graph_builder.build()) # when kmer_graph = retriever.get_kmer_graph('AAAA') # then assert 1 == len(kmer_graph.edges) assert list(kmer_graph) == ['AAA'] assert [1, 2] == list(kmer_graph.nodes['AAA']['kmer'].coverage)
def test_with_three_linked_kmers_and_two_colors_returns_three_kmers(self): # given graph_builder = (builder.Graph().with_kmer_size(3).with_num_colors(2)) graph_builder.with_kmer('AAA', [1, 1], ['.....C..', '.......T']) graph_builder.with_kmer('AAC', [1, 0], ['a.......', '........']) graph_builder.with_kmer('AAT', [0, 1], ['........', 'a.......']) retriever = ContigRetriever(graph_builder.build()) # when kmer_graph = retriever.get_kmer_graph('AAAC') # then assert set(kmer_graph) == {'AAA', 'AAC', 'AAT'} assert set(kmer_graph.edges) == {('AAA', 'AAC', 0), ('AAA', 'AAT', 1), ('AAA', 'AAC', 2)}
def test_with_three_linked_kmers_and_two_colors_returns_three_kmers( self, tmpdir): # given kmer_size = 3 output_graph = (builder.Mccortex(kmer_size).with_dna_sequence( 'AAAC').with_dna_sequence('AAAT').build(tmpdir)) retriever = ContigRetriever(open(output_graph, 'rb')) # when kmer_graph = retriever.get_kmer_graph('GTTT') # then assert set(kmer_graph.nodes) == {'TTT', 'GTT', 'ATT'} assert set(kmer_graph.edges) == {('GTT', 'TTT', 0), ('GTT', 'TTT', 1), ('ATT', 'TTT', 0)}
def view_contig(argv): import argparse parser = argparse.ArgumentParser(prog='cortexpy view contig') parser.add_argument('graph', help="cortex graph") parser.add_argument('contig', help='contig to explore inside graph') parser.add_argument('--to-json', action='store_true') args = parser.parse_args(argv) from cortexpy.graph.contig_retriever import ContigRetriever from cortexpy.graph.serializer.serializer import Serializer contig_retriever = ContigRetriever(open(args.graph, 'rb')) if args.to_json: serializer = Serializer(contig_retriever.get_kmer_graph(args.contig)) print(serializer.to_json()) else: print_contig(contig_retriever, args.contig)
def test_two_nodes_linking_to_self(self): # given graph_builder = builder.Graph().with_kmer_size(3) # when expect = KmerGraphExpectation( ContigRetriever(graph_builder.build()).get_kmer_graph('TTAA')) # then expect.has_edge('TTA', 'TAA', 1) expect.has_n_edges(1)
def test_two_node_path_and_three_node_cycle(self): # given colors = [0, 1] graph_builder = (builder.Graph().with_kmer_size(3).with_kmer( 'AAA', 1, '.....C..').with_kmer('AAC', 1, 'a.....G.').with_kmer( 'ACG', 1, 'a.g.A...').with_kmer('CGA', 1, 'a....C..').with_kmer( 'GAC', 1, '.c....G.')) retriever = ContigRetriever(graph_builder.build()) # when expect = KmerGraphExpectation(retriever.get_kmer_graph('AAACGAC')) # then for color in colors: expect.has_edge('AAA', 'AAC', color) expect.has_edge('AAC', 'ACG', color) expect.has_edge('ACG', 'CGA', color) expect.has_edge('CGA', 'GAC', color) expect.has_edge('GAC', 'ACG', 0) expect.has_n_edges(9)
def test_two_linked_kmers_pickle_ok(self): # given color_names = 'samp1', 'samp2' graph_builder = builder.Graph() \ .with_kmer_size(3) \ .with_num_colors(2) \ .with_color_names(*color_names) \ .with_kmer('AAA', [1, 1], ['.....C..', '.......T']) \ .with_kmer('AAC', [1, 0], ['a.......', '........']) retriever = ContigRetriever(graph_builder.build()) kmer_graph = retriever.get_kmer_graph('GTTT') # when buffer = io.BytesIO() nx.write_gpickle(kmer_graph, buffer) buffer.seek(0) unpickled_kmer_graph = nx.read_gpickle(buffer) # then assert len(unpickled_kmer_graph) == len(kmer_graph) unpickle_node_data = unpickled_kmer_graph.nodes(data=True) for node, data in kmer_graph.nodes(data=True): assert unpickle_node_data[node] == data
def test_two_nodes_linking_to_self(self): # given graph_builder = builder.Graph().with_kmer_size(3) # when kmer_list = ContigRetriever(graph_builder.build()).get_kmers('TTAA') # then assert len(kmer_list) == 2 kmer = kmer_list[0][0] assert kmer_list[0][0].kmer == 'TAA' assert kmer_list[0][1] == 'TTA' assert kmer_list[1][0].kmer == 'TAA' assert kmer_list[1][1] == 'TAA' assert kmer_list[1][0] is kmer assert kmer.edges[1].is_edge('t') for letter in 'acgACGT': assert not kmer.edges[1].is_edge(letter)
class SerializerTestDriver(object): graph_builder = attr.ib(attr.Factory(builder.Graph)) contig_to_retrieve = attr.ib(None) retriever = attr.ib(None) traverse = attr.ib(False) retrieve = attr.ib(False) traversal_start_kmer = attr.ib(None) traversal_colors = attr.ib((0, )) def with_kmer_size(self, n): self.graph_builder.with_kmer_size(n) return self def with_kmer(self, *args): self.graph_builder.with_kmer(*args) return self def traverse_with_start_kmer_and_colors(self, start_kmer, *colors): self.traverse = True self.traversal_start_kmer = start_kmer self.traversal_colors = colors return self def retrieve_contig(self, contig): self.retrieve = True self.contig_to_retrieve = contig return self def run(self): if self.retrieve: self.retriever = ContigRetriever(self.graph_builder.build()) return self.retriever.get_kmer_graph(self.contig_to_retrieve) elif self.traverse: traverser = Engine(RandomAccess(self.graph_builder.build()), traversal_colors=self.traversal_colors) graph = traverser.traverse_from(self.traversal_start_kmer).graph return Interactor(graph) \ .make_graph_nodes_consistent([self.traversal_start_kmer]) \ .graph else: raise Exception("Need to load a command")