def test_with_bubble_and_two_colors_returns_all_kmers( self, tmpdir, ra_constructor): # given kmer_size = 3 output_graph = (builder.Mccortex(kmer_size).with_dna_sequence( 'AAACAAG').with_dna_sequence('AAATAAG').with_dna_sequence( 'AAATAAG', name='sample_1').build(tmpdir)) traverser = Engine(ra_constructor(open(output_graph, 'rb')), traversal_colors=(0, ), orientation=EngineTraversalOrientation.both) # when expect = KmerGraphExpectation(traverser.traverse_from('ACA').graph, sort_edges=True) # then expect.has_node('AAA').has_coverages(2, 1) expect.has_node('AAC').has_coverages(1, 0) expect.has_node('ACA').has_coverages(1, 0) expect.has_node('CAA').has_coverages(1, 0) expect.has_node('AAG').has_coverages(2, 1) expect.has_node('AAT').has_coverages(1, 1) expect.has_node('ATA').has_coverages(1, 1) expect.has_node('TAA').has_coverages(1, 1) expect.has_edges('AAA AAC 0', 'AAC ACA 0', 'ACA CAA 0', 'CAA AAG 0', 'AAA AAT 0', 'AAT ATA 0', 'ATA TAA 0', 'TAA AAG 0', 'AAA AAT 1', 'AAT ATA 1', 'ATA TAA 1', 'AAG TAA 1')
class EngineTestDriver(object): graph_builder = attr.ib(attr.Factory(builder.Graph)) start_kmer_string = attr.ib(None) start_string = attr.ib(None) max_nodes = attr.ib(1000) traversal_orientation = attr.ib(EngineTraversalOrientation.original) traverser = attr.ib(None) traversal_colors = attr.ib((0, )) ra_constructor = attr.ib(RandomAccess) def with_kmer(self, *args): self.graph_builder.with_kmer(*args) return self def with_kmer_size(self, n): self.graph_builder.with_kmer_size(n) return self def with_num_colors(self, n): self.graph_builder.with_num_colors(n) return self def with_start_kmer_string(self, start_kmer_string): self.start_kmer_string = start_kmer_string return self def with_start_string(self, start_string): self.start_string = start_string return self def with_max_nodes(self, max_nodes): self.max_nodes = max_nodes return self def with_traversal_orientation(self, orientation): self.traversal_orientation = EngineTraversalOrientation[orientation] return self def with_traversal_colors(self, *colors): self.traversal_colors = colors return self def with_ra_constructor(self, constructor): self.ra_constructor = constructor return self def run(self): random_access_parser = self.ra_constructor(self.graph_builder.build()) self.traverser = Engine(random_access_parser, traversal_colors=self.traversal_colors, max_nodes=self.max_nodes, orientation=self.traversal_orientation) assert (self.start_string is None) != (self.start_kmer_string is None) if self.start_string: self.traverser.traverse_from_each_kmer_in(self.start_string) else: self.traverser.traverse_from(self.start_kmer_string) return expectation.graph.KmerGraphExpectation(self.traverser.graph)
def run(self): random_access_parser = self.ra_constructor(self.graph_builder.build()) self.traverser = Engine(random_access_parser, traversal_colors=self.traversal_colors, max_nodes=self.max_nodes, orientation=self.traversal_orientation) assert (self.start_string is None) != (self.start_kmer_string is None) if self.start_string: self.traverser.traverse_from_each_kmer_in(self.start_string) else: self.traverser.traverse_from(self.start_kmer_string) return expectation.graph.KmerGraphExpectation(self.traverser.graph)
def assemble(argv): import argparse from cortexpy.command.shared import get_shared_argparse shared_parser = get_shared_argparse() parser = argparse.ArgumentParser(prog='cortexpy assemble', parents=[shared_parser], description=""" Assemble all possible transcripts in <graph> from all k-mers in <start-sequences> and print the resulting transcripts as a FASTA to stdout. All specified colors are traversed and collapsed before output. """) parser.add_argument('graph', help='cortex graph') parser.add_argument('start_sequences_fasta', help='FASTA file with sequences to start from') parser.add_argument('--color', type=int, help='Restrict view to single color') parser.add_argument('--max-nodes', type=int, default=1000, help='Maximum number of nodes to traverse [default: %(default)s]') args = parser.parse_args(argv) from cortexpy.logging_config import configure_logging_from_args_and_get_logger logger = configure_logging_from_args_and_get_logger(args, 'cortexpy.assemble') import sys from Bio import SeqIO from cortexpy.utils import kmerize_fasta from cortexpy.graph.interactor import Interactor from cortexpy.graph.parser.random_access import RandomAccess from cortexpy.constants import EngineTraversalOrientation from cortexpy.graph.traversal.engine import Engine if args.out == '-': output = sys.stdout else: output = open(args.out, 'wt') random_access = RandomAccess(open(args.graph, 'rb')) if args.color is None: colors = list(range(random_access.num_colors)) else: colors = [args.color] traverser = Engine( random_access, traversal_colors=colors, orientation=EngineTraversalOrientation.both, max_nodes=args.max_nodes, ) traverser.traverse_from_each_kmer_in_fasta(args.start_sequences_fasta) kmers = kmerize_fasta(args.start_sequences_fasta, traverser.ra_parser.kmer_size) interactor = Interactor.from_graph(traverser.graph).make_graph_nodes_consistent( seed_kmer_strings=kmers) seq_record_generator = interactor.all_simple_paths() SeqIO.write(seq_record_generator, output, 'fasta')
def run(self): if self.retrieve: self.retriever = ContigRetriever(self.graph_builder.build()) return self.retriever.get_kmer_graph(self.contig_to_retrieve) elif self.traverse: traverser = Engine(RandomAccess(self.graph_builder.build()), traversal_colors=self.traversal_colors) graph = traverser.traverse_from(self.traversal_start_kmer).graph return Interactor(graph) \ .make_graph_nodes_consistent([self.traversal_start_kmer]) \ .graph else: raise Exception("Need to load a command")
def test_with_two_subgraphs_returns_all_kmers(self, tmpdir, ra_constructor): # given kmer_size = 3 output_graph = (builder.Mccortex(kmer_size).with_dna_sequence( 'AAAT').with_dna_sequence('GGGC').build(tmpdir)) traverser = Engine(ra_constructor(open(output_graph, 'rb')), traversal_colors=(0, ), orientation=EngineTraversalOrientation.both) # when expect = KmerGraphExpectation( traverser.traverse_from_each_kmer_in_iterable(['AAA', 'GGG']).graph) # then expect.has_edges('AAA AAT 0', 'CCC GCC 0')
def subgraph(argv): import argparse from .shared import get_shared_argparse import cortexpy.constants shared_parser = get_shared_argparse() parser = argparse.ArgumentParser( 'cortexpy subgraph', parents=[shared_parser], description=""" Find all subgraphs from every k-mer in an initial contig. Input and output are cortex graphs. """ ) parser.add_argument('initial_contig', help="Initial contig from which to start traversal") parser.add_argument('--graphs', nargs='+', required=True, help="Input cortexpy graphs." " Multiple graphs can be specified and are joined on-the-fly.") parser.add_argument('--orientation', type=cortexpy.constants.EngineTraversalOrientation, choices=[o.name for o in cortexpy.constants.EngineTraversalOrientation], default=cortexpy.constants.EngineTraversalOrientation.both, help='Traversal orientation') parser.add_argument('-c', '--colors', nargs='+', type=int, help="""Colors to traverse. May take multiple color numbers separated by a space. The traverser will follow all colors specified. Will follow all colors if not specified. """, default=None) parser.add_argument('--initial-fasta', action='store_true', help='Treat initial_contig as a file in FASTA format') parser.add_argument('--max-nodes', type=int, default=None, help='Maximum number of nodes to traverse (int).' ' Die without output if max nodes is exceeded') parser.add_argument('--logging-interval', type=int, default=90, help='Logging interval. [default: %(default)s]') parser.add_argument('--cache-size', type=int, default=0, help='Number of kmers to cache') parser.add_argument('--binary-search-cache-size', type=int, default=0, help='Number of kmers to cache for binary search') parser.add_argument('--slurp', action='store_true', help='Slurp all cortex graphs before traversal') args = parser.parse_args(argv) from cortexpy.logging_config import configure_logging_from_args_and_get_logger logger = configure_logging_from_args_and_get_logger(args, 'cortexpy.traverse') import sys from cortexpy.graph.serializer.kmer import dump_colored_de_bruijn_graph_to_cortex from cortexpy.graph.parser.random_access_collection import RandomAccessCollection from cortexpy.constants import EngineTraversalOrientation from cortexpy.graph.traversal.engine import Engine from contextlib import ExitStack with ExitStack() as stack: if args.out == '-': output = sys.stdout.buffer else: output = stack.enter_context(open(args.out, 'wb')) if args.slurp: from cortexpy.graph.parser.random_access import SlurpedRandomAccess RAClass = SlurpedRandomAccess.from_handle logger.info("Slurping cortex graphs") else: from cortexpy.graph.parser.random_access import RandomAccess as RAClass if len(args.graphs) == 1: ra_parser = RAClass( stack.enter_context(open(args.graphs[0], 'rb')), kmer_cache_size=args.cache_size ) else: ra_parser = RandomAccessCollection( [RAClass(stack.enter_context(open(graph_path, 'rb')), kmer_cache_size=args.cache_size) for graph_path in args.graphs]) engine = Engine( ra_parser, orientation=EngineTraversalOrientation[args.orientation.name], max_nodes=args.max_nodes, logging_interval=args.logging_interval ) if args.colors is not None: engine.traversal_colors = args.colors else: engine.traversal_colors = tuple(list(range(engine.ra_parser.num_colors))) logger.info('Traversing colors: ' + ','.join([str(c) for c in engine.traversal_colors])) if args.initial_fasta: engine.traverse_from_each_kmer_in_fasta(args.initial_contig) else: engine.traverse_from_each_kmer_in(args.initial_contig) dump_colored_de_bruijn_graph_to_cortex(engine.graph, output)