예제 #1
0
    def test_with_bubble_and_two_colors_returns_all_kmers(
            self, tmpdir, ra_constructor):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex(kmer_size).with_dna_sequence(
            'AAACAAG').with_dna_sequence('AAATAAG').with_dna_sequence(
                'AAATAAG', name='sample_1').build(tmpdir))

        traverser = Engine(ra_constructor(open(output_graph, 'rb')),
                           traversal_colors=(0, ),
                           orientation=EngineTraversalOrientation.both)

        # when
        expect = KmerGraphExpectation(traverser.traverse_from('ACA').graph,
                                      sort_edges=True)

        # then
        expect.has_node('AAA').has_coverages(2, 1)
        expect.has_node('AAC').has_coverages(1, 0)
        expect.has_node('ACA').has_coverages(1, 0)
        expect.has_node('CAA').has_coverages(1, 0)
        expect.has_node('AAG').has_coverages(2, 1)
        expect.has_node('AAT').has_coverages(1, 1)
        expect.has_node('ATA').has_coverages(1, 1)
        expect.has_node('TAA').has_coverages(1, 1)

        expect.has_edges('AAA AAC 0', 'AAC ACA 0', 'ACA CAA 0', 'CAA AAG 0',
                         'AAA AAT 0', 'AAT ATA 0', 'ATA TAA 0', 'TAA AAG 0',
                         'AAA AAT 1', 'AAT ATA 1', 'ATA TAA 1', 'AAG TAA 1')
예제 #2
0
class EngineTestDriver(object):
    graph_builder = attr.ib(attr.Factory(builder.Graph))
    start_kmer_string = attr.ib(None)
    start_string = attr.ib(None)
    max_nodes = attr.ib(1000)
    traversal_orientation = attr.ib(EngineTraversalOrientation.original)
    traverser = attr.ib(None)
    traversal_colors = attr.ib((0, ))
    ra_constructor = attr.ib(RandomAccess)

    def with_kmer(self, *args):
        self.graph_builder.with_kmer(*args)
        return self

    def with_kmer_size(self, n):
        self.graph_builder.with_kmer_size(n)
        return self

    def with_num_colors(self, n):
        self.graph_builder.with_num_colors(n)
        return self

    def with_start_kmer_string(self, start_kmer_string):
        self.start_kmer_string = start_kmer_string
        return self

    def with_start_string(self, start_string):
        self.start_string = start_string
        return self

    def with_max_nodes(self, max_nodes):
        self.max_nodes = max_nodes
        return self

    def with_traversal_orientation(self, orientation):
        self.traversal_orientation = EngineTraversalOrientation[orientation]
        return self

    def with_traversal_colors(self, *colors):
        self.traversal_colors = colors
        return self

    def with_ra_constructor(self, constructor):
        self.ra_constructor = constructor
        return self

    def run(self):
        random_access_parser = self.ra_constructor(self.graph_builder.build())
        self.traverser = Engine(random_access_parser,
                                traversal_colors=self.traversal_colors,
                                max_nodes=self.max_nodes,
                                orientation=self.traversal_orientation)
        assert (self.start_string is None) != (self.start_kmer_string is None)
        if self.start_string:
            self.traverser.traverse_from_each_kmer_in(self.start_string)
        else:
            self.traverser.traverse_from(self.start_kmer_string)
        return expectation.graph.KmerGraphExpectation(self.traverser.graph)
예제 #3
0
 def run(self):
     random_access_parser = self.ra_constructor(self.graph_builder.build())
     self.traverser = Engine(random_access_parser,
                             traversal_colors=self.traversal_colors,
                             max_nodes=self.max_nodes,
                             orientation=self.traversal_orientation)
     assert (self.start_string is None) != (self.start_kmer_string is None)
     if self.start_string:
         self.traverser.traverse_from_each_kmer_in(self.start_string)
     else:
         self.traverser.traverse_from(self.start_kmer_string)
     return expectation.graph.KmerGraphExpectation(self.traverser.graph)
예제 #4
0
def assemble(argv):
    import argparse
    from cortexpy.command.shared import get_shared_argparse
    shared_parser = get_shared_argparse()

    parser = argparse.ArgumentParser(prog='cortexpy assemble', parents=[shared_parser], description="""
    Assemble all possible transcripts in <graph> from all k-mers in <start-sequences> and print the
    resulting transcripts as a FASTA to stdout. All specified colors are traversed and collapsed
    before output.
    """)
    parser.add_argument('graph', help='cortex graph')
    parser.add_argument('start_sequences_fasta', help='FASTA file with sequences to start from')
    parser.add_argument('--color', type=int, help='Restrict view to single color')
    parser.add_argument('--max-nodes', type=int, default=1000,
                        help='Maximum number of nodes to traverse [default: %(default)s]')
    args = parser.parse_args(argv)

    from cortexpy.logging_config import configure_logging_from_args_and_get_logger
    logger = configure_logging_from_args_and_get_logger(args, 'cortexpy.assemble')

    import sys
    from Bio import SeqIO
    from cortexpy.utils import kmerize_fasta
    from cortexpy.graph.interactor import Interactor
    from cortexpy.graph.parser.random_access import RandomAccess
    from cortexpy.constants import EngineTraversalOrientation
    from cortexpy.graph.traversal.engine import Engine

    if args.out == '-':
        output = sys.stdout
    else:
        output = open(args.out, 'wt')

    random_access = RandomAccess(open(args.graph, 'rb'))
    if args.color is None:
        colors = list(range(random_access.num_colors))
    else:
        colors = [args.color]
    traverser = Engine(
        random_access,
        traversal_colors=colors,
        orientation=EngineTraversalOrientation.both,
        max_nodes=args.max_nodes,
    )
    traverser.traverse_from_each_kmer_in_fasta(args.start_sequences_fasta)
    kmers = kmerize_fasta(args.start_sequences_fasta, traverser.ra_parser.kmer_size)
    interactor = Interactor.from_graph(traverser.graph).make_graph_nodes_consistent(
        seed_kmer_strings=kmers)

    seq_record_generator = interactor.all_simple_paths()

    SeqIO.write(seq_record_generator, output, 'fasta')
예제 #5
0
 def run(self):
     if self.retrieve:
         self.retriever = ContigRetriever(self.graph_builder.build())
         return self.retriever.get_kmer_graph(self.contig_to_retrieve)
     elif self.traverse:
         traverser = Engine(RandomAccess(self.graph_builder.build()),
                            traversal_colors=self.traversal_colors)
         graph = traverser.traverse_from(self.traversal_start_kmer).graph
         return Interactor(graph) \
             .make_graph_nodes_consistent([self.traversal_start_kmer]) \
             .graph
     else:
         raise Exception("Need to load a command")
예제 #6
0
    def test_with_two_subgraphs_returns_all_kmers(self, tmpdir,
                                                  ra_constructor):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex(kmer_size).with_dna_sequence(
            'AAAT').with_dna_sequence('GGGC').build(tmpdir))

        traverser = Engine(ra_constructor(open(output_graph, 'rb')),
                           traversal_colors=(0, ),
                           orientation=EngineTraversalOrientation.both)

        # when
        expect = KmerGraphExpectation(
            traverser.traverse_from_each_kmer_in_iterable(['AAA',
                                                           'GGG']).graph)

        # then
        expect.has_edges('AAA AAT 0', 'CCC GCC 0')
예제 #7
0
def subgraph(argv):
    import argparse
    from .shared import get_shared_argparse
    import cortexpy.constants
    shared_parser = get_shared_argparse()
    parser = argparse.ArgumentParser(
        'cortexpy subgraph', parents=[shared_parser],
        description="""
        Find all subgraphs from every k-mer in an initial contig.

        Input and output are cortex graphs.
        """
    )
    parser.add_argument('initial_contig', help="Initial contig from which to start traversal")
    parser.add_argument('--graphs', nargs='+',
                        required=True,
                        help="Input cortexpy graphs."
                             "  Multiple graphs can be specified and are joined on-the-fly.")
    parser.add_argument('--orientation',
                        type=cortexpy.constants.EngineTraversalOrientation,
                        choices=[o.name for o in cortexpy.constants.EngineTraversalOrientation],
                        default=cortexpy.constants.EngineTraversalOrientation.both,
                        help='Traversal orientation')
    parser.add_argument('-c', '--colors',
                        nargs='+',
                        type=int,
                        help="""Colors to traverse.  May take multiple color numbers separated by
                        a space.  The traverser will follow all colors
                        specified.  Will follow all colors if not specified.
                        """, default=None)
    parser.add_argument('--initial-fasta', action='store_true',
                        help='Treat initial_contig as a file in FASTA format')
    parser.add_argument('--max-nodes', type=int, default=None,
                        help='Maximum number of nodes to traverse (int).'
                             '  Die without output if max nodes is exceeded')
    parser.add_argument('--logging-interval', type=int, default=90,
                        help='Logging interval.  [default: %(default)s]')
    parser.add_argument('--cache-size', type=int, default=0, help='Number of kmers to cache')
    parser.add_argument('--binary-search-cache-size', type=int, default=0,
                        help='Number of kmers to cache for binary search')
    parser.add_argument('--slurp', action='store_true',
                        help='Slurp all cortex graphs before traversal')
    args = parser.parse_args(argv)

    from cortexpy.logging_config import configure_logging_from_args_and_get_logger
    logger = configure_logging_from_args_and_get_logger(args, 'cortexpy.traverse')

    import sys
    from cortexpy.graph.serializer.kmer import dump_colored_de_bruijn_graph_to_cortex
    from cortexpy.graph.parser.random_access_collection import RandomAccessCollection
    from cortexpy.constants import EngineTraversalOrientation
    from cortexpy.graph.traversal.engine import Engine
    from contextlib import ExitStack
    with ExitStack() as stack:
        if args.out == '-':
            output = sys.stdout.buffer
        else:
            output = stack.enter_context(open(args.out, 'wb'))

        if args.slurp:
            from cortexpy.graph.parser.random_access import SlurpedRandomAccess
            RAClass = SlurpedRandomAccess.from_handle
            logger.info("Slurping cortex graphs")
        else:
            from cortexpy.graph.parser.random_access import RandomAccess as RAClass

        if len(args.graphs) == 1:
            ra_parser = RAClass(
                stack.enter_context(open(args.graphs[0], 'rb')),
                kmer_cache_size=args.cache_size
            )
        else:
            ra_parser = RandomAccessCollection(
                [RAClass(stack.enter_context(open(graph_path, 'rb')),
                         kmer_cache_size=args.cache_size)
                 for graph_path in args.graphs])
        engine = Engine(
            ra_parser,
            orientation=EngineTraversalOrientation[args.orientation.name],
            max_nodes=args.max_nodes,
            logging_interval=args.logging_interval
        )

        if args.colors is not None:
            engine.traversal_colors = args.colors
        else:
            engine.traversal_colors = tuple(list(range(engine.ra_parser.num_colors)))
        logger.info('Traversing colors: ' + ','.join([str(c) for c in engine.traversal_colors]))

        if args.initial_fasta:
            engine.traverse_from_each_kmer_in_fasta(args.initial_contig)
        else:
            engine.traverse_from_each_kmer_in(args.initial_contig)

        dump_colored_de_bruijn_graph_to_cortex(engine.graph, output)