Beispiel #1
0
 def _traverse_a_branch_from_queue(self):
     setup = self.branch_queue.popleft()
     color_branch_traverser = self.branch_traverser[setup.traversal_color]
     branch = color_branch_traverser.traverse_from(
         setup.start_string,
         orientation=setup.orientation,
         parent_graph=self.graph)
     Interactor.from_graph(self.graph).compose_in_graph(branch.graph)
     self._connect_branch_to_parent_graph(branch, setup)
     self._link_branch_and_queue_neighbor_traversals(branch)
Beispiel #2
0
 def _traverse_from_each_kmer_in(self, kmer_generator):
     for start_kmer in kmer_generator:
         try:
             Interactor.from_graph(self.graph) \
                 .compose_in_graph(self._traverse_from(start_kmer).graph)
             self.log_graph_size()
         except KeyError:
             pass
         if self.max_nodes and len(self.graph) > self.max_nodes:
             raise Exception(
                 ("Terminating contig traversal after kmer {}"
                  " because max node limit is reached").format(start_kmer))
     return self
Beispiel #3
0
 def build(self):
     graph = self.builder.build()
     if self.seed_kmers is None:
         self.seed_kmers = [next(iter(graph))]
     graph = Interactor.from_graph(graph).make_graph_nodes_consistent(self.seed_kmers).graph
     return UnitigFinder.from_graph(graph,
                                    colors=list(self.builder.colors),
                                    test_coverage=self.test_coverage)
Beispiel #4
0
def assemble(argv):
    import argparse
    from cortexpy.command.shared import get_shared_argparse
    shared_parser = get_shared_argparse()

    parser = argparse.ArgumentParser(prog='cortexpy assemble', parents=[shared_parser], description="""
    Assemble all possible transcripts in <graph> from all k-mers in <start-sequences> and print the
    resulting transcripts as a FASTA to stdout. All specified colors are traversed and collapsed
    before output.
    """)
    parser.add_argument('graph', help='cortex graph')
    parser.add_argument('start_sequences_fasta', help='FASTA file with sequences to start from')
    parser.add_argument('--color', type=int, help='Restrict view to single color')
    parser.add_argument('--max-nodes', type=int, default=1000,
                        help='Maximum number of nodes to traverse [default: %(default)s]')
    args = parser.parse_args(argv)

    from cortexpy.logging_config import configure_logging_from_args_and_get_logger
    logger = configure_logging_from_args_and_get_logger(args, 'cortexpy.assemble')

    import sys
    from Bio import SeqIO
    from cortexpy.utils import kmerize_fasta
    from cortexpy.graph.interactor import Interactor
    from cortexpy.graph.parser.random_access import RandomAccess
    from cortexpy.constants import EngineTraversalOrientation
    from cortexpy.graph.traversal.engine import Engine

    if args.out == '-':
        output = sys.stdout
    else:
        output = open(args.out, 'wt')

    random_access = RandomAccess(open(args.graph, 'rb'))
    if args.color is None:
        colors = list(range(random_access.num_colors))
    else:
        colors = [args.color]
    traverser = Engine(
        random_access,
        traversal_colors=colors,
        orientation=EngineTraversalOrientation.both,
        max_nodes=args.max_nodes,
    )
    traverser.traverse_from_each_kmer_in_fasta(args.start_sequences_fasta)
    kmers = kmerize_fasta(args.start_sequences_fasta, traverser.ra_parser.kmer_size)
    interactor = Interactor.from_graph(traverser.graph).make_graph_nodes_consistent(
        seed_kmer_strings=kmers)

    seq_record_generator = interactor.all_simple_paths()

    SeqIO.write(seq_record_generator, output, 'fasta')
Beispiel #5
0
def traverse(argv):
    import argparse
    from cortexpy.command.shared import get_shared_argparse
    shared_parser = get_shared_argparse()

    parser = argparse.ArgumentParser(prog='cortexpy traverse',
                                     parents=[shared_parser],
                                     description="""
        Traverse all simple paths between all sources and targets of an input graph.

        Input is a cortex graph. Output is a FASTA.

        This tool also allows the creation of a JSON representation of a CORTEX graph that is consistent 
        with seed strings by using the --to-json and --seed-strings arguments.

        If a links file is supplied, then branches consistent with the links will be preferred in
        the traversal. 
        """)
    parser.add_argument('graph',
                        help="cortex graph. Slurp graph from stdin is '-'.")
    parser.add_argument('--to-json', action='store_true')
    parser.add_argument(
        '--seed-strings',
        nargs='*',
        default=[],
        help="Strings with seed kmers from which to start contig traversal. "
        "Multiple strings can be specified.")
    parser.add_argument('--color',
                        type=int,
                        help='Restrict view to single color')
    parser.add_argument('--max-paths',
                        type=int,
                        default=0,
                        help='Return exit status 64 if more than this '
                        'number of paths are encountered. '
                        '0 turns off this check.')
    parser.add_argument(
        '--graph-index',
        type=int,
        default=0,
        help='Graph index to be added to description of all output paths')
    parser.add_argument(
        '--extra-start-kmer',
        help='Disconnect this k-mer from incoming k-mers before '
        'candidate transcript creation. '
        'This argument may fail if not used together with --seed-strings.')
    parser.add_argument('--links-file',
                        help='gzipped Mccortex-style links file for graph')
    args = parser.parse_args(argv)

    from cortexpy.logging_config import configure_logging_from_args_and_get_logger
    logger = configure_logging_from_args_and_get_logger(
        args, 'cortexpy.traverse')

    import sys
    import gzip
    from cortexpy.graph.interactor import Interactor
    from cortexpy.graph.serializer.serializer import Serializer
    from cortexpy.graph.parser.streaming import load_cortex_graph
    from cortexpy.links import Links
    from . import get_exit_code_yaml_path
    import yaml

    EXIT_CODES = yaml.load(open(get_exit_code_yaml_path(), 'rt'),
                           Loader=yaml.FullLoader)

    if args.out == '-':
        output = sys.stdout
    else:
        output = open(args.out, 'wt')

    logger.info(f'Loading graph: %s', args.graph)
    if args.graph == '-':
        graph = load_cortex_graph(sys.stdin.buffer)
    else:
        graph = load_cortex_graph(open(args.graph, 'rb'))
    logger.info(f'Loaded {len(graph)} kmers')

    consistent_graph = None
    if args.seed_strings:
        seed_kmer_strings = strings_to_kmer_strings(args.seed_strings,
                                                    graph.graph['kmer_size'])
        logger.info(
            f'Making graph consistent with {len(seed_kmer_strings)} kmers from --seed-strings'
        )
        consistent_graph = Interactor(graph) \
            .make_graph_nodes_consistent(seed_kmer_strings) \
            .graph

    if args.to_json:
        logger.info('Writing JSON representation of graph to STDOUT')
        if consistent_graph:
            graph = consistent_graph
        print(Serializer(graph).to_json())
        return

    if not consistent_graph:
        logger.info('Making graph consistent')
        consistent_graph = Interactor.from_graph(graph) \
            .make_graph_nodes_consistent() \
            .graph

    if args.extra_start_kmer:
        if args.extra_start_kmer not in graph:
            logger.error(
                f'Could not find extra start kmer ({args.extra_start_kmer}) in graph'
            )
            return 1

    links = None
    if args.links_file is not None:
        logger.info(f'Loading links file {args.links_file}')
        links = Links.from_binary_stream(gzip.open(args.links_file, 'rb'))
    seq_record_generator = Interactor(consistent_graph) \
        .all_simple_paths(args.extra_start_kmer, links=links)
    seq_record_generator = annotated_seq_records(seq_record_generator,
                                                 graph_idx=args.graph_index)
    if args.max_paths > 0:
        logger.info('Exiting after element %s', args.max_paths)
        seq_record_generator = raise_after_nth_element(seq_record_generator,
                                                       args.max_paths)
    logger.info('Writing seq records to %s', args.out)
    try:
        for record in seq_record_generator:
            output.write(record.format('fasta'))
    except IndexError:
        logger.error('Max paths (%s) exceeded', args.max_paths)
        return EXIT_CODES['MAX_PATH_EXCEEDED']
Beispiel #6
0
 def build(self):
     if self.consistent_seeds:
         self.graph = Interactor.from_graph(self.graph) \
             .make_graph_nodes_consistent(self.consistent_seeds) \
             .graph
     return self.graph