Exemple #1
0
def merge_scaffolds(big_scaffolds, small_scaffolds, perm_container, rearrange):
    """
    Merges scaffold sets from different iterations. If rearrangements are allowed,
    tries to keep some small-scale rearrangements from the weaker scaffold set.
    """
    logger.info("Merging two iterations")

    #synchronizing scaffolds to the same permutations
    big_updated = _update_scaffolds(big_scaffolds, perm_container)
    small_updated = _update_scaffolds(small_scaffolds, perm_container)

    if rearrange:
        projector = RearrangementProjector(big_updated, small_updated, True)
        new_adj = projector.project()
        big_rearranged = build_scaffolds(new_adj, perm_container, False, False)
    else:
        big_rearranged = big_updated

    merged_scf = _merge_scaffolds(big_rearranged, small_updated)
    merged_scf = _merge_consecutive_contigs(merged_scf)

    if debugger.debugging:
        links_out = os.path.join(debugger.debug_dir, "merged.links")
        output_links(merged_scf, links_out)
        perms_out = os.path.join(debugger.debug_dir, "merged_scaffolds.txt")
        output_scaffolds_premutations(merged_scf, perms_out)

    return merged_scf
Exemple #2
0
    def _construct_ancestor(self):

        ###Enable ChimeraDetector4Ancestor
        if not self.is_solid_scaffolds:
            raw_bp_graphs = {}
            for stage in self.run_stages:
                raw_bp_graphs[stage] = BreakpointGraph(self.stage_perms[stage],
                                                       ancestor=self.ancestor,
                                                       ancestral=True)
            chim_detect = ChimeraDetector4Ancestor(raw_bp_graphs,
                                                   self.run_stages,
                                                   self.ancestor_seqs)

        prev_stages = []
        scaffolds = None

        ###Iterative scaffolding
        last_stage = self.run_stages[-1]
        for stage in self.run_stages:
            logger.info("Stage \"{0}\"".format(stage.name))
            #debugger.set_debug_dir(os.path.join(debug_root, stage.name))
            prev_stages.append(stage)

            if not self.is_solid_scaffolds:
                broken_perms = chim_detect.break_contigs(
                    self.stage_perms[stage], [stage])
            else:
                broken_perms = self.stage_perms[stage]
            breakpoint_graph = BreakpointGraph(broken_perms,
                                               ancestral=True,
                                               ancestor=self.ancestor)
            adj_inferer = AdjacencyInferer(breakpoint_graph,
                                           self.phylogeny,
                                           ancestral=True)
            adjacencies = adj_inferer.infer_adjacencies()
            cur_scaffolds = scfldr.build_scaffolds(adjacencies,
                                                   broken_perms,
                                                   ancestral=True)

            if scaffolds is not None:
                if not self.is_solid_scaffolds:
                    merging_perms = chim_detect.break_contigs(
                        self.stage_perms[stage], prev_stages)
                else:
                    merging_perms = self.stage_perms[stage]
                scaffolds = merge.merge_scaffolds(scaffolds,
                                                  cur_scaffolds,
                                                  merging_perms,
                                                  stage.rearrange,
                                                  ancestral=True)
            else:
                scaffolds = cur_scaffolds
        scfldr.assign_scaffold_names(scaffolds, self.stage_perms[last_stage],
                                     self.naming_ref)

        ###Output generating of ancestor scaffolds
        logger.info("Done scaffolding for ''{0}''".format(self.ancestor))
        out_gen = OutputGenerator(self.ancestor_seqs, scaffolds)
        out_gen.make_output(self.outDir, self.ancestor, write_fasta=False)
        pass
Exemple #3
0
def merge_scaffolds(big_scaffolds, small_scaffolds, perm_container, rearrange, ancestral = False):
    """
    Merges scaffold sets from different iterations. If rearrangements are allowed,
    tries to keep some small-scale rearrangements from the weaker scaffold set.
    """
    logger.info("Merging two iterations")

    #synchronizing scaffolds to the same permutations
    big_updated = _update_scaffolds(big_scaffolds, perm_container, ancestral=ancestral)
    small_updated = _update_scaffolds(small_scaffolds, perm_container, ancestral=ancestral)

    if rearrange:
        projector = RearrangementProjector(big_updated, small_updated, True)
        new_adj = projector.project()
        big_rearranged = build_scaffolds(new_adj, perm_container, False, False, ancestral=ancestral)
    else:
        big_rearranged = big_updated

    merged_scf = _merge_scaffolds(big_rearranged, small_updated)
    merged_scf = _merge_consecutive_contigs(merged_scf)

    if debugger.debugging:
        links_out = os.path.join(debugger.debug_dir, "merged.links")
        output_links(merged_scf, links_out)
        perms_out = os.path.join(debugger.debug_dir, "merged_scaffolds.txt")
        output_scaffolds_premutations(merged_scf, perms_out)

    return merged_scf
Exemple #4
0
def ancestor_construct(scaffolds, ancestor, target, phylogeny,
                       naming_ref, ancestor_sequences, out_dir, stage_perms=None,
                       run_stages=None, targetDone=False, solid_scaffolds=False):

    run_stages = run_stages[:-1]
    ###Enable ChimeraDetector4Ancestor
    if not solid_scaffolds:
        raw_bp_graphs = {}
        for stage in run_stages:
            raw_bp_graphs[stage] = BreakpointGraph(stage_perms[stage], ancestor=ancestor, ancestral=True)
        chim_detect = ChimeraDetector4Ancestor(raw_bp_graphs, run_stages, ancestor_sequences)

    prev_stages = []
    scaffolds = None
    ###apply for all stages
    last_stage = run_stages[-1]
    for stage in run_stages:
        logger.info("Stage \"{0}\"".format(stage.name))
        #debugger.set_debug_dir(os.path.join(debug_root, stage.name))
        prev_stages.append(stage)

        if not solid_scaffolds:
            broken_perms = chim_detect.break_contigs(stage_perms[stage], [stage])
        else:
            broken_perms = stage_perms[stage]
        breakpoint_graph = BreakpointGraph(broken_perms, ancestral=True, ancestor=ancestor, name=stage.name)
        adj_inferer = AdjacencyInferer(breakpoint_graph, phylogeny, ancestral= True)
        adjacencies = adj_inferer.infer_adjacencies(debug=True, filename="%s.adj"%stage.name)
        cur_scaffolds = scfldr.build_scaffolds(adjacencies, broken_perms, ancestral=True)
        oDebugger(cur_scaffolds)
        if scaffolds is not None:
            if not solid_scaffolds:
                merging_perms = chim_detect.break_contigs(stage_perms[stage],
                                                          prev_stages)
            else:
                merging_perms = stage_perms[stage]
            scaffolds = merge.merge_scaffolds(scaffolds, cur_scaffolds,
                                              merging_perms, stage.rearrange, ancestral=True)
        else:
            scaffolds = cur_scaffolds

    scfldr.assign_scaffold_names(scaffolds, stage_perms[last_stage], naming_ref)

    ###output generating of ancestor scaffolds
    logger.info("Done scaffolding for ''{0}''".format(ancestor))
    out_gen = OutputGenerator(ancestor_sequences, scaffolds)
    out_gen.make_output(out_dir, ancestor)
Exemple #5
0
def _run_ragout(args):
    """
    Top-level logic of the program
    """
    if not os.path.isdir(args.out_dir):
        os.mkdir(args.out_dir)

    debug_root = os.path.join(args.out_dir, "debug")
    debugger.set_debugging(args.debug)
    debugger.set_debug_dir(debug_root)
    debugger.clear_debug_dir()

    out_log = os.path.join(args.out_dir, "ragout.log")
    _enable_logging(out_log, args.debug)
    logger.info("Starting Ragout v%s", str(__version__))

    #parsing recipe, preparing synteny backend
    _check_extern_modules(args.synteny_backend)
    synteny_backend = SyntenyBackend.get_available_backends() \
                                        [args.synteny_backend]
    recipe = parse_ragout_recipe(args.recipe)
    synteny_sizes = _get_synteny_scale(recipe, synteny_backend)

    #Running synteny backend to get synteny blocks
    perm_files = synteny_backend.make_permutations(recipe, synteny_sizes,
                                                   args.out_dir,
                                                   args.overwrite,
                                                   args.threads)

    #setting up phylogenetic tree
    phylo_perm_file = perm_files[synteny_sizes[ID_SMALLEST]]
    phylogeny, naming_ref = _get_phylogeny_and_naming_ref(
        recipe, phylo_perm_file)

    #parsing permutation files, apply filters and build breakpoint graph
    logger.info("Processing permutation files")
    raw_bp_graphs = {}
    stage_perms = {}
    run_stages = _make_run_stages(synteny_sizes, args.resolve_repeats)
    for stage in run_stages:
        debugger.set_debug_dir(os.path.join(debug_root, stage.name))
        stage_perms[stage] = PermutationContainer(perm_files[stage.block_size],
                                                  recipe, stage.repeats,
                                                  stage.ref_indels, phylogeny)
        raw_bp_graphs[stage] = BreakpointGraph(stage_perms[stage])

    #initializing chimera detector
    target_sequences = read_fasta_dict(synteny_backend.get_target_fasta())
    chim_detect = None
    if not args.solid_scaffolds:
        chim_detect = ChimeraDetector(raw_bp_graphs, run_stages,
                                      target_sequences)

    #inferring adjacencies: loop over stages (iterations)
    scaffolds = None
    prev_stages = []
    for stage in run_stages:
        logger.info("Stage \"%s\"", stage.name)
        debugger.set_debug_dir(os.path.join(debug_root, stage.name))
        prev_stages.append(stage)

        broken_perms = stage_perms[stage]
        if not args.solid_scaffolds:
            broken_perms = chim_detect.break_contigs(stage_perms[stage],
                                                     [stage])
        breakpoint_graph = BreakpointGraph(broken_perms)

        adj_inferer = AdjacencyInferer(breakpoint_graph, phylogeny)
        adjacencies = adj_inferer.infer_adjacencies()
        cur_scaffolds = scfldr.build_scaffolds(adjacencies,
                                               broken_perms,
                                               debug_output=True,
                                               correct_distances=False)

        if scaffolds is not None:
            if not args.solid_scaffolds:
                broken_perms = chim_detect.break_contigs(
                    stage_perms[stage], prev_stages)
            cur_scaffolds = merge.merge_scaffolds(scaffolds, cur_scaffolds,
                                                  broken_perms,
                                                  stage.rearrange)

        merge.get_breakpoints(cur_scaffolds, breakpoint_graph, broken_perms)

        scaffolds = cur_scaffolds

    debugger.set_debug_dir(debug_root)
    ####

    #name scaffolds according to one of the references
    last_stage = run_stages[ID_SMALLEST]
    scfldr.assign_scaffold_names(scaffolds, stage_perms[last_stage],
                                 naming_ref)
    scfldr.update_gaps(scaffolds)

    #refine with the assembly graph
    if args.refine:
        out_overlap = os.path.join(args.out_dir, "contigs_overlap.dot")
        overlap.make_overlap_graph(synteny_backend.get_target_fasta(),
                                   out_overlap)
        scaffolds = asref.refine_scaffolds(out_overlap, scaffolds,
                                           target_sequences)
        if args.debug:
            shutil.copy(out_overlap, debugger.debug_dir)
        os.remove(out_overlap)

    out_gen = OutputGenerator(target_sequences, scaffolds)
    out_gen.make_output(args.out_dir, recipe["target"])
    logger.info("Done!")
Exemple #6
0
def run_ragout(args):
    """
    Top-level logic of the program
    """
    if not os.path.isdir(args.out_dir):
        os.mkdir(args.out_dir)

    debug_root = os.path.join(args.out_dir, "debug")
    debugger.set_debugging(args.debug)
    debugger.set_debug_dir(debug_root)
    debugger.clear_debug_dir()

    out_log = os.path.join(args.out_dir, "ragout.log")
    enable_logging(out_log, args.debug)
    logger.info("Starting Ragout v{0}".format(__version__))

    check_extern_modules(args.synteny_backend)
    all_backends = SyntenyBackend.get_available_backends()
    backend = all_backends[args.synteny_backend]
    recipe = parse_ragout_recipe(args.recipe)

    #Setting synteny block sizes
    if "blocks" in recipe:
        scale = recipe["blocks"]
    else:
        scale = backend.infer_block_scale(recipe)
        logger.info("Synteny block scale set to '{0}'".format(scale))
    synteny_blocks = config.vals["blocks"][scale]

    #Running backend to get synteny blocks
    perm_files = backend.make_permutations(recipe, synteny_blocks, args.out_dir,
                                           args.overwrite, args.threads)
    run_stages = make_run_stages(synteny_blocks, args.resolve_repeats)
    phylo_perm_file = perm_files[synteny_blocks[-1]]
    phylogeny, naming_ref = get_phylogeny_and_naming_ref(recipe,
                                                         phylo_perm_file)

    logger.info("Processing permutation files")
    raw_bp_graphs = {}
    stage_perms = {}
    for stage in run_stages:
        debugger.set_debug_dir(os.path.join(debug_root, stage.name))
        stage_perms[stage] = PermutationContainer(perm_files[stage.block_size],
                                                  recipe, stage.repeats,
                                                  stage.ref_indels, phylogeny)
        raw_bp_graphs[stage] = BreakpointGraph(stage_perms[stage])

    target_sequences = read_fasta_dict(backend.get_target_fasta())
    if backend.get_ancestor_fasta():
        ancestor_sequences = read_fasta_dict(backend.get_ancestor_fasta())

    if not args.solid_scaffolds:
        chim_detect = ChimeraDetector(raw_bp_graphs, run_stages, target_sequences)

    #####
    scaffolds = None
    last_stage = run_stages[-1]
    if not args.targetDone:
        prev_stages = []
        for stage in run_stages:
            logger.info("Stage \"{0}\"".format(stage.name))
            debugger.set_debug_dir(os.path.join(debug_root, stage.name))
            prev_stages.append(stage)

            if not args.solid_scaffolds:
                broken_perms = chim_detect.break_contigs(stage_perms[stage], [stage])
            else:
                broken_perms = stage_perms[stage]
            breakpoint_graph = BreakpointGraph(broken_perms)

            adj_inferer = AdjacencyInferer(breakpoint_graph, phylogeny)
            adjacencies = adj_inferer.infer_adjacencies()
            cur_scaffolds = scfldr.build_scaffolds(adjacencies, broken_perms)

            if scaffolds is not None:
                if not args.solid_scaffolds:
                    merging_perms = chim_detect.break_contigs(stage_perms[stage],
                                                              prev_stages)
                else:
                    merging_perms = stage_perms[stage]
                scaffolds = merge.merge_scaffolds(scaffolds, cur_scaffolds,
                                                  merging_perms, stage.rearrange)
            else:
                scaffolds = cur_scaffolds
        debugger.set_debug_dir(debug_root)
        ####
        scfldr.assign_scaffold_names(scaffolds, stage_perms[last_stage], naming_ref)

        if not args.no_refine:
            out_overlap = os.path.join(args.out_dir, "contigs_overlap.dot")
            overlap.make_overlap_graph(backend.get_target_fasta(), out_overlap)
            scaffolds = asref.refine_scaffolds(out_overlap, scaffolds,
                                               target_sequences)
            if args.debug:
                shutil.copy(out_overlap, debugger.debug_dir)
            os.remove(out_overlap)

        out_gen = OutputGenerator(target_sequences, scaffolds)
        out_gen.make_output(args.out_dir, recipe["target"])

    ###Ancestor reconstruction
    if args.ancestor_reconstruct:
        #last_stage = run_stages[-1]
        ancestor_construct(scaffolds, recipe['ancestor'], recipe['target'],
                            phylogeny, naming_ref,
                           ancestor_sequences, args.out_dir, stage_perms=stage_perms,
                           run_stages=run_stages, targetDone=args.targetDone,
                           solid_scaffolds=args.solid_scaffolds)
    ###
    logger.info("Done!")
Exemple #7
0
def _run_ragout(args):
    """
    Top-level logic of the program
    """
    if not os.path.isdir(args.out_dir):
        os.mkdir(args.out_dir)

    debug_root = os.path.join(args.out_dir, "debug")
    debugger.set_debugging(args.debug)
    debugger.set_debug_dir(debug_root)
    debugger.clear_debug_dir()

    out_log = os.path.join(args.out_dir, "ragout.log")
    _enable_logging(out_log, args.debug)
    logger.info("Starting Ragout v{0}".format(__version__))

    #parsing recipe, preparing synteny backend
    _check_extern_modules(args.synteny_backend)
    synteny_backend = SyntenyBackend.get_available_backends() \
                                        [args.synteny_backend]
    recipe = parse_ragout_recipe(args.recipe)
    synteny_sizes = _get_synteny_scale(recipe, synteny_backend)

    #Running synteny backend to get synteny blocks
    perm_files = synteny_backend.make_permutations(recipe, synteny_sizes,
                                                   args.out_dir, args.overwrite,
                                                   args.threads)

    #setting up phylogenetic tree
    phylo_perm_file = perm_files[synteny_sizes[ID_SMALLEST]]
    phylogeny, naming_ref = _get_phylogeny_and_naming_ref(recipe,
                                                          phylo_perm_file)

    #parsing permutation files, apply filters and build breakpoint graph
    logger.info("Processing permutation files")
    raw_bp_graphs = {}
    stage_perms = {}
    run_stages = _make_run_stages(synteny_sizes, args.resolve_repeats)
    for stage in run_stages:
        debugger.set_debug_dir(os.path.join(debug_root, stage.name))
        stage_perms[stage] = PermutationContainer(perm_files[stage.block_size],
                                                  recipe, stage.repeats,
                                                  stage.ref_indels, phylogeny)
        raw_bp_graphs[stage] = BreakpointGraph(stage_perms[stage])

    #initializing chimera detector
    target_sequences = read_fasta_dict(synteny_backend.get_target_fasta())
    chim_detect = None
    if not args.solid_scaffolds:
        chim_detect = ChimeraDetector(raw_bp_graphs, run_stages, target_sequences)

    #inferring adjacencies: loop over stages (iterations)
    scaffolds = None
    prev_stages = []
    for stage in run_stages:
        logger.info("Stage \"{0}\"".format(stage.name))
        debugger.set_debug_dir(os.path.join(debug_root, stage.name))
        prev_stages.append(stage)

        broken_perms = stage_perms[stage]
        if not args.solid_scaffolds:
            broken_perms = chim_detect.break_contigs(stage_perms[stage], [stage])
        breakpoint_graph = BreakpointGraph(broken_perms)

        adj_inferer = AdjacencyInferer(breakpoint_graph, phylogeny)
        adjacencies = adj_inferer.infer_adjacencies()
        cur_scaffolds = scfldr.build_scaffolds(adjacencies, broken_perms)

        if scaffolds is not None:
            if not args.solid_scaffolds:
                broken_perms = chim_detect.break_contigs(stage_perms[stage],
                                                         prev_stages)
            cur_scaffolds = merge.merge_scaffolds(scaffolds, cur_scaffolds,
                                                  broken_perms,
                                                  stage.rearrange)

        merge.get_breakpoints(cur_scaffolds, breakpoint_graph, broken_perms)

        scaffolds = cur_scaffolds

    debugger.set_debug_dir(debug_root)
    ####

    #name scaffolds according to one of the references
    last_stage = run_stages[ID_SMALLEST]
    scfldr.assign_scaffold_names(scaffolds, stage_perms[last_stage], naming_ref)

    #refine with the assembly graph
    if args.refine:
        out_overlap = os.path.join(args.out_dir, "contigs_overlap.dot")
        overlap.make_overlap_graph(synteny_backend.get_target_fasta(),
                                   out_overlap)
        scaffolds = asref.refine_scaffolds(out_overlap, scaffolds,
                                           target_sequences)
        if args.debug:
            shutil.copy(out_overlap, debugger.debug_dir)
        os.remove(out_overlap)

    out_gen = OutputGenerator(target_sequences, scaffolds)
    out_gen.make_output(args.out_dir, recipe["target"])
    logger.info("Done!")