def merge_scaffolds(big_scaffolds, small_scaffolds, perm_container, rearrange): """ Merges scaffold sets from different iterations. If rearrangements are allowed, tries to keep some small-scale rearrangements from the weaker scaffold set. """ logger.info("Merging two iterations") #synchronizing scaffolds to the same permutations big_updated = _update_scaffolds(big_scaffolds, perm_container) small_updated = _update_scaffolds(small_scaffolds, perm_container) if rearrange: projector = RearrangementProjector(big_updated, small_updated, True) new_adj = projector.project() big_rearranged = build_scaffolds(new_adj, perm_container, False, False) else: big_rearranged = big_updated merged_scf = _merge_scaffolds(big_rearranged, small_updated) merged_scf = _merge_consecutive_contigs(merged_scf) if debugger.debugging: links_out = os.path.join(debugger.debug_dir, "merged.links") output_links(merged_scf, links_out) perms_out = os.path.join(debugger.debug_dir, "merged_scaffolds.txt") output_scaffolds_premutations(merged_scf, perms_out) return merged_scf
def _construct_ancestor(self): ###Enable ChimeraDetector4Ancestor if not self.is_solid_scaffolds: raw_bp_graphs = {} for stage in self.run_stages: raw_bp_graphs[stage] = BreakpointGraph(self.stage_perms[stage], ancestor=self.ancestor, ancestral=True) chim_detect = ChimeraDetector4Ancestor(raw_bp_graphs, self.run_stages, self.ancestor_seqs) prev_stages = [] scaffolds = None ###Iterative scaffolding last_stage = self.run_stages[-1] for stage in self.run_stages: logger.info("Stage \"{0}\"".format(stage.name)) #debugger.set_debug_dir(os.path.join(debug_root, stage.name)) prev_stages.append(stage) if not self.is_solid_scaffolds: broken_perms = chim_detect.break_contigs( self.stage_perms[stage], [stage]) else: broken_perms = self.stage_perms[stage] breakpoint_graph = BreakpointGraph(broken_perms, ancestral=True, ancestor=self.ancestor) adj_inferer = AdjacencyInferer(breakpoint_graph, self.phylogeny, ancestral=True) adjacencies = adj_inferer.infer_adjacencies() cur_scaffolds = scfldr.build_scaffolds(adjacencies, broken_perms, ancestral=True) if scaffolds is not None: if not self.is_solid_scaffolds: merging_perms = chim_detect.break_contigs( self.stage_perms[stage], prev_stages) else: merging_perms = self.stage_perms[stage] scaffolds = merge.merge_scaffolds(scaffolds, cur_scaffolds, merging_perms, stage.rearrange, ancestral=True) else: scaffolds = cur_scaffolds scfldr.assign_scaffold_names(scaffolds, self.stage_perms[last_stage], self.naming_ref) ###Output generating of ancestor scaffolds logger.info("Done scaffolding for ''{0}''".format(self.ancestor)) out_gen = OutputGenerator(self.ancestor_seqs, scaffolds) out_gen.make_output(self.outDir, self.ancestor, write_fasta=False) pass
def merge_scaffolds(big_scaffolds, small_scaffolds, perm_container, rearrange, ancestral = False): """ Merges scaffold sets from different iterations. If rearrangements are allowed, tries to keep some small-scale rearrangements from the weaker scaffold set. """ logger.info("Merging two iterations") #synchronizing scaffolds to the same permutations big_updated = _update_scaffolds(big_scaffolds, perm_container, ancestral=ancestral) small_updated = _update_scaffolds(small_scaffolds, perm_container, ancestral=ancestral) if rearrange: projector = RearrangementProjector(big_updated, small_updated, True) new_adj = projector.project() big_rearranged = build_scaffolds(new_adj, perm_container, False, False, ancestral=ancestral) else: big_rearranged = big_updated merged_scf = _merge_scaffolds(big_rearranged, small_updated) merged_scf = _merge_consecutive_contigs(merged_scf) if debugger.debugging: links_out = os.path.join(debugger.debug_dir, "merged.links") output_links(merged_scf, links_out) perms_out = os.path.join(debugger.debug_dir, "merged_scaffolds.txt") output_scaffolds_premutations(merged_scf, perms_out) return merged_scf
def ancestor_construct(scaffolds, ancestor, target, phylogeny, naming_ref, ancestor_sequences, out_dir, stage_perms=None, run_stages=None, targetDone=False, solid_scaffolds=False): run_stages = run_stages[:-1] ###Enable ChimeraDetector4Ancestor if not solid_scaffolds: raw_bp_graphs = {} for stage in run_stages: raw_bp_graphs[stage] = BreakpointGraph(stage_perms[stage], ancestor=ancestor, ancestral=True) chim_detect = ChimeraDetector4Ancestor(raw_bp_graphs, run_stages, ancestor_sequences) prev_stages = [] scaffolds = None ###apply for all stages last_stage = run_stages[-1] for stage in run_stages: logger.info("Stage \"{0}\"".format(stage.name)) #debugger.set_debug_dir(os.path.join(debug_root, stage.name)) prev_stages.append(stage) if not solid_scaffolds: broken_perms = chim_detect.break_contigs(stage_perms[stage], [stage]) else: broken_perms = stage_perms[stage] breakpoint_graph = BreakpointGraph(broken_perms, ancestral=True, ancestor=ancestor, name=stage.name) adj_inferer = AdjacencyInferer(breakpoint_graph, phylogeny, ancestral= True) adjacencies = adj_inferer.infer_adjacencies(debug=True, filename="%s.adj"%stage.name) cur_scaffolds = scfldr.build_scaffolds(adjacencies, broken_perms, ancestral=True) oDebugger(cur_scaffolds) if scaffolds is not None: if not solid_scaffolds: merging_perms = chim_detect.break_contigs(stage_perms[stage], prev_stages) else: merging_perms = stage_perms[stage] scaffolds = merge.merge_scaffolds(scaffolds, cur_scaffolds, merging_perms, stage.rearrange, ancestral=True) else: scaffolds = cur_scaffolds scfldr.assign_scaffold_names(scaffolds, stage_perms[last_stage], naming_ref) ###output generating of ancestor scaffolds logger.info("Done scaffolding for ''{0}''".format(ancestor)) out_gen = OutputGenerator(ancestor_sequences, scaffolds) out_gen.make_output(out_dir, ancestor)
def _run_ragout(args): """ Top-level logic of the program """ if not os.path.isdir(args.out_dir): os.mkdir(args.out_dir) debug_root = os.path.join(args.out_dir, "debug") debugger.set_debugging(args.debug) debugger.set_debug_dir(debug_root) debugger.clear_debug_dir() out_log = os.path.join(args.out_dir, "ragout.log") _enable_logging(out_log, args.debug) logger.info("Starting Ragout v%s", str(__version__)) #parsing recipe, preparing synteny backend _check_extern_modules(args.synteny_backend) synteny_backend = SyntenyBackend.get_available_backends() \ [args.synteny_backend] recipe = parse_ragout_recipe(args.recipe) synteny_sizes = _get_synteny_scale(recipe, synteny_backend) #Running synteny backend to get synteny blocks perm_files = synteny_backend.make_permutations(recipe, synteny_sizes, args.out_dir, args.overwrite, args.threads) #setting up phylogenetic tree phylo_perm_file = perm_files[synteny_sizes[ID_SMALLEST]] phylogeny, naming_ref = _get_phylogeny_and_naming_ref( recipe, phylo_perm_file) #parsing permutation files, apply filters and build breakpoint graph logger.info("Processing permutation files") raw_bp_graphs = {} stage_perms = {} run_stages = _make_run_stages(synteny_sizes, args.resolve_repeats) for stage in run_stages: debugger.set_debug_dir(os.path.join(debug_root, stage.name)) stage_perms[stage] = PermutationContainer(perm_files[stage.block_size], recipe, stage.repeats, stage.ref_indels, phylogeny) raw_bp_graphs[stage] = BreakpointGraph(stage_perms[stage]) #initializing chimera detector target_sequences = read_fasta_dict(synteny_backend.get_target_fasta()) chim_detect = None if not args.solid_scaffolds: chim_detect = ChimeraDetector(raw_bp_graphs, run_stages, target_sequences) #inferring adjacencies: loop over stages (iterations) scaffolds = None prev_stages = [] for stage in run_stages: logger.info("Stage \"%s\"", stage.name) debugger.set_debug_dir(os.path.join(debug_root, stage.name)) prev_stages.append(stage) broken_perms = stage_perms[stage] if not args.solid_scaffolds: broken_perms = chim_detect.break_contigs(stage_perms[stage], [stage]) breakpoint_graph = BreakpointGraph(broken_perms) adj_inferer = AdjacencyInferer(breakpoint_graph, phylogeny) adjacencies = adj_inferer.infer_adjacencies() cur_scaffolds = scfldr.build_scaffolds(adjacencies, broken_perms, debug_output=True, correct_distances=False) if scaffolds is not None: if not args.solid_scaffolds: broken_perms = chim_detect.break_contigs( stage_perms[stage], prev_stages) cur_scaffolds = merge.merge_scaffolds(scaffolds, cur_scaffolds, broken_perms, stage.rearrange) merge.get_breakpoints(cur_scaffolds, breakpoint_graph, broken_perms) scaffolds = cur_scaffolds debugger.set_debug_dir(debug_root) #### #name scaffolds according to one of the references last_stage = run_stages[ID_SMALLEST] scfldr.assign_scaffold_names(scaffolds, stage_perms[last_stage], naming_ref) scfldr.update_gaps(scaffolds) #refine with the assembly graph if args.refine: out_overlap = os.path.join(args.out_dir, "contigs_overlap.dot") overlap.make_overlap_graph(synteny_backend.get_target_fasta(), out_overlap) scaffolds = asref.refine_scaffolds(out_overlap, scaffolds, target_sequences) if args.debug: shutil.copy(out_overlap, debugger.debug_dir) os.remove(out_overlap) out_gen = OutputGenerator(target_sequences, scaffolds) out_gen.make_output(args.out_dir, recipe["target"]) logger.info("Done!")
def run_ragout(args): """ Top-level logic of the program """ if not os.path.isdir(args.out_dir): os.mkdir(args.out_dir) debug_root = os.path.join(args.out_dir, "debug") debugger.set_debugging(args.debug) debugger.set_debug_dir(debug_root) debugger.clear_debug_dir() out_log = os.path.join(args.out_dir, "ragout.log") enable_logging(out_log, args.debug) logger.info("Starting Ragout v{0}".format(__version__)) check_extern_modules(args.synteny_backend) all_backends = SyntenyBackend.get_available_backends() backend = all_backends[args.synteny_backend] recipe = parse_ragout_recipe(args.recipe) #Setting synteny block sizes if "blocks" in recipe: scale = recipe["blocks"] else: scale = backend.infer_block_scale(recipe) logger.info("Synteny block scale set to '{0}'".format(scale)) synteny_blocks = config.vals["blocks"][scale] #Running backend to get synteny blocks perm_files = backend.make_permutations(recipe, synteny_blocks, args.out_dir, args.overwrite, args.threads) run_stages = make_run_stages(synteny_blocks, args.resolve_repeats) phylo_perm_file = perm_files[synteny_blocks[-1]] phylogeny, naming_ref = get_phylogeny_and_naming_ref(recipe, phylo_perm_file) logger.info("Processing permutation files") raw_bp_graphs = {} stage_perms = {} for stage in run_stages: debugger.set_debug_dir(os.path.join(debug_root, stage.name)) stage_perms[stage] = PermutationContainer(perm_files[stage.block_size], recipe, stage.repeats, stage.ref_indels, phylogeny) raw_bp_graphs[stage] = BreakpointGraph(stage_perms[stage]) target_sequences = read_fasta_dict(backend.get_target_fasta()) if backend.get_ancestor_fasta(): ancestor_sequences = read_fasta_dict(backend.get_ancestor_fasta()) if not args.solid_scaffolds: chim_detect = ChimeraDetector(raw_bp_graphs, run_stages, target_sequences) ##### scaffolds = None last_stage = run_stages[-1] if not args.targetDone: prev_stages = [] for stage in run_stages: logger.info("Stage \"{0}\"".format(stage.name)) debugger.set_debug_dir(os.path.join(debug_root, stage.name)) prev_stages.append(stage) if not args.solid_scaffolds: broken_perms = chim_detect.break_contigs(stage_perms[stage], [stage]) else: broken_perms = stage_perms[stage] breakpoint_graph = BreakpointGraph(broken_perms) adj_inferer = AdjacencyInferer(breakpoint_graph, phylogeny) adjacencies = adj_inferer.infer_adjacencies() cur_scaffolds = scfldr.build_scaffolds(adjacencies, broken_perms) if scaffolds is not None: if not args.solid_scaffolds: merging_perms = chim_detect.break_contigs(stage_perms[stage], prev_stages) else: merging_perms = stage_perms[stage] scaffolds = merge.merge_scaffolds(scaffolds, cur_scaffolds, merging_perms, stage.rearrange) else: scaffolds = cur_scaffolds debugger.set_debug_dir(debug_root) #### scfldr.assign_scaffold_names(scaffolds, stage_perms[last_stage], naming_ref) if not args.no_refine: out_overlap = os.path.join(args.out_dir, "contigs_overlap.dot") overlap.make_overlap_graph(backend.get_target_fasta(), out_overlap) scaffolds = asref.refine_scaffolds(out_overlap, scaffolds, target_sequences) if args.debug: shutil.copy(out_overlap, debugger.debug_dir) os.remove(out_overlap) out_gen = OutputGenerator(target_sequences, scaffolds) out_gen.make_output(args.out_dir, recipe["target"]) ###Ancestor reconstruction if args.ancestor_reconstruct: #last_stage = run_stages[-1] ancestor_construct(scaffolds, recipe['ancestor'], recipe['target'], phylogeny, naming_ref, ancestor_sequences, args.out_dir, stage_perms=stage_perms, run_stages=run_stages, targetDone=args.targetDone, solid_scaffolds=args.solid_scaffolds) ### logger.info("Done!")
def _run_ragout(args): """ Top-level logic of the program """ if not os.path.isdir(args.out_dir): os.mkdir(args.out_dir) debug_root = os.path.join(args.out_dir, "debug") debugger.set_debugging(args.debug) debugger.set_debug_dir(debug_root) debugger.clear_debug_dir() out_log = os.path.join(args.out_dir, "ragout.log") _enable_logging(out_log, args.debug) logger.info("Starting Ragout v{0}".format(__version__)) #parsing recipe, preparing synteny backend _check_extern_modules(args.synteny_backend) synteny_backend = SyntenyBackend.get_available_backends() \ [args.synteny_backend] recipe = parse_ragout_recipe(args.recipe) synteny_sizes = _get_synteny_scale(recipe, synteny_backend) #Running synteny backend to get synteny blocks perm_files = synteny_backend.make_permutations(recipe, synteny_sizes, args.out_dir, args.overwrite, args.threads) #setting up phylogenetic tree phylo_perm_file = perm_files[synteny_sizes[ID_SMALLEST]] phylogeny, naming_ref = _get_phylogeny_and_naming_ref(recipe, phylo_perm_file) #parsing permutation files, apply filters and build breakpoint graph logger.info("Processing permutation files") raw_bp_graphs = {} stage_perms = {} run_stages = _make_run_stages(synteny_sizes, args.resolve_repeats) for stage in run_stages: debugger.set_debug_dir(os.path.join(debug_root, stage.name)) stage_perms[stage] = PermutationContainer(perm_files[stage.block_size], recipe, stage.repeats, stage.ref_indels, phylogeny) raw_bp_graphs[stage] = BreakpointGraph(stage_perms[stage]) #initializing chimera detector target_sequences = read_fasta_dict(synteny_backend.get_target_fasta()) chim_detect = None if not args.solid_scaffolds: chim_detect = ChimeraDetector(raw_bp_graphs, run_stages, target_sequences) #inferring adjacencies: loop over stages (iterations) scaffolds = None prev_stages = [] for stage in run_stages: logger.info("Stage \"{0}\"".format(stage.name)) debugger.set_debug_dir(os.path.join(debug_root, stage.name)) prev_stages.append(stage) broken_perms = stage_perms[stage] if not args.solid_scaffolds: broken_perms = chim_detect.break_contigs(stage_perms[stage], [stage]) breakpoint_graph = BreakpointGraph(broken_perms) adj_inferer = AdjacencyInferer(breakpoint_graph, phylogeny) adjacencies = adj_inferer.infer_adjacencies() cur_scaffolds = scfldr.build_scaffolds(adjacencies, broken_perms) if scaffolds is not None: if not args.solid_scaffolds: broken_perms = chim_detect.break_contigs(stage_perms[stage], prev_stages) cur_scaffolds = merge.merge_scaffolds(scaffolds, cur_scaffolds, broken_perms, stage.rearrange) merge.get_breakpoints(cur_scaffolds, breakpoint_graph, broken_perms) scaffolds = cur_scaffolds debugger.set_debug_dir(debug_root) #### #name scaffolds according to one of the references last_stage = run_stages[ID_SMALLEST] scfldr.assign_scaffold_names(scaffolds, stage_perms[last_stage], naming_ref) #refine with the assembly graph if args.refine: out_overlap = os.path.join(args.out_dir, "contigs_overlap.dot") overlap.make_overlap_graph(synteny_backend.get_target_fasta(), out_overlap) scaffolds = asref.refine_scaffolds(out_overlap, scaffolds, target_sequences) if args.debug: shutil.copy(out_overlap, debugger.debug_dir) os.remove(out_overlap) out_gen = OutputGenerator(target_sequences, scaffolds) out_gen.make_output(args.out_dir, recipe["target"]) logger.info("Done!")