def generate_config(self, cfg): data_dir = os.path.join(cfg.output_dir, "K%d" % self.K) saves_dir = os.path.join(data_dir, "saves") dst_configs = os.path.join(data_dir, "configs") if self.get_stage(self.short_name) == options_storage.BASE_STAGE: if not os.path.isdir(data_dir): os.makedirs(data_dir) dir_util._path_created = {} # see http://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree dir_util.copy_tree(os.path.join(self.tmp_configs_dir, "debruijn"), dst_configs, preserve_times=False) if self.prev_K: additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % self.prev_K, "simplified_contigs.fasta") else: additional_contigs_fname = None if "read_buffer_size" in cfg.__dict__: # FIXME why here??? process_cfg.substitute_params(os.path.join(dst_configs, "construction.info"), {"read_buffer_size": cfg.read_buffer_size}, self.log) if "scaffolding_mode" in cfg.__dict__: # FIXME why here??? process_cfg.substitute_params(os.path.join(dst_configs, "pe_params.info"), {"scaffolding_mode": cfg.scaffolding_mode}, self.log) prepare_config_rnaspades(os.path.join(dst_configs, "rna_mode.info"), self.log) prepare_config_construction(os.path.join(dst_configs, "construction.info"), self.log) cfg_fn = os.path.join(dst_configs, "config.info") prepare_config_spades(cfg_fn, cfg, self.log, additional_contigs_fname, self.K, self.get_stage(self.short_name), saves_dir, self.last_one, self.bin_home)
def PrepareConfigs(params, log): CopyConfigs(params, log) param_dict = CreateParamDict(params) if not os.path.exists(params.config_file): log.info("ERROR: config file was not found") sys.exit(1) process_cfg.substitute_params(params.config_file, param_dict, log)
def run_scaffold_correction(configs_dir, execution_home, cfg, log, latest, K): data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K) saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) log.info("\n== Running scaffold correction \n") scaffolds_file = os.path.join(latest, "scaffolds.fasta") if not os.path.isfile(scaffolds_file): support.error("Scaffodls were not found in " + scaffolds_file, log) if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log) process_cfg.substitute_params(os.path.join(dst_configs, "moleculo_mode.info"), {"scaffolds_file": scaffolds_file}, log) prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, K) command = [os.path.join(execution_home, "scaffold_correction"), cfg_file_name] add_configs(command, dst_configs) log.info(str(command)) support.sys_call(command, log)
def run_scaffold_correction(configs_dir, execution_home, cfg, log, K): data_dir = os.path.join(cfg.output_dir, "SCC") saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) log.info("\n== Running scaffold correction \n") latest = os.path.join(cfg.output_dir, "K%d" % K) scaffolds_file = os.path.join(latest, "scaffolds.fasta") if not os.path.isfile(scaffolds_file): support.error("Scaffodls were not found in " + scaffolds_file, log) if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log) prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, scaffolds_file) command = [os.path.join(execution_home, "scaffold_correction"), cfg_file_name] log.info(str(command)) support.sys_call(command, log)
def prepare_config_rnaspades(filename, log): if not options_storage.rna: return subst_dict = dict() subst_dict["ss_enabled"] = bool_to_str(options_storage.strand_specific is not None) subst_dict["antisense"] = bool_to_str(options_storage.strand_specific) process_cfg.substitute_params(filename, subst_dict, log)
def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one): subst_dict = dict() subst_dict["K"] = str(K) subst_dict["run_mode"] = "false" if "diploid_mode" in cfg.__dict__: subst_dict["diploid_mode"] = bool_to_str(cfg.diploid_mode) subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset) subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir) subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir) if additional_contigs_fname: subst_dict["additional_contigs"] = process_cfg.process_spaces(additional_contigs_fname) subst_dict["use_additional_contigs"] = bool_to_str(True) else: subst_dict["use_additional_contigs"] = bool_to_str(False) subst_dict["main_iteration"] = bool_to_str(last_one) subst_dict["entry_point"] = stage subst_dict["load_from"] = saves_dir subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode) subst_dict["gap_closer_enable"] = bool_to_str(last_one) subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable) # subst_dict["topology_simplif_enabled"] = bool_to_str(last_one) subst_dict["max_threads"] = cfg.max_threads subst_dict["max_memory"] = cfg.max_memory subst_dict["correct_mismatches"] = bool_to_str(last_one) if "resolving_mode" in cfg.__dict__: subst_dict["resolving_mode"] = cfg.resolving_mode if "careful" in cfg.__dict__: subst_dict["mismatch_careful"] = bool_to_str(cfg.careful) if "pacbio_mode" in cfg.__dict__: subst_dict["pacbio_test_on"] = bool_to_str(cfg.pacbio_mode) subst_dict["pacbio_reads"] = process_cfg.process_spaces(cfg.pacbio_reads) process_cfg.substitute_params(filename, subst_dict, log)
def prepare_config_spades(filename, cfg, log, use_additional_contigs, K, last_one): subst_dict = dict() subst_dict["K"] = str(K) subst_dict["run_mode"] = "false" subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset) subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir) subst_dict["additional_contigs"] = process_cfg.process_spaces( cfg.additional_contigs) subst_dict["entry_point"] = "construction" subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode) subst_dict["gap_closer_enable"] = bool_to_str(last_one) subst_dict["paired_mode"] = bool_to_str(last_one and cfg.paired_mode) subst_dict["topology_simplif_enabled"] = bool_to_str(last_one) subst_dict["use_additional_contigs"] = bool_to_str(use_additional_contigs) subst_dict["max_threads"] = cfg.max_threads subst_dict["max_memory"] = cfg.max_memory subst_dict["correct_mismatches"] = bool_to_str(last_one) if "resolving_mode" in cfg.__dict__: subst_dict["resolving_mode"] = cfg.resolving_mode if "careful" in cfg.__dict__: subst_dict["mismatch_careful"] = bool_to_str(cfg.careful) process_cfg.substitute_params(filename, subst_dict, log)
def generate_config(self, cfg): K = cfg.iterative_K[-1] latest = os.path.join(cfg.output_dir, "K%d" % K) K = options_storage.SCC_K data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K) saves_dir = os.path.join(data_dir, "saves") dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if os.path.isdir(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util.copy_tree(os.path.join(self.tmp_configs_dir, "debruijn"), dst_configs, preserve_times=False) scaffolds_file = os.path.join(latest, "scaffolds.fasta") if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params( construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, self.log) process_cfg.substitute_params( os.path.join(dst_configs, "moleculo_mode.info"), {"scaffolds_file": scaffolds_file}, self.log) prepare_config_scaffold_correction(cfg_file_name, cfg, self.log, saves_dir, K)
def run_scaffold_correction(configs_dir, execution_home, cfg, log, latest, K): data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K) saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) log.info("\n== Running scaffold correction \n") scaffolds_file = os.path.join(latest, "scaffolds.fasta") if not os.path.isfile(scaffolds_file): support.error("Scaffodls were not found in " + scaffolds_file, log) if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params( construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log) process_cfg.substitute_params( os.path.join(dst_configs, "moleculo_mode.info"), {"scaffolds_file": scaffolds_file}, log) prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, K) command = [ os.path.join(execution_home, "spades-truseq-scfcorrection"), cfg_file_name ] add_configs(command, dst_configs) log.info(str(command)) support.sys_call(command, log)
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one): data_dir = os.path.join(cfg.output_dir, "K%d" % K) stage = BASE_STAGE saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if options_storage.continue_mode: if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))): log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)") return if options_storage.restart_from and options_storage.restart_from.find(":") != -1: stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:] support.continue_from_here(log) if stage != BASE_STAGE: if not os.path.isdir(saves_dir): support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir)) else: if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) shutil.copytree(os.path.join(configs_dir, "debruijn"), dst_configs) # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) log.info("\n== Running assembler: " + ("K%d" % K) + "\n") if prev_K: additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta") if not os.path.isfile(additional_contigs_fname): support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log) additional_contigs_fname = None else: additional_contigs_fname = None if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log) prepare_config_spades(cfg_file_name, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one) command = [os.path.join(execution_home, "spades"), cfg_file_name] ## this code makes sense for src/debruijn/simplification.cpp: corrected_and_save_reads() function which is not used now # bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads") # if os.path.isdir(bin_reads_dir): # if glob.glob(os.path.join(bin_reads_dir, "*_cor*")): # for cor_filename in glob.glob(os.path.join(bin_reads_dir, "*_cor*")): # cor_index = cor_filename.rfind("_cor") # new_bin_filename = cor_filename[:cor_index] + cor_filename[cor_index + 4:] # shutil.move(cor_filename, new_bin_filename) support.sys_call(command, log)
def prepare_config_bgcspades(filename, cfg, log): if not "set_of_hmms" in cfg.__dict__: return subst_dict = dict() subst_dict["set_of_hmms"] = cfg.set_of_hmms if options_storage.args.bio: subst_dict["component_size_part"] = 1 subst_dict["set_copynumber"] = bool_to_str(True) subst_dict["start_only_from_tips"] = bool_to_str(True) process_cfg.substitute_params(filename, subst_dict, log)
def prepare_config_mulksg(filename, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home): subst_dict = dict() subst_dict["K"] = str(K) subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset) subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir) subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir) if additional_contigs_fname: subst_dict["additional_contigs"] = process_cfg.process_spaces( additional_contigs_fname) subst_dict["use_additional_contigs"] = bool_to_str(True) else: subst_dict["use_additional_contigs"] = bool_to_str(False) subst_dict["main_iteration"] = bool_to_str(last_one) subst_dict["entry_point"] = stage subst_dict["load_from"] = saves_dir subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode) subst_dict["gap_closer_enable"] = bool_to_str(last_one or K >= 55) subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable) # subst_dict["topology_simplif_enabled"] = bool_to_str(last_one) #TODO: If on multi node, max_threads and max_memory need to change!!! if K == cfg.iterative_K[-1]: subst_dict["max_threads"] = min(multiprocessing.cpu_count(), cfg.max_threads) subst_dict["max_memory"] = cfg.max_memory else: subst_dict["max_threads"] = min( multiprocessing.cpu_count(), cfg.max_threads) #/(len(cfg.iterative_K)-1) subst_dict["max_memory"] = cfg.max_memory / (len(cfg.iterative_K) - 1) subst_dict["save_gp"] = bool_to_str(cfg.save_gp) if (not last_one): subst_dict["correct_mismatches"] = bool_to_str(False) if "resolving_mode" in cfg.__dict__: subst_dict["resolving_mode"] = cfg.resolving_mode if "pacbio_mode" in cfg.__dict__: subst_dict["pacbio_test_on"] = bool_to_str(cfg.pacbio_mode) subst_dict["pacbio_reads"] = process_cfg.process_spaces( cfg.pacbio_reads) if cfg.cov_cutoff == "off": subst_dict["use_coverage_threshold"] = bool_to_str(False) else: subst_dict["use_coverage_threshold"] = bool_to_str(True) if cfg.cov_cutoff == "auto": subst_dict["coverage_threshold"] = 0.0 else: subst_dict["coverage_threshold"] = cfg.cov_cutoff if cfg.lcer_cutoff is not None: subst_dict["lcer_enabled"] = bool_to_str(True) subst_dict["lcer_coverage_threshold"] = cfg.lcer_cutoff if "series_analysis" in cfg.__dict__: subst_dict["series_analysis"] = cfg.series_analysis process_cfg.substitute_params(filename, subst_dict, log)
def prepare_config(config_fname, ds_args, log): args_dict = dict() args_dict["tails_lie_on_bulges"] = process_cfg.bool_to_str(ds_args.allow_gaps) args_dict["align_bulge_sides"] = process_cfg.bool_to_str(ds_args.weak_align) args_dict["haplocontigs"] = process_cfg.process_spaces(ds_args.haplocontigs) args_dict["output_dir"] = process_cfg.process_spaces(ds_args.output_dir) args_dict["developer_mode"] = "false" #process_cfg.bool_to_str(False) args_dict["tmp_dir"] = process_cfg.process_spaces(ds_args.tmp_dir) args_dict["max_threads"] = ds_args.max_threads args_dict["max_memory"] = ds_args.max_memory process_cfg.substitute_params(config_fname, args_dict, log)
def create_mulksg_configs(configs_dir, execution_home, cfg, log, K_values_list, additional_contigs_fname, last_one, ext_python_modules_home): command_list = [] contig_files = [] for K in K_values_list: # RL = get_read_length(cfg.output_dir, K, ext_python_modules_home, log) # if K >= RL: # support.warning("Value of K (%d) exceeded estimated read length (%d)" % # (K, RL), log) data_dir = os.path.join(cfg.output_dir, "K%d" % K) stage = BASE_STAGE saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util._path_created = { } # see http://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) log.info("\n== Running assembler: " + ("K%d" % K) + "\n") if "read_buffer_size" in cfg.__dict__: #FIXME why here??? process_cfg.substitute_params( os.path.join(dst_configs, "construction.info"), {"read_buffer_size": cfg.read_buffer_size}, log) if "scaffolding_mode" in cfg.__dict__: #FIXME why here??? process_cfg.substitute_params( os.path.join(dst_configs, "pe_params.info"), {"scaffolding_mode": cfg.scaffolding_mode}, log) prepare_config_rnamulksg(os.path.join(dst_configs, "rna_mode.info"), log) prepare_config_construction( os.path.join(dst_configs, "construction.info"), log) cfg_fn = os.path.join(dst_configs, "config.info") prepare_config_mulksg(cfg_fn, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home) command = [os.path.join(execution_home, "mulksg-core"), cfg_fn] # print("command is: ", command) add_configs(command, dst_configs) command_list.append(command) contig_files.append( os.path.join(cfg.output_dir, "K%d" % K, "simplified_contigs.fasta")) return command_list, contig_files
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one): data_dir = os.path.join(cfg.output_dir, "K%d" % K) stage = BASE_STAGE saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") if options_storage.continue_mode: if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))): log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)") return if options_storage.restart_from and options_storage.restart_from.find(":") != -1 \ and options_storage.restart_from.startswith("k%d:" % K): stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:] support.continue_from_here(log) if stage != BASE_STAGE: if not os.path.isdir(saves_dir): support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir)) else: if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util._path_created = {} # see http://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) log.info("\n== Running assembler: " + ("K%d" % K) + "\n") if prev_K: additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta") if not os.path.isfile(additional_contigs_fname): support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log) additional_contigs_fname = None else: additional_contigs_fname = None if "read_buffer_size" in cfg.__dict__: #FIXME why here??? process_cfg.substitute_params(os.path.join(dst_configs, "construction.info"), {"read_buffer_size": cfg.read_buffer_size}, log) if "scaffolding_mode" in cfg.__dict__: #FIXME why here??? process_cfg.substitute_params(os.path.join(dst_configs, "pe_params.info"), {"scaffolding_mode": cfg.scaffolding_mode}, log) prepare_config_rnaspades(os.path.join(dst_configs, "rna_mode.info"), log) prepare_config_construction(os.path.join(dst_configs, "construction.info"), log) cfg_fn = os.path.join(dst_configs, "config.info") prepare_config_spades(cfg_fn, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home) command = [os.path.join(execution_home, "spades-core"), cfg_fn] add_configs(command, dst_configs) #print("Calling: " + " ".join(command)) support.sys_call(command, log)
def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home): subst_dict = dict() subst_dict["K"] = str(K) subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset) subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir) subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir) if additional_contigs_fname: subst_dict["additional_contigs"] = process_cfg.process_spaces( additional_contigs_fname) subst_dict["use_additional_contigs"] = bool_to_str(True) else: subst_dict["use_additional_contigs"] = bool_to_str(False) subst_dict["main_iteration"] = bool_to_str(last_one) subst_dict["entry_point"] = stage subst_dict["load_from"] = saves_dir if "checkpoints" in cfg.__dict__: subst_dict["checkpoints"] = cfg.checkpoints subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode) subst_dict["time_tracer_enabled"] = bool_to_str(cfg.time_tracer) subst_dict["gap_closer_enable"] = bool_to_str( last_one or K >= options_storage.GAP_CLOSER_ENABLE_MIN_K) subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable) # subst_dict["topology_simplif_enabled"] = bool_to_str(last_one) subst_dict["max_threads"] = cfg.max_threads subst_dict["max_memory"] = cfg.max_memory subst_dict["save_gp"] = bool_to_str(cfg.save_gp) if not last_one: subst_dict["correct_mismatches"] = bool_to_str(False) if "resolving_mode" in cfg.__dict__: subst_dict["resolving_mode"] = cfg.resolving_mode if "pacbio_mode" in cfg.__dict__: subst_dict["pacbio_test_on"] = bool_to_str(cfg.pacbio_mode) subst_dict["pacbio_reads"] = process_cfg.process_spaces( cfg.pacbio_reads) if cfg.cov_cutoff == "off": subst_dict["use_coverage_threshold"] = bool_to_str(False) else: subst_dict["use_coverage_threshold"] = bool_to_str(True) if cfg.cov_cutoff == "auto": subst_dict["coverage_threshold"] = 0.0 else: subst_dict["coverage_threshold"] = cfg.cov_cutoff if cfg.lcer_cutoff is not None: subst_dict["lcer_enabled"] = bool_to_str(True) subst_dict["lcer_coverage_threshold"] = cfg.lcer_cutoff if "series_analysis" in cfg.__dict__: subst_dict["series_analysis"] = cfg.series_analysis process_cfg.substitute_params(filename, subst_dict, log)
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one): data_dir = os.path.join(cfg.output_dir, "K%d" % K) stage = BASE_STAGE saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if options_storage.continue_mode: if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))): log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)") return if options_storage.restart_from and options_storage.restart_from.find(":") != -1: stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:] support.continue_from_here(log) if stage != BASE_STAGE: if not os.path.isdir(saves_dir): support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir)) else: if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) log.info("\n== Running assembler: " + ("K%d" % K) + "\n") if prev_K: additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta") if not os.path.isfile(additional_contigs_fname): support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log) additional_contigs_fname = None else: additional_contigs_fname = None if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log) prepare_config_spades(cfg_file_name, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home) command = [os.path.join(execution_home, "spades"), cfg_file_name] support.sys_call(command, log)
def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home): subst_dict = dict() subst_dict["K"] = str(K) subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset) subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir) subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir) if additional_contigs_fname: subst_dict["additional_contigs"] = process_cfg.process_spaces(additional_contigs_fname) subst_dict["use_additional_contigs"] = bool_to_str(True) else: subst_dict["use_additional_contigs"] = bool_to_str(False) subst_dict["main_iteration"] = bool_to_str(last_one) subst_dict["entry_point"] = stage subst_dict["load_from"] = saves_dir subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode) subst_dict["gap_closer_enable"] = bool_to_str(last_one or K >= 55) subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable) # subst_dict["topology_simplif_enabled"] = bool_to_str(last_one) subst_dict["max_threads"] = cfg.max_threads subst_dict["max_memory"] = cfg.max_memory subst_dict["save_gp"] = bool_to_str(cfg.save_gp) if (not last_one): subst_dict["correct_mismatches"] = bool_to_str(False) if "resolving_mode" in cfg.__dict__: subst_dict["resolving_mode"] = cfg.resolving_mode if "pacbio_mode" in cfg.__dict__: subst_dict["pacbio_test_on"] = bool_to_str(cfg.pacbio_mode) subst_dict["pacbio_reads"] = process_cfg.process_spaces(cfg.pacbio_reads) if cfg.cov_cutoff == "off": subst_dict["use_coverage_threshold"] = bool_to_str(False) else: subst_dict["use_coverage_threshold"] = bool_to_str(True) if cfg.cov_cutoff == "auto": subst_dict["coverage_threshold"] = 0.0 else: subst_dict["coverage_threshold"] = cfg.cov_cutoff if cfg.lcer_cutoff is not None: subst_dict["lcer_enabled"] = bool_to_str(True) subst_dict["lcer_coverage_threshold"] = cfg.lcer_cutoff #TODO: make something about spades.py and config param substitution if "bwa_paired" in cfg.__dict__: subst_dict["bwa_enable"] = bool_to_str(True) subst_dict["path_to_bwa"] = os.path.join(execution_home, "bwa-spades") if "series_analysis" in cfg.__dict__: subst_dict["series_analysis"] = cfg.series_analysis process_cfg.substitute_params(filename, subst_dict, log)
def prepare_config(config_fname, ds_args, log): args_dict = dict() args_dict["tails_lie_on_bulges"] = process_cfg.bool_to_str(not ds_args.allow_gaps) args_dict["align_bulge_sides"] = process_cfg.bool_to_str(not ds_args.weak_align) args_dict["haplocontigs"] = process_cfg.process_spaces(ds_args.haplocontigs) args_dict["output_dir"] = process_cfg.process_spaces(ds_args.output_dir) args_dict["developer_mode"] = process_cfg.bool_to_str(ds_args.dev_mode) args_dict["tmp_dir"] = process_cfg.process_spaces(ds_args.tmp_dir) args_dict["max_threads"] = ds_args.max_threads args_dict["max_memory"] = ds_args.max_memory args_dict["output_base"] = "" args_dict["ha_enabled"] = process_cfg.bool_to_str(ds_args.haplotype_assembly) args_dict["K"] = str(ds_args.k) args_dict['saves'] = ds_args.saves args_dict['entry_point'] = ds_args.start_from process_cfg.substitute_params(config_fname, args_dict, log)
def prepare_config_bh(filename, cfg, log): subst_dict = dict() subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset_yaml_filename) subst_dict["input_working_dir"] = process_cfg.process_spaces(cfg.tmp_dir) subst_dict["output_dir"] = process_cfg.process_spaces(cfg.output_dir) subst_dict["general_max_iterations"] = cfg.max_iterations subst_dict["general_max_nthreads"] = cfg.max_threads subst_dict["count_merge_nthreads"] = cfg.max_threads subst_dict["bayes_nthreads"] = cfg.max_threads subst_dict["expand_nthreads"] = cfg.max_threads subst_dict["correct_nthreads"] = cfg.max_threads subst_dict["general_hard_memory_limit"] = cfg.max_memory if "qvoffset" in cfg.__dict__: subst_dict["input_qvoffset"] = cfg.qvoffset process_cfg.substitute_params(filename, subst_dict, log)
def prepare_config_scaffold_correction(filename, cfg, log, saves_dir, K): subst_dict = dict() subst_dict["K"] = str(K) subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset) subst_dict["output_base"] = process_cfg.process_spaces(os.path.join(cfg.output_dir, "SCC")) subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir) subst_dict["use_additional_contigs"] = bool_to_str(False) subst_dict["main_iteration"] = bool_to_str(False) subst_dict["entry_point"] = BASE_STAGE subst_dict["load_from"] = saves_dir subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode) subst_dict["max_threads"] = cfg.max_threads subst_dict["max_memory"] = cfg.max_memory #todo process_cfg.substitute_params(filename, subst_dict, log)
def prepare_config_bh(filename, cfg, log): subst_dict = dict() subst_dict["dataset"] = process_cfg.process_spaces( cfg.dataset_yaml_filename) subst_dict["input_working_dir"] = process_cfg.process_spaces(cfg.tmp_dir) subst_dict["output_dir"] = process_cfg.process_spaces(cfg.output_dir) subst_dict["general_max_iterations"] = cfg.max_iterations subst_dict["general_max_nthreads"] = cfg.max_threads subst_dict["count_merge_nthreads"] = cfg.max_threads subst_dict["bayes_nthreads"] = cfg.max_threads subst_dict["expand_nthreads"] = cfg.max_threads subst_dict["correct_nthreads"] = cfg.max_threads subst_dict["general_hard_memory_limit"] = cfg.max_memory if "qvoffset" in cfg.__dict__: subst_dict["input_qvoffset"] = cfg.qvoffset process_cfg.substitute_params(filename, subst_dict, log)
def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one): subst_dict = dict() subst_dict["K"] = str(K) subst_dict["run_mode"] = "false" if "diploid_mode" in cfg.__dict__: subst_dict["diploid_mode"] = bool_to_str(cfg.diploid_mode) subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset) subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir) subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir) if additional_contigs_fname: subst_dict["additional_contigs"] = process_cfg.process_spaces( additional_contigs_fname) subst_dict["use_additional_contigs"] = bool_to_str(True) else: subst_dict["use_additional_contigs"] = bool_to_str(False) subst_dict["main_iteration"] = bool_to_str(last_one) subst_dict["entry_point"] = stage subst_dict["load_from"] = saves_dir subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode) subst_dict["gap_closer_enable"] = bool_to_str(last_one or K >= 55) subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable) # subst_dict["topology_simplif_enabled"] = bool_to_str(last_one) subst_dict["max_threads"] = cfg.max_threads subst_dict["max_memory"] = cfg.max_memory subst_dict["correct_mismatches"] = bool_to_str(last_one) if "resolving_mode" in cfg.__dict__: subst_dict["resolving_mode"] = cfg.resolving_mode if "careful" in cfg.__dict__: subst_dict["mismatch_careful"] = bool_to_str(cfg.careful) if "pacbio_mode" in cfg.__dict__: subst_dict["pacbio_test_on"] = bool_to_str(cfg.pacbio_mode) subst_dict["pacbio_reads"] = process_cfg.process_spaces( cfg.pacbio_reads) if cfg.cov_cutoff == "off": subst_dict["use_coverage_threshold"] = bool_to_str(False) else: subst_dict["use_coverage_threshold"] = bool_to_str(True) if cfg.cov_cutoff == "auto": subst_dict["coverage_threshold"] = 0.0 else: subst_dict["coverage_threshold"] = cfg.cov_cutoff process_cfg.substitute_params(filename, subst_dict, log)
def ModifyConfigFiles(params, log): cdr_param_dict = dict() cdr_param_dict['input_reads'] = params.input_reads cdr_param_dict['output_dir'] = params.output_dir cdr_param_dict['vj_finder_config'] = params.vj_finder_config_file cdr_param_dict['num_threads'] = params.num_threads cdr_param_dict['domain_system'] = params.domain_system cdr_param_dict['run_hg_constructor'] = os.path.join(home_directory, './build/release/bin/ig_swgraph_construct') vj_param_dict = dict() vj_param_dict['loci'] = params.loci vj_param_dict['germline_dir'] = os.path.join(home_directory, "data/germline") params.germline_config_file = os.path.join(params.vj_finder_config_dir, "germline_files_config.txt") vj_param_dict['germline_filenames_config'] = params.germline_config_file ModifyParamsWrtOrganism(params, cdr_param_dict, vj_param_dict) process_cfg.substitute_params(params.cdr_labeler_config_file, cdr_param_dict, log) process_cfg.substitute_params(params.vj_finder_config_file, vj_param_dict, log)
def prepare_config_bh(self, filename, cfg, log): subst_dict = dict() subst_dict["dataset"] = process_cfg.process_spaces( cfg.dataset_yaml_filename) subst_dict["input_working_dir"] = process_cfg.process_spaces( cfg.tmp_dir) subst_dict["output_dir"] = process_cfg.process_spaces(cfg.output_dir) subst_dict["general_max_iterations"] = options_storage.ITERATIONS subst_dict["general_max_nthreads"] = cfg.max_threads subst_dict["count_merge_nthreads"] = cfg.max_threads subst_dict["bayes_nthreads"] = cfg.max_threads subst_dict["expand_nthreads"] = cfg.max_threads subst_dict["correct_nthreads"] = cfg.max_threads subst_dict["general_hard_memory_limit"] = cfg.max_memory if "qvoffset" in cfg.__dict__: subst_dict["input_qvoffset"] = cfg.qvoffset if "count_filter_singletons" in cfg.__dict__: subst_dict["count_filter_singletons"] = cfg.count_filter_singletons if "read_buffer_size" in cfg.__dict__: subst_dict["count_split_buffer"] = cfg.read_buffer_size process_cfg.substitute_params(filename, subst_dict, log)
def prepare_config_spades(filename, cfg, log, prev_K, K, last_one): subst_dict = dict() subst_dict["K"] = str(K) subst_dict["run_mode"] = "false" subst_dict["dataset"] = cfg.dataset subst_dict["output_base"] = cfg.output_dir subst_dict["additional_contigs"] = cfg.additional_contigs subst_dict["entry_point"] = "construction" subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode) subst_dict["gap_closer_enable"] = bool_to_str(last_one) subst_dict["paired_mode"] = bool_to_str(last_one and cfg.paired_mode) subst_dict["topology_simplif_enabled"] = bool_to_str(last_one) subst_dict["use_additional_contigs"] = bool_to_str(prev_K) subst_dict["max_threads"] = cfg.max_threads subst_dict["max_memory"] = cfg.max_memory subst_dict["correct_mismatches"] = bool_to_str(last_one) if "resolving_mode" in cfg.__dict__: subst_dict["resolving_mode"] = cfg.resolving_mode if "careful" in cfg.__dict__: subst_dict["mismatch_careful"] = bool_to_str(cfg.careful) process_cfg.substitute_params(filename, subst_dict, log)
def ModifyConfigFiles(params, log): igs_params_dict = dict() igs_params_dict['output_dir'] = params.output_dir igs_params_dict['loci'] = params.loci igs_params_dict['number_of_metaroots'] = params.number_of_metaroots igs_params_dict['pool_manager_strategy'] = params.tree_strategy igs_params_dict['germline_dir'] = os.path.join(home_directory, "data/germline") igs_params_dict['cdr_labeler_config_filename'] = params.cdr_labeler_config_filename cdr_params_dict = dict() cdr_params_dict['vj_finder_config'] = params.vj_finder_config_filename vjf_params_dict = dict() params.germline_config_file = os.path.join(params.vj_finder_config_dir, "germline_files_config.txt") vjf_params_dict['germline_filenames_config'] = params.germline_config_file vjf_params_dict['germline_dir'] = os.path.join(home_directory, "data/germline") igs_params_dict['germline_filenames_config'] = params.germline_config_file cdr_params_dict = ModifyParamsWrtOrganism(params, cdr_params_dict) process_cfg.substitute_params(params.output_config_file, igs_params_dict, log) process_cfg.substitute_params(params.cdr_labeler_config_filename, cdr_params_dict, log) process_cfg.substitute_params(params.vj_finder_config_filename, vjf_params_dict, log)
def prepare_config_construction(filename, log): if options_storage.read_cov_threshold is None: return subst_dict = dict() subst_dict["read_cov_threshold"] = options_storage.read_cov_threshold process_cfg.substitute_params(filename, subst_dict, log)