def check_cfg_for_restart_from(cfg): if options_storage.restart_from == 'ec' and ("error_correction" not in cfg): support.error("failed to restart from read error correction because this stage was not specified!") if options_storage.restart_from == 'mc' and ("mismatch_corrector" not in cfg): support.error("failed to restart from mismatch correction because this stage was not specified!") if options_storage.restart_from == 'as' or options_storage.restart_from.startswith('k'): if "assembly" not in cfg: support.error("failed to restart from assembling because this stage was not specified!") if options_storage.restart_from.startswith('k'): correct_k = False k_to_check = options_storage.k_mers if not k_to_check: if options_storage.auto_K_allowed(): k_to_check = list(set(options_storage.K_MERS_SHORT + options_storage.K_MERS_150 + options_storage.K_MERS_250)) else: k_to_check = options_storage.K_MERS_SHORT for k in k_to_check: if options_storage.restart_from == ("k%d" % k) or options_storage.restart_from.startswith("k%d:" % k): correct_k = True break if not correct_k: k_str = options_storage.restart_from[1:] if k_str.find(":") != -1: k_str = k_str[:k_str.find(":")] support.error("failed to restart from K=%s because this K was not specified!" % k_str)
def update_k_mers_in_special_cases(cur_k_mers, RL, log, silent=False): if options_storage.auto_K_allowed(): if RL >= 250: if not silent: log.info( "Default k-mer sizes were set to %s because estimated " "read length (%d) is equal to or greater than 250" % (str(options_storage.K_MERS_250), RL)) return options_storage.K_MERS_250 if RL >= 150: if not silent: log.info( "Default k-mer sizes were set to %s because estimated " "read length (%d) is equal to or greater than 150" % (str(options_storage.K_MERS_150), RL), log) return options_storage.K_MERS_150 if RL <= max(cur_k_mers): new_k_mers = [k for k in cur_k_mers if k < RL] if not silent: log.info( "K-mer sizes were set to %s because estimated " "read length (%d) is less than %d" % (str(new_k_mers), RL, max(cur_k_mers)), log) return new_k_mers return cur_k_mers
def generateK(cfg, log, dataset_data, silent=False): if options_storage.args.rna: generateK_for_rna(cfg, dataset_data, log) elif not options_storage.args.iontorrent: RL = support.get_primary_max_reads_length(dataset_data, log, ["merged reads"], options_storage.READS_TYPES_USED_IN_CONSTRUCTION) if options_storage.auto_K_allowed(): if RL >= 250: if not silent: log.info("Default k-mer sizes were set to %s because estimated " "read length (%d) is equal to or greater than 250" % (str(options_storage.K_MERS_250), RL)) cfg.iterative_K = options_storage.K_MERS_250 elif RL >= 150: if not silent: log.info("Default k-mer sizes were set to %s because estimated " "read length (%d) is equal to or greater than 150" % (str(options_storage.K_MERS_150), RL)) cfg.iterative_K = options_storage.K_MERS_150 if RL <= max(cfg.iterative_K): new_k_mers = [k for k in cfg.iterative_K if k < RL] if not silent: log.info("K-mer sizes were set to %s because estimated " "read length (%d) is less than %d" % (str(new_k_mers), RL, max(cfg.iterative_K))) cfg.iterative_K = new_k_mers if not isinstance(cfg.iterative_K, list): cfg.iterative_K = [cfg.iterative_K] cfg.iterative_K = sorted(cfg.iterative_K)
def update_k_mers_in_special_cases(cur_k_mers, RL, log, silent=False): if options_storage.auto_K_allowed(): if RL >= 250: if not silent: support.warning("Default k-mer sizes were set to %s because estimated " "read length (%d) is equal to or greater than 250" % (str(options_storage.K_MERS_250), RL), log) return options_storage.K_MERS_250 if RL >= 150: if not silent: support.warning("Default k-mer sizes were set to %s because estimated " "read length (%d) is equal to or greater than 150" % (str(options_storage.K_MERS_150), RL), log) return options_storage.K_MERS_150 return cur_k_mers
def update_k_mers_in_special_cases(cur_k_mers, RL, log, silent=False): if options_storage.auto_K_allowed(): if RL >= 250: if not silent: log.info("Default k-mer sizes were set to %s because estimated " "read length (%d) is equal to or greater than 250" % (str(options_storage.K_MERS_250), RL)) return options_storage.K_MERS_250 if RL >= 150: if not silent: log.info("Default k-mer sizes were set to %s because estimated " "read length (%d) is equal to or greater than 150" % (str(options_storage.K_MERS_150), RL)) return options_storage.K_MERS_150 if RL <= max(cur_k_mers): new_k_mers = [k for k in cur_k_mers if k < RL] if not silent: log.info("K-mer sizes were set to %s because estimated " "read length (%d) is less than %d" % (str(new_k_mers), RL, max(cur_k_mers))) return new_k_mers return cur_k_mers
def check_cfg_for_restart_from(cfg): if options_storage.restart_from == 'ec' and ("error_correction" not in cfg): support.error( "failed to restart from read error correction because this stage was not specified!" ) if options_storage.restart_from == 'mc' and ("mismatch_corrector" not in cfg): support.error( "failed to restart from mismatch correction because this stage was not specified!" ) if options_storage.restart_from == 'as' or options_storage.restart_from.startswith( 'k'): if "assembly" not in cfg: support.error( "failed to restart from assembling because this stage was not specified!" ) if options_storage.restart_from.startswith('k'): correct_k = False k_to_check = options_storage.k_mers if not k_to_check: if options_storage.auto_K_allowed(): k_to_check = list( set(options_storage.K_MERS_SHORT + options_storage.K_MERS_150 + options_storage.K_MERS_250)) else: k_to_check = options_storage.K_MERS_SHORT for k in k_to_check: if options_storage.restart_from == ( "k%d" % k) or options_storage.restart_from.startswith( "k%d:" % k): correct_k = True break if not correct_k: k_str = options_storage.restart_from[1:] if k_str.find(":") != -1: k_str = k_str[:k_str.find(":")] support.error( "failed to restart from K=%s because this K was not specified!" % k_str)
def check_cfg_for_partial_run(cfg, type='restart-from'): # restart-from ot stop-after if type == 'restart-from': check_point = options_storage.restart_from action = 'restart from' verb = 'was' elif type == 'stop-after': check_point = options_storage.stop_after action = 'stop after' verb = 'is' else: return if check_point == 'ec' and ("error_correction" not in cfg): support.error("failed to " + action + " 'read error correction' ('" + check_point + "') because this stage " + verb + " not specified!") if check_point == 'mc' and ("mismatch_corrector" not in cfg): support.error("failed to " + action + " 'mismatch correction' ('" + check_point + "') because this stage " + verb + " not specified!") if check_point == 'as' or check_point.startswith('k'): if "assembly" not in cfg: support.error("failed to " + action + " 'assembling' ('" + check_point + "') because this stage " + verb + " not specified!") if check_point.startswith('k'): correct_k = False k_to_check = options_storage.k_mers if not k_to_check: if options_storage.auto_K_allowed(): k_to_check = list(set(options_storage.K_MERS_SHORT + options_storage.K_MERS_150 + options_storage.K_MERS_250)) else: k_to_check = options_storage.K_MERS_SHORT for k in k_to_check: if check_point == ("k%d" % k) or check_point.startswith("k%d:" % k): correct_k = True break if not correct_k: k_str = check_point[1:] if k_str.find(":") != -1: k_str = k_str[:k_str.find(":")] support.error("failed to " + action + " K=%s because this K " % k_str + verb + " not specified!")
def print_used_values(cfg, log): def print_value(cfg, section, param, pretty_param="", margin=" "): if not pretty_param: pretty_param = param.capitalize().replace('_', ' ') line = margin + pretty_param if param in cfg[section].__dict__: line += ": " + str(cfg[section].__dict__[param]) else: if "offset" in param: line += " will be auto-detected" log.info(line) log.info("") # system info log.info("System information:") try: log.info(" SPAdes version: " + str(spades_version).strip()) log.info(" Python version: " + ".".join(map(str, sys.version_info[0:3]))) # for more details: '[' + str(sys.version_info) + ']' log.info(" OS: " + platform.platform()) # for more details: '[' + str(platform.uname()) + ']' except Exception: log.info(" Problem occurred when getting system information") log.info("") # main print_value(cfg, "common", "output_dir", "", "") if ("error_correction" in cfg) and (not "assembly" in cfg): log.info("Mode: ONLY read error correction (without assembling)") elif (not "error_correction" in cfg) and ("assembly" in cfg): log.info("Mode: ONLY assembling (without read error correction)") else: log.info("Mode: read error correction and assembling") if ("common" in cfg) and ("developer_mode" in cfg["common"].__dict__): if cfg["common"].developer_mode: log.info("Debug mode is turned ON") else: log.info("Debug mode is turned OFF") log.info("") # dataset if "dataset" in cfg: log.info("Dataset parameters:") if options_storage.args.iontorrent: log.info(" IonTorrent data") if options_storage.args.bio: log.info(" BiosyntheticSPAdes mode") if options_storage.args.meta: log.info(" Metagenomic mode") elif options_storage.args.large_genome: log.info(" Large genome mode") elif options_storage.args.truseq_mode: log.info(" Illumina TruSeq mode") elif options_storage.args.isolate: log.info(" Isolate mode") elif options_storage.args.rna: log.info(" RNA-seq mode") elif options_storage.args.single_cell: log.info(" Single-cell mode") else: log.info(" Standard mode") log.info(" For multi-cell/isolate data we recommend to use '--isolate' option;" \ " for single-cell MDA data use '--sc';" \ " for metagenomic data use '--meta';" \ " for RNA-Seq use '--rna'.") log.info(" Reads:") dataset_data = pyyaml.load(open(cfg["dataset"].yaml_filename)) dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(cfg["dataset"].yaml_filename)) support.pretty_print_reads(dataset_data, log) # error correction if "error_correction" in cfg: log.info("Read error correction parameters:") print_value(cfg, "error_correction", "max_iterations", "Iterations") print_value(cfg, "error_correction", "qvoffset", "PHRED offset") if cfg["error_correction"].gzip_output: log.info(" Corrected reads will be compressed") else: log.info(" Corrected reads will NOT be compressed") # assembly if "assembly" in cfg: log.info("Assembly parameters:") if options_storage.auto_K_allowed(): log.info(" k: automatic selection based on read length") else: print_value(cfg, "assembly", "iterative_K", "k") if options_storage.args.plasmid: log.info(" Plasmid mode is turned ON") if cfg["assembly"].disable_rr: log.info(" Repeat resolution is DISABLED") else: log.info(" Repeat resolution is enabled") if options_storage.args.careful: log.info(" Mismatch careful mode is turned ON") else: log.info(" Mismatch careful mode is turned OFF") if "mismatch_corrector" in cfg: log.info(" MismatchCorrector will be used") else: log.info(" MismatchCorrector will be SKIPPED") if cfg["assembly"].cov_cutoff == "off": log.info(" Coverage cutoff is turned OFF") elif cfg["assembly"].cov_cutoff == "auto": log.info(" Coverage cutoff is turned ON and threshold will be auto-detected") else: log.info(" Coverage cutoff is turned ON and threshold is %f" % cfg["assembly"].cov_cutoff) log.info("Other parameters:") print_value(cfg, "common", "tmp_dir", "Dir for temp files") print_value(cfg, "common", "max_threads", "Threads") print_value(cfg, "common", "max_memory", "Memory limit (in Gb)", " ") log.info("")
def print_used_values(cfg, log): def print_value(cfg, section, param, pretty_param="", margin=" "): if not pretty_param: pretty_param = param.capitalize().replace('_', ' ') line = margin + pretty_param if param in cfg[section].__dict__: line += ": " + str(cfg[section].__dict__[param]) else: if param.find("offset") != -1: line += " will be auto-detected" log.info(line) log.info("") # system info log.info("System information:") try: log.info(" SPAdes version: " + str(spades_version).strip()) log.info(" Python version: " + ".".join(map(str, sys.version_info[0:3]))) # for more details: '[' + str(sys.version_info) + ']' log.info(" OS: " + platform.platform()) # for more deatils: '[' + str(platform.uname()) + ']' except Exception: log.info(" Problem occurred when getting system information") log.info("") # main print_value(cfg, "common", "output_dir", "", "") if ("error_correction" in cfg) and (not "assembly" in cfg): log.info("Mode: ONLY read error correction (without assembling)") elif (not "error_correction" in cfg) and ("assembly" in cfg): log.info("Mode: ONLY assembling (without read error correction)") else: log.info("Mode: read error correction and assembling") if ("common" in cfg) and ("developer_mode" in cfg["common"].__dict__): if cfg["common"].developer_mode: log.info("Debug mode is turned ON") else: log.info("Debug mode is turned OFF") log.info("") # dataset if "dataset" in cfg: log.info("Dataset parameters:") if cfg["dataset"].single_cell: log.info(" Single-cell mode") else: log.info(" Multi-cell mode (you should set '--sc' flag if input data"\ " was obtained with MDA (single-cell) technology") if cfg["dataset"].iontorrent: log.info(" IonTorrent data") log.info(" Reads:") dataset_data = pyyaml.load(open(cfg["dataset"].yaml_filename, 'r')) dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(cfg["dataset"].yaml_filename)) support.pretty_print_reads(dataset_data, log) # error correction if "error_correction" in cfg: log.info("Read error correction parameters:") print_value(cfg, "error_correction", "max_iterations", "Iterations") print_value(cfg, "error_correction", "qvoffset", "PHRED offset") if cfg["error_correction"].gzip_output: log.info(" Corrected reads will be compressed (with gzip)") else: log.info(" Corrected reads will NOT be compressed (with gzip)") # assembly if "assembly" in cfg: log.info("Assembly parameters:") if options_storage.auto_K_allowed(): log.info(" k: automatic selection based on read length") else: print_value(cfg, "assembly", "iterative_K", "k") if cfg["assembly"].careful: log.info(" Mismatch careful mode is turned ON") else: log.info(" Mismatch careful mode is turned OFF") if cfg["assembly"].disable_rr: log.info(" Repeat resolution is DISABLED") else: log.info(" Repeat resolution is enabled") if "mismatch_corrector" in cfg: log.info(" MismatchCorrector will be used") else: log.info(" MismatchCorrector will be SKIPPED") log.info("Other parameters:") print_value(cfg, "common", "tmp_dir", "Dir for temp files") print_value(cfg, "common", "max_threads", "Threads") print_value(cfg, "common", "max_memory", "Memory limit (in Gb)", " ") log.info("")
def print_used_values(cfg, log): def print_value(cfg, section, param, pretty_param="", margin=" "): if not pretty_param: pretty_param = param.capitalize().replace('_', ' ') line = margin + pretty_param if param in cfg[section].__dict__: line += ": " + str(cfg[section].__dict__[param]) else: if param.find("offset") != -1: line += " will be auto-detected" log.info(line) log.info("") # system info log.info("System information:") try: log.info(" SPAdes version: " + str(spades_version).strip()) log.info(" Python version: " + ".".join(map(str, sys.version_info[0:3]))) # for more details: '[' + str(sys.version_info) + ']' log.info(" OS: " + platform.platform()) # for more deatils: '[' + str(platform.uname()) + ']' except Exception: log.info(" Problem occurred when getting system information") log.info("") # main print_value(cfg, "common", "output_dir", "", "") if ("error_correction" in cfg) and (not "assembly" in cfg): log.info("Mode: ONLY read error correction (without assembling)") elif (not "error_correction" in cfg) and ("assembly" in cfg): log.info("Mode: ONLY assembling (without read error correction)") else: log.info("Mode: read error correction and assembling") if ("common" in cfg) and ("developer_mode" in cfg["common"].__dict__): if cfg["common"].developer_mode: log.info("Debug mode is turned ON") else: log.info("Debug mode is turned OFF") log.info("") # dataset if "dataset" in cfg: log.info("Dataset parameters:") if cfg["dataset"].single_cell: log.info(" Single-cell mode") else: log.info(" Multi-cell mode (you should set '--sc' flag if input data"\ " was obtained with MDA (single-cell) technology") if cfg["dataset"].iontorrent: log.info(" IonTorrent data") log.info(" Reads:") dataset_data = pyyaml.load(open(cfg["dataset"].yaml_filename, 'r')) dataset_data = support.relative2abs_paths( dataset_data, os.path.dirname(cfg["dataset"].yaml_filename)) support.pretty_print_reads(dataset_data, log) # error correction if "error_correction" in cfg: log.info("Read error correction parameters:") print_value(cfg, "error_correction", "max_iterations", "Iterations") print_value(cfg, "error_correction", "qvoffset", "PHRED offset") if cfg["error_correction"].gzip_output: log.info(" Corrected reads will be compressed (with gzip)") else: log.info(" Corrected reads will NOT be compressed (with gzip)") # assembly if "assembly" in cfg: log.info("Assembly parameters:") if options_storage.auto_K_allowed(): log.info(" k: automatic selection based on read length") else: print_value(cfg, "assembly", "iterative_K", "k") if cfg["assembly"].careful: log.info(" Mismatch careful mode is turned ON") else: log.info(" Mismatch careful mode is turned OFF") if cfg["assembly"].disable_rr: log.info(" Repeat resolution is DISABLED") else: log.info(" Repeat resolution is enabled") if "mismatch_corrector" in cfg: log.info(" MismatchCorrector will be used") else: log.info(" MismatchCorrector will be SKIPPED") if cfg["assembly"].cov_cutoff == 'off': log.info(" Coverage cutoff is turned OFF") elif cfg["assembly"].cov_cutoff == 'auto': log.info( " Coverage cutoff is turned ON and threshold will be auto-detected" ) else: log.info(" Coverage cutoff is turned ON and threshold is " + str(cfg["assembly"].cov_cutoff)) log.info("Other parameters:") print_value(cfg, "common", "tmp_dir", "Dir for temp files") print_value(cfg, "common", "max_threads", "Threads") print_value(cfg, "common", "max_memory", "Memory limit (in Gb)", " ") log.info("")