def prepare_configs(src_config_dir, ds_args, log): config_dir = os.path.join(ds_args.output_dir, "dipspades_configs") copy_configs(src_config_dir, config_dir) #log.info("dipSPAdes configs were copied to " + config_dir) config_fname = os.path.join(config_dir, "config.info") if not os.path.exists(config_fname): support.check_file_existence(config_fname + ".template") os.rename(config_fname + ".template", config_fname) return os.path.abspath(config_fname)
def parse_arguments(argv, log): try: options, not_options = getopt.gnu_getopt(argv, DS_Args_List.short_options, DS_Args_List.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage("", dipspades=True) sys.exit(1) ds_args = DS_Args() for opt, arg in options: if opt == '-o': ds_args.output_dir = os.path.abspath(arg) elif opt == '--expect-gaps': ds_args.allow_gaps = True elif opt == '--expect-rearrangements': ds_args.weak_align = True elif opt == '--hap': ds_args.haplocontigs_fnames.append(support.check_file_existence(arg, 'haplocontigs', log, dipspades=True)) elif opt == '-t' or opt == "--threads": ds_args.max_threads = int(arg) elif opt == '-m' or opt == "--memory": ds_args.max_memory = int(arg) elif opt == '--tmp-dir': ds_args.tmp_dir = os.path.abspath(arg) ds_args.haplocontigs = os.path.join(ds_args.output_dir, "haplocontigs") if not ds_args.output_dir: support.error("the output_dir is not set! It is a mandatory parameter (-o output_dir).", log, dipspades=True) if not ds_args.haplocontigs_fnames: support.error("cannot start dipSPAdes without at least one haplocontigs file!", log, dipspades=True) if not ds_args.tmp_dir: ds_args.tmp_dir = os.path.join(ds_args.output_dir, options_storage.TMP_DIR) return ds_args
def parse_arguments(argv, log): try: options, not_options = getopt.gnu_getopt(argv, DS_Args_List.short_options, DS_Args_List.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage("", dipspades=True) sys.exit(1) ds_args = DS_Args() for opt, arg in options: if opt == '-o': ds_args.output_dir = os.path.abspath(arg) elif opt == '--expect-gaps': ds_args.allow_gaps = True elif opt == '--expect-rearrangements': ds_args.weak_align = True elif opt == '--hap': ds_args.haplocontigs_fnames.append( support.check_file_existence(arg, 'haplocontigs', log, dipspades=True)) elif opt == '-t' or opt == "--threads": ds_args.max_threads = int(arg) elif opt == '-m' or opt == "--memory": ds_args.max_memory = int(arg) elif opt == '--tmp-dir': ds_args.tmp_dir = os.path.abspath(arg) elif opt == '--dsdebug': ds_args.dev_mode = True elif opt == '--hap-assembly': ds_args.haplotype_assembly = True elif opt == '--dsK': ds_args.k = int(arg) ds_args.haplocontigs = os.path.join(ds_args.output_dir, "haplocontigs") if not ds_args.output_dir: support.error( "the output_dir is not set! It is a mandatory parameter (-o output_dir).", log, dipspades=True) if not ds_args.haplocontigs_fnames: support.error( "cannot start dipSPAdes without at least one haplocontigs file!", log, dipspades=True) if not ds_args.tmp_dir: ds_args.tmp_dir = os.path.join(ds_args.output_dir, options_storage.TMP_DIR) return ds_args
def fill_cfg(options_to_parse, log): try: options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage(spades_version) sys.exit(1) if not options: options_storage.usage(spades_version) sys.exit(1) if len(not_options) > 1: for opt, arg in options: if opt == "-k" and arg.strip().endswith(','): support.error("Do not put spaces after commas in the list of k-mers sizes! Correct example: -k 21,33,55", log) support.error("Please specify option (e.g. -1, -2, -s, etc) for the following paths: " + ", ".join(not_options[1:]) + "\n", log) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER for each type of short-reads libs dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * len(options_storage.SHORT_READS_TYPES.keys()))] # "[{}] * num" doesn't work here! # for parsing options from "previous run command" options_storage.continue_mode = False options_storage.k_mers = None for opt, arg in options: if opt == '-o': options_storage.output_dir = os.path.abspath(arg) elif opt == "--tmp-dir": options_storage.tmp_dir = os.path.abspath(arg) elif opt == "--reference": options_storage.reference = support.check_file_existence(arg, 'reference', log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence(arg, 'dataset', log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == '-k': if arg == 'auto': options_storage.k_mers = arg else: options_storage.k_mers = list(map(int, arg.split(","))) for k in options_storage.k_mers: if k < options_storage.MIN_K or k > options_storage.MAX_K: support.error('wrong k value ' + str(k) + ': all k values should be between %d and %d' % (options_storage.MIN_K, options_storage.MAX_K), log) if k % 2 == 0: support.error('wrong k value ' + str(k) + ': all k values should be odd', log) elif opt == "--sc": options_storage.single_cell = True elif opt == "--iontorrent": options_storage.iontorrent = True elif opt == "--disable-gzip-output": options_storage.disable_gzip_output = True elif opt == "--disable-gzip-output:false": options_storage.disable_gzip_output = False elif opt == "--disable-rr": options_storage.disable_rr = True elif opt == "--disable-rr:false": options_storage.disable_rr = False elif opt == "--only-error-correction": if options_storage.only_assembler: support.error('you cannot specify --only-error-correction and --only-assembler simultaneously') options_storage.only_error_correction = True elif opt == "--only-assembler": if options_storage.only_error_correction: support.error('you cannot specify --only-error-correction and --only-assembler simultaneously') options_storage.only_assembler = True elif opt == "--read-buffer-size": options_storage.read_buffer_size = int(arg) elif opt == "--bh-heap-check": options_storage.bh_heap_check = arg elif opt == "--spades-heap-check": options_storage.spades_heap_check = arg elif opt == "--continue": options_storage.continue_mode = True elif opt == "--restart-from": if arg not in ['ec', 'as', 'mc'] and not arg.startswith('k'): support.error("wrong value for --restart-from option: " + arg + " (only 'ec', 'as', 'k<int>', 'mc' are available)", log) options_storage.continue_mode = True options_storage.restart_from = arg elif opt == '-t' or opt == "--threads": options_storage.threads = int(arg) elif opt == '-m' or opt == "--memory": options_storage.memory = int(arg) elif opt == "--phred-offset": if arg == 'auto': options_storage.qvoffset = arg elif arg in ['33', '64']: options_storage.qvoffset = int(arg) else: support.error('wrong PHRED quality offset value ' + str(arg) + ': should be either 33 or 64', log) elif opt == '-i' or opt == "--iterations": options_storage.iterations = int(arg) elif opt == "--debug": options_storage.developer_mode = True elif opt == "--debug:false": options_storage.developer_mode = False #corrector elif opt == "--mismatch-correction": options_storage.mismatch_corrector = True elif opt == "--mismatch-correction:false": options_storage.mismatch_corrector = False elif opt == "--careful": options_storage.mismatch_corrector = True options_storage.careful = True elif opt == "--careful:false": options_storage.mismatch_corrector = False options_storage.careful = False elif opt == '-h' or opt == "--help": options_storage.usage(spades_version) sys.exit(0) elif opt == "--help-hidden": options_storage.usage(spades_version, True) sys.exit(0) elif opt == "--test": options_storage.set_test_options() support.add_to_dataset('-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data) support.add_to_dataset('-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data) #break elif opt == "--diploid": options_storage.diploid_mode = True else: raise ValueError if not options_storage.output_dir: support.error("the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): if options_storage.continue_mode: support.error("the output_dir should exist for --continue and for --restart-from!", log) os.makedirs(options_storage.output_dir) if options_storage.restart_from: if options_storage.continue_mode: # saving parameters specified with --restart-from if not support.dataset_is_empty(dataset_data): support.error("you cannot specify reads with --restart-from option!", log) options_storage.save_restart_options(log) else: # overriding previous run parameters options_storage.load_restart_options() if options_storage.continue_mode: return None, None if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load(open(options_storage.dataset_yaml_filename, 'r')) except pyyaml.YAMLError: _, exc, _ = sys.exc_info() support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(options_storage.dataset_yaml_filename)) else: dataset_data = support.correct_dataset(dataset_data) dataset_data = support.relative2abs_paths(dataset_data, os.getcwd()) options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w')) support.check_dataset_reads(dataset_data, options_storage.only_assembler, log) if not support.get_lib_ids_by_type(dataset_data, spades_logic.READS_TYPES_USED_IN_CONSTRUCTION): support.error('you should specify at least one unpaired, paired-end, or high-quality mate-pairs library!') options_storage.set_default_values() ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common cfg["common"].__dict__["output_dir"] = options_storage.output_dir cfg["common"].__dict__["tmp_dir"] = options_storage.tmp_dir cfg["common"].__dict__["max_threads"] = options_storage.threads cfg["common"].__dict__["max_memory"] = options_storage.memory cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode # dataset section cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell cfg["dataset"].__dict__["iontorrent"] = options_storage.iontorrent cfg["dataset"].__dict__["yaml_filename"] = options_storage.dataset_yaml_filename if options_storage.developer_mode and options_storage.reference: cfg["dataset"].__dict__["reference"] = options_storage.reference # error correction if (not options_storage.only_assembler) and (options_storage.iterations > 0): cfg["error_correction"].__dict__["output_dir"] = os.path.join(cfg["common"].output_dir, "corrected") cfg["error_correction"].__dict__["max_iterations"] = options_storage.iterations cfg["error_correction"].__dict__["gzip_output"] = not options_storage.disable_gzip_output if options_storage.qvoffset: cfg["error_correction"].__dict__["qvoffset"] = options_storage.qvoffset if options_storage.bh_heap_check: cfg["error_correction"].__dict__["heap_check"] = options_storage.bh_heap_check cfg["error_correction"].__dict__["iontorrent"] = options_storage.iontorrent # assembly if not options_storage.only_error_correction: if options_storage.k_mers: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers else: cfg["assembly"].__dict__["iterative_K"] = options_storage.K_MERS_SHORT cfg["assembly"].__dict__["careful"] = options_storage.careful cfg["assembly"].__dict__["disable_rr"] = options_storage.disable_rr cfg["assembly"].__dict__["diploid_mode"] = options_storage.diploid_mode if options_storage.spades_heap_check: cfg["assembly"].__dict__["heap_check"] = options_storage.spades_heap_check if options_storage.read_buffer_size: cfg["assembly"].__dict__["read_buffer_size"] = options_storage.read_buffer_size #corrector can work only if contigs exist (not only error correction) if (not options_storage.only_error_correction) and options_storage.mismatch_corrector: cfg["mismatch_corrector"] = empty_config() cfg["mismatch_corrector"].__dict__["skip-masked"] = None cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join(bin_home, "bwa-spades") cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads cfg["mismatch_corrector"].__dict__["output-dir"] = options_storage.output_dir return cfg, dataset_data
def check(self): ''' Check correctness of parameters. Unless throws error. ''' if self.multinom is None: if self.model_func_file is None: self.multinom = False else: self.multinom = True if self.pop_labels is not None: self.pop_labels = [x.strip() for x in self.pop_labels.split(',')] if self.ns is not None: self.ns = support.check_comma_sep_list(self.ns) self.input_file = support.check_file_existence(self.input_file) if self.resume_dir is not None: self.resume_dir = support.check_dir_existence(self.resume_dir) if self.resume_dir is not None and self.output_dir is None: self.output_dir = support.ensure_dir_existence( self.resume_dir + "_resumed", check_emptiness=True) elif self.output_dir is None: support.error("Parameter `Output directory` is required") else: self.output_dir = support.ensure_dir_existence( self.output_dir, check_emptiness=True) if self.input_file is None: support.error( "Parameter `Input file` is required") if self.theta is None: support.warning( "`Theta0` is not specified. It would be 1.0.") if self.gen_time is None: support.warning( "`Time for one generation` is not specified. Time will be in genetic units.") self.input_data, self.ns, self.pop_labels = support.load_spectrum( self.input_file, self.ns, self.pop_labels) self.ns = np.array(self.ns) self.number_of_populations = len(self.ns) # Linked or unlinked data if not self.linked_snp and self.boot_dir is not None: support.warning( "SNP's are marked as unlinked, so the directory with bootstrap will be ignored.") elif self.linked_snp: if self.boot_dir is not None: self.boot_dir = support.check_dir_existence(self.boot_dir) self.boots = gadma.Inference.load_bootstrap_data_from_dir(self.boot_dir, self.ns, self.pop_labels) # Custom model if self.model_func_file is not None: self.model_func_file = support.check_file_existence(self.model_func_file) file_with_model_func = imp.load_source('module', self.model_func_file) try: self.model_func = file_with_model_func.model_func except: support.error( "File " + self.model_func_file + ' does not contain function named `model_func`.') if self.model_func_file is not None: if self.p_ids is not None: self.p_ids = support.check_comma_sep_list(self.p_ids, is_int=False) self.fracs = [float(x) for x in self.fracs.split(",")] if len(self.fracs) != 3: support.error( "length of `Fractions` (Parameters of genetic algorithm) must be 3") self.frac_of_old_models = self.fracs[0] self.frac_of_mutated_models = self.fracs[1] self.frac_of_crossed_models = self.fracs[2] if self.moments_scenario and self.dadi_pts is not None: support.warning( "Moments doesn't use --pts argument, so it would be ignored") if self.dadi_pts is None: max_n = max(self.ns) self.dadi_pts = [max_n, max_n + 10, max_n + 20] else: self.dadi_pts = support.check_comma_sep_list(self.dadi_pts) self.put_default_structures() self.final_check()
def fill_cfg(options_to_parse, log): try: options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage(spades_version) sys.exit(1) if not options: options_storage.usage(spades_version) sys.exit(1) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER paired-end libs and MAX_LIBS_NUMBER mate-pair libs dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * 2)] options_storage.continue_mode = False for opt, arg in options: if opt == '-o': options_storage.output_dir = arg elif opt == "--tmp-dir": options_storage.tmp_dir = arg elif opt == "--reference": options_storage.reference = support.check_file_existence( arg, 'reference', log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence( arg, 'dataset', log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == '-k': options_storage.k_mers = list(map(int, arg.split(","))) for k in options_storage.k_mers: if k > 127: support.error( 'wrong k value ' + str(k) + ': all k values should be less than 128', log) if k % 2 == 0: support.error( 'wrong k value ' + str(k) + ': all k values should be odd', log) elif opt == "--sc": options_storage.single_cell = True elif opt == "--disable-gzip-output": options_storage.disable_gzip_output = True elif opt == "--only-error-correction": if options_storage.only_assembler: support.error( 'you cannot specify --only-error-correction and --only-assembler simultaneously' ) options_storage.only_error_correction = True elif opt == "--only-assembler": if options_storage.only_error_correction: support.error( 'you cannot specify --only-error-correction and --only-assembler simultaneously' ) options_storage.only_assembler = True elif opt == "--bh-heap-check": options_storage.bh_heap_check = arg elif opt == "--spades-heap-check": options_storage.spades_heap_check = arg elif opt == "--continue": options_storage.continue_mode = True elif opt == '-t' or opt == "--threads": options_storage.threads = int(arg) elif opt == '-m' or opt == "--memory": options_storage.memory = int(arg) elif opt == "--phred-offset": if int(arg) in [33, 64]: options_storage.qvoffset = int(arg) else: support.error( 'wrong PHRED quality offset value ' + str(arg) + ': should be either 33 or 64', log) elif opt == '-i' or opt == "--iterations": options_storage.iterations = int(arg) elif opt == "--debug": options_storage.developer_mode = True elif opt == "--rectangles": options_storage.rectangles = True #corrector elif opt == "--mismatch-correction": options_storage.mismatch_corrector = True elif opt == "--careful": options_storage.mismatch_corrector = True options_storage.careful = True elif opt == '-h' or opt == "--help": options_storage.usage(spades_version) sys.exit(0) elif opt == "--help-hidden": options_storage.usage(spades_version, True) sys.exit(0) elif opt == "--test": options_storage.set_test_options() support.add_to_dataset( '-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data) support.add_to_dataset( '-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data) #break else: raise ValueError if not options_storage.output_dir: support.error( "the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): if options_storage.continue_mode: support.error("the output_dir should exist for --continue!", log) os.makedirs(options_storage.output_dir) if options_storage.continue_mode: return None, None if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load( open(options_storage.dataset_yaml_filename, 'r')) except pyyaml.YAMLError: _, exc, _ = sys.exc_info() support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) dataset_data = support.relative2abs_paths( dataset_data, os.path.dirname(options_storage.dataset_yaml_filename)) else: dataset_data = support.correct_dataset(dataset_data) dataset_data = support.relative2abs_paths(dataset_data, os.getcwd()) options_storage.dataset_yaml_filename = os.path.join( options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w')) support.check_dataset_reads(dataset_data, options_storage.only_assembler, log) if support.dataset_has_only_mate_pairs_libraries(dataset_data): support.error( 'you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!' ) if options_storage.rectangles and (len(dataset_data) > 1): support.error( 'rectangle graph algorithm for repeat resolution cannot work with multiple libraries!' ) ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common cfg["common"].__dict__["output_dir"] = os.path.abspath( options_storage.output_dir) cfg["common"].__dict__["max_threads"] = options_storage.threads cfg["common"].__dict__["max_memory"] = options_storage.memory cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode # dataset section cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell cfg["dataset"].__dict__["yaml_filename"] = os.path.abspath( options_storage.dataset_yaml_filename) if options_storage.developer_mode and options_storage.reference: cfg["dataset"].__dict__["reference"] = options_storage.reference # error correction if (not options_storage.only_assembler) and (options_storage.iterations > 0): cfg["error_correction"].__dict__["output_dir"] = os.path.join( cfg["common"].output_dir, "corrected") cfg["error_correction"].__dict__[ "max_iterations"] = options_storage.iterations cfg["error_correction"].__dict__[ "gzip_output"] = not options_storage.disable_gzip_output if options_storage.qvoffset: cfg["error_correction"].__dict__[ "qvoffset"] = options_storage.qvoffset if options_storage.bh_heap_check: cfg["error_correction"].__dict__[ "heap_check"] = options_storage.bh_heap_check if options_storage.tmp_dir: cfg["error_correction"].__dict__[ "tmp_dir"] = options_storage.tmp_dir else: cfg["error_correction"].__dict__["tmp_dir"] = cfg[ "error_correction"].output_dir cfg["error_correction"].tmp_dir = os.path.join( os.path.abspath(cfg["error_correction"].tmp_dir), 'tmp') # assembly if not options_storage.only_error_correction: if options_storage.k_mers: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers else: cfg["assembly"].__dict__[ "iterative_K"] = options_storage.k_mers_short cfg["assembly"].__dict__["careful"] = options_storage.careful if options_storage.spades_heap_check: cfg["assembly"].__dict__[ "heap_check"] = options_storage.spades_heap_check #corrector can work only if contigs exist (not only error correction) if (not options_storage.only_error_correction ) and options_storage.mismatch_corrector: cfg["mismatch_corrector"] = empty_config() cfg["mismatch_corrector"].__dict__["skip-masked"] = "" cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join( bin_home, "bwa-spades") cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads cfg["mismatch_corrector"].__dict__[ "output-dir"] = options_storage.output_dir return cfg, dataset_data
try: options, datasets = getopt.gnu_getopt(sys.argv[1:], short_options, long_options) except getopt.GetoptError, err: print str(err) print "" usage() sys.exit(1) for opt, arg in options: if opt in ('-o', "--output-dir"): output_dir = arg make_latest_symlink = False elif opt in ('-r', "--reference"): support.check_file_existence(arg, "reference") reference = arg elif opt in ('-t', "--thread-num"): thread_num = int(arg) if thread_num < 1: thread_num = 1 elif opt in ('-b', "--bin-size"): if int(arg) > 0: bin_size = int(arg) elif opt in ('-k', "--kmer-size"): if int(arg) > 0: kmer = int(arg) elif opt in ('-x', "--max-is"): if int(arg) > 0: max_is = int(arg) elif opt in ('-s', "--skip-trimming"):
def fill_cfg(options_to_parse, log): try: options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage(spades_version) sys.exit(1) if not options: options_storage.usage(spades_version) sys.exit(1) if len(not_options) > 1: for opt, arg in options: if opt == "-k" and arg.strip().endswith(','): support.error( "Do not put spaces after commas in the list of k-mers sizes! Correct example: -k 21,33,55", log) support.error( "Please specify option (e.g. -1, -2, -s, etc) for the following paths: " + ", ".join(not_options[1:]) + "\n", log) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER for each type of short-reads libs dataset_data = [ {} for i in range(options_storage.MAX_LIBS_NUMBER * len(options_storage.SHORT_READS_TYPES.keys()) + len(options_storage.LONG_READS_TYPES)) ] # "[{}]*num" doesn't work here! # for parsing options from "previous run command" options_storage.continue_mode = False options_storage.k_mers = None for opt, arg in options: if opt == '-o': options_storage.output_dir = os.path.abspath(arg) elif opt == "--tmp-dir": options_storage.tmp_dir = os.path.abspath(arg) elif opt == "--configs-dir": options_storage.configs_dir = support.check_dir_existence(arg) elif opt == "--reference": options_storage.reference = support.check_file_existence( arg, 'reference', log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence( arg, 'dataset', log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == '-k': if arg == 'auto': options_storage.k_mers = arg else: options_storage.k_mers = list(map(int, arg.split(","))) for k in options_storage.k_mers: if k < options_storage.MIN_K or k > options_storage.MAX_K: support.error( 'wrong k value ' + str(k) + ': all k values should be between %d and %d' % (options_storage.MIN_K, options_storage.MAX_K), log) if k % 2 == 0: support.error( 'wrong k value ' + str(k) + ': all k values should be odd', log) elif opt == "--sc": options_storage.single_cell = True elif opt == "--iontorrent": options_storage.iontorrent = True elif opt == "--disable-gzip-output": options_storage.disable_gzip_output = True elif opt == "--disable-gzip-output:false": options_storage.disable_gzip_output = False elif opt == "--disable-rr": options_storage.disable_rr = True elif opt == "--disable-rr:false": options_storage.disable_rr = False elif opt == "--only-error-correction": if options_storage.only_assembler: support.error( 'you cannot specify --only-error-correction and --only-assembler simultaneously' ) options_storage.only_error_correction = True elif opt == "--only-assembler": if options_storage.only_error_correction: support.error( 'you cannot specify --only-error-correction and --only-assembler simultaneously' ) options_storage.only_assembler = True elif opt == "--read-buffer-size": options_storage.read_buffer_size = int(arg) elif opt == "--bh-heap-check": options_storage.bh_heap_check = arg elif opt == "--spades-heap-check": options_storage.spades_heap_check = arg elif opt == "--continue": options_storage.continue_mode = True elif opt == "--restart-from": if arg not in ['ec', 'as', 'mc'] and not arg.startswith('k'): support.error( "wrong value for --restart-from option: " + arg + " (should be 'ec', 'as', 'k<int>', or 'mc'", log) options_storage.continue_mode = True options_storage.restart_from = arg elif opt == '-t' or opt == "--threads": options_storage.threads = int(arg) elif opt == '-m' or opt == "--memory": options_storage.memory = int(arg) elif opt == "--phred-offset": if arg == 'auto': options_storage.qvoffset = arg elif arg in ['33', '64']: options_storage.qvoffset = int(arg) else: support.error( 'wrong PHRED quality offset value: ' + arg + ' (should be either 33, 64, or \'auto\')', log) elif opt == "--cov-cutoff": if arg == 'auto' or arg == 'off': options_storage.cov_cutoff = arg elif support.is_float(arg) and float(arg) > 0.0: options_storage.cov_cutoff = float(arg) else: support.error( 'wrong value for --cov-cutoff option: ' + arg + ' (should be a positive float number, or \'auto\', or \'off\')', log) elif opt == '-i' or opt == "--iterations": options_storage.iterations = int(arg) elif opt == "--debug": options_storage.developer_mode = True elif opt == "--debug:false": options_storage.developer_mode = False #corrector elif opt == "--mismatch-correction": options_storage.mismatch_corrector = True elif opt == "--mismatch-correction:false": options_storage.mismatch_corrector = False elif opt == "--careful": options_storage.mismatch_corrector = True options_storage.careful = True elif opt == "--careful:false": options_storage.mismatch_corrector = False options_storage.careful = False elif opt == '-h' or opt == "--help": options_storage.usage(spades_version) sys.exit(0) elif opt == "--help-hidden": options_storage.usage(spades_version, True) sys.exit(0) elif opt == "--test": options_storage.set_test_options() support.add_to_dataset( '-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data) support.add_to_dataset( '-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data) #break elif opt == "--diploid": options_storage.diploid_mode = True else: raise ValueError if not options_storage.output_dir: support.error( "the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): if options_storage.continue_mode: support.error( "the output_dir should exist for --continue and for --restart-from!", log) os.makedirs(options_storage.output_dir) if options_storage.restart_from: if options_storage.continue_mode: # saving parameters specified with --restart-from if not support.dataset_is_empty(dataset_data): support.error( "you cannot specify reads with --restart-from option!", log) options_storage.save_restart_options(log) else: # overriding previous run parameters options_storage.load_restart_options() if options_storage.continue_mode: return None, None if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load( open(options_storage.dataset_yaml_filename, 'r')) except pyyaml.YAMLError: _, exc, _ = sys.exc_info() support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) dataset_data = support.relative2abs_paths( dataset_data, os.path.dirname(options_storage.dataset_yaml_filename)) else: dataset_data = support.correct_dataset(dataset_data) dataset_data = support.relative2abs_paths(dataset_data, os.getcwd()) options_storage.dataset_yaml_filename = os.path.join( options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w')) support.check_dataset_reads(dataset_data, options_storage.only_assembler, log) if not support.get_lib_ids_by_type( dataset_data, spades_logic.READS_TYPES_USED_IN_CONSTRUCTION): support.error( 'you should specify at least one unpaired, paired-end, or high-quality mate-pairs library!' ) options_storage.set_default_values() ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common cfg["common"].__dict__["output_dir"] = options_storage.output_dir cfg["common"].__dict__["tmp_dir"] = options_storage.tmp_dir cfg["common"].__dict__["max_threads"] = options_storage.threads cfg["common"].__dict__["max_memory"] = options_storage.memory cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode # dataset section cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell cfg["dataset"].__dict__["iontorrent"] = options_storage.iontorrent cfg["dataset"].__dict__[ "yaml_filename"] = options_storage.dataset_yaml_filename if options_storage.developer_mode and options_storage.reference: cfg["dataset"].__dict__["reference"] = options_storage.reference # error correction if (not options_storage.only_assembler) and (options_storage.iterations > 0): cfg["error_correction"].__dict__["output_dir"] = os.path.join( cfg["common"].output_dir, "corrected") cfg["error_correction"].__dict__[ "max_iterations"] = options_storage.iterations cfg["error_correction"].__dict__[ "gzip_output"] = not options_storage.disable_gzip_output if options_storage.qvoffset: cfg["error_correction"].__dict__[ "qvoffset"] = options_storage.qvoffset if options_storage.bh_heap_check: cfg["error_correction"].__dict__[ "heap_check"] = options_storage.bh_heap_check cfg["error_correction"].__dict__[ "iontorrent"] = options_storage.iontorrent # assembly if not options_storage.only_error_correction: if options_storage.k_mers: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers else: cfg["assembly"].__dict__[ "iterative_K"] = options_storage.K_MERS_SHORT cfg["assembly"].__dict__["careful"] = options_storage.careful cfg["assembly"].__dict__["disable_rr"] = options_storage.disable_rr cfg["assembly"].__dict__["diploid_mode"] = options_storage.diploid_mode cfg["assembly"].__dict__["cov_cutoff"] = options_storage.cov_cutoff if options_storage.spades_heap_check: cfg["assembly"].__dict__[ "heap_check"] = options_storage.spades_heap_check if options_storage.read_buffer_size: cfg["assembly"].__dict__[ "read_buffer_size"] = options_storage.read_buffer_size #corrector can work only if contigs exist (not only error correction) if (not options_storage.only_error_correction ) and options_storage.mismatch_corrector: cfg["mismatch_corrector"] = empty_config() cfg["mismatch_corrector"].__dict__["skip-masked"] = None cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join( bin_home, "bwa-spades") cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads cfg["mismatch_corrector"].__dict__[ "output-dir"] = options_storage.output_dir return cfg, dataset_data
log.addHandler(console) check_binaries(bin_home, log) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER paired-end libs and MAX_LIBS_NUMBER mate-pair libs dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * 2)] for opt, arg in options: if opt == "-o": options_storage.output_dir = arg elif opt == "--tmp-dir": options_storage.tmp_dir = arg elif opt == "--reference": options_storage.reference = support.check_file_existence(arg, "reference", log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence(arg, "dataset", log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == "-k": options_storage.k_mers = map(int, arg.split(",")) for k in options_storage.k_mers: if k > 127: support.error("wrong k value " + str(k) + ": all k values should be less than 128", log) if k % 2 == 0: support.error("wrong k value " + str(k) + ": all k values should be odd", log) elif opt == "--sc":
def fill_cfg(options_to_parse, log): try: options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage(spades_version) sys.exit(1) if not options: options_storage.usage(spades_version) sys.exit(1) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER paired-end libs and MAX_LIBS_NUMBER mate-pair libs dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * 2)] options_storage.continue_mode = False for opt, arg in options: if opt == '-o': options_storage.output_dir = arg elif opt == "--tmp-dir": options_storage.tmp_dir = arg elif opt == "--reference": options_storage.reference = support.check_file_existence(arg, 'reference', log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence(arg, 'dataset', log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == '-k': options_storage.k_mers = list(map(int, arg.split(","))) for k in options_storage.k_mers: if k > 127: support.error('wrong k value ' + str(k) + ': all k values should be less than 128', log) if k % 2 == 0: support.error('wrong k value ' + str(k) + ': all k values should be odd', log) elif opt == "--sc": options_storage.single_cell = True elif opt == "--disable-gzip-output": options_storage.disable_gzip_output = True elif opt == "--only-error-correction": if options_storage.only_assembler: support.error('you cannot specify --only-error-correction and --only-assembler simultaneously') options_storage.only_error_correction = True elif opt == "--only-assembler": if options_storage.only_error_correction: support.error('you cannot specify --only-error-correction and --only-assembler simultaneously') options_storage.only_assembler = True elif opt == "--bh-heap-check": options_storage.bh_heap_check = arg elif opt == "--spades-heap-check": options_storage.spades_heap_check = arg elif opt == "--continue": options_storage.continue_mode = True elif opt == '-t' or opt == "--threads": options_storage.threads = int(arg) elif opt == '-m' or opt == "--memory": options_storage.memory = int(arg) elif opt == "--phred-offset": if int(arg) in [33, 64]: options_storage.qvoffset = int(arg) else: support.error('wrong PHRED quality offset value ' + str(arg) + ': should be either 33 or 64', log) elif opt == '-i' or opt == "--iterations": options_storage.iterations = int(arg) elif opt == "--debug": options_storage.developer_mode = True elif opt == "--rectangles": options_storage.rectangles = True #corrector elif opt == "--mismatch-correction": options_storage.mismatch_corrector = True elif opt == "--careful": options_storage.mismatch_corrector = True options_storage.careful = True elif opt == '-h' or opt == "--help": options_storage.usage(spades_version) sys.exit(0) elif opt == "--help-hidden": options_storage.usage(spades_version, True) sys.exit(0) elif opt == "--test": options_storage.set_test_options() support.add_to_dataset('-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data) support.add_to_dataset('-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data) #break else: raise ValueError if not options_storage.output_dir: support.error("the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): if options_storage.continue_mode: support.error("the output_dir should exist for --continue!", log) os.makedirs(options_storage.output_dir) if options_storage.continue_mode: return None, None if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load(open(options_storage.dataset_yaml_filename, 'r')) except pyyaml.YAMLError: _, exc, _ = sys.exc_info() support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(options_storage.dataset_yaml_filename)) else: dataset_data = support.correct_dataset(dataset_data) dataset_data = support.relative2abs_paths(dataset_data, os.getcwd()) options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w')) support.check_dataset_reads(dataset_data, options_storage.only_assembler, log) if support.dataset_has_only_mate_pairs_libraries(dataset_data): support.error('you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!') if options_storage.rectangles and (len(dataset_data) > 1): support.error('rectangle graph algorithm for repeat resolution cannot work with multiple libraries!') ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common cfg["common"].__dict__["output_dir"] = os.path.abspath(options_storage.output_dir) cfg["common"].__dict__["max_threads"] = options_storage.threads cfg["common"].__dict__["max_memory"] = options_storage.memory cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode # dataset section cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell cfg["dataset"].__dict__["yaml_filename"] = os.path.abspath(options_storage.dataset_yaml_filename) if options_storage.developer_mode and options_storage.reference: cfg["dataset"].__dict__["reference"] = options_storage.reference # error correction if (not options_storage.only_assembler) and (options_storage.iterations > 0): cfg["error_correction"].__dict__["output_dir"] = os.path.join(cfg["common"].output_dir, "corrected") cfg["error_correction"].__dict__["max_iterations"] = options_storage.iterations cfg["error_correction"].__dict__["gzip_output"] = not options_storage.disable_gzip_output if options_storage.qvoffset: cfg["error_correction"].__dict__["qvoffset"] = options_storage.qvoffset if options_storage.bh_heap_check: cfg["error_correction"].__dict__["heap_check"] = options_storage.bh_heap_check if options_storage.tmp_dir: cfg["error_correction"].__dict__["tmp_dir"] = options_storage.tmp_dir else: cfg["error_correction"].__dict__["tmp_dir"] = cfg["error_correction"].output_dir cfg["error_correction"].tmp_dir = os.path.join(os.path.abspath(cfg["error_correction"].tmp_dir), 'tmp') # assembly if not options_storage.only_error_correction: if options_storage.k_mers: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers else: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers_short cfg["assembly"].__dict__["careful"] = options_storage.careful if options_storage.spades_heap_check: cfg["assembly"].__dict__["heap_check"] = options_storage.spades_heap_check #corrector can work only if contigs exist (not only error correction) if (not options_storage.only_error_correction) and options_storage.mismatch_corrector: cfg["mismatch_corrector"] = empty_config() cfg["mismatch_corrector"].__dict__["skip-masked"] = "" cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join(bin_home, "bwa-spades") cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads cfg["mismatch_corrector"].__dict__["output-dir"] = options_storage.output_dir return cfg, dataset_data