def fill_cfg(options_to_parse, log): try: options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage(spades_version) sys.exit(1) if not options: options_storage.usage(spades_version) sys.exit(1) if len(not_options) > 1: for opt, arg in options: if opt == "-k" and arg.strip().endswith(','): support.error("Do not put spaces after commas in the list of k-mers sizes! Correct example: -k 21,33,55", log) support.error("Please specify option (e.g. -1, -2, -s, etc) for the following paths: " + ", ".join(not_options[1:]) + "\n", log) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER for each type of short-reads libs dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * len(options_storage.SHORT_READS_TYPES.keys()))] # "[{}] * num" doesn't work here! # for parsing options from "previous run command" options_storage.continue_mode = False options_storage.k_mers = None for opt, arg in options: if opt == '-o': options_storage.output_dir = os.path.abspath(arg) elif opt == "--tmp-dir": options_storage.tmp_dir = os.path.abspath(arg) elif opt == "--reference": options_storage.reference = support.check_file_existence(arg, 'reference', log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence(arg, 'dataset', log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == '-k': if arg == 'auto': options_storage.k_mers = arg else: options_storage.k_mers = list(map(int, arg.split(","))) for k in options_storage.k_mers: if k < options_storage.MIN_K or k > options_storage.MAX_K: support.error('wrong k value ' + str(k) + ': all k values should be between %d and %d' % (options_storage.MIN_K, options_storage.MAX_K), log) if k % 2 == 0: support.error('wrong k value ' + str(k) + ': all k values should be odd', log) elif opt == "--sc": options_storage.single_cell = True elif opt == "--iontorrent": options_storage.iontorrent = True elif opt == "--disable-gzip-output": options_storage.disable_gzip_output = True elif opt == "--disable-gzip-output:false": options_storage.disable_gzip_output = False elif opt == "--disable-rr": options_storage.disable_rr = True elif opt == "--disable-rr:false": options_storage.disable_rr = False elif opt == "--only-error-correction": if options_storage.only_assembler: support.error('you cannot specify --only-error-correction and --only-assembler simultaneously') options_storage.only_error_correction = True elif opt == "--only-assembler": if options_storage.only_error_correction: support.error('you cannot specify --only-error-correction and --only-assembler simultaneously') options_storage.only_assembler = True elif opt == "--read-buffer-size": options_storage.read_buffer_size = int(arg) elif opt == "--bh-heap-check": options_storage.bh_heap_check = arg elif opt == "--spades-heap-check": options_storage.spades_heap_check = arg elif opt == "--continue": options_storage.continue_mode = True elif opt == "--restart-from": if arg not in ['ec', 'as', 'mc'] and not arg.startswith('k'): support.error("wrong value for --restart-from option: " + arg + " (only 'ec', 'as', 'k<int>', 'mc' are available)", log) options_storage.continue_mode = True options_storage.restart_from = arg elif opt == '-t' or opt == "--threads": options_storage.threads = int(arg) elif opt == '-m' or opt == "--memory": options_storage.memory = int(arg) elif opt == "--phred-offset": if arg == 'auto': options_storage.qvoffset = arg elif arg in ['33', '64']: options_storage.qvoffset = int(arg) else: support.error('wrong PHRED quality offset value ' + str(arg) + ': should be either 33 or 64', log) elif opt == '-i' or opt == "--iterations": options_storage.iterations = int(arg) elif opt == "--debug": options_storage.developer_mode = True elif opt == "--debug:false": options_storage.developer_mode = False #corrector elif opt == "--mismatch-correction": options_storage.mismatch_corrector = True elif opt == "--mismatch-correction:false": options_storage.mismatch_corrector = False elif opt == "--careful": options_storage.mismatch_corrector = True options_storage.careful = True elif opt == "--careful:false": options_storage.mismatch_corrector = False options_storage.careful = False elif opt == '-h' or opt == "--help": options_storage.usage(spades_version) sys.exit(0) elif opt == "--help-hidden": options_storage.usage(spades_version, True) sys.exit(0) elif opt == "--test": options_storage.set_test_options() support.add_to_dataset('-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data) support.add_to_dataset('-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data) #break elif opt == "--diploid": options_storage.diploid_mode = True else: raise ValueError if not options_storage.output_dir: support.error("the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): if options_storage.continue_mode: support.error("the output_dir should exist for --continue and for --restart-from!", log) os.makedirs(options_storage.output_dir) if options_storage.restart_from: if options_storage.continue_mode: # saving parameters specified with --restart-from if not support.dataset_is_empty(dataset_data): support.error("you cannot specify reads with --restart-from option!", log) options_storage.save_restart_options(log) else: # overriding previous run parameters options_storage.load_restart_options() if options_storage.continue_mode: return None, None if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load(open(options_storage.dataset_yaml_filename, 'r')) except pyyaml.YAMLError: _, exc, _ = sys.exc_info() support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(options_storage.dataset_yaml_filename)) else: dataset_data = support.correct_dataset(dataset_data) dataset_data = support.relative2abs_paths(dataset_data, os.getcwd()) options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w')) support.check_dataset_reads(dataset_data, options_storage.only_assembler, log) if not support.get_lib_ids_by_type(dataset_data, spades_logic.READS_TYPES_USED_IN_CONSTRUCTION): support.error('you should specify at least one unpaired, paired-end, or high-quality mate-pairs library!') options_storage.set_default_values() ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common cfg["common"].__dict__["output_dir"] = options_storage.output_dir cfg["common"].__dict__["tmp_dir"] = options_storage.tmp_dir cfg["common"].__dict__["max_threads"] = options_storage.threads cfg["common"].__dict__["max_memory"] = options_storage.memory cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode # dataset section cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell cfg["dataset"].__dict__["iontorrent"] = options_storage.iontorrent cfg["dataset"].__dict__["yaml_filename"] = options_storage.dataset_yaml_filename if options_storage.developer_mode and options_storage.reference: cfg["dataset"].__dict__["reference"] = options_storage.reference # error correction if (not options_storage.only_assembler) and (options_storage.iterations > 0): cfg["error_correction"].__dict__["output_dir"] = os.path.join(cfg["common"].output_dir, "corrected") cfg["error_correction"].__dict__["max_iterations"] = options_storage.iterations cfg["error_correction"].__dict__["gzip_output"] = not options_storage.disable_gzip_output if options_storage.qvoffset: cfg["error_correction"].__dict__["qvoffset"] = options_storage.qvoffset if options_storage.bh_heap_check: cfg["error_correction"].__dict__["heap_check"] = options_storage.bh_heap_check cfg["error_correction"].__dict__["iontorrent"] = options_storage.iontorrent # assembly if not options_storage.only_error_correction: if options_storage.k_mers: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers else: cfg["assembly"].__dict__["iterative_K"] = options_storage.K_MERS_SHORT cfg["assembly"].__dict__["careful"] = options_storage.careful cfg["assembly"].__dict__["disable_rr"] = options_storage.disable_rr cfg["assembly"].__dict__["diploid_mode"] = options_storage.diploid_mode if options_storage.spades_heap_check: cfg["assembly"].__dict__["heap_check"] = options_storage.spades_heap_check if options_storage.read_buffer_size: cfg["assembly"].__dict__["read_buffer_size"] = options_storage.read_buffer_size #corrector can work only if contigs exist (not only error correction) if (not options_storage.only_error_correction) and options_storage.mismatch_corrector: cfg["mismatch_corrector"] = empty_config() cfg["mismatch_corrector"].__dict__["skip-masked"] = None cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join(bin_home, "bwa-spades") cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads cfg["mismatch_corrector"].__dict__["output-dir"] = options_storage.output_dir return cfg, dataset_data
def fill_cfg(options_to_parse, log): try: options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage(spades_version) sys.exit(1) if not options: options_storage.usage(spades_version) sys.exit(1) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER paired-end libs and MAX_LIBS_NUMBER mate-pair libs dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * 2)] options_storage.continue_mode = False for opt, arg in options: if opt == '-o': options_storage.output_dir = arg elif opt == "--tmp-dir": options_storage.tmp_dir = arg elif opt == "--reference": options_storage.reference = support.check_file_existence( arg, 'reference', log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence( arg, 'dataset', log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == '-k': options_storage.k_mers = list(map(int, arg.split(","))) for k in options_storage.k_mers: if k > 127: support.error( 'wrong k value ' + str(k) + ': all k values should be less than 128', log) if k % 2 == 0: support.error( 'wrong k value ' + str(k) + ': all k values should be odd', log) elif opt == "--sc": options_storage.single_cell = True elif opt == "--disable-gzip-output": options_storage.disable_gzip_output = True elif opt == "--only-error-correction": if options_storage.only_assembler: support.error( 'you cannot specify --only-error-correction and --only-assembler simultaneously' ) options_storage.only_error_correction = True elif opt == "--only-assembler": if options_storage.only_error_correction: support.error( 'you cannot specify --only-error-correction and --only-assembler simultaneously' ) options_storage.only_assembler = True elif opt == "--bh-heap-check": options_storage.bh_heap_check = arg elif opt == "--spades-heap-check": options_storage.spades_heap_check = arg elif opt == "--continue": options_storage.continue_mode = True elif opt == '-t' or opt == "--threads": options_storage.threads = int(arg) elif opt == '-m' or opt == "--memory": options_storage.memory = int(arg) elif opt == "--phred-offset": if int(arg) in [33, 64]: options_storage.qvoffset = int(arg) else: support.error( 'wrong PHRED quality offset value ' + str(arg) + ': should be either 33 or 64', log) elif opt == '-i' or opt == "--iterations": options_storage.iterations = int(arg) elif opt == "--debug": options_storage.developer_mode = True elif opt == "--rectangles": options_storage.rectangles = True #corrector elif opt == "--mismatch-correction": options_storage.mismatch_corrector = True elif opt == "--careful": options_storage.mismatch_corrector = True options_storage.careful = True elif opt == '-h' or opt == "--help": options_storage.usage(spades_version) sys.exit(0) elif opt == "--help-hidden": options_storage.usage(spades_version, True) sys.exit(0) elif opt == "--test": options_storage.set_test_options() support.add_to_dataset( '-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data) support.add_to_dataset( '-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data) #break else: raise ValueError if not options_storage.output_dir: support.error( "the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): if options_storage.continue_mode: support.error("the output_dir should exist for --continue!", log) os.makedirs(options_storage.output_dir) if options_storage.continue_mode: return None, None if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load( open(options_storage.dataset_yaml_filename, 'r')) except pyyaml.YAMLError: _, exc, _ = sys.exc_info() support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) dataset_data = support.relative2abs_paths( dataset_data, os.path.dirname(options_storage.dataset_yaml_filename)) else: dataset_data = support.correct_dataset(dataset_data) dataset_data = support.relative2abs_paths(dataset_data, os.getcwd()) options_storage.dataset_yaml_filename = os.path.join( options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w')) support.check_dataset_reads(dataset_data, options_storage.only_assembler, log) if support.dataset_has_only_mate_pairs_libraries(dataset_data): support.error( 'you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!' ) if options_storage.rectangles and (len(dataset_data) > 1): support.error( 'rectangle graph algorithm for repeat resolution cannot work with multiple libraries!' ) ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common cfg["common"].__dict__["output_dir"] = os.path.abspath( options_storage.output_dir) cfg["common"].__dict__["max_threads"] = options_storage.threads cfg["common"].__dict__["max_memory"] = options_storage.memory cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode # dataset section cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell cfg["dataset"].__dict__["yaml_filename"] = os.path.abspath( options_storage.dataset_yaml_filename) if options_storage.developer_mode and options_storage.reference: cfg["dataset"].__dict__["reference"] = options_storage.reference # error correction if (not options_storage.only_assembler) and (options_storage.iterations > 0): cfg["error_correction"].__dict__["output_dir"] = os.path.join( cfg["common"].output_dir, "corrected") cfg["error_correction"].__dict__[ "max_iterations"] = options_storage.iterations cfg["error_correction"].__dict__[ "gzip_output"] = not options_storage.disable_gzip_output if options_storage.qvoffset: cfg["error_correction"].__dict__[ "qvoffset"] = options_storage.qvoffset if options_storage.bh_heap_check: cfg["error_correction"].__dict__[ "heap_check"] = options_storage.bh_heap_check if options_storage.tmp_dir: cfg["error_correction"].__dict__[ "tmp_dir"] = options_storage.tmp_dir else: cfg["error_correction"].__dict__["tmp_dir"] = cfg[ "error_correction"].output_dir cfg["error_correction"].tmp_dir = os.path.join( os.path.abspath(cfg["error_correction"].tmp_dir), 'tmp') # assembly if not options_storage.only_error_correction: if options_storage.k_mers: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers else: cfg["assembly"].__dict__[ "iterative_K"] = options_storage.k_mers_short cfg["assembly"].__dict__["careful"] = options_storage.careful if options_storage.spades_heap_check: cfg["assembly"].__dict__[ "heap_check"] = options_storage.spades_heap_check #corrector can work only if contigs exist (not only error correction) if (not options_storage.only_error_correction ) and options_storage.mismatch_corrector: cfg["mismatch_corrector"] = empty_config() cfg["mismatch_corrector"].__dict__["skip-masked"] = "" cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join( bin_home, "bwa-spades") cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads cfg["mismatch_corrector"].__dict__[ "output-dir"] = options_storage.output_dir return cfg, dataset_data
datasets_dict = dict() print("Analyzing datasets") for dataset in datasets: try: dataset_data = pyyaml.load(file(dataset, 'r')) except pyyaml.YAMLError, exc: support.warning('skipping ' + dataset + ': exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) continue dataset_data = support.correct_dataset(dataset_data) for id, library in enumerate(dataset_data): print("processing lib#" + str(id) + " of " + dataset) basename = os.path.splitext(os.path.basename(dataset))[0] cur_key = basename i = 1 while datasets_dict.has_key(cur_key): cur_key = basename + "_" + str(i) cur_reads = [] for key, value in library.items(): if key.endswith('reads'): for reads_file in value: cur_reads.append(get_full_path(dataset, reads_file)) datasets_dict[cur_key] = cur_reads
def fill_cfg(options_to_parse, log): try: options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage(spades_version) sys.exit(1) if not options: options_storage.usage(spades_version) sys.exit(1) if len(not_options) > 1: for opt, arg in options: if opt == "-k" and arg.strip().endswith(','): support.error( "Do not put spaces after commas in the list of k-mers sizes! Correct example: -k 21,33,55", log) support.error( "Please specify option (e.g. -1, -2, -s, etc) for the following paths: " + ", ".join(not_options[1:]) + "\n", log) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER for each type of short-reads libs dataset_data = [ {} for i in range(options_storage.MAX_LIBS_NUMBER * len(options_storage.SHORT_READS_TYPES.keys()) + len(options_storage.LONG_READS_TYPES)) ] # "[{}]*num" doesn't work here! # for parsing options from "previous run command" options_storage.continue_mode = False options_storage.k_mers = None for opt, arg in options: if opt == '-o': options_storage.output_dir = os.path.abspath(arg) elif opt == "--tmp-dir": options_storage.tmp_dir = os.path.abspath(arg) elif opt == "--configs-dir": options_storage.configs_dir = support.check_dir_existence(arg) elif opt == "--reference": options_storage.reference = support.check_file_existence( arg, 'reference', log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence( arg, 'dataset', log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == '-k': if arg == 'auto': options_storage.k_mers = arg else: options_storage.k_mers = list(map(int, arg.split(","))) for k in options_storage.k_mers: if k < options_storage.MIN_K or k > options_storage.MAX_K: support.error( 'wrong k value ' + str(k) + ': all k values should be between %d and %d' % (options_storage.MIN_K, options_storage.MAX_K), log) if k % 2 == 0: support.error( 'wrong k value ' + str(k) + ': all k values should be odd', log) elif opt == "--sc": options_storage.single_cell = True elif opt == "--iontorrent": options_storage.iontorrent = True elif opt == "--disable-gzip-output": options_storage.disable_gzip_output = True elif opt == "--disable-gzip-output:false": options_storage.disable_gzip_output = False elif opt == "--disable-rr": options_storage.disable_rr = True elif opt == "--disable-rr:false": options_storage.disable_rr = False elif opt == "--only-error-correction": if options_storage.only_assembler: support.error( 'you cannot specify --only-error-correction and --only-assembler simultaneously' ) options_storage.only_error_correction = True elif opt == "--only-assembler": if options_storage.only_error_correction: support.error( 'you cannot specify --only-error-correction and --only-assembler simultaneously' ) options_storage.only_assembler = True elif opt == "--read-buffer-size": options_storage.read_buffer_size = int(arg) elif opt == "--bh-heap-check": options_storage.bh_heap_check = arg elif opt == "--spades-heap-check": options_storage.spades_heap_check = arg elif opt == "--continue": options_storage.continue_mode = True elif opt == "--restart-from": if arg not in ['ec', 'as', 'mc'] and not arg.startswith('k'): support.error( "wrong value for --restart-from option: " + arg + " (should be 'ec', 'as', 'k<int>', or 'mc'", log) options_storage.continue_mode = True options_storage.restart_from = arg elif opt == '-t' or opt == "--threads": options_storage.threads = int(arg) elif opt == '-m' or opt == "--memory": options_storage.memory = int(arg) elif opt == "--phred-offset": if arg == 'auto': options_storage.qvoffset = arg elif arg in ['33', '64']: options_storage.qvoffset = int(arg) else: support.error( 'wrong PHRED quality offset value: ' + arg + ' (should be either 33, 64, or \'auto\')', log) elif opt == "--cov-cutoff": if arg == 'auto' or arg == 'off': options_storage.cov_cutoff = arg elif support.is_float(arg) and float(arg) > 0.0: options_storage.cov_cutoff = float(arg) else: support.error( 'wrong value for --cov-cutoff option: ' + arg + ' (should be a positive float number, or \'auto\', or \'off\')', log) elif opt == '-i' or opt == "--iterations": options_storage.iterations = int(arg) elif opt == "--debug": options_storage.developer_mode = True elif opt == "--debug:false": options_storage.developer_mode = False #corrector elif opt == "--mismatch-correction": options_storage.mismatch_corrector = True elif opt == "--mismatch-correction:false": options_storage.mismatch_corrector = False elif opt == "--careful": options_storage.mismatch_corrector = True options_storage.careful = True elif opt == "--careful:false": options_storage.mismatch_corrector = False options_storage.careful = False elif opt == '-h' or opt == "--help": options_storage.usage(spades_version) sys.exit(0) elif opt == "--help-hidden": options_storage.usage(spades_version, True) sys.exit(0) elif opt == "--test": options_storage.set_test_options() support.add_to_dataset( '-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data) support.add_to_dataset( '-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data) #break elif opt == "--diploid": options_storage.diploid_mode = True else: raise ValueError if not options_storage.output_dir: support.error( "the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): if options_storage.continue_mode: support.error( "the output_dir should exist for --continue and for --restart-from!", log) os.makedirs(options_storage.output_dir) if options_storage.restart_from: if options_storage.continue_mode: # saving parameters specified with --restart-from if not support.dataset_is_empty(dataset_data): support.error( "you cannot specify reads with --restart-from option!", log) options_storage.save_restart_options(log) else: # overriding previous run parameters options_storage.load_restart_options() if options_storage.continue_mode: return None, None if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load( open(options_storage.dataset_yaml_filename, 'r')) except pyyaml.YAMLError: _, exc, _ = sys.exc_info() support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) dataset_data = support.relative2abs_paths( dataset_data, os.path.dirname(options_storage.dataset_yaml_filename)) else: dataset_data = support.correct_dataset(dataset_data) dataset_data = support.relative2abs_paths(dataset_data, os.getcwd()) options_storage.dataset_yaml_filename = os.path.join( options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w')) support.check_dataset_reads(dataset_data, options_storage.only_assembler, log) if not support.get_lib_ids_by_type( dataset_data, spades_logic.READS_TYPES_USED_IN_CONSTRUCTION): support.error( 'you should specify at least one unpaired, paired-end, or high-quality mate-pairs library!' ) options_storage.set_default_values() ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common cfg["common"].__dict__["output_dir"] = options_storage.output_dir cfg["common"].__dict__["tmp_dir"] = options_storage.tmp_dir cfg["common"].__dict__["max_threads"] = options_storage.threads cfg["common"].__dict__["max_memory"] = options_storage.memory cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode # dataset section cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell cfg["dataset"].__dict__["iontorrent"] = options_storage.iontorrent cfg["dataset"].__dict__[ "yaml_filename"] = options_storage.dataset_yaml_filename if options_storage.developer_mode and options_storage.reference: cfg["dataset"].__dict__["reference"] = options_storage.reference # error correction if (not options_storage.only_assembler) and (options_storage.iterations > 0): cfg["error_correction"].__dict__["output_dir"] = os.path.join( cfg["common"].output_dir, "corrected") cfg["error_correction"].__dict__[ "max_iterations"] = options_storage.iterations cfg["error_correction"].__dict__[ "gzip_output"] = not options_storage.disable_gzip_output if options_storage.qvoffset: cfg["error_correction"].__dict__[ "qvoffset"] = options_storage.qvoffset if options_storage.bh_heap_check: cfg["error_correction"].__dict__[ "heap_check"] = options_storage.bh_heap_check cfg["error_correction"].__dict__[ "iontorrent"] = options_storage.iontorrent # assembly if not options_storage.only_error_correction: if options_storage.k_mers: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers else: cfg["assembly"].__dict__[ "iterative_K"] = options_storage.K_MERS_SHORT cfg["assembly"].__dict__["careful"] = options_storage.careful cfg["assembly"].__dict__["disable_rr"] = options_storage.disable_rr cfg["assembly"].__dict__["diploid_mode"] = options_storage.diploid_mode cfg["assembly"].__dict__["cov_cutoff"] = options_storage.cov_cutoff if options_storage.spades_heap_check: cfg["assembly"].__dict__[ "heap_check"] = options_storage.spades_heap_check if options_storage.read_buffer_size: cfg["assembly"].__dict__[ "read_buffer_size"] = options_storage.read_buffer_size #corrector can work only if contigs exist (not only error correction) if (not options_storage.only_error_correction ) and options_storage.mismatch_corrector: cfg["mismatch_corrector"] = empty_config() cfg["mismatch_corrector"].__dict__["skip-masked"] = None cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join( bin_home, "bwa-spades") cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads cfg["mismatch_corrector"].__dict__[ "output-dir"] = options_storage.output_dir return cfg, dataset_data
raise ValueError if not options_storage.output_dir: support.error("the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): os.makedirs(options_storage.output_dir) if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load(file(options_storage.dataset_yaml_filename, "r")) except pyyaml.YAMLError, exc: support.error( "exception caught while parsing YAML file (" + options_storage.dataset_yaml_filename + "):\n" + str(exc) ) else: dataset_data = support.correct_dataset(dataset_data) options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, file(options_storage.dataset_yaml_filename, "w")) support.check_dataset_reads(dataset_data, log) if support.dataset_has_only_mate_pairs_libraries(dataset_data): support.error( "you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!" ) if options_storage.rectangles and (len(dataset_data) > 1): support.error("rectangle graph algorithm for repeat resolution cannot work with multiple libraries!") ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler:
def fill_cfg(options_to_parse, log): try: options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage(spades_version) sys.exit(1) if not options: options_storage.usage(spades_version) sys.exit(1) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER paired-end libs and MAX_LIBS_NUMBER mate-pair libs dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * 2)] options_storage.continue_mode = False for opt, arg in options: if opt == '-o': options_storage.output_dir = arg elif opt == "--tmp-dir": options_storage.tmp_dir = arg elif opt == "--reference": options_storage.reference = support.check_file_existence(arg, 'reference', log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence(arg, 'dataset', log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == '-k': options_storage.k_mers = list(map(int, arg.split(","))) for k in options_storage.k_mers: if k > 127: support.error('wrong k value ' + str(k) + ': all k values should be less than 128', log) if k % 2 == 0: support.error('wrong k value ' + str(k) + ': all k values should be odd', log) elif opt == "--sc": options_storage.single_cell = True elif opt == "--disable-gzip-output": options_storage.disable_gzip_output = True elif opt == "--only-error-correction": if options_storage.only_assembler: support.error('you cannot specify --only-error-correction and --only-assembler simultaneously') options_storage.only_error_correction = True elif opt == "--only-assembler": if options_storage.only_error_correction: support.error('you cannot specify --only-error-correction and --only-assembler simultaneously') options_storage.only_assembler = True elif opt == "--bh-heap-check": options_storage.bh_heap_check = arg elif opt == "--spades-heap-check": options_storage.spades_heap_check = arg elif opt == "--continue": options_storage.continue_mode = True elif opt == '-t' or opt == "--threads": options_storage.threads = int(arg) elif opt == '-m' or opt == "--memory": options_storage.memory = int(arg) elif opt == "--phred-offset": if int(arg) in [33, 64]: options_storage.qvoffset = int(arg) else: support.error('wrong PHRED quality offset value ' + str(arg) + ': should be either 33 or 64', log) elif opt == '-i' or opt == "--iterations": options_storage.iterations = int(arg) elif opt == "--debug": options_storage.developer_mode = True elif opt == "--rectangles": options_storage.rectangles = True #corrector elif opt == "--mismatch-correction": options_storage.mismatch_corrector = True elif opt == "--careful": options_storage.mismatch_corrector = True options_storage.careful = True elif opt == '-h' or opt == "--help": options_storage.usage(spades_version) sys.exit(0) elif opt == "--help-hidden": options_storage.usage(spades_version, True) sys.exit(0) elif opt == "--test": options_storage.set_test_options() support.add_to_dataset('-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data) support.add_to_dataset('-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data) #break else: raise ValueError if not options_storage.output_dir: support.error("the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): if options_storage.continue_mode: support.error("the output_dir should exist for --continue!", log) os.makedirs(options_storage.output_dir) if options_storage.continue_mode: return None, None if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load(open(options_storage.dataset_yaml_filename, 'r')) except pyyaml.YAMLError: _, exc, _ = sys.exc_info() support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(options_storage.dataset_yaml_filename)) else: dataset_data = support.correct_dataset(dataset_data) dataset_data = support.relative2abs_paths(dataset_data, os.getcwd()) options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w')) support.check_dataset_reads(dataset_data, options_storage.only_assembler, log) if support.dataset_has_only_mate_pairs_libraries(dataset_data): support.error('you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!') if options_storage.rectangles and (len(dataset_data) > 1): support.error('rectangle graph algorithm for repeat resolution cannot work with multiple libraries!') ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common cfg["common"].__dict__["output_dir"] = os.path.abspath(options_storage.output_dir) cfg["common"].__dict__["max_threads"] = options_storage.threads cfg["common"].__dict__["max_memory"] = options_storage.memory cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode # dataset section cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell cfg["dataset"].__dict__["yaml_filename"] = os.path.abspath(options_storage.dataset_yaml_filename) if options_storage.developer_mode and options_storage.reference: cfg["dataset"].__dict__["reference"] = options_storage.reference # error correction if (not options_storage.only_assembler) and (options_storage.iterations > 0): cfg["error_correction"].__dict__["output_dir"] = os.path.join(cfg["common"].output_dir, "corrected") cfg["error_correction"].__dict__["max_iterations"] = options_storage.iterations cfg["error_correction"].__dict__["gzip_output"] = not options_storage.disable_gzip_output if options_storage.qvoffset: cfg["error_correction"].__dict__["qvoffset"] = options_storage.qvoffset if options_storage.bh_heap_check: cfg["error_correction"].__dict__["heap_check"] = options_storage.bh_heap_check if options_storage.tmp_dir: cfg["error_correction"].__dict__["tmp_dir"] = options_storage.tmp_dir else: cfg["error_correction"].__dict__["tmp_dir"] = cfg["error_correction"].output_dir cfg["error_correction"].tmp_dir = os.path.join(os.path.abspath(cfg["error_correction"].tmp_dir), 'tmp') # assembly if not options_storage.only_error_correction: if options_storage.k_mers: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers else: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers_short cfg["assembly"].__dict__["careful"] = options_storage.careful if options_storage.spades_heap_check: cfg["assembly"].__dict__["heap_check"] = options_storage.spades_heap_check #corrector can work only if contigs exist (not only error correction) if (not options_storage.only_error_correction) and options_storage.mismatch_corrector: cfg["mismatch_corrector"] = empty_config() cfg["mismatch_corrector"].__dict__["skip-masked"] = "" cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join(bin_home, "bwa-spades") cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads cfg["mismatch_corrector"].__dict__["output-dir"] = options_storage.output_dir return cfg, dataset_data