os.makedirs(options_storage.output_dir) if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load(file(options_storage.dataset_yaml_filename, "r")) except pyyaml.YAMLError, exc: support.error( "exception caught while parsing YAML file (" + options_storage.dataset_yaml_filename + "):\n" + str(exc) ) else: dataset_data = support.correct_dataset(dataset_data) options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, file(options_storage.dataset_yaml_filename, "w")) support.check_dataset_reads(dataset_data, log) if support.dataset_has_only_mate_pairs_libraries(dataset_data): support.error( "you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!" ) if options_storage.rectangles and (len(dataset_data) > 1): support.error("rectangle graph algorithm for repeat resolution cannot work with multiple libraries!") ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common
def fill_cfg(options_to_parse, log): try: options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage(spades_version) sys.exit(1) if not options: options_storage.usage(spades_version) sys.exit(1) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER paired-end libs and MAX_LIBS_NUMBER mate-pair libs dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * 2)] options_storage.continue_mode = False for opt, arg in options: if opt == '-o': options_storage.output_dir = arg elif opt == "--tmp-dir": options_storage.tmp_dir = arg elif opt == "--reference": options_storage.reference = support.check_file_existence( arg, 'reference', log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence( arg, 'dataset', log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == '-k': options_storage.k_mers = list(map(int, arg.split(","))) for k in options_storage.k_mers: if k > 127: support.error( 'wrong k value ' + str(k) + ': all k values should be less than 128', log) if k % 2 == 0: support.error( 'wrong k value ' + str(k) + ': all k values should be odd', log) elif opt == "--sc": options_storage.single_cell = True elif opt == "--disable-gzip-output": options_storage.disable_gzip_output = True elif opt == "--only-error-correction": if options_storage.only_assembler: support.error( 'you cannot specify --only-error-correction and --only-assembler simultaneously' ) options_storage.only_error_correction = True elif opt == "--only-assembler": if options_storage.only_error_correction: support.error( 'you cannot specify --only-error-correction and --only-assembler simultaneously' ) options_storage.only_assembler = True elif opt == "--bh-heap-check": options_storage.bh_heap_check = arg elif opt == "--spades-heap-check": options_storage.spades_heap_check = arg elif opt == "--continue": options_storage.continue_mode = True elif opt == '-t' or opt == "--threads": options_storage.threads = int(arg) elif opt == '-m' or opt == "--memory": options_storage.memory = int(arg) elif opt == "--phred-offset": if int(arg) in [33, 64]: options_storage.qvoffset = int(arg) else: support.error( 'wrong PHRED quality offset value ' + str(arg) + ': should be either 33 or 64', log) elif opt == '-i' or opt == "--iterations": options_storage.iterations = int(arg) elif opt == "--debug": options_storage.developer_mode = True elif opt == "--rectangles": options_storage.rectangles = True #corrector elif opt == "--mismatch-correction": options_storage.mismatch_corrector = True elif opt == "--careful": options_storage.mismatch_corrector = True options_storage.careful = True elif opt == '-h' or opt == "--help": options_storage.usage(spades_version) sys.exit(0) elif opt == "--help-hidden": options_storage.usage(spades_version, True) sys.exit(0) elif opt == "--test": options_storage.set_test_options() support.add_to_dataset( '-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data) support.add_to_dataset( '-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data) #break else: raise ValueError if not options_storage.output_dir: support.error( "the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): if options_storage.continue_mode: support.error("the output_dir should exist for --continue!", log) os.makedirs(options_storage.output_dir) if options_storage.continue_mode: return None, None if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load( open(options_storage.dataset_yaml_filename, 'r')) except pyyaml.YAMLError: _, exc, _ = sys.exc_info() support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) dataset_data = support.relative2abs_paths( dataset_data, os.path.dirname(options_storage.dataset_yaml_filename)) else: dataset_data = support.correct_dataset(dataset_data) dataset_data = support.relative2abs_paths(dataset_data, os.getcwd()) options_storage.dataset_yaml_filename = os.path.join( options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w')) support.check_dataset_reads(dataset_data, options_storage.only_assembler, log) if support.dataset_has_only_mate_pairs_libraries(dataset_data): support.error( 'you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!' ) if options_storage.rectangles and (len(dataset_data) > 1): support.error( 'rectangle graph algorithm for repeat resolution cannot work with multiple libraries!' ) ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common cfg["common"].__dict__["output_dir"] = os.path.abspath( options_storage.output_dir) cfg["common"].__dict__["max_threads"] = options_storage.threads cfg["common"].__dict__["max_memory"] = options_storage.memory cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode # dataset section cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell cfg["dataset"].__dict__["yaml_filename"] = os.path.abspath( options_storage.dataset_yaml_filename) if options_storage.developer_mode and options_storage.reference: cfg["dataset"].__dict__["reference"] = options_storage.reference # error correction if (not options_storage.only_assembler) and (options_storage.iterations > 0): cfg["error_correction"].__dict__["output_dir"] = os.path.join( cfg["common"].output_dir, "corrected") cfg["error_correction"].__dict__[ "max_iterations"] = options_storage.iterations cfg["error_correction"].__dict__[ "gzip_output"] = not options_storage.disable_gzip_output if options_storage.qvoffset: cfg["error_correction"].__dict__[ "qvoffset"] = options_storage.qvoffset if options_storage.bh_heap_check: cfg["error_correction"].__dict__[ "heap_check"] = options_storage.bh_heap_check if options_storage.tmp_dir: cfg["error_correction"].__dict__[ "tmp_dir"] = options_storage.tmp_dir else: cfg["error_correction"].__dict__["tmp_dir"] = cfg[ "error_correction"].output_dir cfg["error_correction"].tmp_dir = os.path.join( os.path.abspath(cfg["error_correction"].tmp_dir), 'tmp') # assembly if not options_storage.only_error_correction: if options_storage.k_mers: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers else: cfg["assembly"].__dict__[ "iterative_K"] = options_storage.k_mers_short cfg["assembly"].__dict__["careful"] = options_storage.careful if options_storage.spades_heap_check: cfg["assembly"].__dict__[ "heap_check"] = options_storage.spades_heap_check #corrector can work only if contigs exist (not only error correction) if (not options_storage.only_error_correction ) and options_storage.mismatch_corrector: cfg["mismatch_corrector"] = empty_config() cfg["mismatch_corrector"].__dict__["skip-masked"] = "" cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join( bin_home, "bwa-spades") cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads cfg["mismatch_corrector"].__dict__[ "output-dir"] = options_storage.output_dir return cfg, dataset_data
def fill_cfg(options_to_parse, log): try: options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options) except getopt.GetoptError: _, exc, _ = sys.exc_info() sys.stderr.write(str(exc) + "\n") sys.stderr.flush() options_storage.usage(spades_version) sys.exit(1) if not options: options_storage.usage(spades_version) sys.exit(1) # all parameters are stored here cfg = dict() # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER paired-end libs and MAX_LIBS_NUMBER mate-pair libs dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * 2)] options_storage.continue_mode = False for opt, arg in options: if opt == '-o': options_storage.output_dir = arg elif opt == "--tmp-dir": options_storage.tmp_dir = arg elif opt == "--reference": options_storage.reference = support.check_file_existence(arg, 'reference', log) elif opt == "--dataset": options_storage.dataset_yaml_filename = support.check_file_existence(arg, 'dataset', log) elif opt in options_storage.reads_options: support.add_to_dataset(opt, arg, dataset_data) elif opt == '-k': options_storage.k_mers = list(map(int, arg.split(","))) for k in options_storage.k_mers: if k > 127: support.error('wrong k value ' + str(k) + ': all k values should be less than 128', log) if k % 2 == 0: support.error('wrong k value ' + str(k) + ': all k values should be odd', log) elif opt == "--sc": options_storage.single_cell = True elif opt == "--disable-gzip-output": options_storage.disable_gzip_output = True elif opt == "--only-error-correction": if options_storage.only_assembler: support.error('you cannot specify --only-error-correction and --only-assembler simultaneously') options_storage.only_error_correction = True elif opt == "--only-assembler": if options_storage.only_error_correction: support.error('you cannot specify --only-error-correction and --only-assembler simultaneously') options_storage.only_assembler = True elif opt == "--bh-heap-check": options_storage.bh_heap_check = arg elif opt == "--spades-heap-check": options_storage.spades_heap_check = arg elif opt == "--continue": options_storage.continue_mode = True elif opt == '-t' or opt == "--threads": options_storage.threads = int(arg) elif opt == '-m' or opt == "--memory": options_storage.memory = int(arg) elif opt == "--phred-offset": if int(arg) in [33, 64]: options_storage.qvoffset = int(arg) else: support.error('wrong PHRED quality offset value ' + str(arg) + ': should be either 33 or 64', log) elif opt == '-i' or opt == "--iterations": options_storage.iterations = int(arg) elif opt == "--debug": options_storage.developer_mode = True elif opt == "--rectangles": options_storage.rectangles = True #corrector elif opt == "--mismatch-correction": options_storage.mismatch_corrector = True elif opt == "--careful": options_storage.mismatch_corrector = True options_storage.careful = True elif opt == '-h' or opt == "--help": options_storage.usage(spades_version) sys.exit(0) elif opt == "--help-hidden": options_storage.usage(spades_version, True) sys.exit(0) elif opt == "--test": options_storage.set_test_options() support.add_to_dataset('-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data) support.add_to_dataset('-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data) #break else: raise ValueError if not options_storage.output_dir: support.error("the output_dir is not set! It is a mandatory parameter (-o output_dir).", log) if not os.path.isdir(options_storage.output_dir): if options_storage.continue_mode: support.error("the output_dir should exist for --continue!", log) os.makedirs(options_storage.output_dir) if options_storage.continue_mode: return None, None if options_storage.dataset_yaml_filename: try: dataset_data = pyyaml.load(open(options_storage.dataset_yaml_filename, 'r')) except pyyaml.YAMLError: _, exc, _ = sys.exc_info() support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc)) dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(options_storage.dataset_yaml_filename)) else: dataset_data = support.correct_dataset(dataset_data) dataset_data = support.relative2abs_paths(dataset_data, os.getcwd()) options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml") pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w')) support.check_dataset_reads(dataset_data, options_storage.only_assembler, log) if support.dataset_has_only_mate_pairs_libraries(dataset_data): support.error('you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!') if options_storage.rectangles and (len(dataset_data) > 1): support.error('rectangle graph algorithm for repeat resolution cannot work with multiple libraries!') ### FILLING cfg cfg["common"] = empty_config() cfg["dataset"] = empty_config() if not options_storage.only_assembler: cfg["error_correction"] = empty_config() if not options_storage.only_error_correction: cfg["assembly"] = empty_config() # common cfg["common"].__dict__["output_dir"] = os.path.abspath(options_storage.output_dir) cfg["common"].__dict__["max_threads"] = options_storage.threads cfg["common"].__dict__["max_memory"] = options_storage.memory cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode # dataset section cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell cfg["dataset"].__dict__["yaml_filename"] = os.path.abspath(options_storage.dataset_yaml_filename) if options_storage.developer_mode and options_storage.reference: cfg["dataset"].__dict__["reference"] = options_storage.reference # error correction if (not options_storage.only_assembler) and (options_storage.iterations > 0): cfg["error_correction"].__dict__["output_dir"] = os.path.join(cfg["common"].output_dir, "corrected") cfg["error_correction"].__dict__["max_iterations"] = options_storage.iterations cfg["error_correction"].__dict__["gzip_output"] = not options_storage.disable_gzip_output if options_storage.qvoffset: cfg["error_correction"].__dict__["qvoffset"] = options_storage.qvoffset if options_storage.bh_heap_check: cfg["error_correction"].__dict__["heap_check"] = options_storage.bh_heap_check if options_storage.tmp_dir: cfg["error_correction"].__dict__["tmp_dir"] = options_storage.tmp_dir else: cfg["error_correction"].__dict__["tmp_dir"] = cfg["error_correction"].output_dir cfg["error_correction"].tmp_dir = os.path.join(os.path.abspath(cfg["error_correction"].tmp_dir), 'tmp') # assembly if not options_storage.only_error_correction: if options_storage.k_mers: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers else: cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers_short cfg["assembly"].__dict__["careful"] = options_storage.careful if options_storage.spades_heap_check: cfg["assembly"].__dict__["heap_check"] = options_storage.spades_heap_check #corrector can work only if contigs exist (not only error correction) if (not options_storage.only_error_correction) and options_storage.mismatch_corrector: cfg["mismatch_corrector"] = empty_config() cfg["mismatch_corrector"].__dict__["skip-masked"] = "" cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join(bin_home, "bwa-spades") cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads cfg["mismatch_corrector"].__dict__["output-dir"] = options_storage.output_dir return cfg, dataset_data