def Run(self): self.__CheckInputExistance() command_line = "%s %s %s --limit=%d" % (IgRepConConfig().run_report_supernodes, self.__params.io.compressed_final_clusters_fa, self.__params.io.final_stripped_clusters_fa, self.__params.min_cluster_size) support.sys_call(command_line, self._log)
def Run(self): self.__CheckInputExistance() command_line = "%s %s %s %s" % (IgRepConConfig().run_pair_reads_merger, self.__params.left_reads, self.__params.right_reads, self.__params.single_reads) support.sys_call(command_line, self._log)
def compress_dataset_files(dataset_data, ext_python_modules_home, max_threads, log): log.info("\n== Compressing corrected reads (with gzip)") to_compress = [] for reads_library in dataset_data: for key, value in reads_library.items(): if key.endswith('reads'): compressed_reads_filenames = [] for reads_file in value: if not os.path.isfile(reads_file): support.error('something went wrong and file with corrected reads (' + reads_file + ') is missing!', log) to_compress.append(reads_file) compressed_reads_filenames.append(reads_file + ".gz") reads_library[key] = compressed_reads_filenames if len(to_compress): pigz_path = support.which('pigz') if pigz_path: for reads_file in to_compress: support.sys_call([pigz_path, '-f', '-7', '-p', str(max_threads), reads_file], log) else: addsitedir(ext_python_modules_home) if sys.version.startswith('2.'): from joblib2 import Parallel, delayed elif sys.version.startswith('3.'): from joblib3 import Parallel, delayed n_jobs = min(len(to_compress), max_threads) outputs = Parallel(n_jobs=n_jobs)(delayed(support.sys_call)(['gzip', '-f', '-7', reads_file]) for reads_file in to_compress) for output in outputs: if output: log.info(output)
def run_corrector(configs_dir, execution_home, cfg, ext_python_modules_home, log, to_correct, result): addsitedir(ext_python_modules_home) if sys.version.startswith('2.'): import pyyaml2 as pyyaml elif sys.version.startswith('3.'): import pyyaml3 as pyyaml dst_configs = os.path.join(cfg.output_dir, "configs") if os.path.exists(dst_configs): shutil.rmtree(dst_configs) dir_util.copy_tree(os.path.join(configs_dir, "corrector"), dst_configs, preserve_times=False) cfg_file_name = os.path.join(dst_configs, "corrector.info") cfg.tmp_dir = support.get_tmp_dir(prefix="corrector_") prepare_config_corr(cfg_file_name, cfg, ext_python_modules_home) binary_name = "corrector" command = [os.path.join(execution_home, binary_name), os.path.abspath(cfg_file_name), os.path.abspath(to_correct)] log.info("\n== Running contig polishing tool: " + ' '.join(command) + "\n") log.info("\n== Dataset description file was created: " + cfg_file_name + "\n") support.sys_call(command, log) if not os.path.isfile(result): support.error("Mismatch correction finished abnormally: " + result + " not found!") if os.path.isdir(cfg.tmp_dir): shutil.rmtree(cfg.tmp_dir)
def run_scaffold_correction(configs_dir, execution_home, cfg, log, latest, K): data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K) saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) log.info("\n== Running scaffold correction \n") scaffolds_file = os.path.join(latest, "scaffolds.fasta") if not os.path.isfile(scaffolds_file): support.error("Scaffodls were not found in " + scaffolds_file, log) if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params( construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log) process_cfg.substitute_params( os.path.join(dst_configs, "moleculo_mode.info"), {"scaffolds_file": scaffolds_file}, log) prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, K) command = [ os.path.join(execution_home, "spades-truseq-scfcorrection"), cfg_file_name ] add_configs(command, dst_configs) log.info(str(command)) support.sys_call(command, log)
def run_iteration(configs_dir, execution_home, cfg, log, K, use_additional_contigs, last_one): data_dir = os.path.join(cfg.output_dir, "K%d" % (K)) if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads") dst_configs = os.path.join(data_dir, "configs") shutil.copytree(os.path.join(configs_dir, "debruijn"), dst_configs) cfg_file_name = os.path.join(dst_configs, "config.info") # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) prepare_config_spades(cfg_file_name, cfg, log, use_additional_contigs, K, last_one) prev_K = K command = os.path.join(execution_home, "spades") + " " +\ os.path.abspath(cfg_file_name) if os.path.isdir(bin_reads_dir): if glob.glob(os.path.join(bin_reads_dir, "*_cor*")): for cor_filename in glob.glob(os.path.join(bin_reads_dir, "*_cor*")): cor_index = cor_filename.rfind("_cor") new_bin_filename = cor_filename[:cor_index] + cor_filename[cor_index + 4:] shutil.move(cor_filename, new_bin_filename) log.info("\n== Running assembler: " + ("K%d" % (K)) + "\n") support.sys_call(command, log)
def run_scaffold_correction(configs_dir, execution_home, cfg, log, K): data_dir = os.path.join(cfg.output_dir, "SCC") saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) log.info("\n== Running scaffold correction \n") latest = os.path.join(cfg.output_dir, "K%d" % K) scaffolds_file = os.path.join(latest, "scaffolds.fasta") if not os.path.isfile(scaffolds_file): support.error("Scaffodls were not found in " + scaffolds_file, log) if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log) prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, scaffolds_file) command = [os.path.join(execution_home, "scaffold_correction"), cfg_file_name] log.info(str(command)) support.sys_call(command, log)
def run_igrec(input_file, output_dir, log=None, tau=4, min_fillin=0.6, loci="all", threads=16, additional_args="", min_sread_size=5, remove_tmp=True): if log is None: log = FakeLog() args = { "path": path_to_igrec, "tau": tau, "min_fillin": min_fillin, "loci": loci, "threads": threads, "input_file": input_file, "output_dir": output_dir, "min_sread_size": min_sread_size, "additional_args": additional_args } timer = Timer() support.sys_call( "%(path)s/igrec.py --tau=%(tau)d --min-fillin=%(min_fillin)f -t %(threads)d --loci %(loci)s -s %(input_file)s -o %(output_dir)s --min-sread-size %(min_sread_size)d %(additional_args)s" % args, log=log) timer.stamp(output_dir + "/time.txt") if remove_tmp: rmdir(output_dir + "/vj_finder")
def run_scaffold_correction(configs_dir, execution_home, cfg, log, latest, K): data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K) saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) log.info("\n== Running scaffold correction \n") scaffolds_file = os.path.join(latest, "scaffolds.fasta") if not os.path.isfile(scaffolds_file): support.error("Scaffodls were not found in " + scaffolds_file, log) if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log) process_cfg.substitute_params(os.path.join(dst_configs, "moleculo_mode.info"), {"scaffolds_file": scaffolds_file}, log) prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, K) command = [os.path.join(execution_home, "scaffold_correction"), cfg_file_name] add_configs(command, dst_configs) log.info(str(command)) support.sys_call(command, log)
def run_ig_simulator(output_dir, log=None, chain="HC", num_bases=100, num_mutated=1000, repertoire_size=5000, tcr=False): if log is None: log = FakeLog() assert chain in ["HC", "LC"] args = { "path": path_to_ig_simulator if not tcr else path_to_ig_simulator_tcr, "output_dir": output_dir, "chain": chain, "num_bases": num_bases, "num_mutated": num_mutated, "repertoire_size": repertoire_size } timer = Timer() cmd = "%(path)s/ig_simulator.py --chain-type %(chain)s --num-bases %(num_bases)d --num-mutated %(num_mutated)d --repertoire-size %(repertoire_size)d -o %(output_dir)s --skip-drawing" % args if tcr: vgenes = igrec_dir + "/data/germline/human/TCR/TRBV.fa" jgenes = igrec_dir + "/data/germline/human/TCR/TRBJ.fa" dgenes = igrec_dir + "/data/germline/human/TCR/TRBD.fa" cmd += " --vgenes=" + vgenes + " --jgenes=" + jgenes + " --dgenes=" + dgenes support.sys_call(cmd, log=log) timer.stamp(output_dir + "/time.txt")
def simulate_data(input_file, output_dir, log=None, **kwargs): import tempfile import shutil if log is None: log = FakeLog() mkdir_p(output_dir) temp_dir = tempfile.mkdtemp() run_igrec(input_file, temp_dir, remove_tmp=False, tau=1) # Run IgReC for VJF output input_file = temp_dir + "/vj_finder/cleaned_reads.fa" simulated_repertoire_to_rcm(input_file, "%s/final_repertoire.rcm" % output_dir) simulated_repertoire_to_final_repertoire( input_file, "%s/final_repertoire.fa.gz" % output_dir) args = { "path": igrec_dir, "repertoire": output_dir + "/final_repertoire.fa.gz", "rcm": output_dir + "/final_repertoire.rcm" } support.sys_call( "%(path)s/py/ig_compress_equal_clusters.py %(repertoire)s %(repertoire)s -r %(rcm)s" % args, log=log) # TODO factor this stage jit_fx_file(input_file, "%s/input_reads.fa.gz" % output_dir, **kwargs) shutil.rmtree(temp_dir)
def run_presto(input_file, output_dir, log=None, remove_tmp=True): if log is None: log = FakeLog() mkdir_p(output_dir) # gunzip input_file_new = "%s/input_reads.fasta" % output_dir fastx2fastx(input_file, input_file_new) args = {"input_file": input_file_new, "output_dir": output_dir} timer = Timer() support.sys_call( "CollapseSeq.py -s %(input_file)s --outdir %(output_dir)s --outname presto" % args, log=log) timer.stamp(output_dir + "/time.txt") presto_output = output_dir + "/presto_collapse-unique.fasta" repertoire_fa = output_dir + "/final_repertoire.fa" with smart_open(presto_output) as fin, smart_open(repertoire_fa, "w") as fout: for i, record in enumerate( SeqIO.parse(fin, idFormatByFileName(presto_output))): id = record.description size = parse_presto_id(id) record.id = record.description = "cluster___%d___size___%d" % ( i, size) SeqIO.write(record, fout, "fasta") if remove_tmp: os.remove(input_file_new) os.remove(presto_output)
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one): data_dir = os.path.join(cfg.output_dir, "K%d" % K) stage = BASE_STAGE saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if options_storage.continue_mode: if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))): log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)") return if options_storage.restart_from and options_storage.restart_from.find(":") != -1: stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:] support.continue_from_here(log) if stage != BASE_STAGE: if not os.path.isdir(saves_dir): support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir)) else: if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) shutil.copytree(os.path.join(configs_dir, "debruijn"), dst_configs) # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) log.info("\n== Running assembler: " + ("K%d" % K) + "\n") if prev_K: additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta") if not os.path.isfile(additional_contigs_fname): support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log) additional_contigs_fname = None else: additional_contigs_fname = None if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log) prepare_config_spades(cfg_file_name, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one) command = [os.path.join(execution_home, "spades"), cfg_file_name] ## this code makes sense for src/debruijn/simplification.cpp: corrected_and_save_reads() function which is not used now # bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads") # if os.path.isdir(bin_reads_dir): # if glob.glob(os.path.join(bin_reads_dir, "*_cor*")): # for cor_filename in glob.glob(os.path.join(bin_reads_dir, "*_cor*")): # cor_index = cor_filename.rfind("_cor") # new_bin_filename = cor_filename[:cor_index] + cor_filename[cor_index + 4:] # shutil.move(cor_filename, new_bin_filename) support.sys_call(command, log)
def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg, not_used_dataset_data, ext_python_modules_home, log): addsitedir(ext_python_modules_home) if sys.version.startswith('2.'): import pyyaml2 as pyyaml elif sys.version.startswith('3.'): import pyyaml3 as pyyaml dst_configs = os.path.join(cfg.output_dir, "configs") if os.path.exists(dst_configs): shutil.rmtree(dst_configs) if cfg.iontorrent: dir_util.copy_tree(os.path.join(configs_dir, "ionhammer"), dst_configs, preserve_times=False) cfg_file_name = os.path.join(dst_configs, "ionhammer.cfg") else: dir_util.copy_tree(os.path.join(configs_dir, "hammer"), dst_configs, preserve_times=False) cfg_file_name = os.path.join(dst_configs, "config.info") # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) cfg.tmp_dir = support.get_tmp_dir(prefix="hammer_") if cfg.iontorrent: prepare_config_ih(cfg_file_name, cfg, ext_python_modules_home) binary_name = "ionhammer" else: prepare_config_bh(cfg_file_name, cfg, log) binary_name = "hammer" command = [os.path.join(execution_home, binary_name), os.path.abspath(cfg_file_name)] log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n") support.sys_call(command, log) if not os.path.isfile(corrected_dataset_yaml_filename): support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!") corrected_dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r')) remove_not_corrected_reads(cfg.output_dir) is_changed = False if cfg.gzip_output: is_changed = True compress_dataset_files(corrected_dataset_data, ext_python_modules_home, cfg.max_threads, log) if not_used_dataset_data: is_changed = True corrected_dataset_data += not_used_dataset_data if is_changed: pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w')) log.info("\n== Dataset description file was created: " + corrected_dataset_yaml_filename + "\n") if os.path.isdir(cfg.tmp_dir): shutil.rmtree(cfg.tmp_dir)
def Run(self, log): log.info("Running %s" % self.cmdl) try: support.sys_call(self.cmdl, log, self.cwd) except: log.error("Failed to run '%s':\n%s" % (self.cmdl, sys.exc_info())) return -1 log.info("Returned 0") return 0
def Run(self): self.__CheckInputExistance() command_line = IgRepConConfig().run_trie_compressor + " -i " + self.__params.io.cropped_reads + \ " -o " + self.__params.io.compressed_reads + " -m " + self.__params.io.map_file support.sys_call(command_line, self._log) command_line = "%s %s %s --limit=%d" % ( IgRepConConfig().run_report_supernodes, self.__params.io.compressed_reads, self.__params.io.supernodes_file, self.__params.min_cluster_size) support.sys_call(command_line, self._log)
def Run(self): self.__CheckInputExistance() command_line = "%s %s %s -T %s -m %s -r %s -R %s" % (IgRepConConfig().run_compress_equal_clusters, self.__params.io.uncompressed_final_clusters_fa, self.__params.io.compressed_final_clusters_fa, self.__params.io.tmp_compressed_clusters_fa, self.__params.io.tmp_compressed_clusters_map, self.__params.io.uncompressed_final_rcm, self.__params.io.compressed_final_rcm) support.sys_call(command_line, self._log)
def main(): args = parse_args() # create logger log = logging.getLogger("Mismatch correction " + args.assembly_type) log.setLevel(logging.DEBUG) console = logging.StreamHandler(sys.stdout) console.setFormatter(logging.Formatter("%(message)s")) console.setLevel(logging.DEBUG) log.addHandler(console) # moving assembled contigs (scaffolds) to misc dir if os.path.isfile(args.corrected): shutil.move(args.corrected, args.assembled) # TODO can check only here, that assembled existst and may be skipping... if not os.path.isfile(args.assembled) or os.path.getsize( args.assembled) == 0: log.info("\n== Skipping processing of %s (empty file)\n" % args.assembly_type) else: log.info("\n== Processing of %s\n" % args.assembly_type) tmp_dir_for_corrector = os.path.join(args.output_dir, "mismatch_corrector", args.assembly_type) # correcting result_corrected_filename = os.path.join(tmp_dir_for_corrector, "corrected_contigs.fasta") dst_configs = os.path.join(tmp_dir_for_corrector, "configs") cfg_file_name = os.path.join(dst_configs, "corrector.info") binary_name = "spades-corrector-core" command = [ os.path.join(args.bin_home, binary_name), os.path.abspath(cfg_file_name), os.path.abspath(args.assembled) ] log.info("\n== Running contig polishing tool: " + ' '.join(command) + "\n") log.info("\n== Dataset description file was created: " + cfg_file_name + "\n") log.info("Run: " + ' '.join(command)) support.sys_call(command, log) if not os.path.isfile(result_corrected_filename): log.error( "mismatch correction finished abnormally: %s not found!" % result_corrected_filename) if os.path.isfile(result_corrected_filename): shutil.copyfile(result_corrected_filename, args.corrected)
def move_dataset_files(dataset_data, dst, ext_python_modules_home, max_threads, log, gzip=False): to_compress = [] for reads_library in dataset_data: for key, value in reads_library.items(): if key.endswith('reads'): moved_reads_files = [] for reads_file in value: dst_filename = os.path.join(dst, os.path.basename(reads_file)) # TODO: fix problem with files with the same basenames in Hammer binary! if not os.path.isfile(reads_file): if (not gzip and os.path.isfile(dst_filename)) or ( gzip and os.path.isfile(dst_filename + '.gz')): support.warning( 'file with corrected reads (' + reads_file + ') is the same in several libraries', log) if gzip: dst_filename += '.gz' else: support.error( 'something went wrong and file with corrected reads (' + reads_file + ') is missing!', log) else: shutil.move(reads_file, dst_filename) if gzip: to_compress.append(dst_filename) dst_filename += '.gz' moved_reads_files.append(dst_filename) reads_library[key] = moved_reads_files if len(to_compress): pigz_path = support.which('pigz') if pigz_path: for reads_file in to_compress: support.sys_call([ pigz_path, '-f', '-7', '-p', str(max_threads), reads_file ], log) else: addsitedir(ext_python_modules_home) if sys.version.startswith('2.'): from joblib2 import Parallel, delayed elif sys.version.startswith('3.'): from joblib3 import Parallel, delayed n_jobs = min(len(to_compress), max_threads) outputs = Parallel(n_jobs=n_jobs)( delayed(support.sys_call)(['gzip', '-f', '-7', reads_file]) for reads_file in to_compress) for output in outputs: if output: log.info(output)
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one): data_dir = os.path.join(cfg.output_dir, "K%d" % K) stage = BASE_STAGE saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") if options_storage.continue_mode: if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))): log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)") return if options_storage.restart_from and options_storage.restart_from.find(":") != -1 \ and options_storage.restart_from.startswith("k%d:" % K): stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:] support.continue_from_here(log) if stage != BASE_STAGE: if not os.path.isdir(saves_dir): support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir)) else: if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util._path_created = {} # see http://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) log.info("\n== Running assembler: " + ("K%d" % K) + "\n") if prev_K: additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta") if not os.path.isfile(additional_contigs_fname): support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log) additional_contigs_fname = None else: additional_contigs_fname = None if "read_buffer_size" in cfg.__dict__: #FIXME why here??? process_cfg.substitute_params(os.path.join(dst_configs, "construction.info"), {"read_buffer_size": cfg.read_buffer_size}, log) if "scaffolding_mode" in cfg.__dict__: #FIXME why here??? process_cfg.substitute_params(os.path.join(dst_configs, "pe_params.info"), {"scaffolding_mode": cfg.scaffolding_mode}, log) prepare_config_rnaspades(os.path.join(dst_configs, "rna_mode.info"), log) prepare_config_construction(os.path.join(dst_configs, "construction.info"), log) cfg_fn = os.path.join(dst_configs, "config.info") prepare_config_spades(cfg_fn, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home) command = [os.path.join(execution_home, "spades-core"), cfg_fn] add_configs(command, dst_configs) #print("Calling: " + " ".join(command)) support.sys_call(command, log)
def run_mixcr2_alignment_only(input_file, output_dir, log=None, loci="all", enforce_fastq=False, threads=16, remove_tmp=True, species="hsa"): if log is None: log = FakeLog() mkdir_p(output_dir) if enforce_fastq and idFormatByFileName(input_file) == "fasta": input_file_fq = "%s/input_reads.fq" % output_dir fastx2fastx(input_file, input_file_fq) input_file = input_file_tmp = input_file_fq elif idFormatByFileName(input_file) == "fasta": input_file_fasta = "%s/input_reads.fasta" % output_dir fastx2fastx(input_file, input_file_fasta) input_file = input_file_tmp = input_file_fasta else: input_file_tmp = None path = path_to_mixcr2 args = { "path": path, "compress_eq_clusters_cmd": path_to_igrec + "/py/ig_compress_equal_clusters.py", "mixcr_cmd": "java -jar %s/mixcr.jar" % path, "threads": threads, "input_file": input_file, "output_dir": output_dir, "species": species, "loci": loci, "loci_arg": "chains" } # support.sys_call("%(mixcr_cmd)s align -t %(threads)d -f -g -r %(output_dir)s/align_report.txt --%(loci_arg)s %(loci)s --noMerge --species %(species)s %(input_file)s %(output_dir)s/mixcr.vdjca" % args, # log=log) timer = Timer() support.sys_call( "%(mixcr_cmd)s align -p kaligner2 --species %(species)s -t %(threads)d -f -g -r %(output_dir)s/align_report.txt --noMerge --%(loci_arg)s %(loci)s -OreadsLayout=Collinear -OvParameters.geneFeatureToAlign=VTranscript -OallowPartialAlignments=true %(input_file)s %(output_dir)s/mixcr.vdjca" % args, log=log) timer.stamp(output_dir + "/time.txt") if remove_tmp: if input_file_tmp is not None: os.remove(input_file_tmp) os.remove(output_dir + "/align_report.txt") os.remove(output_dir + "/mixcr.vdjca")
def run_bh(result_filename, configs_dir, execution_home, cfg, ext_python_modules_home, log): addsitedir(ext_python_modules_home) if sys.version.startswith('2.'): import pyyaml2 as pyyaml elif sys.version.startswith('3.'): import pyyaml3 as pyyaml dst_configs = os.path.join(cfg.output_dir, "configs") if os.path.exists(dst_configs): shutil.rmtree(dst_configs) shutil.copytree(os.path.join(configs_dir, "hammer"), dst_configs) cfg_file_name = os.path.join(dst_configs, "config.info") # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) prepare_config_bh(cfg_file_name, cfg, log) command = [ os.path.join(execution_home, "hammer"), os.path.abspath(cfg_file_name) ] log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n") support.sys_call(command, log) corrected_dataset_yaml_filename = os.path.join(cfg.tmp_dir, "corrected.yaml") if not os.path.isfile(corrected_dataset_yaml_filename): support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!") corrected_dataset_data = pyyaml.load( open(corrected_dataset_yaml_filename, 'r')) if cfg.gzip_output: log.info("\n== Compressing corrected reads (with gzip)") move_dataset_files(corrected_dataset_data, cfg.output_dir, ext_python_modules_home, cfg.max_threads, log, cfg.gzip_output) corrected_dataset_yaml_filename = result_filename pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w')) log.info("\n== Dataset description file created: " + corrected_dataset_yaml_filename + "\n") shutil.rmtree(cfg.tmp_dir)
def Run(self): self.__CheckInputExistance() self.__params.vj_finder_output = os.path.join(self.__params.output, "vj_finder") command_line = IgRepConConfig().run_vj_aligner + " -i " + self.__params.single_reads + \ " -o " + self.__params.io.vj_finder_output + \ " --db-directory " + IgRepConConfig().path_to_germline + \ " -t " + str(self.__params.num_threads) + \ " --loci " + self.__params.loci + \ " --organism " + self.__params.organism if self.__params.no_pseudogenes: command_line += " --no-pseudogenes" support.sys_call(command_line, self._log)
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one): data_dir = os.path.join(cfg.output_dir, "K%d" % K) stage = BASE_STAGE saves_dir = os.path.join(data_dir, 'saves') dst_configs = os.path.join(data_dir, "configs") cfg_file_name = os.path.join(dst_configs, "config.info") if options_storage.continue_mode: if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))): log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)") return if options_storage.restart_from and options_storage.restart_from.find(":") != -1: stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:] support.continue_from_here(log) if stage != BASE_STAGE: if not os.path.isdir(saves_dir): support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir)) else: if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False) # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) log.info("\n== Running assembler: " + ("K%d" % K) + "\n") if prev_K: additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta") if not os.path.isfile(additional_contigs_fname): support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log) additional_contigs_fname = None else: additional_contigs_fname = None if "read_buffer_size" in cfg.__dict__: construction_cfg_file_name = os.path.join(dst_configs, "construction.info") process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log) prepare_config_spades(cfg_file_name, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home) command = [os.path.join(execution_home, "spades"), cfg_file_name] support.sys_call(command, log)
def Run(self): self.__CheckInputExistance() command_line = "%s -i %s -c %s -q %s -o %s" % ( IgRepConConfig().run_rcm_recoverer, self.__params.io.cropped_reads, self.__params.io.map_file, self.__params.io.dense_sgraph_decomposition, self.__params.io.uncompressed_final_rcm) support.sys_call(command_line, self._log) command_line = IgRepConConfig().run_consensus_constructor + \ " -i " + self.__params.io.cropped_reads + \ " -R " + self.__params.io.uncompressed_final_rcm + \ " -o " + self.__params.io.uncompressed_final_clusters_fa + \ " -H " + " -t " + str(self.__params.num_threads) support.sys_call(command_line, self._log)
def run_iteration(configs_dir, execution_home, cfg, log, K, use_additional_contigs, last_one): data_dir = os.path.join(cfg.output_dir, "K%d" % K) if options_storage.continue_mode: if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")): log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)") return else: options_storage.continue_mode = False # continue from here if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads") dst_configs = os.path.join(data_dir, "configs") shutil.copytree(os.path.join(configs_dir, "debruijn"), dst_configs) cfg_file_name = os.path.join(dst_configs, "config.info") # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) prepare_config_spades(cfg_file_name, cfg, log, use_additional_contigs, K, last_one) command = [ os.path.join(execution_home, "spades"), os.path.abspath(cfg_file_name) ] if os.path.isdir(bin_reads_dir): if glob.glob(os.path.join(bin_reads_dir, "*_cor*")): for cor_filename in glob.glob(os.path.join(bin_reads_dir, "*_cor*")): cor_index = cor_filename.rfind("_cor") new_bin_filename = cor_filename[:cor_index] + cor_filename[ cor_index + 4:] shutil.move(cor_filename, new_bin_filename) log.info("\n== Running assembler: " + ("K%d" % K) + "\n") support.sys_call(command, log)
def get_igblast_output(args): args.input_hash = hash_file(args.input) args.igblast_output = args.storage_dir + "/" + args.input_hash + ".blast" if args.rerun_igblast or not os.path.exists(args.igblast_output + ".gz"): log.info("IgBLAST output will be written to " + args.igblast_output + ".gz") fq2fa(args.input, args.tmp_file) igblast_time = time.time() support.sys_call("bash %(workdir)s/blast.sh %(tmp_file)s %(igblast_output)s 2> /dev/null" % args.__dict__, log) igblast_time = time.time() - igblast_time os.unlink(args.tmp_file) support.sys_call("gzip %s --force" % args.igblast_output, log) log.info("IgBLAST time: %fs" % igblast_time) blast = ParseIgBlastOutput(args.igblast_output + ".gz", log, smart_open) # Normalize blast_blocks return [line.hit_table for line in blast.blocks]
def main(): log = igrec.CreateLogger() parser, params = ParseCommandLineParams(log) CheckParamsCorrectness(parser, params, log) try: if not os.path.exists(params.output): os.makedirs(params.output) igrec.CreateFileLogger(params, log) igrec.PrintCommandLine(log) final_dir = InitMakeFiles(params, log) # We need freshly compiled version to get actual build info if not params.no_compilation: support.sys_call( "make -C " + os.path.join(os.path.dirname(final_dir), "compilation"), log) from src.build_info.build_info import BuildInfo print "===================Build info===================" BuildInfo().Log(log) print "================================================" support.sys_call("make -C " + final_dir, log) PrintOutputFiles(params, log) log.info("\nThank you for using BarcodedIgReC!") except KeyboardInterrupt: log.info("\nBarcodedIgReC was interrupted!") except Exception: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) log.info("\nERROR: Exception caught.") SupportInfo(log) sys.exit(exc_value) except BaseException: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) log.info("\nERROR: Exception caught.") SupportInfo(log) sys.exit(exc_value) log.info("Log was written to " + params.log_filename)
def run_vjfinder(input_file, output_dir, log=None, loci="all", threads=16, additional_args="", remove_tmp=False): if log is None: log = FakeLog() import os.path import os args = { "path": path_to_igrec, "loci": loci, "threads": threads, "input_file": input_file, "output_dir": output_dir, "organism": "human", "path_to_germline": igrec_dir + "/data/germline", "additional_args": additional_args } args = dict2class(args) command_line = args.path + "/build/release/bin/vj_finder" + \ " -i " + os.path.abspath(args.input_file) + \ " -o " + os.path.abspath(args.output_dir) + \ " --db-directory " + os.path.abspath(args.path_to_germline) + \ " -t " + str(args.threads) + \ " --loci " + args.loci + \ " --organism " + args.organism + " " + args.additional_args cwd = os.getcwd() os.chdir(igrec_dir) timer = Timer() support.sys_call(command_line, log=log) timer.stamp(output_dir + "/time.txt") os.chdir(cwd) if remove_tmp: import os.path if os.path.isfile(output_dir): import shutil shutil.rmtree(output_dir)
def main(): log = igrec.CreateLogger() parser, params = ParseCommandLineParams(log) CheckParamsCorrectness(parser, params, log) try: if not os.path.exists(params.output): os.makedirs(params.output) igrec.CreateFileLogger(params, log) igrec.PrintCommandLine(log) final_dir = InitMakeFiles(params, log) # We need freshly compiled version to get actual build info if not params.no_compilation: support.sys_call("make -C " + os.path.join(os.path.dirname(final_dir), "compilation"), log) print "===================Build info===================" from py import build_info build_info.Log(log) print "================================================" support.sys_call("make -C " + final_dir, log) PrintOutputFiles(params, log) log.info("\nThank you for using BarcodedIgReC!") except KeyboardInterrupt: log.info("\nBarcodedIgReC was interrupted!") except Exception: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) log.info("\nERROR: Exception caught.") SupportInfo(log) sys.exit(exc_value) except BaseException: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) log.info("\nERROR: Exception caught.") SupportInfo(log) sys.exit(exc_value) log.info("Log was written to " + params.log_filename)
def run_bh(result_filename, configs_dir, execution_home, cfg, ext_python_modules_home, log): addsitedir(ext_python_modules_home) if sys.version.startswith('2.'): import pyyaml2 as pyyaml elif sys.version.startswith('3.'): import pyyaml3 as pyyaml dst_configs = os.path.join(cfg.output_dir, "configs") if os.path.exists(dst_configs): shutil.rmtree(dst_configs) shutil.copytree(os.path.join(configs_dir, "hammer"), dst_configs) cfg_file_name = os.path.join(dst_configs, "config.info") # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) prepare_config_bh(cfg_file_name, cfg, log) command = [os.path.join(execution_home, "hammer"), os.path.abspath(cfg_file_name)] log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n") support.sys_call(command, log) corrected_dataset_yaml_filename = os.path.join(cfg.tmp_dir, "corrected.yaml") if not os.path.isfile(corrected_dataset_yaml_filename): support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!") corrected_dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r')) if cfg.gzip_output: log.info("\n== Compressing corrected reads (with gzip)") move_dataset_files(corrected_dataset_data, cfg.output_dir, ext_python_modules_home, cfg.max_threads, log, cfg.gzip_output) corrected_dataset_yaml_filename = result_filename pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w')) log.info("\n== Dataset description file created: " + corrected_dataset_yaml_filename + "\n") shutil.rmtree(cfg.tmp_dir)
def run_igrec_old(input_file, output_dir, log=None, tau=3, threads=16, additional_args="", remove_tmp=True): if log is None: log = FakeLog() output_dir = os.path.abspath(output_dir) input_file = os.path.abspath(input_file) args = { "path": path_to_igrec_old, "tau": tau, "threads": threads, "input_file": input_file, "output_dir": output_dir, "additional_args": additional_args } timer = Timer() cwd = os.getcwd() os.chdir(path_to_igrec_old) support.sys_call( "%(path)s/ig_repertoire_constructor.py --tau=%(tau)d -t %(threads)d -s %(input_file)s -o %(output_dir)s %(additional_args)s" % args, log=log) os.chdir(cwd) timer.stamp(output_dir + "/time.txt") # Rename output os.rename(output_dir + "/constructed_repertoire.clusters.fa", output_dir + "/final_repertoire.fa") os.rename(output_dir + "/constructed_repertoire.rcm", output_dir + "/final_repertoire.rcm") if remove_tmp: rmdir(output_dir + "/configs") rmdir(output_dir + "/saves") rmdir(output_dir + "/temp_files") rmdir(output_dir + "/hamming_graphs_tau_%d" % tau)
def Run(self): self.__CheckInputExistance() if not self.__params.no_alignment: self.__params.vj_finder_output = os.path.join(self.__params.output, "vj_finder") command_line = os.path.abspath(IgRepConConfig().run_vj_aligner) + \ " -i " + os.path.abspath(self.__params.single_reads) + \ " -o " + os.path.abspath(self.__params.io.vj_finder_output) + \ " --db-directory " + os.path.abspath(IgRepConConfig().path_to_germline) + \ " -t " + str(self.__params.num_threads) + \ " --loci " + self.__params.loci + \ " --organism " + self.__params.organism if self.__params.no_pseudogenes: command_line += " --pseudogenes=off" else: command_line += " --pseudogenes=on" cwd = os.getcwd() os.chdir(home_directory) support.sys_call(command_line, self._log) os.chdir(cwd) else: self._log.info("VJ Finder stage skipped") self.__params.io.cropped_reads = self.__params.single_reads
def RunTool(params, log): try: igs_command_line = ig_simulator_bin + " " + \ params.output_config_file support.sys_call(igs_command_line, log) log.info("\nThank you for using " + tool_name + "!\n") except (KeyboardInterrupt): log.info("\n" + tool_name + " was interrupted!") except Exception: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) log.info("\nERROR: Exception caught.") except BaseException: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) log.info("\nERROR: Exception caught.")
def move_dataset_files(dataset_data, dst, ext_python_modules_home, max_threads, log, gzip=False): to_compress = [] for reads_library in dataset_data: for key, value in reads_library.items(): if key.endswith('reads'): moved_reads_files = [] for reads_file in value: dst_filename = os.path.join(dst, os.path.basename(reads_file)) # TODO: fix problem with files with the same basenames in Hammer binary! if not os.path.isfile(reads_file): if (not gzip and os.path.isfile(dst_filename)) or (gzip and os.path.isfile(dst_filename + '.gz')): support.warning('file with corrected reads (' + reads_file + ') is the same in several libraries', log) if gzip: dst_filename += '.gz' else: support.error('something went wrong and file with corrected reads (' + reads_file + ') is missing!', log) else: shutil.move(reads_file, dst_filename) if gzip: to_compress.append(dst_filename) dst_filename += '.gz' moved_reads_files.append(dst_filename) reads_library[key] = moved_reads_files if len(to_compress): pigz_path = support.which('pigz') if pigz_path: for reads_file in to_compress: support.sys_call([pigz_path, '-f', '-7', '-p', str(max_threads), reads_file], log) else: addsitedir(ext_python_modules_home) if sys.version.startswith('2.'): from joblib2 import Parallel, delayed elif sys.version.startswith('3.'): from joblib3 import Parallel, delayed n_jobs = min(len(to_compress), max_threads) outputs = Parallel(n_jobs=n_jobs)(delayed(support.sys_call)(['gzip', '-f', '-7', reads_file]) for reads_file in to_compress) for output in outputs: if output: log.info(output)
def run_bh(configs_dir, execution_home, cfg, ext_python_modules_home, log): addsitedir(ext_python_modules_home) import pyyaml dst_configs = os.path.join(cfg.output_dir, "configs") if os.path.exists(dst_configs): shutil.rmtree(dst_configs) shutil.copytree(os.path.join(configs_dir, "hammer"), dst_configs) cfg_file_name = os.path.join(dst_configs, "config.info") # removing template configs for root, dirs, files in os.walk(dst_configs): for cfg_file in files: cfg_file = os.path.join(root, cfg_file) if cfg_file.endswith('.info.template'): if os.path.isfile(cfg_file.split('.template')[0]): os.remove(cfg_file) else: os.rename(cfg_file, cfg_file.split('.template')[0]) prepare_config_bh(cfg_file_name, cfg, log) command = os.path.join(execution_home, "hammer") + " " +\ os.path.abspath(cfg_file_name) log.info("\n== Running read error correction tool: " + command + "\n") support.sys_call(command, log) corrected_dataset_yaml_filename = os.path.join(cfg.tmp_dir, "corrected.yaml") corrected_dataset_data = pyyaml.load(file(corrected_dataset_yaml_filename, 'r')) if cfg.gzip_output: log.info("\n== Compressing corrected reads (with gzip)") support.move_dataset_files(corrected_dataset_data, cfg.output_dir, log, cfg.gzip_output) corrected_dataset_yaml_filename = os.path.join(cfg.output_dir, "corrected.yaml") pyyaml.dump(corrected_dataset_data, file(corrected_dataset_yaml_filename, 'w')) log.info("\n== Dataset description file created: " + corrected_dataset_yaml_filename + "\n") shutil.rmtree(cfg.tmp_dir) return corrected_dataset_yaml_filename
def Run(self): self.__CheckInputExistance() command_line = IgRepConConfig().run_trie_compressor + " -i " + self.__params.io.cropped_reads + \ " -o " + self.__params.io.compressed_reads + " -m " + self.__params.io.map_file + " -Toff" support.sys_call(command_line, self._log) command_line = IgRepConConfig().run_triecmp_to_repertoire + " -i " + self.__params.io.cropped_reads + \ " -c " + self.__params.io.compressed_reads + " -m " + self.__params.io.map_file + \ " -r " + self.__params.io.supernode_repertoire + " -R " + self.__params.io.supernode_rcm support.sys_call(command_line, self._log) command_line = "%s %s %s --limit=%d" % (IgRepConConfig().run_report_supernodes, self.__params.io.supernode_repertoire, self.__params.io.supernodes_file, self.__params.min_cluster_size) support.sys_call(command_line, self._log) if not self.__params.equal_compression: command_line = IgRepConConfig().run_fake_trie_compressor + " -i " + self.__params.io.cropped_reads + \ " -o " + self.__params.io.compressed_reads + " -m " + self.__params.io.map_file support.sys_call(command_line, self._log)
def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg, dataset_data, ext_python_modules_home, only_compressing_is_needed, log): addsitedir(ext_python_modules_home) if sys.version.startswith('2.'): import pyyaml2 as pyyaml elif sys.version.startswith('3.'): import pyyaml3 as pyyaml # not all reads need processing if support.get_lib_ids_by_type(dataset_data, options_storage.LONG_READS_TYPES): not_used_dataset_data = support.get_libs_by_type(dataset_data, options_storage.LONG_READS_TYPES) to_correct_dataset_data = support.rm_libs_by_type(dataset_data, options_storage.LONG_READS_TYPES) to_correct_dataset_yaml_filename = os.path.join(cfg.output_dir, "to_correct.yaml") pyyaml.dump(to_correct_dataset_data, open(to_correct_dataset_yaml_filename, 'w'), default_flow_style = False, default_style='"', width=100500) cfg.dataset_yaml_filename = to_correct_dataset_yaml_filename else: not_used_dataset_data = None if not only_compressing_is_needed: dst_configs = os.path.join(cfg.output_dir, "configs") if os.path.exists(dst_configs): shutil.rmtree(dst_configs) if cfg.iontorrent: dir_util.copy_tree(os.path.join(configs_dir, "ionhammer"), dst_configs, preserve_times=False) cfg_file_name = os.path.join(dst_configs, "ionhammer.cfg") else: dir_util.copy_tree(os.path.join(configs_dir, "hammer"), dst_configs, preserve_times=False) cfg_file_name = os.path.join(dst_configs, "config.info") cfg.tmp_dir = support.get_tmp_dir(prefix="hammer_") if cfg.iontorrent: prepare_config_ih(cfg_file_name, cfg, ext_python_modules_home) binary_name = "ionhammer" else: prepare_config_bh(cfg_file_name, cfg, log) binary_name = "hammer" command = [os.path.join(execution_home, binary_name), os.path.abspath(cfg_file_name)] log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n") support.sys_call(command, log) if not os.path.isfile(corrected_dataset_yaml_filename): support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!") else: log.info("\n===== Skipping %s (already processed). \n" % "read error correction tool") support.continue_from_here(log) corrected_dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r')) remove_not_corrected_reads(cfg.output_dir) is_changed = False if cfg.gzip_output: is_changed = True compress_dataset_files(corrected_dataset_data, ext_python_modules_home, cfg.max_threads, log) if not_used_dataset_data: is_changed = True corrected_dataset_data += not_used_dataset_data if is_changed: pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w'), default_flow_style = False, default_style='"', width=100500) log.info("\n== Dataset description file was created: " + corrected_dataset_yaml_filename + "\n") if os.path.isdir(cfg.tmp_dir): shutil.rmtree(cfg.tmp_dir)
def main(argv, external_logger = ""): from argparse import ArgumentParser parser = ArgumentParser(description="== DSF: an algorithm for corrupted cliques search ==", epilog=""" In case you have troubles running DSF, you can write to [email protected]. Please provide us with dense_subgraph_finder.log file from the output directory. """, add_help=False) req_args = parser.add_argument_group("Input") input_args = req_args.add_mutually_exclusive_group(required=True) input_args.add_argument("-g", "--graph", type=str, default="", dest="graph", help="Input graph in GRAPH format") input_args.add_argument("--test", action="store_const", const=os.path.join(home_directory, "test_dataset/dsf/test.graph"), dest="graph", help="Running test dataset") out_args = parser.add_argument_group("Output") out_args.add_argument("-o", "--output", type=str, default=os.path.join(home_directory, "dsf_test"), help="Output directory") optional_args = parser.add_argument_group("Optional arguments") optional_args.add_argument("-t", "--threads", type=int, default=16, dest="num_threads", help="Threads number [default: %(default)d]") optional_args.add_argument("-f", '--min-fillin', type=float, default=0.6, dest="min_fillin", help='Minimum fill-in of dense subgraphs [default: %(default)f]') optional_args.add_argument("-n", "--min-snode-size", type=int, default=5, dest="min_snode_size", help="Minimum vertex weight that prevents its gluing with other heavy vertex " "[default: %(default)d]") optional_args.add_argument("-s", "--min-size", type=int, default=5, dest="min_graph_size", help="Minimum size of graph where dense subgraphs will be computed " "[default: %(default)d]") optional_args.add_argument("--create-triv-dec", action="store_const", const=True, dest="create_trivial_decomposition", help='Creating decomposition according to connected components [default: False]') optional_args.add_argument("--save-aux-files", action="store_const", const=True, dest="save_aux_files", help="Saving auxiliary files: subgraphs in GRAPH format and their decompositions " "[default: False]") optional_args.add_argument("--clean-output-dir", default=True, dest="clean_output_dir", action="store_true", help="Clean output directory on start [default]") optional_args.add_argument("--no-clean-output-dir", default=True, dest="clean_output_dir", action="store_false", help="Do not clean output directory on start") optional_args.add_argument("-h", "--help", action="help", help="Help message and exit") parser.set_defaults(config_dir="configs", config_file="config.info") # prepare log log = logging.getLogger('dense_subgraph_finder') log.setLevel(logging.DEBUG) console = logging.StreamHandler(sys.stdout) console.setFormatter(logging.Formatter('%(message)s')) console.setLevel(logging.DEBUG) log.addHandler(console) if external_logger != "": external_log_handler = logging.FileHandler(external_logger, mode = "a") log.addHandler(external_log_handler) args = [arg for arg in argv if ntpath.basename(arg) != 'dense_subgraph_finder.py'] params = parser.parse_args(args) CheckParamsCorrectness(params, log, parser) SetOutputParams(params, params.output) PrepareOutputDir(params, log) # log file params.log_filename = os.path.join(params.output, "dense_subgraph_finder.log") if os.path.exists(params.log_filename): log.info("Removing %s" % params.log_filename) os.remove(params.log_filename) log_handler = logging.FileHandler(params.log_filename, mode='a') log.addHandler(log_handler) # print command line command_line = "Command_line: " if argv[0] != "dense_subgraph_finder.py": command_line += "dense_subgraph_finder.py " command_line += " ".join(argv) log.info(command_line + "\n") PrintParams(params, log) log.info("Log will be written to " + params.log_filename + "\n") PrepareConfigs(params, log) # run dense subgraph finder try: dsf_command_line = init.PathToBins.run_dense_sgraph_finder + " " + params.config_file support.sys_call(dsf_command_line, log) Cleanup(params, log) log.info("\nThank you for using Dense Subgraph Finder!\n") except (KeyboardInterrupt): log.info("\nDense subgraph finder was interrupted!") except Exception: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) log.info("\nERROR: Exception caught.") supportInfo(log) except BaseException: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) log.info("\nERROR: Exception caught.") supportInfo(log) log.info("Log was written to " + params.log_filename)
def main(argv): from argparse import ArgumentParser parser = ArgumentParser(description="== " + tool_name + ": a tool for diversity analysis of full-length immunosequencing reads ==", epilog="In case you have troubles running " + tool_name + ", you can write to [email protected]." "Please provide us with diversity_analyzer.log file from the output directory.", add_help=False) req_args = parser.add_argument_group("Required params") input_args = req_args.add_mutually_exclusive_group(required=True) input_args.add_argument("-i", "--input", type=str, default="", dest="input_reads", help="Input reads in FASTQ/FATSA format") input_args.add_argument("--test", action="store_const", const=test_reads, dest="input_reads", help="Running test dataset") out_args = parser.add_argument_group("Output") out_args.add_argument("-o", "--output", type=str, dest="output_dir", default="", #os.path.join(home_directory, "cdr_test"), help="Output directory") optional_args = parser.add_argument_group("Optional arguments") optional_args.add_argument("-t", "--threads", type=int, default=16, dest="num_threads", help="Threads number [default: %(default)d]") optional_args.add_argument("-d", '--domain', type=str, default="imgt", dest="domain_system", help='Domain system for CDR search: imgt OR kabat [default: %(default)s]') vj_finder_args= parser.add_argument_group("VJ alignment params") optional_args.add_argument("-l", "--loci", type=str, default="all", dest="loci", help="Loci: IGH, IGK, IGL, IG (all BCRs), TRA, TRB, TRG, TRD, TR (all TCRs) or all. " "[default: %(default)s]") optional_args.add_argument("--org", type=str, default="human", dest="organism", help="Organism: human, mouse, rat, rabbit, rhesus-monkey [default: %(default)s]") optional_args.add_argument('--skip-plots', action='store_const', const=True, dest = "skip_plots", help = "Skip drawing plots") optional_args.add_argument("-h", "--help", action="help", help="Help message and exit") # prepare log log = logging.getLogger('diversity_analyzer') log.setLevel(logging.DEBUG) console = logging.StreamHandler(sys.stdout) console.setFormatter(logging.Formatter('%(message)s')) console.setLevel(logging.DEBUG) log.addHandler(console) params = parser.parse_args() CheckBinariesExistance(params, log) CheckParamsCorrectness(params, log) SetOutputParams(params, log) PrepareOutputDir(params) # log file params.log_filename = os.path.join(params.output_dir, "diversity_analyzer.log") if os.path.exists(params.log_filename): os.remove(params.log_filename) log_handler = logging.FileHandler(params.log_filename, mode='a') log.addHandler(log_handler) # print command line command_line = "Command_line: " command_line += " ".join(argv) log.info(command_line + "\n") PrintParams(params, log) log.info("Log will be written to " + params.log_filename + "\n") PrepareConfigs(params, log) try: cdr_command_line = run_cdr_labeler + " " + params.cdr_labeler_config_file support.sys_call(cdr_command_line, log) if not params.skip_plots: log.info("\n==== Visualization of diversity statistics ====") visualize_vj_stats.main(["", os.path.join(params.output_dir, "cdr_details.txt"), os.path.join(params.output_dir, "shm_details.txt"), params.output_dir, log]) log.info("\n==== Annotation report creation ====") html_report_writer.main(os.path.join(params.output_dir, "cdr_details.txt"), os.path.join(params.output_dir, "shm_details.txt"), os.path.join(params.output_dir, "plots"), os.path.join(params.output_dir, "annotation_report.html"), log) Cleanup(params, log) log.info("\nThank you for using " + tool_name + "!\n") except (KeyboardInterrupt): log.info("\n" + tool_name + " was interrupted!") except Exception: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) log.info("\nERROR: Exception caught.") #supportInfo(log) except BaseException: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) log.info("\nERROR: Exception caught.") #supportInfo(log) log.info("Log was written to " + params.log_filename)
def main(ds_args_list, general_args_list, spades_home, bin_home): log = logging.getLogger('dipspades') log.setLevel(logging.DEBUG) console = logging.StreamHandler(sys.stdout) console.setFormatter(logging.Formatter('%(message)s')) console.setLevel(logging.DEBUG) log.addHandler(console) support.check_binaries(bin_home, log) ds_args = parse_arguments(ds_args_list, log) if not os.path.exists(ds_args.output_dir): os.makedirs(ds_args.output_dir) log_filename = os.path.join(ds_args.output_dir, "dipspades.log") if os.path.exists(log_filename): os.remove(log_filename) log_handler = logging.FileHandler(log_filename, mode='a') log.addHandler(log_handler) params_filename = os.path.join(ds_args.output_dir, "params.txt") params_handler = logging.FileHandler(params_filename, mode='a') log.addHandler(params_handler) log.info("\n") log.info("General command line: " + " ".join(general_args_list) + "\n") log.info("dipSPAdes command line: " + " ".join(ds_args_list) + "\n") print_ds_args(ds_args, log) log.removeHandler(params_handler) log.info("\n======= dipSPAdes started. Log can be found here: " + log_filename + "\n") write_haplocontigs_in_file(ds_args.haplocontigs, ds_args.haplocontigs_fnames) config_fname = prepare_configs(os.path.join(spades_home, "configs", "dipspades"), ds_args, log) ds_args.tmp_dir = support.get_tmp_dir(prefix="dipspades_", base_dir=ds_args.tmp_dir) prepare_config(config_fname, ds_args, log) try: log.info("===== Assembling started.\n") binary_path = os.path.join(bin_home, "dipspades") command = [binary_path, config_fname] support.sys_call(command, log) log.info("\n===== Assembling finished.\n") print_ds_output(ds_args.output_dir, log) if os.path.isdir(ds_args.tmp_dir): shutil.rmtree(ds_args.tmp_dir) log.info("\n======= dipSPAdes finished.\n") log.info("dipSPAdes log can be found here: " + log_filename + "\n") log.info("Thank you for using dipSPAdes!") log.removeHandler(log_handler) except Exception: exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: if exc_type == OSError and exc_value.errno == errno.ENOEXEC: # Exec format error support.error("It looks like you are using SPAdes binaries for another platform.\n" + support.get_spades_binaries_info_message(), dipspades=True) else: log.exception(exc_value) support.error("exception caught: %s" % exc_type, log) except BaseException: # since python 2.5 system-exiting exceptions (e.g. KeyboardInterrupt) are derived from BaseException exc_type, exc_value, _ = sys.exc_info() if exc_type == SystemExit: sys.exit(exc_value) else: log.exception(exc_value) support.error("exception caught: %s" % exc_type, log, dipspades=True)