def prophyle_decompress(archive, output_dir, klcp): pro.test_files(archive) _compile_prophyle_bin(parallel=True) with tarfile.open(archive) as tar: names = tar.getnames() index_name = names[0] for x in FILES_TO_ARCHIVE: assert os.path.join(index_name, x) in names, "File '{}' is missing in the archive".format(x) index_dir = os.path.join(output_dir, index_name) pro.message("Decompressing index core files") cmd = ["tar", "xvf", archive, "-C", output_dir] pro.run_safe(cmd) pro.message("Core files have been decompressed, reconstructing the index") pro.touch(os.path.join(index_dir, "index.fa")) pro.touch(os.path.join(index_dir, "index.fa.pac")) if klcp: config = pro.load_index_config(index_dir) cmd = [PROPHYLE, "index", "-k", config['k'], os.path.join(index_dir, "tree.nw"), index_dir] else: cmd = [PROPHYLE, "index", "-K", os.path.join(index_dir, "tree.nw"), index_dir] pro.run_safe(cmd) pro.message("Index reconstruction finished")
def _propagation_postprocessing(index_dir, in_tree_fn, out_tree_fn): """Merge reduced FASTA files after k-mer propagation and create index.fa. Args: index_dir (str): Index directory. in_tree_fn (str): Input tree in Newick/NHX. out_tree_fn (str): Output tree in Newick/NHX. """ pro.message('Propagation post-processing') propagation_dir = os.path.join(index_dir, 'propagation') tsv_fn = os.path.join(index_dir, "index.fa.kmers.tsv") index_fa = os.path.join(index_dir, "index.fa") command = ["cat", os.path.join(propagation_dir, "*.tsv"), '>', tsv_fn] pro.run_safe( command, err_msg="K-mer statistics could not be created.", thr_exc=True, ) command = [PROPAGATION_POSTPROCESSING, propagation_dir, index_fa, in_tree_fn, tsv_fn, out_tree_fn] pro.run_safe( command, err_msg="Main ProPhyle FASTA file could not be generated", thr_exc=True, ) pro.touch(index_fa + ".complete") _log_file_md5(index_fa) _log_file_md5(in_tree_fn) _log_file_md5(out_tree_fn)
def _mark_complete(d, i=1, name=None): """Create a mark file (an empty file to mark a finished step nb i). Args: d (str): Directory. i (int): Number of the step. name (str): Name of the mark. """ assert i > 0 pro.touch(__mark_fn(d, i, name))
def prophyle_decompress(archive, output_dir, klcp): pro.test_files(archive) if not os.path.isdir(output_dir): pro.error("Directory '{}' does not exist.".format(output_dir)) _compile_prophyle_bin(parallel=True) with tarfile.open(archive) as tar: names = tar.getnames() index_name = names[0] for x in FILES_TO_ARCHIVE: if not os.path.join(index_name, x) in names: pro.error("File '{}' is missing in the archive".format(x)) index_dir = os.path.join(output_dir, index_name) index_exists = True for i in range(1, 7): fn = os.path.join(index_dir, ".complete.{}".format(i)) if not os.path.isfile(fn): index_exists = False break if index_exists: pro.message("Index already exists") return _compile_prophyle_bin(parallel=True) pro.message("Decompressing core index files") cmd = ["tar", "xvf", archive, "-C", output_dir] pro.run_safe(cmd) fn = os.path.join(index_dir, ".complete.4") pro.rm(fn) pro.message("Reconstructing the index") pro.touch(os.path.join(index_dir, "index.fa")) pro.touch(os.path.join(index_dir, "index.fa.pac")) if klcp: config = pro.load_index_config(index_dir) cmd = [ PROPHYLE, "index", "-k", config['k'], os.path.join(index_dir, "tree.nw"), index_dir ] else: cmd = [ PROPHYLE, "index", "-K", os.path.join(index_dir, "tree.nw"), index_dir ] pro.run_safe(cmd) pro.message("Index reconstruction finished")