def prophyle_decompress(archive, output_dir, klcp):
    pro.test_files(archive)

    _compile_prophyle_bin(parallel=True)

    with tarfile.open(archive) as tar:
        names = tar.getnames()
        index_name = names[0]
        for x in FILES_TO_ARCHIVE:
            assert os.path.join(index_name, x) in names, "File '{}' is missing in the archive".format(x)

    index_dir = os.path.join(output_dir, index_name)

    pro.message("Decompressing index core files")

    cmd = ["tar", "xvf", archive, "-C", output_dir]
    pro.run_safe(cmd)
    pro.message("Core files have been decompressed, reconstructing the index")

    pro.touch(os.path.join(index_dir, "index.fa"))
    pro.touch(os.path.join(index_dir, "index.fa.pac"))

    if klcp:
        config = pro.load_index_config(index_dir)
        cmd = [PROPHYLE, "index", "-k", config['k'], os.path.join(index_dir, "tree.nw"), index_dir]
    else:
        cmd = [PROPHYLE, "index", "-K", os.path.join(index_dir, "tree.nw"), index_dir]

    pro.run_safe(cmd)
    pro.message("Index reconstruction finished")
def _propagation_postprocessing(index_dir, in_tree_fn, out_tree_fn):
    """Merge reduced FASTA files after k-mer propagation and create index.fa.

    Args:
        index_dir (str): Index directory.
        in_tree_fn (str): Input tree in Newick/NHX.
        out_tree_fn (str): Output tree in Newick/NHX.
    """

    pro.message('Propagation post-processing')

    propagation_dir = os.path.join(index_dir, 'propagation')
    tsv_fn = os.path.join(index_dir, "index.fa.kmers.tsv")
    index_fa = os.path.join(index_dir, "index.fa")

    command = ["cat", os.path.join(propagation_dir, "*.tsv"), '>', tsv_fn]
    pro.run_safe(
        command,
        err_msg="K-mer statistics could not be created.",
        thr_exc=True,
    )

    command = [PROPAGATION_POSTPROCESSING, propagation_dir, index_fa, in_tree_fn, tsv_fn, out_tree_fn]
    pro.run_safe(
        command,
        err_msg="Main ProPhyle FASTA file could not be generated",
        thr_exc=True,
    )
    pro.touch(index_fa + ".complete")
    _log_file_md5(index_fa)
    _log_file_md5(in_tree_fn)
    _log_file_md5(out_tree_fn)
def _mark_complete(d, i=1, name=None):
    """Create a mark file (an empty file to mark a finished step nb i).

    Args:
        d (str): Directory.
        i (int): Number of the step.
        name (str): Name of the mark.
    """

    assert i > 0

    pro.touch(__mark_fn(d, i, name))
Exemple #4
0
def prophyle_decompress(archive, output_dir, klcp):
    pro.test_files(archive)

    if not os.path.isdir(output_dir):
        pro.error("Directory '{}' does not exist.".format(output_dir))

    _compile_prophyle_bin(parallel=True)

    with tarfile.open(archive) as tar:
        names = tar.getnames()
        index_name = names[0]
        for x in FILES_TO_ARCHIVE:
            if not os.path.join(index_name, x) in names:
                pro.error("File '{}' is missing in the archive".format(x))

    index_dir = os.path.join(output_dir, index_name)

    index_exists = True
    for i in range(1, 7):
        fn = os.path.join(index_dir, ".complete.{}".format(i))
        if not os.path.isfile(fn):
            index_exists = False
            break
    if index_exists:
        pro.message("Index already exists")
        return

    _compile_prophyle_bin(parallel=True)

    pro.message("Decompressing core index files")
    cmd = ["tar", "xvf", archive, "-C", output_dir]
    pro.run_safe(cmd)
    fn = os.path.join(index_dir, ".complete.4")
    pro.rm(fn)

    pro.message("Reconstructing the index")
    pro.touch(os.path.join(index_dir, "index.fa"))
    pro.touch(os.path.join(index_dir, "index.fa.pac"))
    if klcp:
        config = pro.load_index_config(index_dir)
        cmd = [
            PROPHYLE, "index", "-k", config['k'],
            os.path.join(index_dir, "tree.nw"), index_dir
        ]
    else:
        cmd = [
            PROPHYLE, "index", "-K",
            os.path.join(index_dir, "tree.nw"), index_dir
        ]

    pro.run_safe(cmd)
    pro.message("Index reconstruction finished")