Example #1
0
def _propagate(index_dir, threads):
    """Run k-mer propagation.

    Args:
        index_dir (str): Index directory.
        threads (int): Number of threads for Makefile.
    """
    pro.message('Running k-mer propagation')
    propagation_dir = os.path.join(index_dir, 'propagation')
    pro.test_files(os.path.join(propagation_dir, 'Makefile'), test_nonzero=True)

    # test if input files for propagation exist
    command = ['make', '-C', propagation_dir, '-n', '-s', '>', '/dev/null']
    pro.run_safe(
        command,
        err_msg="Some FASTA files needed for k-mer propagation are probably missing, see the messages above.",
        thr_exc=False,
        silent=True,
    )

    # run propagation
    command = ['make', '-j', threads, '-C', propagation_dir, 'V=1']
    pro.run_safe(
        command,
        err_msg="K-mer propagation has not been finished because of an error. See messages above.",
        thr_exc=False,
    )
Example #2
0
def prophyle_decompress(archive, output_dir, klcp):
    pro.test_files(archive)

    _compile_prophyle_bin(parallel=True)

    with tarfile.open(archive) as tar:
        names = tar.getnames()
        index_name = names[0]
        for x in FILES_TO_ARCHIVE:
            assert os.path.join(index_name, x) in names, "File '{}' is missing in the archive".format(x)

    index_dir = os.path.join(output_dir, index_name)

    pro.message("Decompressing index core files")

    cmd = ["tar", "xvf", archive, "-C", output_dir]
    pro.run_safe(cmd)
    pro.message("Core files have been decompressed, reconstructing the index")

    pro.touch(os.path.join(index_dir, "index.fa"))
    pro.touch(os.path.join(index_dir, "index.fa.pac"))

    if klcp:
        config = pro.load_index_config(index_dir)
        cmd = [PROPHYLE, "index", "-k", config['k'], os.path.join(index_dir, "tree.nw"), index_dir]
    else:
        cmd = [PROPHYLE, "index", "-K", os.path.join(index_dir, "tree.nw"), index_dir]

    pro.run_safe(cmd)
    pro.message("Index reconstruction finished")
Example #3
0
def _pseudo_fai(d):
    """Generate a psedudofai file for given directory (directory/*.fa => directory.fai).

    Pseudofai format = TSV with 2 two columns: filename, sequence header (text after > in FASTA).

    Args:
        d (str): Directory.
    """
    l = os.path.dirname(d)
    pseudofai_fn = d + ".pseudofai"
    pro.makedirs(d)
    if _is_complete(d, 2) and os.path.isfile(pseudofai_fn):
        pro.message(
            "Skipping generating pseudofai for library '{}' (already exists)".
            format(l))
    else:
        pro.message("Generating pseudofai for library '{}'".format(l))
        assert d[-1] != "/"
        # cmd=['grep -r --include=\\*.{fa,ffn,fna}', '">"', d, '| sed "s/:>/\t/"']
        cmd = [
            'find', d, '-name', "'*.fa'", "-o", "-name", "'*.ffn'", "-o",
            "-name", "'*.fna'", "-exec", "grep", "-H", '">"', "{}", "\\;", "|",
            'sed', '"s/\:>/\t/"'
        ]

        pro.run_safe(cmd, output_fn=pseudofai_fn)
        _mark_complete(d, 2)
Example #4
0
def _propagation_preprocessing(in_trees, out_tree, no_prefixes, sampling_rate, autocomplete):
    """Merge input trees into a single tree.

    Args:
        in_trees (list of str): Input NHX trees (possibly with a root specifier).
        out_tree (str): Output NHX tree.
        no_prefixes (bool): Don't prepend prefixes to node names during tree merging.
        sampling rate (float): Sampling rate for subsampling the tree or None for no subsampling.
    """

    pro.message('Generating index tree')
    # existence already checked
    # pro.test_files(*in_trees)
    command = [PROPAGATION_PREPROCESSING]
    if sampling_rate is not None:
        command += ['-s', sampling_rate]
    command += in_trees + [out_tree]
    if no_prefixes:
        command += ['-P']
    if autocomplete:
        command += ['-A']
    pro.run_safe(
        command,
        err_msg="The main tree could not be generated.",
        thr_exc=False,
    )
    _log_file_md5(out_tree)
Example #5
0
def _propagation_postprocessing(index_dir, in_tree_fn, out_tree_fn):
    """Merge reduced FASTA files after k-mer propagation and create index.fa.

    Args:
        index_dir (str): Index directory.
        in_tree_fn (str): Input tree in Newick/NHX.
        out_tree_fn (str): Output tree in Newick/NHX.
    """

    pro.message('Propagation post-processing')

    propagation_dir = os.path.join(index_dir, 'propagation')
    tsv_fn = os.path.join(index_dir, "index.fa.kmers.tsv")
    index_fa = os.path.join(index_dir, "index.fa")

    command = ["cat", os.path.join(propagation_dir, "*.tsv"), '>', tsv_fn]
    pro.run_safe(
        command,
        err_msg="K-mer statistics could not be created.",
        thr_exc=True,
    )

    command = [PROPAGATION_POSTPROCESSING, propagation_dir, index_fa, in_tree_fn, tsv_fn, out_tree_fn]
    pro.run_safe(
        command,
        err_msg="Main ProPhyle FASTA file could not be generated",
        thr_exc=True,
    )
    pro.touch(index_fa + ".complete")
    _log_file_md5(index_fa)
    _log_file_md5(in_tree_fn)
    _log_file_md5(out_tree_fn)
Example #6
0
def prophyle_compress(index_dir, archive):
    _compile_prophyle_bin(parallel=True)
    tmp_dir = tempfile.mkdtemp()
    arcdir = index_dir.rstrip("/").split("/")[-1]
    tmp_arc_dir = os.path.join(tmp_dir, arcdir)

    # todo: should create a correct directory

    pro.message("Creating a temporary directory for files to compress")
    pro.makedirs(tmp_arc_dir)

    for x in FILES_TO_ARCHIVE:
        if x == "index.fa.bwt":
            continue
        pro.cp_to_dir(os.path.join(index_dir, x), tmp_arc_dir)

    bwt_fn_1 = os.path.join(index_dir, "index.fa.bwt")
    bwt_fn_2 = os.path.join(tmp_arc_dir, "index.fa.bwt")
    cmd = [IND, "debwtupdate", bwt_fn_1, bwt_fn_2]
    pro.run_safe(cmd)

    pro.message("Creating '{}'".format(archive))
    with tarfile.open(archive, "w:gz") as tar:
        tar.add(tmp_arc_dir, arcname=arcdir)
    pro.message("File '{}' has been created".format(archive))
Example #7
0
def prophyle_analyze(index_dir, out_prefix, input_fns, stats, in_format):

    cmd_analyze = [ANALYZE, '-s', stats, index_dir, out_prefix] + input_fns

    if in_format is not None:
        cmd_analyze += ['-f', in_format]

    pro.test_files(*filter(lambda x: x != "-", input_fns), test_nonzero=True)

    pro.run_safe(cmd_analyze)
Example #8
0
def _remove_tmp_propagation_files(index_dir):
    """Run k-mer propagation.

    Args:
        index_dir (str): Index directory.
    """
    pro.message('Removing temporary files')
    propagation_dir = os.path.join(index_dir, 'propagation')

    command = ['make', '-C', propagation_dir, 'clean', '>', '/dev/null']
    pro.run_safe(command)
Example #9
0
def prophyle_decompress(archive, output_dir, klcp):
    pro.test_files(archive)

    if not os.path.isdir(output_dir):
        pro.error("Directory '{}' does not exist.".format(output_dir))

    _compile_prophyle_bin(parallel=True)

    with tarfile.open(archive) as tar:
        names = tar.getnames()
        index_name = names[0]
        for x in FILES_TO_ARCHIVE:
            if not os.path.join(index_name, x) in names:
                pro.error("File '{}' is missing in the archive".format(x))

    index_dir = os.path.join(output_dir, index_name)

    index_exists = True
    for i in range(1, 7):
        fn = os.path.join(index_dir, ".complete.{}".format(i))
        if not os.path.isfile(fn):
            index_exists = False
            break
    if index_exists:
        pro.message("Index already exists")
        return

    _compile_prophyle_bin(parallel=True)

    pro.message("Decompressing core index files")
    cmd = ["tar", "xvf", archive, "-C", output_dir]
    pro.run_safe(cmd)
    fn = os.path.join(index_dir, ".complete.4")
    pro.rm(fn)

    pro.message("Reconstructing the index")
    pro.touch(os.path.join(index_dir, "index.fa"))
    pro.touch(os.path.join(index_dir, "index.fa.pac"))
    if klcp:
        config = pro.load_index_config(index_dir)
        cmd = [
            PROPHYLE, "index", "-k", config['k'],
            os.path.join(index_dir, "tree.nw"), index_dir
        ]
    else:
        cmd = [
            PROPHYLE, "index", "-K",
            os.path.join(index_dir, "tree.nw"), index_dir
        ]

    pro.run_safe(cmd)
    pro.message("Index reconstruction finished")
Example #10
0
def parse_rpt(library, library_dir):

    if library == "all":
        for l in LIBRARIES:
            parse_rpt(l, library_dir)
        return
    else:
        assert library in LIBRARIES

    cmd = [
        RPT_PARSER,
        os.path.join(library_dir, library), '>', library + '_taxamap.tsv'
    ]
    pro.run_safe(cmd)
Example #11
0
def fasta_idx(library, library_dir):

    if library == "all":
        for l in LIBRARIES:
            fasta_idx(l, library_dir)
        return
    else:
        assert library in LIBRARIES

    cmd = [
        'find',
        os.path.join(library_dir, library), '-name', '*.fna', '|'
        'parallel', '--no-notice', '--verbose', 'samtools', 'faidx', '{}'
    ]
    pro.run_safe(cmd)
Example #12
0
def build_tree(library, library_dir):

    if library == "all":
        for l in LIBRARIES:
            build_tree(l, library_dir)
        return
    else:
        assert library in LIBRARIES

    root = "Bacteria" if library == 'plasmids' else library.title()

    cmd = [
        TREE_BUILDER, library, library_dir, library + '.nw',
        library + '_taxamap.tsv', '-l', library + '.log', '-u', root
    ]
    pro.run_safe(cmd)
Example #13
0
def _pac2bwt(fa_fn):
    """Run `bwa pac2bwtgen` (2bit => BWT).

    Args:
        fa_fn (str): FASTA file.
    """

    #pro.message('Generating BWT')
    pro.test_files(BWA, fa_fn + ".pac")
    command = [BWA, 'pac2bwtgen', fa_fn + ".pac", fa_fn + ".bwt"]
    pro.run_safe(
        command,
        err_msg="Burrows-Wheeler Transform could not be computed.",
        thr_exc=True,
    )
    _log_file_md5(fa_fn + ".bwt", remark="without OCC")
Example #14
0
def _kmer_stats(index_dir):
    """Create a file with k-mer statistics.

    Args:
        index_dir (str): Index directory.
    """
    propagation_dir = os.path.join(index_dir, 'propagation')
    command = [
        "cat", propagation_dir + "/*.count.tsv", "|", "grep", "-v", "^#", "|", "sort", "|", "uniq", ">",
        os.path.join(index_dir, "index.fa.kmers.tsv")
    ]
    pro.run_safe(
        command,
        err_msg="A file with k-mer statistics could not be created.",
        thr_exc=False,
    )
Example #15
0
def _bwt2bwtocc(fa_fn):
    """Run `bwa bwtupdate` (BWT => BWT+OCC).

    Args:
        fa_fn (str): FASTA file.
    """

    #pro.message('Generating sampled OCC array')
    pro.test_files(BWA, fa_fn + ".bwt")
    command = [BWA, 'bwtupdate', fa_fn + ".bwt"]
    pro.run_safe(
        command,
        err_msg="OCC array could not be computed.",
        thr_exc=True,
    )
    _log_file_md5(fa_fn + ".bwt", remark="with OCC")
Example #16
0
def _bwtocc2sa(fa_fn):
    """Run `bwa bwt2sa` (BWT+, remark="with OCC"OCC => SSA).

    Args:
        fa_fn (str): FASTA file.
    """

    #pro.message('Generating sampled SA')
    pro.test_files(BWA, fa_fn + ".bwt")
    command = [BWA, 'bwt2sa', fa_fn + ".bwt", fa_fn + ".sa"]
    pro.run_safe(
        command,
        err_msg="Sampled Suffix Array computation failed.",
        thr_exc=True,
    )
    _log_file_md5(fa_fn + ".sa")
Example #17
0
def _fa2pac(fa_fn):
    """Run `bwa fa2pac` (FA => 2bit).

    Args:
        fa_fn (str): FASTA file.
    """

    #pro.message('Generating packed FASTA file')
    pro.test_files(BWA, fa_fn)
    command = [BWA, 'fa2pac', fa_fn, fa_fn]
    pro.run_safe(
        command,
        err_msg="Packaged file could not be created.",
        thr_exc=True,
    )
    _log_file_md5(fa_fn + ".pac")
Example #18
0
def _bwtocc2klcp(fa_fn, k):
    """Create k-LCP `` (BWT => k-LCP).

    Args:
        fa_fn (str): FASTA file.
        k (int): K-mer size.
    """

    #pro.message('Generating k-LCP array')
    pro.test_files(IND, fa_fn + ".bwt")
    command = [IND, 'build', '-k', k, fa_fn]
    pro.run_safe(
        command,
        err_msg="k-Longest Common Prefix array construction failed.",
        thr_exc=True,
    )
    _log_file_md5("{}.{}.klcp".format(fa_fn, k))
Example #19
0
def _bwtocc2sa_klcp(fa_fn, k):
    """Create k-LCP `` (BWT => k-LCP).

    Args:
        fa_fn (str): FASTA file.
        k (int): K-mer size.
    """

    pro.message('Generating k-LCP array and SA in parallel')
    pro.test_files(IND, fa_fn + ".bwt")
    command = [IND, 'build', '-s', '-k', k, fa_fn]
    pro.run_safe(
        command,
        err_msg="Parallel construction of k-Longest Common Prefix array and Sampled Suffix Array failed.",
        thr_exc=True,
    )
    _log_file_md5(fa_fn + ".sa")
    _log_file_md5("{}.{}.klcp".format(fa_fn, k))
Example #20
0
def _propagate(index_dir, threads, nonprop=0):
    """Run k-mer propagation.

    Args:
        index_dir (str): Index directory.
        threads (int): Number of threads for Makefile.
        nonprop (bool): Switch propagation off.
    """
    pro.message('Running k-mer propagation')
    propagation_dir = os.path.join(index_dir, 'propagation')
    pro.test_files(os.path.join(propagation_dir, 'Makefile'),
                   test_nonzero=True)

    if nonprop:
        nonprop_cmd_str = "NONPROP=1"
    else:
        nonprop_cmd_str = ""

    # test if input files for propagation exist
    command = [
        'make', '-j', '-C', propagation_dir, '-n', '-s', nonprop_cmd_str, '>',
        '/dev/null'
    ]
    pro.run_safe(
        command,
        err_msg=
        "Some FASTA files needed for k-mer propagation are probably missing, see the messages above.",
        thr_exc=False,
        silent=True,
    )

    # run propagation
    # TODO: progress report is switched off; come up with a better way than
    # counting files
    command = [
        'make', '-j', threads, '-C', propagation_dir, nonprop_cmd_str, 'V=1',
        'PRINT_PROGRESS='
    ]
    pro.run_safe(
        command,
        err_msg=
        "K-mer propagation has not been finished because of an error. See messages above.",
        thr_exc=False,
    )
Example #21
0
def _compile_prophyle_bin(clean=False,
                          parallel=False,
                          silent=True,
                          force=False):
    """Compile ProPhyle binaries if they don't exist yet. Recompile if not up-to-date.

    Args:
        clean (bool): Run make clean instead of make.
        parallel (bool): Run make in parallel.
        silent (bool): Run make silently.
        force (bool): Force recompile (make -B).
    """

    try:
        command = ["make"]

        if parallel:
            command += ['-j']

        if silent:
            command += ['-s']

        if force:
            command += ['-B']

        command += ["-C", C_D]

        if clean:
            command += ['clean']

        pro.run_safe(command, output_fo=sys.stderr)

    except RuntimeError:
        if not os.path.isfile(IND) or not os.path.isfile(ASM):
            pro.error(
                "Error: ProPhyle executables could not be compiled. Please, the command '{}' manually."
                .format(" ".join(command)))
        else:
            print(
                "Warning: ProPhyle executables could not be recompiled. Going to use the old ones.",
                file=sys.stderr)
Example #22
0
def _create_makefile(index_dir, k, library_dir, mask_repeats=False):
    """Create a Makefile for k-mer propagation.

    Args:
        index_dir (str): Index directory.
        k (int): K-mer size.
        library_dir (library_dir): Library directory.
        mask_repeats (bool): Mask repeats using DustMasker.

    TODO:
        * Add checking of params.mk
    """
    pro.message('Creating Makefile for k-mer propagation')
    propagation_dir = os.path.join(index_dir, 'propagation')
    pro.makedirs(propagation_dir)

    makefile = os.path.join(propagation_dir, 'Makefile')
    tree_fn = os.path.join(index_dir, 'tree.preliminary.nw')
    _test_tree(tree_fn)
    # pro.test_files(NEWICK2MAKEFILE, tree_fn)
    command = [
        NEWICK2MAKEFILE, '-k', k, tree_fn,
        os.path.abspath(library_dir), './', makefile
    ]

    config = collections.OrderedDict()
    config['prophyle-version'] = version.VERSION
    config['prophyle-revision'] = version.REVCOUNT
    config['prophyle-commit'] = version.SHORTHASH
    config['k'] = k

    pro.save_index_config(index_dir, config)

    with open(os.path.join(propagation_dir, "params.mk"), "w+") as f:
        f.write('PRG_ASM="{}"\n'.format(ASM))
        f.write("K={}\n".format(k))
        if mask_repeats:
            f.write("MASKREP=1\n")
    pro.run_safe(command)
    _log_file_md5(makefile)
Example #23
0
def _merge_kmer_stats(index_dir):
    """Create a file with k-mer statistics.

    Args:
        index_dir (str): Index directory.
    """
    tsv_fn = os.path.join(index_dir, "index.fa.kmers.tsv")
    propagation_dir = os.path.join(index_dir, 'propagation')
    command = [
        "find", propagation_dir, "-name", "'*.tsv'", \
        "|", "sort", \
        "|", "xargs", "cat", \
        "|", "grep", "-v", "^#",
        "|", "sort", \
        "|", "uniq", \
        '>', tsv_fn]

    pro.run_safe(
        command,
        err_msg="A file with k-mer statistics could not be created.",
        thr_exc=False,
    )
Example #24
0
def download_rpt(library, library_dir):

    if library == "all":
        for l in LIBRARIES:
            download_rpt(l, library_dir)
        return
    else:
        assert library in LIBRARIES

    d = os.path.join(library_dir, library)
    #os.makedirs(d, exist_ok=True)
    #pro.makedirs(d)
    # if it does not exist, exit, there are no fna files to add to the tree!

    if library == 'bacteria':
        cmd = [
            'cd', d, '&&', 'curl',
            FTP_NCBI + '/genomes/archive/old_refseq/Bacteria/all.rpt.tar.gz',
            '|', 'tar', 'xz'
        ]
        pro.run_safe(cmd)

    elif library == 'viruses':
        cmd = [
            'cd', d, '&&', 'curl',
            FTP_NCBI + '/genomes/Viruses/all.rpt.tar.gz', '|', 'tar', 'xz'
        ]
        pro.run_safe(cmd)

    elif library == 'plasmids':
        cmd = [
            'cd', d, '&&', 'curl', FTP_NCBI +
            '/genomes/archive/old_refseq/Plasmids/plasmids.all.rpt.tar.gz',
            '|', 'tar', 'xz', '--strip', '5'
        ]
        pro.run_safe(cmd)

    else:
        raise ValueError('Unknown library "{}"'.format(library))
Example #25
0
def create_bwa_index(fa):
    # cmd('"{bwa}" index "{fa}"'.format(bwa=bwa,fa=fa))
    pro.run_safe([bwa, 'fa2pac', fa, fa])
    pro.run_safe([bwa, 'pac2bwtgen', fa + ".pac", fa + ".bwt", ">", "/dev/null"])
    pro.run_safe([bwa, 'bwtupdate', fa + ".bwt"])
    pro.run_safe([bwa, 'bwt2sa', fa + ".bwt", fa + ".sa"])
Example #26
0
def create_klcp(fa, k):
    pro.run_safe([prophyle_index, 'build', '-k', k, fa, ">", "/dev/null"])
Example #27
0
def prophyle_classify(
    index_dir, fq_fn, fq_pe_fn, k, out_format, mimic_kraken, measure, annotate, tie_lca, kmer_lca, print_seq, cimpl,
    force_restarted_search, prophyle_conf_string
):
    """Run ProPhyle classification.

    Args:
        index_dir (str): Index directory.
        fq_fn (str): Input reads (single-end or first of paired-end).
        fq_pe_fn (str): Input reads (second paired-end, None if single-end)
        k (int): K-mer size (None => detect automatically).
        out_format (str): Output format: sam / kraken.
        mimic_kraken (bool): Mimic Kraken algorithm (compute LCA for each k-mer).
        measure (str): Measure used for classification (h1 / h2 / c1 / c2).
        annotate (bool): Annotate assignments (insert annotations from Newick to SAM).
        tie_lca (bool): If multiple equally good assignments found, compute their LCA.
        kmer_lca (bool): Replace k-mer matches by their LCA.
        print_seq (bool): Print sequencing in SAM.
        cimpl (bool): Use the C++ implementation.
        force_restarted_search (bool): Force restarted search.
        prophyle_conf_string (str): ProPhyle configuration string.
    """

    _compile_prophyle_bin(parallel=True)
    index_fa = os.path.join(index_dir, 'index.fa')
    index_tree = os.path.join(index_dir, 'tree.nw')

    if k is None:
        k = pro.detect_k_from_index(index_dir)
        pro.message("Automatic detection of k-mer length: k={}".format(k))

    _test_tree(index_tree)

    if fq_pe_fn:
        pro.test_files(fq_fn, fq_pe_fn, allow_pipes=False)
    elif fq_fn != '-':
        pro.test_files(fq_fn, allow_pipes=False)

    pro.test_files(IND)

    pro.test_files(
        index_fa + '.bwt',
        #index_fa + '.pac',
        index_fa + '.sa',
        index_fa + '.ann',
        #index_fa + '.amb',
    )

    (bwt_s, sa_s) = pro.file_sizes(index_fa + '.bwt', index_fa + '.sa')
    assert abs(bwt_s - 2 * sa_s) < 1000, 'Inconsistent index (SA vs. BWT)'
    #assert abs(bwt_s - 2 * pac_s) < 1000, 'Inconsistent index (PAC vs. BWT)'

    klcp_fn = "{}.{}.klcp".format(index_fa, k)
    if force_restarted_search:
        pro.message("Restarted search forced")
        use_rolling_window = False
    else:
        use_rolling_window = os.path.isfile(klcp_fn)
        if use_rolling_window:
            pro.message("k-LCP file found, going to use rolling window")
            pro.test_files(klcp_fn)
            (klcp_s, ) = pro.file_sizes(klcp_fn)
            assert abs(bwt_s - 4 * klcp_s) < 1000, 'Inconsistent index (KLCP vs. BWT)'
        else:
            pro.message("k-LCP file not found, going to use restarted search")

    if cimpl:
        ASSIGN = C_ASSIGN
    else:
        ASSIGN = PY_ASSIGN

    if mimic_kraken:
        measure = "h1"
        tie_lca = True
        kmer_lca = True
        out_format = "kraken"

    cmd_assign = [ASSIGN]

    if not cimpl and prophyle_conf_string:
        cmd_assign += ['-c', prophyle_conf_string]

    cmd_assign += ['-m', measure, '-f', out_format]

    if annotate:
        cmd_assign += ['-A']

    if tie_lca:
        cmd_assign += ['-L']

    if kmer_lca:
        cmd_assign += ['-X']

    cmd_assign += [index_tree, k, '-']

    if fq_pe_fn:
        cmd_read = [READ, fq_fn, fq_pe_fn, '|']
        in_read = '-'
    else:
        cmd_read = []
        # fq_fn can be '-' as well
        in_read = fq_fn

    cmd_query = [
        IND, 'query', '-k', k, '-u' if use_rolling_window else '', '-b' if print_seq else '', index_fa, in_read, '|'
    ]

    command = cmd_read + cmd_query + cmd_assign
    pro.run_safe(command)
Example #28
0
def prophyle_download(library, library_dir, force=False):
    """Create a library Download genomic library and copy the corresponding tree.

    Args:
        library (str): Library to download (bacteria / viruses / ...)
        library_dir (str): Directory where download files will be downloaded.

    TODO:
        * Add support for alternative URLs (http / ftp, backup refseq sites, etc.).
            * http://downloads.hmpdacc.org/data/HMREFG/all_seqs.fa.bz2
            * ftp://public-ftp.hmpdacc.org/HMREFG/all_seqs.fa.bz2
    """

    if library == "all":
        for l in LIBRARIES:
            prophyle_download(l, library_dir, force)
        return
    else:
        assert library in LIBRARIES

    if library_dir is None:
        d = os.path.join(os.path.expanduser("~/prophyle"), library)
    else:
        d = os.path.join(library_dir, library)
    # print('making',d, file=sys.stderr)
    # os.makedirs(d, exist_ok=True)
    pro.makedirs(d)

    #pro.message("Checking library '{}' in '{}'".format(library, d))
    lib_missing = _missing_library(d)

    if library == 'bacteria':
        if lib_missing or force:
            cmd = [
                'cd', d + "/..", '&&', 'curl', '-O', ZENODO_URL + '/files/bacteria.nw', '&&', 'curl',
                ZENODO_URL + '/files/bacteria.tar.gz', '|', 'tar', 'xz'
            ]
            pro.run_safe(cmd)
            _mark_complete(d, 1)
        # _pseudo_fai(d)

    elif library == 'viruses':
        if lib_missing or force:
            cmd = [
                'cd', d + "/..", '&&', 'curl', '-O', ZENODO_URL + '/files/viruses.nw', '&&', 'curl',
                ZENODO_URL + '/files/viruses.tar.gz', '|', 'tar', 'xz'
            ]
            pro.run_safe(cmd)
            _mark_complete(d, 1)
        # _pseudo_fai(d)

    elif library == 'plasmids':
        if lib_missing or force:
            cmd = [
                'cd', d + "/..", '&&', 'curl', '-O', ZENODO_URL + '/files/plasmids.nw', '&&', 'curl',
                ZENODO_URL + '/files/plasmids.tar.gz', '|', 'tar', 'xz'
            ]
            pro.run_safe(cmd)
            _mark_complete(d, 1)
        # _pseudo_fai(d)

    elif library == 'hmp':
        if lib_missing or force:
            # fix when error appears
            cmd = [
                'cd', d, '&&', 'curl', 'http://downloads.hmpdacc.org/data/HMREFG/all_seqs.fa.bz2', '|', 'bzip2', '-d',
                '|', SPLIT_FA,
                os.path.abspath(d)
            ]
            pro.run_safe(cmd)
            _mark_complete(d, 1)
        # _pseudo_fai(d)

    else:
        raise ValueError('Unknown library "{}"'.format(library))
Example #29
0
def query(fa, fq, k, u=False, v=False, t=1):
    params = ""
    cmd = [prophyle_index, 'query', "-v" if v else "", "-u" if u else "", '-k', k, '-t', t, fa, fq]
    pro.run_safe(cmd)