Ejemplo n.º 1
0
def _pseudo_fai(d):
    """Generate a psedudofai file for given directory (directory/*.fa => directory.fai).

    Pseudofai format = TSV with 2 two columns: filename, sequence header (text after > in FASTA).

    Args:
        d (str): Directory.
    """
    l = os.path.dirname(d)
    pseudofai_fn = d + ".pseudofai"
    pro.makedirs(d)
    if _is_complete(d, 2) and os.path.isfile(pseudofai_fn):
        pro.message(
            "Skipping generating pseudofai for library '{}' (already exists)".
            format(l))
    else:
        pro.message("Generating pseudofai for library '{}'".format(l))
        assert d[-1] != "/"
        # cmd=['grep -r --include=\\*.{fa,ffn,fna}', '">"', d, '| sed "s/:>/\t/"']
        cmd = [
            'find', d, '-name', "'*.fa'", "-o", "-name", "'*.ffn'", "-o",
            "-name", "'*.fna'", "-exec", "grep", "-H", '">"', "{}", "\\;", "|",
            'sed', '"s/\:>/\t/"'
        ]

        pro.run_safe(cmd, output_fn=pseudofai_fn)
        _mark_complete(d, 2)
Ejemplo n.º 2
0
def _propagation_postprocessing(index_dir, in_tree_fn, out_tree_fn):
    """Merge reduced FASTA files after k-mer propagation and create index.fa.

    Args:
        index_dir (str): Index directory.
        in_tree_fn (str): Input tree in Newick/NHX.
        out_tree_fn (str): Output tree in Newick/NHX.
    """

    pro.message('Propagation post-processing')

    propagation_dir = os.path.join(index_dir, 'propagation')
    tsv_fn = os.path.join(index_dir, "index.fa.kmers.tsv")
    index_fa = os.path.join(index_dir, "index.fa")

    command = ["cat", os.path.join(propagation_dir, "*.tsv"), '>', tsv_fn]
    pro.run_safe(
        command,
        err_msg="K-mer statistics could not be created.",
        thr_exc=True,
    )

    command = [PROPAGATION_POSTPROCESSING, propagation_dir, index_fa, in_tree_fn, tsv_fn, out_tree_fn]
    pro.run_safe(
        command,
        err_msg="Main ProPhyle FASTA file could not be generated",
        thr_exc=True,
    )
    pro.touch(index_fa + ".complete")
    _log_file_md5(index_fa)
    _log_file_md5(in_tree_fn)
    _log_file_md5(out_tree_fn)
Ejemplo n.º 3
0
def _propagation_preprocessing(in_trees, out_tree, no_prefixes, sampling_rate, autocomplete):
    """Merge input trees into a single tree.

    Args:
        in_trees (list of str): Input NHX trees (possibly with a root specifier).
        out_tree (str): Output NHX tree.
        no_prefixes (bool): Don't prepend prefixes to node names during tree merging.
        sampling rate (float): Sampling rate for subsampling the tree or None for no subsampling.
    """

    pro.message('Generating index tree')
    # existence already checked
    # pro.test_files(*in_trees)
    command = [PROPAGATION_PREPROCESSING]
    if sampling_rate is not None:
        command += ['-s', sampling_rate]
    command += in_trees + [out_tree]
    if no_prefixes:
        command += ['-P']
    if autocomplete:
        command += ['-A']
    pro.run_safe(
        command,
        err_msg="The main tree could not be generated.",
        thr_exc=False,
    )
    _log_file_md5(out_tree)
Ejemplo n.º 4
0
    def parse_krakline(self, krakline):
        """Load a krakline to the current object.

        Args:
            krakline (str): Kraken-like line.
        """

        self.krakline = krakline
        parts = krakline.strip().split("\t")
        self.readname, _, readlen, self.krakmers = parts[1:5]
        self.readlen = int(readlen)
        if len(parts) == 7:
            self.seq = parts[5]
            self.qual = parts[6]
        else:
            self.seq = None
            self.qual = None

        # list of (count,list of nodes)
        self.kmer_blocks = []
        kmer_countdown = self.readlen - self.k + 1

        for block in self.krakmers.split(" "):
            try:
                (ids, count) = block.split(":")
                count = int(count)
                kmer_countdown -= count
                nodenames = ids.split(",")
                self.kmer_blocks.append((nodenames, count))
            except ValueError:
                pro.message(
                    "Warning: prophex output for read '{}' has been truncated."
                    .format(self.readname))
        self.kmer_blocks.append((['0'], kmer_countdown))
Ejemplo n.º 5
0
def _propagate(index_dir, threads):
    """Run k-mer propagation.

    Args:
        index_dir (str): Index directory.
        threads (int): Number of threads for Makefile.
    """
    pro.message('Running k-mer propagation')
    propagation_dir = os.path.join(index_dir, 'propagation')
    pro.test_files(os.path.join(propagation_dir, 'Makefile'), test_nonzero=True)

    # test if input files for propagation exist
    command = ['make', '-C', propagation_dir, '-n', '-s', '>', '/dev/null']
    pro.run_safe(
        command,
        err_msg="Some FASTA files needed for k-mer propagation are probably missing, see the messages above.",
        thr_exc=False,
        silent=True,
    )

    # run propagation
    command = ['make', '-j', threads, '-C', propagation_dir, 'V=1']
    pro.run_safe(
        command,
        err_msg="K-mer propagation has not been finished because of an error. See messages above.",
        thr_exc=False,
    )
Ejemplo n.º 6
0
def _log_file_md5(fn, remark=None):
    md5 = _file_md5(fn)
    size = pro.file_sizes(fn)[0]
    m = "File {}{} has md5 checksum {} and size {} B".format(
        os.path.basename(fn),
        " ({})".format(remark) if remark is not None else "",
        md5,
        size,
    )
    pro.message(m, only_log=True)
Ejemplo n.º 7
0
def _remove_tmp_propagation_files(index_dir):
    """Run k-mer propagation.

    Args:
        index_dir (str): Index directory.
    """
    pro.message('Removing temporary files')
    propagation_dir = os.path.join(index_dir, 'propagation')

    command = ['make', '-C', propagation_dir, 'clean', '>', '/dev/null']
    pro.run_safe(command)
Ejemplo n.º 8
0
def _missing_library(d):
    """Check if library has been already downloaded.

    Args:
        d (str): Directory.
    """

    l = os.path.dirname(d)
    pro.makedirs(d)
    if _is_complete(d, 1):
        pro.message("Skipping downloading library '{}' (already exists)".format(l))
        return False
    else:
        pro.message("Downloading library '{}'".format(l))
        return True
Ejemplo n.º 9
0
def _bwtocc2sa_klcp(fa_fn, k):
    """Create k-LCP `` (BWT => k-LCP).

    Args:
        fa_fn (str): FASTA file.
        k (int): K-mer size.
    """

    pro.message('Generating k-LCP array and SA in parallel')
    pro.test_files(IND, fa_fn + ".bwt")
    command = [IND, 'build', '-s', '-k', k, fa_fn]
    pro.run_safe(
        command,
        err_msg="Parallel construction of k-Longest Common Prefix array and Sampled Suffix Array failed.",
        thr_exc=True,
    )
    _log_file_md5(fa_fn + ".sa")
    _log_file_md5("{}.{}.klcp".format(fa_fn, k))
Ejemplo n.º 10
0
def _propagate(index_dir, threads, nonprop=0):
    """Run k-mer propagation.

    Args:
        index_dir (str): Index directory.
        threads (int): Number of threads for Makefile.
        nonprop (bool): Switch propagation off.
    """
    pro.message('Running k-mer propagation')
    propagation_dir = os.path.join(index_dir, 'propagation')
    pro.test_files(os.path.join(propagation_dir, 'Makefile'),
                   test_nonzero=True)

    if nonprop:
        nonprop_cmd_str = "NONPROP=1"
    else:
        nonprop_cmd_str = ""

    # test if input files for propagation exist
    command = [
        'make', '-j', '-C', propagation_dir, '-n', '-s', nonprop_cmd_str, '>',
        '/dev/null'
    ]
    pro.run_safe(
        command,
        err_msg=
        "Some FASTA files needed for k-mer propagation are probably missing, see the messages above.",
        thr_exc=False,
        silent=True,
    )

    # run propagation
    # TODO: progress report is switched off; come up with a better way than
    # counting files
    command = [
        'make', '-j', threads, '-C', propagation_dir, nonprop_cmd_str, 'V=1',
        'PRINT_PROGRESS='
    ]
    pro.run_safe(
        command,
        err_msg=
        "K-mer propagation has not been finished because of an error. See messages above.",
        thr_exc=False,
    )
Ejemplo n.º 11
0
def prophyle_decompress(archive, output_dir, klcp):
    pro.test_files(archive)

    if not os.path.isdir(output_dir):
        pro.error("Directory '{}' does not exist.".format(output_dir))

    _compile_prophyle_bin(parallel=True)

    with tarfile.open(archive) as tar:
        names = tar.getnames()
        index_name = names[0]
        for x in FILES_TO_ARCHIVE:
            if not os.path.join(index_name, x) in names:
                pro.error("File '{}' is missing in the archive".format(x))

    index_dir = os.path.join(output_dir, index_name)

    index_exists = True
    for i in range(1, 7):
        fn = os.path.join(index_dir, ".complete.{}".format(i))
        if not os.path.isfile(fn):
            index_exists = False
            break
    if index_exists:
        pro.message("Index already exists")
        return

    _compile_prophyle_bin(parallel=True)

    pro.message("Decompressing core index files")
    cmd = ["tar", "xvf", archive, "-C", output_dir]
    pro.run_safe(cmd)
    fn = os.path.join(index_dir, ".complete.4")
    pro.rm(fn)

    pro.message("Reconstructing the index")
    pro.touch(os.path.join(index_dir, "index.fa"))
    pro.touch(os.path.join(index_dir, "index.fa.pac"))
    if klcp:
        config = pro.load_index_config(index_dir)
        cmd = [
            PROPHYLE, "index", "-k", config['k'],
            os.path.join(index_dir, "tree.nw"), index_dir
        ]
    else:
        cmd = [
            PROPHYLE, "index", "-K",
            os.path.join(index_dir, "tree.nw"), index_dir
        ]

    pro.run_safe(cmd)
    pro.message("Index reconstruction finished")
Ejemplo n.º 12
0
def main():
    args = parse_args()

    global CONFIG
    prophyle_conf_string = pro.load_prophyle_conf(CONFIG, args.config)

    try:
        assign_all_reads(
            tree_fn=args.tree_fn,
            inp_fo=args.input_file,
            form=args.format,
            k=args.k,
            measure=args.measure,
            annotate=args.annotate,
            tie_lca=args.tie_lca,
            kmer_lca=args.kmer_lca,
        )

    # Karel: I don't remember why I was considering also IOError here
    # except (BrokenPipeError, IOError):
    except BrokenPipeError:
        # pipe error (e.g., when head is used)
        sys.stderr.close()
        sys.stdout.close()
        exit(0)

    except KeyboardInterrupt:
        pro.message("Error: Keyboard interrupt")
        pro.close_log()
        exit(1)

    finally:
        try:
            sys.stdout.flush()
        except BrokenPipeError:
            pass
        finally:
            try:
                sys.stderr.flush()
            except:
                pass
Ejemplo n.º 13
0
def prophyle_decompress(archive, output_dir, klcp):
    pro.test_files(archive)

    _compile_prophyle_bin(parallel=True)

    with tarfile.open(archive) as tar:
        names = tar.getnames()
        index_name = names[0]
        for x in FILES_TO_ARCHIVE:
            assert os.path.join(index_name, x) in names, "File '{}' is missing in the archive".format(x)

    index_dir = os.path.join(output_dir, index_name)

    pro.message("Decompressing index core files")

    cmd = ["tar", "xvf", archive, "-C", output_dir]
    pro.run_safe(cmd)
    pro.message("Core files have been decompressed, reconstructing the index")

    pro.touch(os.path.join(index_dir, "index.fa"))
    pro.touch(os.path.join(index_dir, "index.fa.pac"))

    if klcp:
        config = pro.load_index_config(index_dir)
        cmd = [PROPHYLE, "index", "-k", config['k'], os.path.join(index_dir, "tree.nw"), index_dir]
    else:
        cmd = [PROPHYLE, "index", "-K", os.path.join(index_dir, "tree.nw"), index_dir]

    pro.run_safe(cmd)
    pro.message("Index reconstruction finished")
Ejemplo n.º 14
0
def prophyle_compress(index_dir, archive):
    _compile_prophyle_bin(parallel=True)
    tmp_dir = tempfile.mkdtemp()
    arcdir = index_dir.rstrip("/").split("/")[-1]
    tmp_arc_dir = os.path.join(tmp_dir, arcdir)

    # todo: should create a correct directory

    pro.message("Creating a temporary directory for files to compress")
    pro.makedirs(tmp_arc_dir)

    for x in FILES_TO_ARCHIVE:
        if x == "index.fa.bwt":
            continue
        pro.cp_to_dir(os.path.join(index_dir, x), tmp_arc_dir)

    bwt_fn_1 = os.path.join(index_dir, "index.fa.bwt")
    bwt_fn_2 = os.path.join(tmp_arc_dir, "index.fa.bwt")
    cmd = [IND, "debwtupdate", bwt_fn_1, bwt_fn_2]
    pro.run_safe(cmd)

    pro.message("Creating '{}'".format(archive))
    with tarfile.open(archive, "w:gz") as tar:
        tar.add(tmp_arc_dir, arcname=arcdir)
    pro.message("File '{}' has been created".format(archive))
Ejemplo n.º 15
0
def _create_makefile(index_dir, k, library_dir, mask_repeats=False):
    """Create a Makefile for k-mer propagation.

    Args:
        index_dir (str): Index directory.
        k (int): K-mer size.
        library_dir (library_dir): Library directory.
        mask_repeats (bool): Mask repeats using DustMasker.

    TODO:
        * Add checking of params.mk
    """
    pro.message('Creating Makefile for k-mer propagation')
    propagation_dir = os.path.join(index_dir, 'propagation')
    pro.makedirs(propagation_dir)

    makefile = os.path.join(propagation_dir, 'Makefile')
    tree_fn = os.path.join(index_dir, 'tree.preliminary.nw')
    _test_tree(tree_fn)
    # pro.test_files(NEWICK2MAKEFILE, tree_fn)
    command = [
        NEWICK2MAKEFILE, '-k', k, tree_fn,
        os.path.abspath(library_dir), './', makefile
    ]

    config = collections.OrderedDict()
    config['prophyle-version'] = version.VERSION
    config['prophyle-revision'] = version.REVCOUNT
    config['prophyle-commit'] = version.SHORTHASH
    config['k'] = k

    pro.save_index_config(index_dir, config)

    with open(os.path.join(propagation_dir, "params.mk"), "w+") as f:
        f.write('PRG_ASM="{}"\n'.format(ASM))
        f.write("K={}\n".format(k))
        if mask_repeats:
            f.write("MASKREP=1\n")
    pro.run_safe(command)
    _log_file_md5(makefile)
Ejemplo n.º 16
0
def prophyle_classify(
    index_dir, fq_fn, fq_pe_fn, k, out_format, mimic_kraken, measure, annotate, tie_lca, kmer_lca, print_seq, cimpl,
    force_restarted_search, prophyle_conf_string
):
    """Run ProPhyle classification.

    Args:
        index_dir (str): Index directory.
        fq_fn (str): Input reads (single-end or first of paired-end).
        fq_pe_fn (str): Input reads (second paired-end, None if single-end)
        k (int): K-mer size (None => detect automatically).
        out_format (str): Output format: sam / kraken.
        mimic_kraken (bool): Mimic Kraken algorithm (compute LCA for each k-mer).
        measure (str): Measure used for classification (h1 / h2 / c1 / c2).
        annotate (bool): Annotate assignments (insert annotations from Newick to SAM).
        tie_lca (bool): If multiple equally good assignments found, compute their LCA.
        kmer_lca (bool): Replace k-mer matches by their LCA.
        print_seq (bool): Print sequencing in SAM.
        cimpl (bool): Use the C++ implementation.
        force_restarted_search (bool): Force restarted search.
        prophyle_conf_string (str): ProPhyle configuration string.
    """

    _compile_prophyle_bin(parallel=True)
    index_fa = os.path.join(index_dir, 'index.fa')
    index_tree = os.path.join(index_dir, 'tree.nw')

    if k is None:
        k = pro.detect_k_from_index(index_dir)
        pro.message("Automatic detection of k-mer length: k={}".format(k))

    _test_tree(index_tree)

    if fq_pe_fn:
        pro.test_files(fq_fn, fq_pe_fn, allow_pipes=False)
    elif fq_fn != '-':
        pro.test_files(fq_fn, allow_pipes=False)

    pro.test_files(IND)

    pro.test_files(
        index_fa + '.bwt',
        #index_fa + '.pac',
        index_fa + '.sa',
        index_fa + '.ann',
        #index_fa + '.amb',
    )

    (bwt_s, sa_s) = pro.file_sizes(index_fa + '.bwt', index_fa + '.sa')
    assert abs(bwt_s - 2 * sa_s) < 1000, 'Inconsistent index (SA vs. BWT)'
    #assert abs(bwt_s - 2 * pac_s) < 1000, 'Inconsistent index (PAC vs. BWT)'

    klcp_fn = "{}.{}.klcp".format(index_fa, k)
    if force_restarted_search:
        pro.message("Restarted search forced")
        use_rolling_window = False
    else:
        use_rolling_window = os.path.isfile(klcp_fn)
        if use_rolling_window:
            pro.message("k-LCP file found, going to use rolling window")
            pro.test_files(klcp_fn)
            (klcp_s, ) = pro.file_sizes(klcp_fn)
            assert abs(bwt_s - 4 * klcp_s) < 1000, 'Inconsistent index (KLCP vs. BWT)'
        else:
            pro.message("k-LCP file not found, going to use restarted search")

    if cimpl:
        ASSIGN = C_ASSIGN
    else:
        ASSIGN = PY_ASSIGN

    if mimic_kraken:
        measure = "h1"
        tie_lca = True
        kmer_lca = True
        out_format = "kraken"

    cmd_assign = [ASSIGN]

    if not cimpl and prophyle_conf_string:
        cmd_assign += ['-c', prophyle_conf_string]

    cmd_assign += ['-m', measure, '-f', out_format]

    if annotate:
        cmd_assign += ['-A']

    if tie_lca:
        cmd_assign += ['-L']

    if kmer_lca:
        cmd_assign += ['-X']

    cmd_assign += [index_tree, k, '-']

    if fq_pe_fn:
        cmd_read = [READ, fq_fn, fq_pe_fn, '|']
        in_read = '-'
    else:
        cmd_read = []
        # fq_fn can be '-' as well
        in_read = fq_fn

    cmd_query = [
        IND, 'query', '-k', k, '-u' if use_rolling_window else '', '-b' if print_seq else '', index_fa, in_read, '|'
    ]

    command = cmd_read + cmd_query + cmd_assign
    pro.run_safe(command)
Ejemplo n.º 17
0
def prophyle_index(
    index_dir, threads, k, trees_fn, library_dir, construct_klcp, force, no_prefixes, mask_repeats, keep_tmp_files,
    sampling_rate, autocomplete
):
    """Build a ProPhyle index.

    Args:
        index_dir (str): Index directory.
        threads (int): Number of threads in k-mer propagation.
        k (int): K-mer size.
        trees_fn (list of str): Newick/NHX tree, possibly with a root spec (@root).
        library_dir (str): Library directory.
        klcp (bool): Generate klcp.
        force (bool): Rewrite files if they already exist.
        no_prefixes (bool): Don't prepend prefixes to node names during tree merging.
        mask_repeats (bool): Mask repeats using DustMasker.
        keep_tmp_files (bool): Keep temporary files from k-mer propagation.
        sampling rate (float): Sampling rate for subsampling the tree or None for no subsampling.
        autocomplete (bool): Autocomplete names of internal nodes and fasta paths.
    """

    assert isinstance(k, int)
    assert isinstance(threads, int)
    assert k > 1
    assert threads > 0
    assert sampling_rate is None or 0.0 <= float(sampling_rate) <= 1.0

    _compile_prophyle_bin(parallel=True)

    index_fa = os.path.join(index_dir, 'index.fa')
    index_tree_1 = os.path.join(index_dir, 'tree.preliminary.nw')
    index_tree_2 = os.path.join(index_dir, 'tree.nw')

    # recompute = recompute everything from now on
    # force==True => start to recompute everything from beginning
    recompute = force

    # make index dir
    pro.makedirs(index_dir)

    #
    # 1) Newick
    #

    #if not _is_complete(index_dir, 1) or not pro.existing_and_newer_list(trees_fn, index_tree_1):
    if not _is_complete(index_dir, 1):
        recompute = True

    if recompute:
        pro.message('[1/6] Copying/merging trees', upper=True)
        for tree_fn in trees_fn:
            tree_fn, _, root = tree_fn.partition("@")
            tree = pro.load_nhx_tree(tree_fn, validate=False)
            # postpone for autocomplete
            if not autocomplete:
                pro.validate_prophyle_nhx_tree(tree)
            if root != "":
                assert len(tree.search_nodes(name=root)) != 0, "Node '{}' does not exist in '{}'.".format(root, tree_fn)
        if len(trees_fn) != 1:
            pro.message('Merging {} trees'.format(len(trees_fn)))
        _propagation_preprocessing(
            trees_fn, index_tree_1, no_prefixes=no_prefixes, sampling_rate=sampling_rate, autocomplete=autocomplete
        )
        _test_tree(index_tree_1)
        _mark_complete(index_dir, 1)
    else:
        pro.message('[1/6] Tree already exists, skipping its creation', upper=True)

    #
    # 2) Create and run Makefile for propagation, and merge FASTA files
    #

    if not _is_complete(index_dir, 2):
        recompute = True

    if recompute:
        pro.message('[2/6] Running k-mer propagation', upper=True)
        _create_makefile(index_dir, k, library_dir, mask_repeats=mask_repeats)
        _propagate(index_dir, threads=threads)
        _propagation_postprocessing(index_dir, index_tree_1, index_tree_2)
        _test_tree(index_tree_2)
        _kmer_stats(index_dir)
        if not keep_tmp_files:
            _remove_tmp_propagation_files(index_dir)
        else:
            pro.message('Keeping temporary files')
        _mark_complete(index_dir, 2)
    else:
        pro.message('[2/6] K-mers have already been propagated, skipping propagation', upper=True)

    #
    # 3) BWT
    #

    if not _is_complete(index_dir, 3) and not _is_complete(index_dir, 4, dont_check_previous=True):
        recompute = True

    if recompute:
        pro.message('[3/6] Constructing BWT', upper=True)
        pro.rm(index_fa + '.bwt', index_fa + '.bwt.complete')
        _fa2pac(index_fa)
        _pac2bwt(index_fa)
        _mark_complete(index_dir, 3)
    else:
        pro.message('[3/6] BWT already exists, skipping its construction', upper=True)

    #
    # 3) OCC
    #

    if not _is_complete(index_dir, 4):
        recompute = True

    if recompute:
        pro.message('[4/6] Constructing OCC', upper=True)
        _bwt2bwtocc(index_fa)
        _mark_complete(index_dir, 4)
    else:
        pro.message('[4/6] OCC already exists, skipping their construction', upper=True)

    #
    # 4) SA + 5) KLCP (compute SA + KLCP in parallel)
    #

    klcp_fn = "{}.{}.klcp".format(index_fa, k)

    if construct_klcp:

        if not _is_complete(index_dir, 5):
            # SA not computed yet => compute it in parallel with KLCP
            recompute = True

        if recompute:
            pro.message('[5/6],[6/6] Constructing SA + KLCP in parallel ', upper=True)
            _bwtocc2sa_klcp(index_fa, k)
            _mark_complete(index_dir, 5)
            _mark_complete(index_dir, 6)
            return

    #
    # 5) SA (compute only SA)
    #

    if not _is_complete(index_dir, 5):
        recompute = True

    if recompute:
        pro.message('[5/6] Constructing SA', upper=True)
        _bwtocc2sa(index_fa)
    else:
        pro.message('[5/6] SA already exists, skipping its construction', upper=True)

    #
    # 6) KLCP (compute only KLCP)
    #

    if construct_klcp:
        if not _is_complete(index_dir, 6):
            recompute = True

        if recompute:
            pro.message('[6/6] Constructing k-LCP', upper=True)
            _bwtocc2klcp(index_fa, k)
            _mark_complete(index_dir, 6)
        else:
            pro.message('[6/6] k-LCP already exists, skipping its construction', upper=True)
Ejemplo n.º 18
0
        histogram, unique_histogram = compute_histogram(tree, asgs, args.stats)

    otu_table = compute_otu_table(histogram, tree)
    with open(args.out_prefix + '.rawhits.tsv', 'w') as f:
        if args.stats.startswith('w'):
            print_histogram(histogram, f, tree)
        elif unique_histogram is not None:
            print_histogram(unique_histogram, f, tree)

    with open(args.out_prefix + '.otu.tsv', 'w') as f:
        print_histogram(otu_table, f, tree)
    with open(args.out_prefix + '.kraken.tsv', 'w') as f:
        tot_count = print_kraken_report(otu_table, histogram, unclassified, tree, f)
    with open(args.out_prefix + '.metaphlan.tsv', 'w') as f:
        print_metaphlan_report(otu_table, tot_count, tree, f)
    with open(args.out_prefix + '.centrifuge.tsv', 'w') as f:
        print_centrifuge_report(otu_table, histogram, unique_histogram, tree, f)


if __name__ == "__main__":
    try:
        main()
    except BrokenPipeError:
        # pipe error (e.g., when head is used)
        sys.stderr.close()
        exit(0)
    except KeyboardInterrupt:
        pro.message("Error: Keyboard interrupt")
        pro.close_log()
        exit(1)
Ejemplo n.º 19
0
def main():
    try:
        par = parser()
        args = par.parse_args()
        subcommand = args.subcommand

        global CONFIG
        prophyle_conf_string = pro.load_prophyle_conf(CONFIG, args.config)

        if subcommand == "download":
            pro.open_log(args.log_fn)
            for single_lib in args.library:
                pro.message('Downloading "{}" started'.format(single_lib))
                prophyle_download(
                    library=single_lib,
                    library_dir=args.home_dir,
                    force=args.force,
                )
                pro.message('Downloading "{}" finished'.format(single_lib))
            pro.close_log()

        elif subcommand == "index":
            if args.library_dir is None:
                library_dir = os.path.dirname(args.tree[0])
            else:
                library_dir = args.library_dir

            if args.log_fn is None:
                args.log_fn = os.path.join(args.index_dir, "log.txt")

            pro.open_log(args.log_fn)
            pro.message('Index construction started')
            prophyle_index(
                index_dir=args.index_dir,
                threads=args.threads,
                k=args.k,
                trees_fn=args.tree,
                library_dir=library_dir,
                force=args.force,
                construct_klcp=args.klcp,
                no_prefixes=args.no_prefixes,
                mask_repeats=args.mask_repeats,
                keep_tmp_files=args.keep_tmp_files,
                sampling_rate=args.sampling_rate,
                autocomplete=args.autocomplete,
            )
            pro.message('Index construction finished')
            pro.close_log()

        elif subcommand == "classify":
            # if args.log_fn is None:
            #    args.log_fn = os.path.join(args.index_dir, "log.txt")

            pro.open_log(args.log_fn)
            pro.message('Classification started')
            prophyle_classify(
                index_dir=args.index_dir,
                fq_fn=args.reads,
                fq_pe_fn=args.reads_pe,
                k=args.k,
                out_format=args.oform,
                mimic_kraken=args.mimic,
                measure=args.measure,
                tie_lca=args.tie_lca,
                kmer_lca=args.kmer_lca,
                annotate=args.annotate,
                print_seq=args.print_seq,
                cimpl=args.cimpl,
                force_restarted_search=args.force_restarted_search,
                prophyle_conf_string=prophyle_conf_string,  # already preprocessed
            )
            pro.message('Classification finished')
            pro.close_log()

        elif subcommand == "analyze":

            prophyle_analyze(
                index_dir=args.index_dir,
                out_prefix=args.out_prefix,
                input_fns=args.input_fns,
                stats=args.stats,
                in_format=args.in_format,
            )

        elif subcommand == "compress":

            if args.archive is None:
                archive = args.index_dir.rstrip("/") + ".tar.gz"
            else:
                archive = args.archive

            prophyle_compress(
                index_dir=args.index_dir,
                archive=archive,
            )

        elif subcommand == "decompress":

            prophyle_decompress(
                archive=args.archive,
                output_dir=args.output_dir,
                klcp=args.klcp,
            )

        elif subcommand == "compile":

            prophyle_compile(
                clean=args.clean,
                parallel=args.parallel,
                force=args.force,
            )

        else:
            msg_lns = par.format_help().split("\n")[2:]
            msg_lns = [x for x in msg_lns if x.find("optional arguments") == -1 and x.find("--") == -1]
            msg = "\n".join(msg_lns)
            msg = msg.replace("\n\n", '\n').replace("subcommands:\n", "Command:\n").replace("Usage", "\nUsage")
            msg = msg.replace("\n    compress", "\n\n    compress")
            print(file=sys.stderr)
            print(msg, file=sys.stderr)
            sys.exit(2)

    except BrokenPipeError:
        # pipe error (e.g., when head is used)
        sys.stderr.close()
        sys.stdout.close()
        exit(0)

    except KeyboardInterrupt:
        pro.message("Error: Keyboard interrupt")
        pro.close_log()
        exit(1)

    finally:
        sys.stdout.flush()
        sys.stderr.flush()