Esempio n. 1
0
def get_QL_trash_fpath(fpath, outdir_path, quality, length):
    # Function configures path to trash (quality and length) file according to filter options.
    #
    # :param fpath: path to input file;
    # :type fpath: str;
    # :param outdir_path: path to output directory;
    # :type outdir_path: str;
    # :param quality: threshold for quality filter;
    # :type quality: float;
    # :param length: threshold for length filter;
    # :type length: int or None, if filter is disabled;

    trash_fpath = os.path.join(outdir_path, "trash")

    # Add quality to name name of trash file
    if not is_fasta(fpath):
        trash_fpath += "-q{}".format(quality)
    # end if

    # Add length to name name of trash file
    if not length is None:
        trash_fpath += "-m{}".format(length)
    # end if

    # Add appropriate extention
    trash_fpath += re.search(ext_pattern, fpath).group(1)

    return trash_fpath
Esempio n. 2
0
def get_QL_filter(fpath, quality, length):
    # Function returns a filter function.
    # This filter in turn returns True if annotation line
    #   passed to it passes all filters and False otherwise.
    # Returns quality and length filter.
    #
    # :param fpath: path to input file;
    # :type fpath: str;
    # :param quality: threshold for quality filter;
    # :type quality: float;
    # :param length: threshold for length filter;
    # :type length: int or None, if filter is disabled;

    filters = list()

    # Ad quality filter
    # There will be minus instead of quality for fasta files
    if not is_fasta(fpath):

        # Someone can try to classify fasta files and then bin fast5 accoeding to the classification.
        # In this case TypeError will be raised. Then just return True.

        def qual_filter(x):
            try:
                return x[0] >= quality
            except TypeError:
                return True
            # end try

        # end def qual_filter

        filters.append(qual_filter)
    # end if

    # Add length filter
    if not length is None:
        filters.append(lambda x: x[1] >= length)
    # end if

    # Return "integral" filter
    return lambda line: all((f(line) for f in filters))
Esempio n. 3
0
from glob import glob

try:
    opts, args = getopt.gnu_getopt(sys.argv[1:], "hvr:d:o:s:q:m:i:c:ut:n", [
        "help", "version", "taxannot-resdir=", "indir=", "outdir=",
        "binning-sensitivity=", "min-qual=", "min-seq-len=", "min-pident=",
        "min-coverage=", "untwist-fast5", "threads=", "no-trash"
    ])
except getopt.GetoptError as gerr:
    print(str(gerr))
    platf_depend_exit(2)
# end try

from src.filesystem import is_fasta, is_fastq, is_fast5

is_fastqa = lambda f: is_fasta(f) or is_fastq(f)

from datetime import datetime

now = datetime.now().strftime("%Y-%m-%d %H.%M.%S")

# |== Default parameters: ==|
fq_fa_list = list()  # list with input FASTQ and FASTA files paths
fast5_list = list()  # list with input FAST5 files paths
tax_annot_res_dir = "barapost_result"  # path to directory with classification results
indir_path = None  # path to input directory
outdir_path = "binning_result_{}".format(now.replace(
    ' ', '_'))  # path to output directory
sens = ("genus", 5)  # binning sensitivity
min_qual = 10  # minimum mean read quality to keep
min_qlen = None  # minimum seqeunce length to keep
Esempio n. 4
0
 def test_is_fasta_txt(self, fpath_txt: str):
     # Should not recognize file with `.txt` extention
     assert fls.is_fasta(fpath_txt) == False
Esempio n. 5
0
 def test_is_fasta_fa_bz2(self, fpath_fa_bz2: str):
     # Should not recognize file with `.fa.bz2` extention
     assert fls.is_fasta(fpath_fa_bz2) == False
Esempio n. 6
0
 def test_is_fasta_fa_gz(self, fpath_fa_gz: str):
     # Should recognize file with `.fa.gz` extention
     assert fls.is_fasta(fpath_fa_gz) == True
Esempio n. 7
0
 def test_is_fasta_fa(self, fpath_fa: str):
     # Should recognize file with `.fa` extention
     assert fls.is_fasta(fpath_fa) == True