def get_QL_trash_fpath(fpath, outdir_path, quality, length): # Function configures path to trash (quality and length) file according to filter options. # # :param fpath: path to input file; # :type fpath: str; # :param outdir_path: path to output directory; # :type outdir_path: str; # :param quality: threshold for quality filter; # :type quality: float; # :param length: threshold for length filter; # :type length: int or None, if filter is disabled; trash_fpath = os.path.join(outdir_path, "trash") # Add quality to name name of trash file if not is_fasta(fpath): trash_fpath += "-q{}".format(quality) # end if # Add length to name name of trash file if not length is None: trash_fpath += "-m{}".format(length) # end if # Add appropriate extention trash_fpath += re.search(ext_pattern, fpath).group(1) return trash_fpath
def get_QL_filter(fpath, quality, length): # Function returns a filter function. # This filter in turn returns True if annotation line # passed to it passes all filters and False otherwise. # Returns quality and length filter. # # :param fpath: path to input file; # :type fpath: str; # :param quality: threshold for quality filter; # :type quality: float; # :param length: threshold for length filter; # :type length: int or None, if filter is disabled; filters = list() # Ad quality filter # There will be minus instead of quality for fasta files if not is_fasta(fpath): # Someone can try to classify fasta files and then bin fast5 accoeding to the classification. # In this case TypeError will be raised. Then just return True. def qual_filter(x): try: return x[0] >= quality except TypeError: return True # end try # end def qual_filter filters.append(qual_filter) # end if # Add length filter if not length is None: filters.append(lambda x: x[1] >= length) # end if # Return "integral" filter return lambda line: all((f(line) for f in filters))
from glob import glob try: opts, args = getopt.gnu_getopt(sys.argv[1:], "hvr:d:o:s:q:m:i:c:ut:n", [ "help", "version", "taxannot-resdir=", "indir=", "outdir=", "binning-sensitivity=", "min-qual=", "min-seq-len=", "min-pident=", "min-coverage=", "untwist-fast5", "threads=", "no-trash" ]) except getopt.GetoptError as gerr: print(str(gerr)) platf_depend_exit(2) # end try from src.filesystem import is_fasta, is_fastq, is_fast5 is_fastqa = lambda f: is_fasta(f) or is_fastq(f) from datetime import datetime now = datetime.now().strftime("%Y-%m-%d %H.%M.%S") # |== Default parameters: ==| fq_fa_list = list() # list with input FASTQ and FASTA files paths fast5_list = list() # list with input FAST5 files paths tax_annot_res_dir = "barapost_result" # path to directory with classification results indir_path = None # path to input directory outdir_path = "binning_result_{}".format(now.replace( ' ', '_')) # path to output directory sens = ("genus", 5) # binning sensitivity min_qual = 10 # minimum mean read quality to keep min_qlen = None # minimum seqeunce length to keep
def test_is_fasta_txt(self, fpath_txt: str): # Should not recognize file with `.txt` extention assert fls.is_fasta(fpath_txt) == False
def test_is_fasta_fa_bz2(self, fpath_fa_bz2: str): # Should not recognize file with `.fa.bz2` extention assert fls.is_fasta(fpath_fa_bz2) == False
def test_is_fasta_fa_gz(self, fpath_fa_gz: str): # Should recognize file with `.fa.gz` extention assert fls.is_fasta(fpath_fa_gz) == True
def test_is_fasta_fa(self, fpath_fa: str): # Should recognize file with `.fa` extention assert fls.is_fasta(fpath_fa) == True