コード例 #1
0
def filterAndTrim(infile, outfile):
    '''
    filter and trim reads based on user-defined
    parameters
    '''
    maxn = PARAMS["trim_maxn"]
    maxee = PARAMS["trim_maxee"]
    truncQ = PARAMS["trim_truncq"]
    truncLen = PARAMS["trim_trunclen"]
    trimLeft = PARAMS["trim_trimleft"]

    # make sure parameters are present
    assert maxn != "" and maxee != "" and truncQ != "" and truncLen != "" and trimLeft !="", \
    "must specify all parameters to filterAndTrim"

    if PARAMS["paired"] == 1:
        paired = "--paired"
        # unzip one by one
        infile_read2 = infile.replace(".fastq.1.gz", ".fastq.2.gz")
        infiles = [infile, infile_read2]
    else:
        paired = ""
        infiles = [infile]

    tmpdir = P.get_temp_dir()
    for inf in infiles:
        outtmp = os.path.join(tmpdir, inf.replace(".gz", ""))
        statement = '''zcat %(inf)s > %(outtmp)s'''
        P.run(statement)

    # hackabout
    outtmp = os.path.join(tmpdir,
                          [x for x in infiles
                           if x.endswith(".fastq.1.gz")][0].replace(".gz", ""))
    statement = '''Rscript %(scriptsdir)s/dada2_filter_and_trim.R
                           --infile=%(outtmp)s
                           %(paired)s
                           --maxN=%(maxn)s
                           --maxEE=%(maxee)s
                           --truncQ=%(truncQ)s
                           --truncLen=%(truncLen)s
                           --trimLeft=%(trimLeft)s
                           --filtered-directory=filtered.dir'''
    P.run(statement)
    shutil.rmtree(tmpdir)