def processReadsGSE65525(infile, outfile): ''' process the reads with trimmomatic as per Klein et al 2015 ''' track = P.snip(os.path.basename(infile), ".fastq.gz") threads = 1 job_memory = "7G" # as per Allon et al 2015 trimmomatic_options = "LEADING:28 SLIDINGWINDOW:4:20 MINLEN:19" m = PipelinePreprocess.MasterProcessor(threads=threads) m.add(PipelinePreprocess.Trimmomatic(trimmomatic_options, threads=threads)) statement = m.build((infile, ), "GSE65525/processed.dir/trimmed-", track) P.run()
def processReads(infile, outfiles): '''process reads from .fastq and other sequence files. ''' trimmomatic_options = PARAMS["trimmomatic_options"] if PARAMS["trimmomatic_adapter"]: trimmomatic_options = " ILLUMINACLIP:%s:%s:%s:%s:%s:%s " % ( PARAMS["trimmomatic_adapter"], PARAMS["trimmomatic_mismatches"], PARAMS["trimmomatic_p_thresh"], PARAMS["trimmomatic_c_thresh"], PARAMS["trimmomatic_min_adapter_len"], PARAMS["trimmomatic_keep_both_reads"]) + trimmomatic_options if PARAMS["auto_remove"]: trimmomatic_options = " ILLUMINACLIP:%s:%s:%s:%s:%s:%s " % ( "contaminants.fasta", PARAMS["trimmomatic_mismatches"], PARAMS["trimmomatic_p_thresh"], PARAMS["trimmomatic_c_thresh"], PARAMS["trimmomatic_min_adapter_len"], PARAMS["trimmomatic_keep_both_reads"]) + trimmomatic_options job_threads = PARAMS["threads"] job_memory = "12G" track = re.match(REGEX_TRACK, infile).groups()[0] m = PipelinePreprocess.MasterProcessor( save=PARAMS["save"], summarize=PARAMS["summarize"], threads=PARAMS["threads"]) for tool in P.asList(PARAMS["preprocessors"]): if tool == "fastx_trimmer": m.add(PipelinePreprocess.FastxTrimmer( PARAMS["fastx_trimmer_options"], threads=PARAMS["threads"])) elif tool == "trimmomatic": m.add(PipelinePreprocess.Trimmomatic( trimmomatic_options, threads=PARAMS["threads"])) elif tool == "sickle": m.add(PipelinePreprocess.Sickle( PARAMS["sickle_options"], threads=PARAMS["threads"])) elif tool == "trimgalore": m.add(PipelinePreprocess.Trimgalore( PARAMS["trimgalore_options"], threads=PARAMS["threads"])) elif tool == "flash": m.add(PipelinePreprocess.Flash( PARAMS["flash_options"], threads=PARAMS["threads"])) elif tool == "reversecomplement": m.add(PipelinePreprocess.ReverseComplement( PARAMS["reversecomplement_options"])) elif tool == "pandaseq": m.add(PipelinePreprocess.Pandaseq( PARAMS["pandaseq_options"], threads=PARAMS["threads"])) elif tool == "cutadapt": cutadapt_options = PARAMS["cutadapt_options"] if PARAMS["auto_remove"]: cutadapt_options += " -a file:contaminants.fasta " m.add(PipelinePreprocess.Cutadapt( cutadapt_options, threads=PARAMS["threads"], untrimmed=PARAMS['cutadapt_reroute_untrimmed'], process_paired=PARAMS["cutadapt_process_paired"])) else: raise NotImplementedError("tool '%s' not implemented" % tool) statement = m.build((infile,), "processed.dir/trimmed-", track) P.run()