예제 #1
0
 def makeAdaptorFasta(infile, outfile):
     '''Make a single fasta file for each sample of all contaminant adaptor
     sequences for removal
     '''
     PipelinePreprocess.makeAdaptorFasta(
         infile=infile,
         outfile=outfile,
         track=re.match(REGEX_TRACK, infile).groups()[0],
         dbh=connect(),
         contaminants_file=PARAMS['contaminants'])
예제 #2
0
 def makeAdaptorFasta(infile, outfile):
     '''Make a single fasta file for each sample of all contaminant adaptor
     sequences for removal
     '''
     PipelinePreprocess.makeAdaptorFasta(
         infile=infile,
         outfile=outfile,
         track=re.match(REGEX_TRACK, infile).groups()[0],
         dbh=connect(),
         contaminants_file=PARAMS['contaminants'])
예제 #3
0
def processReadsGSE65525(infile, outfile):
    ''' process the reads with trimmomatic as per Klein et al 2015 '''

    track = P.snip(os.path.basename(infile), ".fastq.gz")

    threads = 1
    job_memory = "7G"

    # as per Allon et al 2015
    trimmomatic_options = "LEADING:28 SLIDINGWINDOW:4:20 MINLEN:19"

    m = PipelinePreprocess.MasterProcessor(threads=threads)

    m.add(PipelinePreprocess.Trimmomatic(trimmomatic_options, threads=threads))

    statement = m.build((infile, ), "GSE65525/processed.dir/trimmed-", track)

    P.run()
예제 #4
0
def processReads(infile, outfile):
    '''process reads from .fastq or .sra files.

    Tasks specified in PREPROCESSTOOLS are run in order

    '''

    job_threads = PARAMS["general_threads"]
    job_options = "-l mem_free=%s" % PARAMS["general_memory"]

    m = PipelinePreprocess.Preprocessor()
    statement = m.build((infile,), outfile, PREPROCESSTOOLS)
    print statement
    P.run()
예제 #5
0
    def processReads(infile, outfiles):
        '''process reads from .fastq and other sequence files.
        '''
        trimmomatic_options = PARAMS["trimmomatic_options"]

        if PARAMS["trimmomatic_adapter"]:
            trimmomatic_options = " ILLUMINACLIP:%s:%s:%s:%s:%s:%s " % (
                PARAMS["trimmomatic_adapter"],
                PARAMS["trimmomatic_mismatches"],
                PARAMS["trimmomatic_p_thresh"],
                PARAMS["trimmomatic_c_thresh"],
                PARAMS["trimmomatic_min_adapter_len"],
                PARAMS["trimmomatic_keep_both_reads"]) + trimmomatic_options

        if PARAMS["auto_remove"]:
            trimmomatic_options = " ILLUMINACLIP:%s:%s:%s:%s:%s:%s " % (
                "contaminants.fasta",
                PARAMS["trimmomatic_mismatches"],
                PARAMS["trimmomatic_p_thresh"],
                PARAMS["trimmomatic_c_thresh"],
                PARAMS["trimmomatic_min_adapter_len"],
                PARAMS["trimmomatic_keep_both_reads"]) + trimmomatic_options

        job_threads = PARAMS["threads"]
        job_memory = "12G"

        track = re.match(REGEX_TRACK, infile).groups()[0]

        m = PipelinePreprocess.MasterProcessor(
            save=PARAMS["save"],
            summarize=PARAMS["summarize"],
            threads=PARAMS["threads"])

        for tool in P.asList(PARAMS["preprocessors"]):

            if tool == "fastx_trimmer":
                m.add(PipelinePreprocess.FastxTrimmer(
                    PARAMS["fastx_trimmer_options"],
                    threads=PARAMS["threads"]))
            elif tool == "trimmomatic":
                m.add(PipelinePreprocess.Trimmomatic(
                    trimmomatic_options,
                    threads=PARAMS["threads"]))
            elif tool == "sickle":
                m.add(PipelinePreprocess.Sickle(
                    PARAMS["sickle_options"],
                    threads=PARAMS["threads"]))
            elif tool == "trimgalore":
                m.add(PipelinePreprocess.Trimgalore(
                    PARAMS["trimgalore_options"],
                    threads=PARAMS["threads"]))
            elif tool == "flash":
                m.add(PipelinePreprocess.Flash(
                    PARAMS["flash_options"],
                    threads=PARAMS["threads"]))
            elif tool == "reversecomplement":
                m.add(PipelinePreprocess.ReverseComplement(
                    PARAMS["reversecomplement_options"]))
            elif tool == "pandaseq":
                m.add(PipelinePreprocess.Pandaseq(
                    PARAMS["pandaseq_options"],
                    threads=PARAMS["threads"]))
            elif tool == "cutadapt":
                cutadapt_options = PARAMS["cutadapt_options"]
                if PARAMS["auto_remove"]:
                    cutadapt_options += " -a file:contaminants.fasta "
                m.add(PipelinePreprocess.Cutadapt(
                    cutadapt_options,
                    threads=PARAMS["threads"],
                    untrimmed=PARAMS['cutadapt_reroute_untrimmed'],
                    process_paired=PARAMS["cutadapt_process_paired"]))
            else:
                raise NotImplementedError("tool '%s' not implemented" % tool)

        statement = m.build((infile,), "processed.dir/trimmed-", track)
        P.run()