예제 #1
0
파일: WGS.py 프로젝트: bwang2014/NGSTools-1
def processSampleFromLibarary(sampleID, sampleIDList, libraryBamFileList):
    ''''''

    ########################## 0. init #########################
    #__init__(self, sampleName, outdir, fq1, fq2='', qualityBase='33', cfgfile='~/.NGSTools.cfg')

    mySample = NGSTools.NGSTools(sampleID,
                                 args.outDir,
                                 fq1=sampleIDList[0],
                                 fq2=sampleIDList[1],
                                 qualityBase=args.qbase,
                                 cfgfile=os.path.abspath(args.config))

    ########################## 1. QC  #########################

    if QC:
        mySample.cutadapter(run=_run)
        mySample.QC_fastqc(run=_run)

    ########################## 2. Mapping  #######################
    if BWA:
        finalBam = mySample.bwa(run=_run)
        #finalBam = mySample.bowtie2(mode='--end-to-end', run=_run)
        finalBam = mySample.samtools_sort(run=_run)

    libraryBamFileList.append(finalBam)
예제 #2
0
def processSampleFromLibarary(sampleID, sampleIDList, libraryBamFileList):
    ''''''

    ########################## 0. init #########################
    #__init__(self, sampleName, outdir, fq1, fq2='', quanlityBase='33', cfgfile='~/.NGSTools.cfg')

    mySample = NGSTools.NGSTools(sampleID,
                                 args.outDir,
                                 fq1=sampleIDList[0],
                                 fq2=sampleIDList[1],
                                 cfgfile=os.path.abspath(args.config))

    ########################## 1. QC  #########################

    if QC:
        mySample.trim_Galore(args.rrbs, run=_run)
        mySample.QC_fastqc(run=_run)

    ########################## 2. Mapping  #######################
    if Bismark:
        finalBam = mySample.Bismark(run=_run)
    elif Bs_seeker2:
        finalBam = mySample.Bs_seeker2(run=_run)
    else:
        print 'WARNING: not choose a mapping tools'

    libraryBamFileList.append(finalBam)
예제 #3
0
def processSample(line, condition, transcripts, countsFiles, finalBam,
                  expressCXB):

    cols = line.strip().split('\t')

    if len(cols) == 3:
        # single end library
        fq2 = '-'
    else:
        # paired end
        fq2 = cols[3]

    sample = {
        'name': cols[0],
        'condition': cols[1],
        'fq1': cols[2],
        'fq2': fq2,
        'bam': ''
    }

    ########################## 0. init #########################
    #__init__(self, sampleName, outdir, fq1, fq2='', quanlityBase='32', cfgfile='~/.NGSTools.cfg'):
    mySample = NGSTools.NGSTools(sample['name'],
                                 args.outDir,
                                 sample['fq1'],
                                 sample['fq2'],
                                 libType=args.libraryType,
                                 cfgfile=os.path.abspath(args.config))

    if QC:
        #################### 1. Quality Control ####################

        ###### 1.1 cut adapter ######
        if args.dataType == 'raw':
            #mySample.cutadapter(adapter5='', adapter3='AATGATACGGCGACCACCGAGATCT', run = _run)
            mySample.cutadapter(run=_run)
            ### Nextera Kit
            #mySample.cutadapter(adapter5='CTGTCTCTTATACAC', adapter3='CTGTCTCTTATACAC',run = _run)

            #mySample.rm_lowQual(run = _run)
        else:
            pass

        ##### 1.2 fastqc #####
        mySample.QC_fastqc(run=_run)

    if Mapping:
        ######################## 2. Mapping ########################

        sample['bam'] = mySample.tophat2(run=_run)

        if condition.has_key(sample['condition']):
            #condition[sample['condition']][sample['name']] = sample['bam']
            condition[sample['condition']] += "," + sample['bam']
        else:
            #condition[sample['condition']] = {sample['name'] : sample['bam']}
            condition[sample['condition']] = sample['bam']

        if GFold:

            # GFold count
            mySample.gfoldCount(run=_run)

        if DESeq:
            # DESeq2
            count = mySample.HTSeq_count(run=_run)
            countsFiles[count] = sample['condition'] + '|' + sample['name']

        if GATK:

            # remove duplicates
            mySample.rmdup(run=_run)

            # picard reorder
            mySample.picard_reorder(run=_run)

            # splitN
            mySample.splitN(run=_run)

            # realign
            realnBam = mySample.realn(run=_run)

            # recal need known SNP site

            # recal
            #recalBam = mySample.recal(run = _run)

            finalBam[realnBam] = sample['condition']

            # samtools call SNP/InDel
            mySample.samtools_call(run=_run)
            mySample.samtools_filter(run=_run)

    ########################  DEGs calling preparation ########################

    if Cufflinks:
        ##### 3. cufflinks #####
        cuffdir = os.path.join(args.outDir, 'cufflinks')
        if not os.path.exists(cuffdir):
            os.mkdir(cuffdir)

        # cufflinks #
        command = 'cufflinks --library-type %s -p 4 -g %s -o %s %s' % (
            args.libraryType, cfg.gtf,
            os.path.join(cuffdir, sample['condition'] + '_' + sample['name']),
            sample['bam'])
        NGSTools.writeCommands(command,
                               cuffdir + '/cufflinks_%s.sh' % sample['name'],
                               _run)

        transcripts.append(
            os.path.join(cuffdir, sample['condition'] + '_' + sample['name'],
                         'transcripts.gtf'))