def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name=sampleID, target=processSampleFromLibarary, args=( sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) finalBamFilePath = os.path.join(bamFileDir, "%s_final.bam" % sampleName) # merge the bam from each line to one final bam command = NGSTools.picard_merge(libraryBamFileList, finalBamFilePath, cfg) NGSTools.writeCommands(command, args.outDir + '/mapping/picard_mergebam_' + sampleName + '.sh', run=_run) ###################### 3. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg) NGSTools.writeCommands(command, args.outDir + '/mapping/picard_rmdup_' + sampleName + '.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath) ##################### 4. DMR calling ################## if Methylation_extractor: NGSTools.methylation_extractor(finalBamFilePath, bamFileDir + '/' + sampleName, cfg)
def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName) mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName) # merge the bam from each lane to one final bam command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg) NGSTools.writeCommands(command, bamFileDir+'/picard_mergebam_'+sampleName+'.sh', run=_run) ###################### 2.2. filter bam ###################### #command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) #command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) #NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run) ###################### 3. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(mergedBamFilePath, True, cfg) NGSTools.writeCommands(command, bamFileDir+'/picard_rmdup_'+sampleName+'.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath) ####################### 4. call SNP ###################### if MPILEUP: SNP_out = os.path.join(args.outDir, "SNP", sampleName) NGSTools._mkdir(SNP_out) command, rawVcf = NGSTools.bcftools_call(finalBamFilePath, cfg, outdir=SNP_out, sampleName=sampleName) NGSTools.writeCommands(command, SNP_out+'/bcftools_call_'+sampleName+'.sh', run=_run) outVcf = rawVcf.replace("vcf$", "flt.vcf") command = NGSTools.bcftools_filter(rawVcf, outVcf, cfg) NGSTools.writeCommands(command, SNP_out+'/bcftools_filter_'+sampleName+'.sh', run=_run)
def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) finalBamFilePath = os.path.join(bamFileDir, "%s_final.bam" % sampleName) # merge the bam from each line to one final bam command = NGSTools.picard_merge(libraryBamFileList, finalBamFilePath, cfg) NGSTools.writeCommands(command, args.outDir+'/mapping/picard_mergebam_'+sampleName+'.sh', run=_run) ###################### 3. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg) NGSTools.writeCommands(command, args.outDir+'/mapping/picard_rmdup_'+sampleName+'.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath) ##################### 4. DMR calling ################## if Methylation_extractor: NGSTools.methylation_extractor(finalBamFilePath, bamFileDir+'/'+sampleName, cfg)
def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name=sampleID, target=processSampleFromLibarary, args=( sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName) mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName) # merge the bam from each line to one final bam command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg) NGSTools.writeCommands(command, bamFileDir + '/picard_mergebam_' + sampleName + '.sh', run=_run) ###################### 3. filter bam ###################### #command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) NGSTools.writeCommands(command, bamFileDir + '/filterBam_' + sampleName + '.sh', run=_run) ###################### 4. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg) NGSTools.writeCommands(command, bamFileDir + '/picard_rmdup_' + sampleName + '.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)
def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName) mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName) # merge the bam from each line to one final bam command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg) NGSTools.writeCommands(command, bamFileDir+'/picard_mergebam_'+sampleName+'.sh', run=_run) ###################### 3. filter bam ###################### command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run) ###################### 4. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg) NGSTools.writeCommands(command, bamFileDir+'/picard_rmdup_'+sampleName+'.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)
def processSample(line, condition, transcripts, countsFiles, finalBam, expressCXB): cols = line.strip().split('\t') if len(cols) == 3: # single end library fq2 = '-' else: # paired end fq2 = cols[3] sample = { 'name': cols[0], 'condition': cols[1], 'fq1': cols[2], 'fq2': fq2, 'bam': '' } ########################## 0. init ######################### #__init__(self, sampleName, outdir, fq1, fq2='', quanlityBase='32', cfgfile='~/.NGSTools.cfg'): mySample = NGSTools.NGSTools(sample['name'], args.outDir, sample['fq1'], sample['fq2'], libType=args.libraryType, cfgfile=os.path.abspath(args.config)) if QC: #################### 1. Quality Control #################### ###### 1.1 cut adapter ###### if args.dataType == 'raw': #mySample.cutadapter(adapter5='', adapter3='AATGATACGGCGACCACCGAGATCT', run = _run) mySample.cutadapter(run=_run) ### Nextera Kit #mySample.cutadapter(adapter5='CTGTCTCTTATACAC', adapter3='CTGTCTCTTATACAC',run = _run) #mySample.rm_lowQual(run = _run) else: pass ##### 1.2 fastqc ##### mySample.QC_fastqc(run=_run) if Mapping: ######################## 2. Mapping ######################## sample['bam'] = mySample.tophat2(run=_run) if condition.has_key(sample['condition']): #condition[sample['condition']][sample['name']] = sample['bam'] condition[sample['condition']] += "," + sample['bam'] else: #condition[sample['condition']] = {sample['name'] : sample['bam']} condition[sample['condition']] = sample['bam'] if GFold: # GFold count mySample.gfoldCount(run=_run) if DESeq: # DESeq2 count = mySample.HTSeq_count(run=_run) countsFiles[count] = sample['condition'] + '|' + sample['name'] if GATK: # remove duplicates mySample.rmdup(run=_run) # picard reorder mySample.picard_reorder(run=_run) # splitN mySample.splitN(run=_run) # realign realnBam = mySample.realn(run=_run) # recal need known SNP site # recal #recalBam = mySample.recal(run = _run) finalBam[realnBam] = sample['condition'] # samtools call SNP/InDel mySample.samtools_call(run=_run) mySample.samtools_filter(run=_run) ######################## DEGs calling preparation ######################## if Cufflinks: ##### 3. cufflinks ##### cuffdir = os.path.join(args.outDir, 'cufflinks') if not os.path.exists(cuffdir): os.mkdir(cuffdir) # cufflinks # command = 'cufflinks --library-type %s -p 4 -g %s -o %s %s' % ( args.libraryType, cfg.gtf, os.path.join(cuffdir, sample['condition'] + '_' + sample['name']), sample['bam']) NGSTools.writeCommands(command, cuffdir + '/cufflinks_%s.sh' % sample['name'], _run) transcripts.append( os.path.join(cuffdir, sample['condition'] + '_' + sample['name'], 'transcripts.gtf'))
record.append(P) for P in record: P.join() ######################## DEGs calling ######################## if Cufflinks: # cuffmerge # cuffdir = os.path.join(args.outDir, 'cufflinks') with open(cuffdir + '/assemblies.txt', 'w') as writer: writer.write('\n'.join(transcripts)) command = 'cuffmerge -o %s -g %s -s %s -p 10 %s' % ( cuffdir + '/merged_asm', cfg.gtf, cfg.genome, cuffdir + '/assemblies.txt') NGSTools.writeCommands(command, cuffdir + '/cuffmerge.sh', _run) # cuffnorm # cuffnorm_dir = os.path.join(cuffdir, 'cuffnorm') try: os.mkdir(cuffnorm_dir) except: pass cond = ','.join(condition.keys()) bams = ' '.join(condition.values()) command = 'mkdir cuffnorm\ncuffnorm --library-type %s -o %s -L %s %s %s' % ( args.libraryType, cuffnorm_dir, cond, cfg.gtf, bams) NGSTools.writeCommands(command, cuffdir + '/cuffnorm.sh', _run) # cuffdiff #
def processSample(line, condition, transcripts, countsFiles, finalBam): cols = line.strip().split('\t') if len(cols) == 3: # single end library fq2 = '-' else: # paired end fq2 = cols[3] sample = { 'name' : cols[0], 'condition' : cols[1], 'fq1' : cols[2], 'fq2' : fq2, 'bam' : '' } ########################## 0. init ######################### #__init__(self, sampleName, outdir, fq1, fq2='', quanlityBase='32', cfgfile='~/.NGSTools.cfg'): mySample = NGSTools.NGSTools(sample['name'], args.outDir, sample['fq1'], sample['fq2'], cfgfile=os.path.abspath(args.config)) if QC: #################### 1. Quality Control #################### ###### 1.1 cut adapter ###### if args.dataType == 'raw': #mySample.cutadapter(adapter5='', adapter3='AATGATACGGCGACCACCGAGATCT', run = _run) mySample.cutadapter(run = _run) #mySample.rm_lowQual(run = _run) else: pass ##### 1.2 fastqc ##### mySample.QC_fastqc(run = _run) if Mapping: ######################## 2. Mapping ######################## sample['bam'] = mySample.tophat2(run = _run) if condition.has_key(sample['condition']): #condition[sample['condition']][sample['name']] = sample['bam'] condition[sample['condition']] += ","+sample['bam'] else: #condition[sample['condition']] = {sample['name'] : sample['bam']} condition[sample['condition']] = sample['bam'] if GFold: # GFold count mySample.gfoldCount(run = _run) if DESeq2: # DESeq2 count = mySample.HTSeq_count(run = _run) countsFiles[count] = sample['condition'] if GATK: # remove duplicates mySample.rmdup(run = _run) # picard reorder mySample.picard_reorder(run = _run) # splitN mySample.splitN(run = _run) # realign realnBam = mySample.realn(run = _run) # recal need known SNP site # recal #recalBam = mySample.recal(run = _run) finalBam[realnBam] = sample['condition'] # samtools call SNP/InDel mySample.samtools_call(run = _run) mySample.samtools_filter(run = _run) ######################## DEGs calling preparation ######################## if Cufflinks: ##### 3. cufflinks ##### cuffdir = os.path.join(args.outDir, 'cufflinks') if not os.path.exists(cuffdir): os.mkdir(cuffdir) # cufflinks # command = 'cufflinks -p 4 -g %s -o %s %s' % (cfg.gtf, os.path.join(cuffdir, sample['condition']+'_'+sample['name']), sample['bam']) NGSTools.writeCommands(command, cuffdir+'/cufflinks_%s.sh' % sample['name'], _run) transcripts.append(os.path.join(cuffdir, sample['condition']+'_'+sample['name'], 'transcripts.gtf'))
record.append(P) for P in record: P.join() ######################## DEGs calling ######################## if Cufflinks: # cuffmerge # cuffdir = os.path.join(args.outDir, 'cufflinks') with open(cuffdir+'/assemblies.txt', 'w') as writer: writer.write('\n'.join(transcripts)) command = 'cuffmerge -o %s -g %s -s %s -p 10 %s' % (cuffdir+'/merged_asm', cfg.gtf, cfg.genome, cuffdir+'/assemblies.txt') NGSTools.writeCommands(command, cuffdir+'/cuffmerge.sh', _run) # cuffdiff # if len(condition) != 2: print 'WARNING: condition' command = 'cuffdiff -o %s -b %s -p 10 -L %s -u %s %s %s' % (cuffdir+'/cuffdiff', cfg.genome, condition.keys()[0]+','+condition.keys()[1], cuffdir+'/merged_asm/merged.gtf', condition.values()[0], condition.values()[1]) NGSTools.writeCommands(command, cuffdir+'/cuffdiff.sh', _run) if DESeq2: # deseq2 # deseqDir = os.path.join(args.outDir, 'DESeq') try: os.mkdir(deseqDir) except:
def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name=sampleID, target=processSampleFromLibarary, args=( sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName) finalBamFilePath = mergedBamFilePath # merge the bam from each lane to one final bam command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg) NGSTools.writeCommands(command, bamFileDir + '/picard_mergebam_' + sampleName + '.sh', run=_run) ###################### 2.2. filter bam ###################### #command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) #command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) #NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run) ###################### 3. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(mergedBamFilePath, True, cfg) NGSTools.writeCommands(command, bamFileDir + '/picard_rmdup_' + sampleName + '.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath) ####################### 4. call SNP ###################### if MPILEUP: SNP_out = os.path.join(args.outDir, "SNP", sampleName) NGSTools._mkdir(SNP_out) command, rawVcf = NGSTools.bcftools_call(finalBamFilePath, cfg, outdir=SNP_out, sampleName=sampleName) NGSTools.writeCommands(command, SNP_out + '/bcftools_call_' + sampleName + '.sh', run=_run) outVcf = rawVcf.replace("vcf$", "flt.vcf") command = NGSTools.bcftools_filter(rawVcf, outVcf, cfg) NGSTools.writeCommands(command, SNP_out + '/bcftools_filter_' + sampleName + '.sh', run=_run)