Beispiel #1
0
def processSample(sampleName, sampleNameDict):
    '''pipeline for sample '''

    # process communication
    mng = Manager()
    libraryBamFileList = mng.list()
    # This libraryBamFileList is a list contains seval bams from one sample.

    # init mult-processer
    record = []

    for sampleID in sampleNameDict:

        Processer = Process(name=sampleID,
                            target=processSampleFromLibarary,
                            args=(
                                sampleID,
                                sampleNameDict[sampleID],
                                libraryBamFileList,
                            ))

        Processer.start()

        record.append(Processer)

    # wait for processer
    for proc in record:
        proc.join()

    ######################	2.1 post mapping  ####################
    bamFileDir = os.path.dirname(libraryBamFileList[0])
    finalBamFilePath = os.path.join(bamFileDir, "%s_final.bam" % sampleName)

    # merge the bam from each line to one final bam
    command = NGSTools.picard_merge(libraryBamFileList, finalBamFilePath, cfg)
    NGSTools.writeCommands(command,
                           args.outDir + '/mapping/picard_mergebam_' +
                           sampleName + '.sh',
                           run=_run)

    ###################### 3. romove duplicates ##################
    if RMDUP:
        command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg)
        NGSTools.writeCommands(command,
                               args.outDir + '/mapping/picard_rmdup_' +
                               sampleName + '.sh',
                               run=_run)

        finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)

    ##################### 4. DMR calling ##################
    if Methylation_extractor:
        NGSTools.methylation_extractor(finalBamFilePath,
                                       bamFileDir + '/' + sampleName, cfg)
Beispiel #2
0
def processSample(sampleName, sampleNameDict):
	'''pipeline for sample '''

	# process communication
	mng = Manager()
	libraryBamFileList = mng.list()
	# This libraryBamFileList is a list contains seval bams from one sample.

	# init mult-processer
	record = []

	for sampleID in sampleNameDict:

		Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, ))

		Processer.start()

		record.append(Processer)

	# wait for processer
	for proc in record:
		proc.join()
		
	
	######################	2.1 post mapping  ####################
	bamFileDir = os.path.dirname(libraryBamFileList[0])
	finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName)
	mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName)

	# merge the bam from each lane to one final bam
	command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg)
	NGSTools.writeCommands(command, bamFileDir+'/picard_mergebam_'+sampleName+'.sh', run=_run)


	###################### 2.2. filter bam  ######################
	#command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
	#command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
	#NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run)

	###################### 3. romove duplicates ##################
	if RMDUP:
		command = NGSTools.picard_rmdup(mergedBamFilePath, True, cfg)
		NGSTools.writeCommands(command, bamFileDir+'/picard_rmdup_'+sampleName+'.sh', run=_run)

		finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)


	####################### 4. call SNP ######################
	if MPILEUP:
		SNP_out = os.path.join(args.outDir, "SNP", sampleName)
		NGSTools._mkdir(SNP_out)

		command, rawVcf = NGSTools.bcftools_call(finalBamFilePath, cfg, outdir=SNP_out, sampleName=sampleName)
		NGSTools.writeCommands(command, SNP_out+'/bcftools_call_'+sampleName+'.sh', run=_run)
		
		outVcf = rawVcf.replace("vcf$", "flt.vcf")
		command = NGSTools.bcftools_filter(rawVcf, outVcf, cfg)
		NGSTools.writeCommands(command, SNP_out+'/bcftools_filter_'+sampleName+'.sh', run=_run)
Beispiel #3
0
def processSample(sampleName, sampleNameDict):
	'''pipeline for sample '''

	# process communication
	mng = Manager()
	libraryBamFileList = mng.list()
	# This libraryBamFileList is a list contains seval bams from one sample.

	# init mult-processer
	record = []

	for sampleID in sampleNameDict:

		Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, ))

		Processer.start()

		record.append(Processer)

	# wait for processer
	for proc in record:
		proc.join()
		
	
	######################	2.1 post mapping  ####################
	bamFileDir = os.path.dirname(libraryBamFileList[0])
	finalBamFilePath = os.path.join(bamFileDir, "%s_final.bam" % sampleName)

	# merge the bam from each line to one final bam
	command = NGSTools.picard_merge(libraryBamFileList, finalBamFilePath, cfg)
	NGSTools.writeCommands(command, args.outDir+'/mapping/picard_mergebam_'+sampleName+'.sh', run=_run)

	###################### 3. romove duplicates ##################
	if RMDUP:
		command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg)
		NGSTools.writeCommands(command, args.outDir+'/mapping/picard_rmdup_'+sampleName+'.sh', run=_run)

		finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)

	
	##################### 4. DMR calling ##################
	if Methylation_extractor:
		NGSTools.methylation_extractor(finalBamFilePath, bamFileDir+'/'+sampleName, cfg)
Beispiel #4
0
def processSample(sampleName, sampleNameDict):
    '''pipeline for sample '''

    # process communication
    mng = Manager()
    libraryBamFileList = mng.list()
    # This libraryBamFileList is a list contains seval bams from one sample.

    # init mult-processer
    record = []

    for sampleID in sampleNameDict:

        Processer = Process(name=sampleID,
                            target=processSampleFromLibarary,
                            args=(
                                sampleID,
                                sampleNameDict[sampleID],
                                libraryBamFileList,
                            ))

        Processer.start()

        record.append(Processer)

    # wait for processer
    for proc in record:
        proc.join()

    ######################	2.1 post mapping  ####################
    bamFileDir = os.path.dirname(libraryBamFileList[0])
    finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName)
    mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName)

    # merge the bam from each line to one final bam
    command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg)
    NGSTools.writeCommands(command,
                           bamFileDir + '/picard_mergebam_' + sampleName +
                           '.sh',
                           run=_run)

    ###################### 3. filter bam  ######################
    #command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
    command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath,
                                                       finalBamFilePath)
    NGSTools.writeCommands(command,
                           bamFileDir + '/filterBam_' + sampleName + '.sh',
                           run=_run)

    ###################### 4. romove duplicates ##################
    if RMDUP:
        command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg)
        NGSTools.writeCommands(command,
                               bamFileDir + '/picard_rmdup_' + sampleName +
                               '.sh',
                               run=_run)

        finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)
Beispiel #5
0
def processSample(sampleName, sampleNameDict):
	'''pipeline for sample '''

	# process communication
	mng = Manager()
	libraryBamFileList = mng.list()
	# This libraryBamFileList is a list contains seval bams from one sample.

	# init mult-processer
	record = []

	for sampleID in sampleNameDict:

		Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, ))

		Processer.start()

		record.append(Processer)

	# wait for processer
	for proc in record:
		proc.join()
		
	
	######################	2.1 post mapping  ####################
	bamFileDir = os.path.dirname(libraryBamFileList[0])
	finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName)
	mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName)

	# merge the bam from each line to one final bam
	command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg)
	NGSTools.writeCommands(command, bamFileDir+'/picard_mergebam_'+sampleName+'.sh', run=_run)


	###################### 3. filter bam  ######################
	command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
	NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run)

	###################### 4. romove duplicates ##################
	if RMDUP:
		command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg)
		NGSTools.writeCommands(command, bamFileDir+'/picard_rmdup_'+sampleName+'.sh', run=_run)

		finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)
Beispiel #6
0
def processSample(line, condition, transcripts, countsFiles, finalBam,
                  expressCXB):

    cols = line.strip().split('\t')

    if len(cols) == 3:
        # single end library
        fq2 = '-'
    else:
        # paired end
        fq2 = cols[3]

    sample = {
        'name': cols[0],
        'condition': cols[1],
        'fq1': cols[2],
        'fq2': fq2,
        'bam': ''
    }

    ########################## 0. init #########################
    #__init__(self, sampleName, outdir, fq1, fq2='', quanlityBase='32', cfgfile='~/.NGSTools.cfg'):
    mySample = NGSTools.NGSTools(sample['name'],
                                 args.outDir,
                                 sample['fq1'],
                                 sample['fq2'],
                                 libType=args.libraryType,
                                 cfgfile=os.path.abspath(args.config))

    if QC:
        #################### 1. Quality Control ####################

        ###### 1.1 cut adapter ######
        if args.dataType == 'raw':
            #mySample.cutadapter(adapter5='', adapter3='AATGATACGGCGACCACCGAGATCT', run = _run)
            mySample.cutadapter(run=_run)
            ### Nextera Kit
            #mySample.cutadapter(adapter5='CTGTCTCTTATACAC', adapter3='CTGTCTCTTATACAC',run = _run)

            #mySample.rm_lowQual(run = _run)
        else:
            pass

        ##### 1.2 fastqc #####
        mySample.QC_fastqc(run=_run)

    if Mapping:
        ######################## 2. Mapping ########################

        sample['bam'] = mySample.tophat2(run=_run)

        if condition.has_key(sample['condition']):
            #condition[sample['condition']][sample['name']] = sample['bam']
            condition[sample['condition']] += "," + sample['bam']
        else:
            #condition[sample['condition']] = {sample['name'] : sample['bam']}
            condition[sample['condition']] = sample['bam']

        if GFold:

            # GFold count
            mySample.gfoldCount(run=_run)

        if DESeq:
            # DESeq2
            count = mySample.HTSeq_count(run=_run)
            countsFiles[count] = sample['condition'] + '|' + sample['name']

        if GATK:

            # remove duplicates
            mySample.rmdup(run=_run)

            # picard reorder
            mySample.picard_reorder(run=_run)

            # splitN
            mySample.splitN(run=_run)

            # realign
            realnBam = mySample.realn(run=_run)

            # recal need known SNP site

            # recal
            #recalBam = mySample.recal(run = _run)

            finalBam[realnBam] = sample['condition']

            # samtools call SNP/InDel
            mySample.samtools_call(run=_run)
            mySample.samtools_filter(run=_run)

    ########################  DEGs calling preparation ########################

    if Cufflinks:
        ##### 3. cufflinks #####
        cuffdir = os.path.join(args.outDir, 'cufflinks')
        if not os.path.exists(cuffdir):
            os.mkdir(cuffdir)

        # cufflinks #
        command = 'cufflinks --library-type %s -p 4 -g %s -o %s %s' % (
            args.libraryType, cfg.gtf,
            os.path.join(cuffdir, sample['condition'] + '_' + sample['name']),
            sample['bam'])
        NGSTools.writeCommands(command,
                               cuffdir + '/cufflinks_%s.sh' % sample['name'],
                               _run)

        transcripts.append(
            os.path.join(cuffdir, sample['condition'] + '_' + sample['name'],
                         'transcripts.gtf'))
Beispiel #7
0
    record.append(P)

for P in record:
    P.join()

########################  DEGs calling ########################
if Cufflinks:
    # cuffmerge #
    cuffdir = os.path.join(args.outDir, 'cufflinks')
    with open(cuffdir + '/assemblies.txt', 'w') as writer:
        writer.write('\n'.join(transcripts))

    command = 'cuffmerge -o %s -g %s -s %s -p 10 %s' % (
        cuffdir + '/merged_asm', cfg.gtf, cfg.genome,
        cuffdir + '/assemblies.txt')
    NGSTools.writeCommands(command, cuffdir + '/cuffmerge.sh', _run)

    # cuffnorm #
    cuffnorm_dir = os.path.join(cuffdir, 'cuffnorm')
    try:
        os.mkdir(cuffnorm_dir)
    except:
        pass

    cond = ','.join(condition.keys())
    bams = ' '.join(condition.values())
    command = 'mkdir cuffnorm\ncuffnorm --library-type %s -o %s -L %s %s %s' % (
        args.libraryType, cuffnorm_dir, cond, cfg.gtf, bams)
    NGSTools.writeCommands(command, cuffdir + '/cuffnorm.sh', _run)

    # cuffdiff #
Beispiel #8
0
def processSample(line, condition, transcripts, countsFiles, finalBam):

	cols = line.strip().split('\t')

	if len(cols) == 3:
		# single end library
		fq2 = '-'
	else:
		# paired end
		fq2 = cols[3]

	sample = {
		'name' : cols[0],
		'condition' :	cols[1],
		'fq1' : cols[2],
		'fq2' : fq2,
		'bam' : ''
	}


	########################## 0. init #########################
	#__init__(self, sampleName, outdir, fq1, fq2='', quanlityBase='32', cfgfile='~/.NGSTools.cfg'):
	mySample = NGSTools.NGSTools(sample['name'], args.outDir, sample['fq1'], sample['fq2'], cfgfile=os.path.abspath(args.config))
	

	if QC:
		#################### 1. Quality Control ####################

		###### 1.1 cut adapter ######
		if args.dataType == 'raw':
			#mySample.cutadapter(adapter5='', adapter3='AATGATACGGCGACCACCGAGATCT', run = _run)
			mySample.cutadapter(run = _run)
			#mySample.rm_lowQual(run = _run)
		else:
			pass

		##### 1.2 fastqc #####
		mySample.QC_fastqc(run = _run)
	
	
	if Mapping:
		######################## 2. Mapping ########################
		
		sample['bam'] = mySample.tophat2(run = _run)


		if condition.has_key(sample['condition']):
			#condition[sample['condition']][sample['name']] = sample['bam']
			condition[sample['condition']] += ","+sample['bam']
		else:
			#condition[sample['condition']] = {sample['name'] : sample['bam']}
			condition[sample['condition']] = sample['bam']

		if GFold:

			# GFold count
			mySample.gfoldCount(run = _run)
		
		if DESeq2:
			# DESeq2
			count = mySample.HTSeq_count(run = _run)
			countsFiles[count] = sample['condition']

		if GATK:

			# remove duplicates
			mySample.rmdup(run = _run)

			# picard reorder
			mySample.picard_reorder(run = _run)

			# splitN
			mySample.splitN(run = _run)

			# realign
			realnBam = mySample.realn(run = _run)

			# recal need known SNP site

			# recal
			#recalBam = mySample.recal(run = _run)

			finalBam[realnBam] = sample['condition']

			# samtools call SNP/InDel
			mySample.samtools_call(run = _run)
			mySample.samtools_filter(run = _run)


	########################  DEGs calling preparation ########################

	if Cufflinks:
		##### 3. cufflinks #####
		cuffdir = os.path.join(args.outDir, 'cufflinks')
		if not os.path.exists(cuffdir):
			os.mkdir(cuffdir)

		# cufflinks #
		command = 'cufflinks -p 4 -g %s -o %s %s' % (cfg.gtf, os.path.join(cuffdir, sample['condition']+'_'+sample['name']), sample['bam'])
		NGSTools.writeCommands(command, cuffdir+'/cufflinks_%s.sh' % sample['name'], _run)

		transcripts.append(os.path.join(cuffdir, sample['condition']+'_'+sample['name'], 'transcripts.gtf'))
Beispiel #9
0
	record.append(P)

for P in record:
	P.join()



########################  DEGs calling ########################
if Cufflinks:
	# cuffmerge #
	cuffdir = os.path.join(args.outDir, 'cufflinks')
	with open(cuffdir+'/assemblies.txt', 'w') as writer:
		writer.write('\n'.join(transcripts))

	command = 'cuffmerge -o %s -g %s -s %s -p 10 %s' % (cuffdir+'/merged_asm', cfg.gtf, cfg.genome, cuffdir+'/assemblies.txt')
	NGSTools.writeCommands(command, cuffdir+'/cuffmerge.sh', _run)

	# cuffdiff #

	if len(condition) != 2:
		print 'WARNING: condition'

	command = 'cuffdiff -o %s -b %s -p 10 -L %s -u %s %s %s' % (cuffdir+'/cuffdiff', cfg.genome, condition.keys()[0]+','+condition.keys()[1], cuffdir+'/merged_asm/merged.gtf', condition.values()[0], condition.values()[1])
	NGSTools.writeCommands(command, cuffdir+'/cuffdiff.sh', _run)

if DESeq2:
	# deseq2 #
	deseqDir = os.path.join(args.outDir, 'DESeq')
	try:
		os.mkdir(deseqDir)
	except:
Beispiel #10
0
def processSample(sampleName, sampleNameDict):
    '''pipeline for sample '''

    # process communication
    mng = Manager()
    libraryBamFileList = mng.list()
    # This libraryBamFileList is a list contains seval bams from one sample.

    # init mult-processer
    record = []

    for sampleID in sampleNameDict:

        Processer = Process(name=sampleID,
                            target=processSampleFromLibarary,
                            args=(
                                sampleID,
                                sampleNameDict[sampleID],
                                libraryBamFileList,
                            ))

        Processer.start()

        record.append(Processer)

    # wait for processer
    for proc in record:
        proc.join()

    ######################	2.1 post mapping  ####################
    bamFileDir = os.path.dirname(libraryBamFileList[0])
    mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName)
    finalBamFilePath = mergedBamFilePath

    # merge the bam from each lane to one final bam
    command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg)
    NGSTools.writeCommands(command,
                           bamFileDir + '/picard_mergebam_' + sampleName +
                           '.sh',
                           run=_run)

    ###################### 2.2. filter bam  ######################
    #command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
    #command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
    #NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run)

    ###################### 3. romove duplicates ##################
    if RMDUP:
        command = NGSTools.picard_rmdup(mergedBamFilePath, True, cfg)
        NGSTools.writeCommands(command,
                               bamFileDir + '/picard_rmdup_' + sampleName +
                               '.sh',
                               run=_run)

        finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)

    ####################### 4. call SNP ######################
    if MPILEUP:
        SNP_out = os.path.join(args.outDir, "SNP", sampleName)
        NGSTools._mkdir(SNP_out)

        command, rawVcf = NGSTools.bcftools_call(finalBamFilePath,
                                                 cfg,
                                                 outdir=SNP_out,
                                                 sampleName=sampleName)
        NGSTools.writeCommands(command,
                               SNP_out + '/bcftools_call_' + sampleName +
                               '.sh',
                               run=_run)

        outVcf = rawVcf.replace("vcf$", "flt.vcf")
        command = NGSTools.bcftools_filter(rawVcf, outVcf, cfg)
        NGSTools.writeCommands(command,
                               SNP_out + '/bcftools_filter_' + sampleName +
                               '.sh',
                               run=_run)