コード例 #1
0
	def filterAlignmentByReferenceIDs(cls, inputFname, outputFname=None, refNameSet = set(['Contig0', 'Contig1', 'Contig2']), \
									readGroup="", platform='LS454'):
		"""
		2011-7-8
		
		"""
		import os, sys
		import pysam, copy
		samfile = BamFile(inputFname, 'rb')
		header = copy.deepcopy(samfile.header)
		if readGroup:	#add read group if it's missing
			if "RG" not in header:
				header['RG'] = []
			header['RG'].append({'ID':readGroup, 'PL':platform, 'LB':platform, 'SM':readGroup})
		
		# remove SQ entries that are not in refNameSet, from the header
		newSQList = []
		for SQ_entry in header["SQ"]:
			if SQ_entry['SN'] in refNameSet:
				newSQList.append(SQ_entry)
		header["SQ"] = newSQList
		
		refName2bamOutputF = {}
		bamOutputF = pysam.Samfile(outputFname, 'wb', header=header)	# template=samfile)
		sys.stderr.write("Retain reads from %s only from these references %s ...\n"%(inputFname, refNameSet))
		processor = cls.FilterAlignmentByReferenceIDs(refNameSet=refNameSet, readGroup=readGroup, bamOutputF=bamOutputF)
		samfile.traverseBamByRead(processor=processor)
		bamOutputF.close()
コード例 #2
0
	def filterAlignmentByReferenceIDs(cls, inputFname, outputDir=None, refNameSet = set(['Contig0', 'Contig1', 'Contig2']), \
									readGroup="", platform='LS454'):
		"""
		2011-7-8
		
		"""
		import os, sys
		import pysam, copy
		samfile = BamFile(inputFname, 'rb')
		header = copy.deepcopy(samfile.header)
		if readGroup:
			if "RG" not in header:
				header['RG'] = []
			header['RG'].append({'ID':readGroup, 'PL':platform, 'LB':platform, 'SM':readGroup})
		refName2bamOutputF = {}
		for refName in refNameSet:
			inputFileBaseNamePrefix = os.path.splitext(os.path.basename(inputFname))[0]
			outputFname = os.path.join(outputDir, '%s_%s.bam'%(inputFileBaseNamePrefix,refName))
			bamOutputF = pysam.Samfile(outputFname, 'wb', header=header)	# template=samfile)
			refName2bamOutputF[refName] = bamOutputF
		sys.stderr.write("Retain reads from %s only from these references %s ...\n"%(inputFname, refNameSet))
		processor = cls.FilterAlignmentByReferenceIDs(refName2bamOutputF=refName2bamOutputF, refNameSet=refNameSet, readGroup=readGroup)
		samfile.traverseBamByRead(processor=processor)
		for refName, bamOutputF in refName2bamOutputF.iteritems():
			bamOutputF.close()