Ejemplo n.º 1
0
def main():
	parser = argparse.ArgumentParser(description="Get clusters from a sorted bam/sam file of mapped reads")
	parser.add_argument('infiles', metavar='I',type=str,nargs='+',help="Sorted sam/bam files to be processed.")
	parser.add_argument('-l','--readLength',type=int,required=True,help="The length of the reads")
	parser.add_argument('-o','--outfiles',type=str,nargs='*',help="Files to write output to. The sequence correspondes to the sequence of input files. If too few outputs, the rest will be output to stdout")
	parser.add_argument('-r','--rcut',type=int,default=0,help="Minimum support")
	parser.add_argument('-s','--strand',action='store_true',default=False,help='Whether to force strandnees')
	parser.add_argument('-i','--insert',type=int,default=150,help="The average insert size.")
	parser.add_argument('-m','--maxInsert',type=int,default=400,help='Maximum insert size.')
	parser.add_argument('-N','--mapped',type=str,required=True,help='Number of unique reads mapped.')

	args = parser.parse_args()
	global RCUT 
	RCUT = args.rcut
	global RLENGTH
	RLENGTH = args.readLength
	global USESTRAND
	USESTRAND = args.strand
	global AINSERT
	AINSERT = args.insert
	global MINSERT
	MINSERT = args.maxInsert
	mapped = c2r.getMappedReads(args.mapped)
	outCount = 0
	for infile in args.infiles:
		global NUMMAPPED
		NUMMAPPED = mapped[infile.split('/')[-1].split('.')[0].lower()]
		if outCount >= len(args.outfiles):
			out = sys.stdout
		else:
			outBed = open(args.outfiles[outCount]+'.bed','w')
			outCov = open(args.outfiles[outCount]+'.cov.txt','w')
			outCount += 1
		getClusters(infile,outBed, outCov)
Ejemplo n.º 2
0
	for r in records:
		if not re.match("^chr(\d+|X|Y|M)$",r['chrom']):
			continue
		count = 0
		for read in sam.fetch(r['chrom'],int(r['chromStart']),int(r['chromEnd'])-1):
			count += 1
		maxCov = 0
		for c in sam.pileup(r['chrom'],int(r['chromStart']), int(r['chromEnd']) -1):
			if c.n > maxCov:
				maxCov = c.n
		
		length = int(r['chromEnd']) - int(r['chromStart'])
		rpkm = gc.calRPKM(count,length,numMapped)
		outBed.write('\t'.join([r['chrom'],r['chromStart'],r['chromEnd'],str(index),str(rpkm),r['strand']]))
		outBed.write('\n')
		outCov.write('\t'.join([str(index),str(count),str(maxCov),str(rpkm)]))
		outCov.write('\n')
		index += 1

if __name__=="__main__":
	nameMap = {"exonic":"cds", "intronic":"noncds"}
	mapped = c2r.getMappedReads(sys.argv[1])
	for prefix in sys.argv[2:]:
		for k in nameMap:
			records = fo.getRecords("../genome/hg19/hg19."+nameMap[k]+".bed")
			removeDuplicate(records)
			sam = pysam.Samfile(prefix+".npcrd.unique."+k+".bam","rb")
			outBed = open(prefix+"."+k+".nonCluster.bed",'w')
			outCov = open(prefix + "."+k+".nonCluster.cov.txt",'w')
			calculateCov(records, sam, outBed, outCov, long(mapped[prefix.split('/')[-1].lower()]))