예제 #1
0
파일: annotate.py 프로젝트: hjanime/CSI
def main():
	genes = fo.getRecords(sys.argv[2])
	annotatee = fo.getRecords(sys.argv[1])
	withinGeneType = sys.argv[3]
	withoutGeneType = sys.argv[4]
	fraction = float(sys.argv[5])
	ao,_ = fo.getNearFeatures(annotatee, genes,fraction,False,False)
	f = open(sys.argv[1])
	out = open(sys.argv[1]+'.annotation.txt','w')
	for r in f:
		if r.startswith('#'):
			out.write(r.strip() +"\ttype\tgene" + "\n")
		else:
			tokens = r.strip().split('\t')
			tempAnn = ao[tokens[3]]
			if len(tempAnn) > 0:
				annStr = withinGeneType + "\t" + ','.join(list(tempAnn))
			else:
				annStr = withoutGeneType
			tokens.append(annStr)
			out.write('\t'.join(tokens))
			out.write('\n')
	out.close()
예제 #2
0
파일: merge.py 프로젝트: hjanime/CSI
def main():
	genes = fo.getRecords(sys.argv[1])
	fraction = float(sys.argv[-1])
	for prefix in sys.argv[2:-1]:
		out = open(prefix+'.all.bed','w')
		out.write("track name=\"%s\" visibility=2 itemRgb=\"On\"\n"%(prefix+'_all',))
		#novel = fo.getRecords(prefix+".novel.bed")
		#anns,_ = fo.getNearFeatures(novel,genes,fraction,False,False)
		f = open(prefix+'.novel.bed')
		for r in f:
			if r.startswith('#'):
				out.write(r.strip()+"\tthickStart\tthickEnd\titemRgb\n")
			else:
				tokens = r.strip().split('\t')
				tempAnn = anns[tokens[3]]
				tokens.append(tokens[1])
				tokens.append(tokens[2])
				cIndex = getColorId(int(tokens[4]))
				if len(tempAnn) > 0:
					tokens.append(INTRONIC_COLORS[cIndex])
				else:
					tokens.append(UNKNOWN_COLORS[cIndex])
				out.write('\t'.join(tokens))
				out.write('\n')

		f.close()
		#exonic = fo.getRecords(prefix+".exonic.bed")
		#anns,_ = fo.getNearFeatures(exonic,genes,fraction,False,False)
		f = open(prefix+".exonic.bed")
		for r in f:
			if r.startswith('#'):
				continue
			else:
				tokens = r.strip().split('\t')
				tempAnn = anns[tokens[3]]
				tokens.append(tokens[1])
				tokens.append(tokens[2])
				cIndex = getColorId(int(tokens[4]))
				tokens.append(EXONIC_COLORS[cIndex])
				tokens[3] = 'e'+tokens[3]
				out.write('\t'.join(tokens))
				out.write('\n')
		f.close()
		out.close()
예제 #3
0
파일: getCoverage.py 프로젝트: hjanime/CSI
	for r in records:
		if not re.match("^chr(\d+|X|Y|M)$",r['chrom']):
			continue
		count = 0
		for read in sam.fetch(r['chrom'],int(r['chromStart']),int(r['chromEnd'])-1):
			count += 1
		maxCov = 0
		for c in sam.pileup(r['chrom'],int(r['chromStart']), int(r['chromEnd']) -1):
			if c.n > maxCov:
				maxCov = c.n
		
		length = int(r['chromEnd']) - int(r['chromStart'])
		rpkm = gc.calRPKM(count,length,numMapped)
		outBed.write('\t'.join([r['chrom'],r['chromStart'],r['chromEnd'],str(index),str(rpkm),r['strand']]))
		outBed.write('\n')
		outCov.write('\t'.join([str(index),str(count),str(maxCov),str(rpkm)]))
		outCov.write('\n')
		index += 1

if __name__=="__main__":
	nameMap = {"exonic":"cds", "intronic":"noncds"}
	mapped = c2r.getMappedReads(sys.argv[1])
	for prefix in sys.argv[2:]:
		for k in nameMap:
			records = fo.getRecords("../genome/hg19/hg19."+nameMap[k]+".bed")
			removeDuplicate(records)
			sam = pysam.Samfile(prefix+".npcrd.unique."+k+".bam","rb")
			outBed = open(prefix+"."+k+".nonCluster.bed",'w')
			outCov = open(prefix + "."+k+".nonCluster.cov.txt",'w')
			calculateCov(records, sam, outBed, outCov, long(mapped[prefix.split('/')[-1].lower()]))