def main(): genes = fo.getRecords(sys.argv[2]) annotatee = fo.getRecords(sys.argv[1]) withinGeneType = sys.argv[3] withoutGeneType = sys.argv[4] fraction = float(sys.argv[5]) ao,_ = fo.getNearFeatures(annotatee, genes,fraction,False,False) f = open(sys.argv[1]) out = open(sys.argv[1]+'.annotation.txt','w') for r in f: if r.startswith('#'): out.write(r.strip() +"\ttype\tgene" + "\n") else: tokens = r.strip().split('\t') tempAnn = ao[tokens[3]] if len(tempAnn) > 0: annStr = withinGeneType + "\t" + ','.join(list(tempAnn)) else: annStr = withoutGeneType tokens.append(annStr) out.write('\t'.join(tokens)) out.write('\n') out.close()
def main(): genes = fo.getRecords(sys.argv[1]) fraction = float(sys.argv[-1]) for prefix in sys.argv[2:-1]: out = open(prefix+'.all.bed','w') out.write("track name=\"%s\" visibility=2 itemRgb=\"On\"\n"%(prefix+'_all',)) #novel = fo.getRecords(prefix+".novel.bed") #anns,_ = fo.getNearFeatures(novel,genes,fraction,False,False) f = open(prefix+'.novel.bed') for r in f: if r.startswith('#'): out.write(r.strip()+"\tthickStart\tthickEnd\titemRgb\n") else: tokens = r.strip().split('\t') tempAnn = anns[tokens[3]] tokens.append(tokens[1]) tokens.append(tokens[2]) cIndex = getColorId(int(tokens[4])) if len(tempAnn) > 0: tokens.append(INTRONIC_COLORS[cIndex]) else: tokens.append(UNKNOWN_COLORS[cIndex]) out.write('\t'.join(tokens)) out.write('\n') f.close() #exonic = fo.getRecords(prefix+".exonic.bed") #anns,_ = fo.getNearFeatures(exonic,genes,fraction,False,False) f = open(prefix+".exonic.bed") for r in f: if r.startswith('#'): continue else: tokens = r.strip().split('\t') tempAnn = anns[tokens[3]] tokens.append(tokens[1]) tokens.append(tokens[2]) cIndex = getColorId(int(tokens[4])) tokens.append(EXONIC_COLORS[cIndex]) tokens[3] = 'e'+tokens[3] out.write('\t'.join(tokens)) out.write('\n') f.close() out.close()
for r in records: if not re.match("^chr(\d+|X|Y|M)$",r['chrom']): continue count = 0 for read in sam.fetch(r['chrom'],int(r['chromStart']),int(r['chromEnd'])-1): count += 1 maxCov = 0 for c in sam.pileup(r['chrom'],int(r['chromStart']), int(r['chromEnd']) -1): if c.n > maxCov: maxCov = c.n length = int(r['chromEnd']) - int(r['chromStart']) rpkm = gc.calRPKM(count,length,numMapped) outBed.write('\t'.join([r['chrom'],r['chromStart'],r['chromEnd'],str(index),str(rpkm),r['strand']])) outBed.write('\n') outCov.write('\t'.join([str(index),str(count),str(maxCov),str(rpkm)])) outCov.write('\n') index += 1 if __name__=="__main__": nameMap = {"exonic":"cds", "intronic":"noncds"} mapped = c2r.getMappedReads(sys.argv[1]) for prefix in sys.argv[2:]: for k in nameMap: records = fo.getRecords("../genome/hg19/hg19."+nameMap[k]+".bed") removeDuplicate(records) sam = pysam.Samfile(prefix+".npcrd.unique."+k+".bam","rb") outBed = open(prefix+"."+k+".nonCluster.bed",'w') outCov = open(prefix + "."+k+".nonCluster.cov.txt",'w') calculateCov(records, sam, outBed, outCov, long(mapped[prefix.split('/')[-1].lower()]))