def main(): parser = argparse.ArgumentParser(description="Get clusters from a sorted bam/sam file of mapped reads") parser.add_argument('infiles', metavar='I',type=str,nargs='+',help="Sorted sam/bam files to be processed.") parser.add_argument('-l','--readLength',type=int,required=True,help="The length of the reads") parser.add_argument('-o','--outfiles',type=str,nargs='*',help="Files to write output to. The sequence correspondes to the sequence of input files. If too few outputs, the rest will be output to stdout") parser.add_argument('-r','--rcut',type=int,default=0,help="Minimum support") parser.add_argument('-s','--strand',action='store_true',default=False,help='Whether to force strandnees') parser.add_argument('-i','--insert',type=int,default=150,help="The average insert size.") parser.add_argument('-m','--maxInsert',type=int,default=400,help='Maximum insert size.') parser.add_argument('-N','--mapped',type=str,required=True,help='Number of unique reads mapped.') args = parser.parse_args() global RCUT RCUT = args.rcut global RLENGTH RLENGTH = args.readLength global USESTRAND USESTRAND = args.strand global AINSERT AINSERT = args.insert global MINSERT MINSERT = args.maxInsert mapped = c2r.getMappedReads(args.mapped) outCount = 0 for infile in args.infiles: global NUMMAPPED NUMMAPPED = mapped[infile.split('/')[-1].split('.')[0].lower()] if outCount >= len(args.outfiles): out = sys.stdout else: outBed = open(args.outfiles[outCount]+'.bed','w') outCov = open(args.outfiles[outCount]+'.cov.txt','w') outCount += 1 getClusters(infile,outBed, outCov)
for r in records: if not re.match("^chr(\d+|X|Y|M)$",r['chrom']): continue count = 0 for read in sam.fetch(r['chrom'],int(r['chromStart']),int(r['chromEnd'])-1): count += 1 maxCov = 0 for c in sam.pileup(r['chrom'],int(r['chromStart']), int(r['chromEnd']) -1): if c.n > maxCov: maxCov = c.n length = int(r['chromEnd']) - int(r['chromStart']) rpkm = gc.calRPKM(count,length,numMapped) outBed.write('\t'.join([r['chrom'],r['chromStart'],r['chromEnd'],str(index),str(rpkm),r['strand']])) outBed.write('\n') outCov.write('\t'.join([str(index),str(count),str(maxCov),str(rpkm)])) outCov.write('\n') index += 1 if __name__=="__main__": nameMap = {"exonic":"cds", "intronic":"noncds"} mapped = c2r.getMappedReads(sys.argv[1]) for prefix in sys.argv[2:]: for k in nameMap: records = fo.getRecords("../genome/hg19/hg19."+nameMap[k]+".bed") removeDuplicate(records) sam = pysam.Samfile(prefix+".npcrd.unique."+k+".bam","rb") outBed = open(prefix+"."+k+".nonCluster.bed",'w') outCov = open(prefix + "."+k+".nonCluster.cov.txt",'w') calculateCov(records, sam, outBed, outCov, long(mapped[prefix.split('/')[-1].lower()]))