def mergeFeatures(features): currChrom = features[0][0] currStart = features[0][1] currEnd = features[0][2] currStrand = features[0][5] merged= [] count = 1 for e in features: if e[5] == currStrand and currChrom == e[0] and fo.getOverlapOri(currStart,currEnd,e[1],e[2]) > 0: if e[2] > currEnd: currEnd = e[2] if e[1] < currStart: currStart = e[1] else: merged.append([currChrom,currStart,currEnd,count,0,currStrand]) currChrom = e[0] currStart = e[1] currEnd = e[2] currStrand = e[5] count += 1 merged.append([currChrom,currStart,currEnd,count,0,currStrand]) return merged
for r in reader: transcripts.append(r) transcripts.sort(key=lambda k: (k["name"], k["chrom"], int(k["chromStart"]), int(k["chromEnd"]))) genes = {} for t in transcripts: if t["name"] not in genes: genes[t["name"]] = [[t["chrom"], t["chromStart"], t["chromEnd"], t["name"], t["score"], t["strand"]]] else: if t["chrom"] != genes[t["name"]][-1][0]: genes[t["name"]].append([t["chrom"], t["chromStart"], t["chromEnd"], t["name"], t["score"], t["strand"]]) else: if ( fo.getOverlapOri( int(t["chromStart"]), int(t["chromEnd"]), int(genes[t["name"]][-1][1]), int(genes[t["name"]][-1][2]) ) > 0 ): if int(t["chromStart"]) < int(genes[t["name"]][-1][1]): genes[t["name"]][-1][1] = t["chromStart"] if int(t["chromEnd"]) > int(genes[t["name"]][-1][2]): genes[t["name"]][-1][2] = t["chromEnd"] else: genes[t["name"]].append( [t["chrom"], t["chromStart"], t["chromEnd"], t["name"], t["score"], t["strand"]] ) records = [] out = open(sys.argv[2], "w") for g in genes: