Example #1
0
def mergeFeatures(features):
	currChrom = features[0][0]
	currStart = features[0][1]
	currEnd = features[0][2]
	currStrand = features[0][5]
	merged= []
	count = 1
	for e in features:
		if e[5] == currStrand and currChrom == e[0] and fo.getOverlapOri(currStart,currEnd,e[1],e[2]) > 0:	
			if e[2] > currEnd:
				currEnd = e[2]
			if e[1] < currStart:
				currStart = e[1]
		else:
			merged.append([currChrom,currStart,currEnd,count,0,currStrand])
			currChrom = e[0]
			currStart = e[1]
			currEnd = e[2]
			currStrand = e[5]
			count += 1
	merged.append([currChrom,currStart,currEnd,count,0,currStrand])
	return merged
Example #2
0
for r in reader:
    transcripts.append(r)

transcripts.sort(key=lambda k: (k["name"], k["chrom"], int(k["chromStart"]), int(k["chromEnd"])))

genes = {}
for t in transcripts:
    if t["name"] not in genes:
        genes[t["name"]] = [[t["chrom"], t["chromStart"], t["chromEnd"], t["name"], t["score"], t["strand"]]]
    else:
        if t["chrom"] != genes[t["name"]][-1][0]:
            genes[t["name"]].append([t["chrom"], t["chromStart"], t["chromEnd"], t["name"], t["score"], t["strand"]])
        else:
            if (
                fo.getOverlapOri(
                    int(t["chromStart"]), int(t["chromEnd"]), int(genes[t["name"]][-1][1]), int(genes[t["name"]][-1][2])
                )
                > 0
            ):
                if int(t["chromStart"]) < int(genes[t["name"]][-1][1]):
                    genes[t["name"]][-1][1] = t["chromStart"]
                if int(t["chromEnd"]) > int(genes[t["name"]][-1][2]):
                    genes[t["name"]][-1][2] = t["chromEnd"]
            else:
                genes[t["name"]].append(
                    [t["chrom"], t["chromStart"], t["chromEnd"], t["name"], t["score"], t["strand"]]
                )

records = []
out = open(sys.argv[2], "w")
for g in genes: