def clusterTEentries(entries, mmpenalty):
    """
	cluster non-overlapping TE insertions of the same family.
	a scoring system is used to decide whether non-overlapping TE insertions will be clustered
	"""
    clustered = []
    tes = sorted(entries, key=lambda e: e.start)
    while (len(tes) > 0):
        a = tes.pop(0)
        score = a.score
        end = a.end
        while (len(tes) > 0):
            totest = tes[0]
            gap = totest.start - end
            gappen = gap * mmpenalty
            scorewithgap = score - gappen
            if scorewithgap < 0:  # if the score with the gap reaches zero; break! similar dynamic programming
                break
            # novel high score:
            totestscore = scorewithgap + totest.score
            if (totestscore >= score):
                tes.pop(0)
                score = totestscore
                end = totest.end
            else:
                #no new high score
                break

        ne = GTFEntry(a.chr, a.source, a.feature, a.start, end, score,
                      a.strand, a.frame, a.comment)
        ne.target = a.target
        clustered.append(ne)
    return clustered
def clusterTEentries(entries,mmpenalty):
	"""
	cluster non-overlapping TE insertions of the same family.
	a scoring system is used to decide whether non-overlapping TE insertions will be clustered
	"""
	clustered=[]
	tes=sorted(entries,key=lambda e: e.start)
	while(len(tes)>0):
		a=tes.pop(0)
		score=a.score
		end=a.end
		while(len(tes)>0):
			totest=tes[0]
			gap=totest.start-end
			gappen=gap*mmpenalty
			scorewithgap=score-gappen
			if scorewithgap<0: # if the score with the gap reaches zero; break! similar dynamic programming
				break
			# novel high score: 
			totestscore=scorewithgap+totest.score
			if(totestscore>=score):
				tes.pop(0)
				score=totestscore
				end=totest.end
			else:
				#no new high score
				break

		ne =GTFEntry(a.chr,a.source,a.feature,a.start,end,score,a.strand,a.frame,a.comment)
		ne.target=a.target
		clustered.append(ne)
	return clustered
Esempio n. 3
0
def mergeTEentries(entries, matchscore):
    """
    merge overlapping TE entries of the same family.
    New  score is the total length
    """
    merged = []
    tes = sorted(entries, key=lambda e: e.start)
    while(len(tes) > 0):
        a = tes.pop(0)
        start = a.start
        highestend = a.end
        while(len(tes) > 0 and tes[0].start <= highestend):
            b = tes.pop(0)
            if(b.end > highestend):
                highestend = b.end
        ne = GTFEntry(a.chr, a.source, a.feature, start, highestend, matchscore * float(highestend - start), a.strand, a.frame, a.comment)
        ne.target = a.target
        merged.append(ne)
    return merged
def mergeTEentries(entries,matchscore):
	"""
	merge overlapping TE entries of the same family.
	New  score is the total length
	"""
	merged=[]
	tes=sorted(entries,key=lambda e: e.start)
	while(len(tes)>0):
		a=tes.pop(0)
		start=a.start
		highestend=a.end
		while(len(tes)>0 and tes[0].start<=highestend):
			b=tes.pop(0)
			if(b.end>highestend):
				highestend=b.end
		ne =GTFEntry(a.chr,a.source,a.feature,start,highestend,matchscore*float(highestend-start),a.strand,a.frame,a.comment)
		ne.target=a.target
		merged.append(ne)
	return merged
def get_querygtflist(nucmerentries):
    #chr, source, feature, start, end, score, strand, frame, comment (comment is unparsed)
    gtfes = []
    for nm in nucmerentries:
        comment = "{0}:{1}-{2} count={3}".format(nm.rname, nm.rstart, nm.rend,
                                                 nm.count)
        gtfes.append(
            GTFEntry(nm.qname, "nucmer", "orthologous", nm.qstart, nm.qend,
                     ".", ".", ".", comment))
    return gtfes