def CheckSuboptimal(rep_id, exons, eliminated_predictions, other_ids, map_prediction2data, options): overlaps = [] # get predictions which overlap by exons (but not completely): for id in other_ids: if id == rep_id: continue if id in eliminated_predictions: continue if Exons.CheckOverlap( exons[rep_id], exons[id]) and \ not Exons.CheckCoverage(exons[rep_id], exons[id], max_slippage=options.max_slippage): overlaps.append(id) rep = map_prediction2data[rep_id] identity = rep.mPid + options.suboptimal_min_identity_difference for x in range(0, len(overlaps) - 1): id1 = overlaps[x] d1 = map_prediction2data[id1] for y in range(x + 1, len(overlaps)): id2 = overlaps[y] d2 = map_prediction2data[id2] if options.loglevel >= 3: options.stdlog.write( "# suboptimal: %s ? %s + %s: %s %s %s %s %i %i %i\n" % ( rep_id, id1, id2, d1.mQuality in options.quality_remove_suboptimal, d2.mQuality in options.quality_remove_suboptimal, not Exons.CheckOverlap(exons[id1], exons[id2]), Exons.CheckCoverageAinB( exons[rep_id], exons[id1] + exons[id2], min_terminal_exon_coverage=0.0), rep.mPid, d1.mPid, d2.mPid, )) if (d1.mQuality in options.quality_remove_suboptimal and d2.mQuality in options.quality_remove_suboptimal ) and \ not Exons.CheckOverlap( exons[id1], exons[id2] ) and \ Exons.CheckContainedAinB(exons[rep_id], exons[id1] + exons[id2], min_terminal_exon_coverage=0.0 ) and \ (identity < d1.mPid) and \ (identity < d2.mPid): if options.loglevel >= 1: options.stdlog.write( "# elimination: %s(%s) joins %s(%s) and %s(%s)\n" % (rep_id, rep.mPid, id1, d1.mPid, id2, d2.mPid)) return True return False
def ProcessChunk(chunk, eliminated_predictions, exons): """process a cluster of overlapping predictions. Chunks are sorted by first position. Thus, only former can span later. """ eliminated = [] for x in range(0, len(chunk) - 1): xfrom, xto, xid, xquality = chunk[x] if xquality in options.quality_keep_gene_spanners: continue for y in range(x + 1, len(chunk)): yfrom, yto, yid, yquality = chunk[y] # print xid, yid, xfrom < yfrom, xto > yto, # Exons.CheckOverlap(exons[xid], exons[yid] ), xquality, # yquality if xfrom < yfrom and \ xto > yto and \ not Exons.CheckOverlap(exons[str(xid)], exons[str(yid)] ) and \ yquality in options.quality_remove_gene_spanners: eliminated_predictions[xid] = 0 eliminated.append((xid, "g")) if options.loglevel >= 1: options.stdlog.write( "# elimination: %s(%s) spans %s(%s)\n" % (str(xid), xquality, str(yid), yquality)) break return eliminated
def CheckExonSwop( rep_id, exons, eliminated_predictions, other_ids, map_prediction2data, options ): """check for exon swop return true, if exon swop occurs. Exon swop occurs, if this prediction joins two predictions, one of which should be CG. None of the predictions should be fully contained in the master prediction. given: the rep_id to analyzse a map of rep_id to exons a list of rep_ids to check against -> is it an exon swopper? -> joining two CG predictions that do not overlap and contain no extra exons apart from the overlapping. -> is it large spanning prediction? -> spanning many predictions, including at least one CG? """ overlaps = [] ## get predictions which overlap by exons (but not completely): for id in other_ids: if id == rep_id: continue if id in eliminated_predictions: continue if Exons.CheckOverlap( exons[rep_id], exons[id]) and \ not Exons.CheckCoverage( exons[rep_id], exons[id], max_slippage=options.max_slippage ): overlaps.append( id ) if options.loglevel >= 3: options.stdlog.write( "# exon swop: %s overlaps with %i out of %i predictions\n" % (rep_id, len(overlaps), len(other_ids) ) ) options.stdlog.flush() for x in range(0, len(overlaps)-1): id1 = overlaps[x] for y in range(x+1, len(overlaps)): id2 = overlaps[y] if options.loglevel >= 4: options.stdlog.write( "# exon swop: %s ? %s + %s: %s %s %s %s\n" % \ (rep_id, id1, id2, map_prediction2data[id1].mQuality in options.quality_remove_exon_swopper, map_prediction2data[id2].mQuality in options.quality_remove_exon_swopper, not Exons.CheckOverlap( exons[id1], exons[id2] ), Exons.CheckCoverageAinB( exons[rep_id], exons[id1] + exons[id2], min_terminal_num_exons = 0, min_terminal_exon_coverage = 0.7, max_slippage = options.max_slippage ) ) ) if (map_prediction2data[id1].mQuality in options.quality_remove_exon_swopper and \ map_prediction2data[id2].mQuality in options.quality_remove_exon_swopper ) and \ not Exons.CheckOverlap( exons[id1], exons[id2] ) and \ Exons.CheckCoverageAinB( exons[rep_id], exons[id1] + exons[id2], min_terminal_num_exons = 0, min_terminal_exon_coverage = 0.7, max_slippage = options.max_slippage ): if options.loglevel >= 1: options.stdlog.write( "# elimination: %s(%s) joins %s(%s) and %s(%s)\n" % \ (rep_id, map_prediction2data[rep_id].mQuality, id1, map_prediction2data[id1].mQuality, id2, map_prediction2data[id2].mQuality) ) return True return False