Esempio n. 1
0
def CheckSuboptimal(rep_id, exons, eliminated_predictions, other_ids,
                    map_prediction2data, options):

    overlaps = []

    # get predictions which overlap by exons (but not completely):
    for id in other_ids:
        if id == rep_id:
            continue
        if id in eliminated_predictions:
            continue
        if Exons.CheckOverlap( exons[rep_id], exons[id]) and \
            not Exons.CheckCoverage(exons[rep_id],
                                    exons[id],
                                    max_slippage=options.max_slippage):
            overlaps.append(id)

    rep = map_prediction2data[rep_id]
    identity = rep.mPid + options.suboptimal_min_identity_difference

    for x in range(0, len(overlaps) - 1):
        id1 = overlaps[x]
        d1 = map_prediction2data[id1]
        for y in range(x + 1, len(overlaps)):
            id2 = overlaps[y]
            d2 = map_prediction2data[id2]
            if options.loglevel >= 3:
                options.stdlog.write(
                    "# suboptimal: %s ? %s + %s: %s %s %s %s %i %i %i\n" % (
                        rep_id,
                        id1,
                        id2,
                        d1.mQuality in options.quality_remove_suboptimal,
                        d2.mQuality in options.quality_remove_suboptimal,
                        not Exons.CheckOverlap(exons[id1], exons[id2]),
                        Exons.CheckCoverageAinB(
                            exons[rep_id],
                            exons[id1] + exons[id2],
                            min_terminal_exon_coverage=0.0),
                        rep.mPid,
                        d1.mPid,
                        d2.mPid,
                    ))

            if (d1.mQuality in options.quality_remove_suboptimal and
                    d2.mQuality in options.quality_remove_suboptimal ) and \
                    not Exons.CheckOverlap( exons[id1], exons[id2] ) and \
                    Exons.CheckContainedAinB(exons[rep_id], exons[id1] + exons[id2],
                                             min_terminal_exon_coverage=0.0 ) and \
                    (identity < d1.mPid) and \
                    (identity < d2.mPid):
                if options.loglevel >= 1:
                    options.stdlog.write(
                        "# elimination: %s(%s) joins %s(%s) and %s(%s)\n" %
                        (rep_id, rep.mPid, id1, d1.mPid, id2, d2.mPid))
                return True

    return False
Esempio n. 2
0
def CheckExonSwop( rep_id,
                   exons,
                   eliminated_predictions,
                   other_ids,
                   map_prediction2data,
                   options ):
    """check for exon swop

    return true, if exon swop occurs.

    Exon swop occurs, if this prediction joins
    two predictions, one of which should be CG.

    None of the predictions should be fully contained
    in the master prediction.

    given:
        the rep_id to analyzse
        a map of rep_id to exons
        a list of rep_ids to check against

    -> is it an exon swopper?
      -> joining two CG predictions that do not overlap and
         contain no extra exons apart from the overlapping.
    -> is it large spanning prediction?
      -> spanning many predictions, including at least one CG?
    
    """
    overlaps = []
    ## get predictions which overlap by exons (but not completely):
    
    for id in other_ids:
        if id == rep_id: continue
        if id in eliminated_predictions: continue
        if Exons.CheckOverlap( exons[rep_id], exons[id]) and \
               not Exons.CheckCoverage( exons[rep_id],
                                        exons[id],
                                        max_slippage=options.max_slippage ):
            overlaps.append( id )

    if options.loglevel >= 3:
        options.stdlog.write( "# exon swop: %s overlaps with %i out of %i predictions\n" % (rep_id, len(overlaps), len(other_ids) ) )
        options.stdlog.flush()
            
    for x in range(0, len(overlaps)-1):
        id1 = overlaps[x]
        for y in range(x+1, len(overlaps)):
            id2 = overlaps[y]
            if options.loglevel >= 4:
                options.stdlog.write( "# exon swop: %s ? %s + %s: %s %s %s %s\n" % \
                      (rep_id, id1, id2, 
                       map_prediction2data[id1].mQuality in options.quality_remove_exon_swopper,
                       map_prediction2data[id2].mQuality in options.quality_remove_exon_swopper,
                       not Exons.CheckOverlap( exons[id1], exons[id2] ),
                       Exons.CheckCoverageAinB( exons[rep_id], exons[id1] + exons[id2],
                                                min_terminal_num_exons = 0,
                                                min_terminal_exon_coverage = 0.7,
                                                max_slippage = options.max_slippage ) ) )
                
            if (map_prediction2data[id1].mQuality in options.quality_remove_exon_swopper and \
                map_prediction2data[id2].mQuality in options.quality_remove_exon_swopper ) and \
                not Exons.CheckOverlap( exons[id1], exons[id2] ) and \
                Exons.CheckCoverageAinB( exons[rep_id], exons[id1] + exons[id2],
                                         min_terminal_num_exons = 0,
                                         min_terminal_exon_coverage = 0.7,
                                         max_slippage = options.max_slippage ):
                if options.loglevel >= 1:
                    options.stdlog.write( "# elimination: %s(%s) joins %s(%s) and %s(%s)\n" % \
                          (rep_id, map_prediction2data[rep_id].mQuality,
                           id1, map_prediction2data[id1].mQuality,
                           id2, map_prediction2data[id2].mQuality) )
                return True
            
    return False