def CalKRBias(seq, topo, flank_win, max_dist):
    kr_bias = None
    KR_pos_list = []  # [[1,5], [1,3]]

    posTM = myfunc.GetTMPosition(topo)
    NtermState = lcmp.GetNtermState(topo)
    numTM = len(posTM)
    length = len(seq)
    if numTM >= 1:
        sum_KR_odd = 0
        sum_KR_even = 0
        for i in range(numTM + 1):
            if i == 0 or i == numTM:
                if i == 0:
                    b = max(0, posTM[i][0] - max_dist)
                    e = posTM[i][0] + flank_win
                else:
                    b = posTM[i - 1][1] - flank_win
                    e = min(length, posTM[i - 1][1] + max_dist)
                KRpos = GetKRPos(seq, b, e)
            else:
                if posTM[i][0] - posTM[i - 1][1] > 2 * max_dist:
                    b1 = posTM[i - 1][1] - flank_win
                    e1 = posTM[i - 1][1] + max_dist
                    b2 = posTM[i][0] - max_dist
                    e2 = posTM[i][0] + flank_win
                    KRpos = GetKRPos(seq, b1, e1)
                    KRpos += GetKRPos(seq, b2, e2)
                else:
                    b = posTM[i - 1][1] - flank_win
                    e = posTM[i][0] + flank_win
                    #                     print (b,e)
                    #                     print len(seq)
                    #                     print "flank_win=",flank_win
                    #                     print "i=",i
                    #                     print "numTM=", len(posTM)
                    KRpos = GetKRPos(seq, b, e)
            KR_pos_list.append(KRpos)
            if i % 2 == 0:
                sum_KR_odd += len(KRpos)
            else:
                sum_KR_even += len(KRpos)

        kr_bias = sum_KR_odd - sum_KR_even


#        print KR_pos_list
    return (kr_bias, KR_pos_list, numTM)
def IsAllIdenticalTopology(topoList):  #{{{
    numSeq = len(topoList)
    if numSeq <= 1:
        return True
    else:
        posTMList = [myfunc.GetTMPosition(topo) for topo in topoList]
        NtermStateList = [lcmp.GetNtermState(topo) for topo in topoList]
        numTMList = [len(posTM) for posTM in posTMList]

        for i in xrange(numSeq - 1):
            for j in xrange(i + 1, numSeq):
                if not lcmp.IsIdenticalTopology(
                        NtermStateList[i], NtermStateList[j], numTMList[i],
                        numTMList[j], posTMList[i], posTMList[j], topoList[i],
                        topoList[j], min_TM_overlap):
                    return False
        return True
Beispiel #3
0
def CountIdenticalTopology(pred_topodict, real_topodict, agreement, TM_type,
                           fpout_wrong, seqDict, pred_method_item):  #{{{
    """
    return (cntIDT, cntINV)
    """
    numPredTopo = len(pred_topodict)

    cntIDT = 0
    cntINV = 0
    cntDIFF = 0
    for seqid in pred_topodict:
        predtopo = pred_topodict[seqid]
        try:
            realtopo = real_topodict[seqid]
        except KeyError:
            print >> sys.stderr, "%s a nonTM protein predicted as TM protein" % (
                seqid)
            realtopo = "i" * len(predtopo)
            pass

        pred_NtermState = lcmp.GetNtermState(predtopo)
        real_NtermState = lcmp.GetNtermState(realtopo.replace('.', '-'))
        pred_posTM = myfunc.GetTMPosition(predtopo)
        real_posTM = myfunc.GetTMPosition(realtopo)
        pred_numTM = len(pred_posTM)
        real_numTM = len(real_posTM)

        #         if g_params['isDEBUG'] and seqid == "3cx5I":
        #             print "pred_NtermState = <%s>"%pred_NtermState
        #             print "real_NtermState = <%s>"% real_NtermState
        #             print "pred_posTM = ", pred_posTM
        #             print "real_posTM = ", real_posTM

        if lcmp.IsIdenticalTopology(pred_NtermState, real_NtermState,
                                    pred_numTM, real_numTM, pred_posTM,
                                    real_posTM, predtopo, realtopo,
                                    g_params['min_TM_overlap']):
            cntIDT += 1
        else:
            if fpout_wrong != None:
                # output the wrongly predict topology
                fpout_wrong.write(">%s Number %d mtd_%s\n" %
                                  (seqid, cntDIFF + 1, pred_method_item))
                try:
                    seq = seqDict[seqid]
                    fpout_wrong.write("%-10s %s\n" % ("AASeq", seq))
                except KeyError:
                    seq = ""
                fpout_wrong.write("%-10s %s\n" % ("RealTop", realtopo))
                fpout_wrong.write("%-10s %s\n" % ("PredTop", predtopo))
                fpout_wrong.write("\n")

            if lcmp.IsInvertedTopology(pred_NtermState, real_NtermState,
                                       pred_numTM, real_numTM, pred_posTM,
                                       real_posTM, predtopo, realtopo,
                                       g_params['min_TM_overlap']):
                cntINV += 1
            if g_params['isDEBUG']:
                print >> sys.stderr, "%-7s(real %3s) nTM=%2d %s" % (
                    seqid, agreement, real_numTM, realtopo)
                print >> sys.stderr, "%-7s(pred %3s) nTM=%2d %s" % (
                    seqid, agreement, pred_numTM, predtopo)
                print >> sys.stderr
            cntDIFF += 1
    return (cntIDT, cntINV)
#border color; used when BORDER_WIDTH is above 0
#BORDER_COLOR\t#0000ff

#=================================================================#
#       Actual data follows after the "DATA" keyword              #
#=================================================================#
DATA

"""

fpout.write(dataset_settings)

for i in range(numSeq):
    gid = idList[i]
    if gid != 'Consensus':
        n_i = 0
        n_o = 0
        NtermState = lcmp.GetNtermState(seqList[i])
        numTM = myfunc.CountTM(seqList[i])
        if NtermState == 'o':
            n_i = 0
            n_o = numTM
        else:
            n_i = numTM
            n_o = 0
        fpout.write("%s\t%d\t%d\n" % (gid, n_i, n_o))
fpout.write("\n")

if fpout != sys.stdout:
    fpout.close()