def GetTopoAlignStat(topo1, topo2):
    stat = []
    posTM1 = myfunc.GetTMPosition(topo1)
    if len(posTM1) > 0:
        for (b, e) in posTM1:
            segList1 = []
            segList2 = []
            cntTM = 0
            cntGap = 0
            cntSeq = 0
            for j in xrange(b, e):
                if topo1[j] == 'M':
                    segList2.append(topo2[j])
                    if topo2[j] == 'M':
                        cntTM += 1
                    elif topo2[j] == '-':
                        cntGap += 1
                    else:
                        cntSeq += 1
            rd = {}
            sizeSeg = len(segList2)
            freqTM = myfunc.FloatDivision(cntTM, sizeSeg)
            freqGap = myfunc.FloatDivision(cntGap, sizeSeg)
            freqSeq = myfunc.FloatDivision(cntSeq, sizeSeg)

            rd['seg2'] = ''.join(segList2)
            rd['freqTM'] = freqTM
            rd['freqGap'] = freqGap
            rd['freqSeq'] = freqSeq
            stat.append(rd)
    return stat
def MaskTopologyBySignalPeptide(idList, topoList, signalpDict):
    newTopoList = []
    for i in xrange(len(idList)):
        topo = topoList[i]
        if idList[i] in signalpDict:
            posTMList = myfunc.GetTMPosition(topo)
            try:
                posSigP = signalpDict[idList[i]]
                (b,e) = (posTMList[0][0],posTMList[0][1])
                cov = myfunc.coverage(0, posSigP, b, e)
                if float(cov)/(e-b) > 0.5:
#mask
                    masked_state = topo[e]
                    newTopo = ( "".join([masked_state]*(e)) +
                            topo[e:])
                    newTopoList.append(newTopo)
                    if DEBUG:
                        print
                        print "posTM", (b,e), "SignalPeptide", posSigP
                        print topo
                        print newTopo
                else:
                    newTopoList.append(topo)
            except (KeyError, IndexError):
                newTopoList.append(topo)
        else:
            newTopoList.append(topo)
    return newTopoList
Beispiel #3
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    topofile = ""
    outfile = ""
    isGapLess = False

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            topofile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o"]:
                outfile = argv[i + 1]
                i += 2
            elif argv[i] in ["-i", "--i"]:
                topofile = argv[i + 1]
                i += 2
            elif argv[i] in ["-gapless", "--gapless"]:
                isGapLess = True
                i += 1
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            topofile = argv[i]
            i += 1
    if topofile == "":
        print >> sys.stderr, "topofile not set. exit"
        return 1
    try:
        (idList, annoList, seqList) = myfunc.ReadFasta(topofile)
        fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
        for i in xrange(len(idList)):
            topo = seqList[i]
            seqid = idList[i]
            if isGapLess:
                topo = topo.replace("-", "").replace(".", "")
            posTMList = myfunc.GetTMPosition(topo)
            print >> fpout, seqid, posTMList
        myfunc.myclose(fpout)
    except (IOError, IndexError):
        pass
Beispiel #4
0
def IsIdenticalTopology_simple( topo1, topo2, min_TM_overlap = 5):#{{{

    """Check whether topo1 and topo2 are identical"""
# Created 2011-11-15, updated 2011-11-15
# Two topologies are considered identical (Krogh et al. 2001) if
# 1. numTM1 == numTM2
# 2. Each helix of the compared topology should overlap by at least N (e.g. 5)
#    residues
# 3. Each helix is oriented in the same way
    numTM1 = myfunc.CountTM(topo1)
    numTM2 = myfunc.CountTM(topo2)
    Nterm1 = GetNtermState(topo1)
    Nterm2 = GetNtermState(topo2)
    posTM1 = myfunc.GetTMPosition(topo1)
    posTM2 = myfunc.GetTMPosition(topo2)

    if numTM1 != numTM2:
        return False
    else:
        if Nterm1 != Nterm2:
            return False
        else:
            for i in range (numTM1): 
                (b1,e1) = posTM1[i]
                (b2,e2) = posTM2[i]
                (common_b, common_e) = (max(b1,b2), min(e1,e2))
                overlap = common_e - common_b
                if overlap <= 0:
                    return False
                else:
                    cntCommonM = 0
                    for j in range(common_b, common_e):
                        if topo1[j] == 'M' and topo2[j] == 'M':
                            cntCommonM += 1
                        if cntCommonM >= min_TM_overlap:
                            break
#                     print ("cntCommonM=", cntCommonM, "min_TM_overlap=",
#                             min_TM_overlap)
                    if cntCommonM < min_TM_overlap:
                        return False
    return True
Beispiel #5
0
def MatchTopology(targetTopo, topoList, min_TM_overlap = 5, seqid = ""):#{{{
## compare targetTopo to all topologies in the topoList
# return (matchList, numIDTtopo, numPredictor
    numList = len(topoList)
    matchList = []
# 0 for different topology
# 1 for identical topology
# -1 for empty topology

# debug
#     print "SeqID: %s"%(seqid)
#     print GetNtermState(targetTopo), myfunc.GetTMPosition(targetTopo)
#     print
#     for tt in topoList:
#         if tt:
#             print GetNtermState(tt), myfunc.GetTMPosition(tt)
#         else:
#             print "Null"
#     print

    NtermStateTarget = GetNtermState(targetTopo)
    posTMtarget = myfunc.GetTMPosition(targetTopo)
    numTMtarget = len(posTMtarget)
    for i in range(numList):
        if topoList[i] == "":
            matchList.append(-1)
        else:
            NtermState = GetNtermState(topoList[i])
            posTM = myfunc.GetTMPosition(topoList[i])
            numTM = len(posTM)
            if IsIdenticalTopology(NtermStateTarget, NtermState,
                    numTMtarget, numTM, posTMtarget, posTM, targetTopo,
                    topoList[i], min_TM_overlap):
                matchList.append(1)
            else:
                matchList.append(0)

    numIDTtopo = matchList.count(1)
    numPredictor = matchList.count(1) + matchList.count(0)
    return (matchList, numIDTtopo, numPredictor)
def CalKRBias(seq, topo, flank_win, max_dist):
    kr_bias = None
    KR_pos_list = []  # [[1,5], [1,3]]

    posTM = myfunc.GetTMPosition(topo)
    NtermState = lcmp.GetNtermState(topo)
    numTM = len(posTM)
    length = len(seq)
    if numTM >= 1:
        sum_KR_odd = 0
        sum_KR_even = 0
        for i in range(numTM + 1):
            if i == 0 or i == numTM:
                if i == 0:
                    b = max(0, posTM[i][0] - max_dist)
                    e = posTM[i][0] + flank_win
                else:
                    b = posTM[i - 1][1] - flank_win
                    e = min(length, posTM[i - 1][1] + max_dist)
                KRpos = GetKRPos(seq, b, e)
            else:
                if posTM[i][0] - posTM[i - 1][1] > 2 * max_dist:
                    b1 = posTM[i - 1][1] - flank_win
                    e1 = posTM[i - 1][1] + max_dist
                    b2 = posTM[i][0] - max_dist
                    e2 = posTM[i][0] + flank_win
                    KRpos = GetKRPos(seq, b1, e1)
                    KRpos += GetKRPos(seq, b2, e2)
                else:
                    b = posTM[i - 1][1] - flank_win
                    e = posTM[i][0] + flank_win
                    #                     print (b,e)
                    #                     print len(seq)
                    #                     print "flank_win=",flank_win
                    #                     print "i=",i
                    #                     print "numTM=", len(posTM)
                    KRpos = GetKRPos(seq, b, e)
            KR_pos_list.append(KRpos)
            if i % 2 == 0:
                sum_KR_odd += len(KRpos)
            else:
                sum_KR_even += len(KRpos)

        kr_bias = sum_KR_odd - sum_KR_even


#        print KR_pos_list
    return (kr_bias, KR_pos_list, numTM)
def IsAllIdenticalTopology(topoList):  #{{{
    numSeq = len(topoList)
    if numSeq <= 1:
        return True
    else:
        posTMList = [myfunc.GetTMPosition(topo) for topo in topoList]
        NtermStateList = [lcmp.GetNtermState(topo) for topo in topoList]
        numTMList = [len(posTM) for posTM in posTMList]

        for i in xrange(numSeq - 1):
            for j in xrange(i + 1, numSeq):
                if not lcmp.IsIdenticalTopology(
                        NtermStateList[i], NtermStateList[j], numTMList[i],
                        numTMList[j], posTMList[i], posTMList[j], topoList[i],
                        topoList[j], min_TM_overlap):
                    return False
        return True
Beispiel #8
0
def RunHHSearchPairwise(tableinfoFile,  #{{{
        hhprofilepathList, hhprofilepathMapDictList,
        hhsearchpathList, hhsearchpathMapDictList,
        topoDict, outpath, dupfile):
    fpoutDup = None
    if dupfile != "":
        fpoutDup = myfunc.myopen(dupfile, sys.stdout, "w", False)

    hdl = myfunc.ReadLineByBlock(tableinfoFile)
    if hdl.failure:
        return 1
    cnt = 0
    lines = hdl.readlines()
    while lines != None:
        for line in lines:
            if not line or line[0] == "#":
                continue
            strs = line.split()
            try:
                seqid1 = strs[0]
                seqid2 = strs[1]
            except (IndexError, ValueError):
                print >> sys.stderr, "Bad record line \"%s\""%(line)
                continue

            try:
                topo1 = topoDict[seqid1]
            except KeyError:
                topo1 = ""
            try:
                topo2 = topoDict[seqid2]
            except KeyError:
                topo2 = ""

            seqlen1 = len(topo1)
            seqlen2 = len(topo2)

            pairlist = [(seqid1, seqlen1), (seqid2, seqlen2)]
            pairlist = sorted(pairlist, key=lambda x:x[1], reverse=False) # short - long

            hhrfile = "%s%s%s_%s.hhr"%(outpath, os.sep, seqid1, seqid2)
            if g_params['isUsePreBuildHHSearchResult']:
                keystr = "%s_%s"%(seqid1, seqid2)
                tmp_hhrfile = GetProfileFileName(hhsearchpathList,
                        hhsearchpathMapDictList, keystr, ".hhr")
                if os.path.exists(tmp_hhrfile):
                    hhrfile = tmp_hhrfile
                else:
                    print >> sys.stderr, "hhrfile %s does not exist in"\
                            " the prebuilt path"%(hhrfile)


            # update seqid1 and seqid2 (shorter - longer)
            seqid1 = pairlist[0][0] # shorter sequence
            seqid2 = pairlist[1][0] # longer sequence

            try:
                topo1 = topoDict[seqid1]
            except KeyError:
                topo1 = ""
            try:
                topo2 = topoDict[seqid2]
            except KeyError:
                topo2 = ""

            seqlen1 = len(topo1)
            seqlen2 = len(topo2)
            numTM1 = len(myfunc.GetTMPosition(topo1))
            numTM2 = len(myfunc.GetTMPosition(topo2))


            if not os.path.exists(hhrfile) or g_params['isForceOverWrite']:
                a3mfile = GetProfileFileName(hhprofilepathList, #query
                        hhprofilepathMapDictList, pairlist[0][0], ".a3m")
                hhmfile = GetProfileFileName(hhprofilepathList,  #template
                        hhprofilepathMapDictList, pairlist[1][0], ".hhm")
                if a3mfile == "" or not os.path.exists(a3mfile):
                    print >> sys.stderr, "a3mfile not found for %s. Ignore." %(pairlist[0][0])
                elif hhmfile == "" or not os.path.exists(hhmfile):
                    print >> sys.stderr, "hhmfile not found for %s. Ignore." %(pairlist[1][0])
                else:
                    tmp_hhrfile = "%s.tmp"%(hhrfile)
                    cmd = "hhsearch -i %s -d %s -o %s -v 0 -nocons -nopred -nodssp" % (
                            a3mfile, hhmfile, tmp_hhrfile)
                    os.system(cmd)
                    if os.path.exists(tmp_hhrfile):
                        os.system("/bin/mv -f %s %s"%(tmp_hhrfile, hhrfile))
                        print hhrfile, "output"
            if fpoutDup and os.path.exists(hhrfile):
                ss_isdup = ""
                hitinfo = {}
#                 if IsDuplicatedByHHSearch(hhrfile, seqid1, seqid2, cnt):
#                     ss_isdup = 'y'
#                 else:
#                     ss_isdup = 'n'
                hitinfo = CheckDuplication(hhrfile, seqid1, seqid2, topoDict, cnt)
                if hitinfo != {}:
                    fpoutDup.write("%s-%s %s %4d %4d %4d %4d" %(
                        seqid1, seqid2, hitinfo['isDup'],
                        len(topo1), len(topo2), numTM1, numTM2))
                    if 'hit' in hitinfo:
                        for j in xrange(len(hitinfo['hit'])):
                            hit = hitinfo['hit'][j]
                            ss_hit = "%d-%d(nTM=%d) %d-%d(nTM=%d)"%(
                                    hit['posQuery'][0], hit['posQuery'][1], hit['numTM1'],
                                    hit['posTemplate'][0], hit['posTemplate'][1], hit['numTM2'])
                            fpoutDup.write(" | %35s"%(ss_hit))
                    fpoutDup.write("\n")
            cnt += 1

        lines = hdl.readlines()
    hdl.close()
    myfunc.myclose(fpoutDup)
    return 0
Beispiel #9
0
def CheckDuplication(hhrfile, seqid1, seqid2, topoDict, cnt):#{{{
    hitinfo = {}
    try:
        # Read in hhsearch hits
        fpin = open(hhrfile,"r")
        lines = fpin.readlines()
        fpin.close()
    except IOError:
        print >> sys.stderr, "Failed to read hhrfile %s"%hhrfile
        return {}

    try:
        topo1 = topoDict[seqid1]
    except KeyError:
        topo1 = ""
    try:
        topo2 = topoDict[seqid2]
    except KeyError:
        topo2 = ""


    lengthQuery = 0
    lengthTemplate = 0
    hitList = []
    numLine = len(lines)
    i = 0
    while i < numLine:
        line = lines[i]
        if line.find("Match_columns") == 0:
            try:
                lengthQuery = int(line.split()[1])
                hitinfo['seqLen1'] = lengthQuery
            except (IndexError, ValueError):
                print >> sys.stderr, "Error in hhrfile %s. Ignore"%(hhrfile)
                return {}
            i += 1
        elif line.find(" No Hit") == 0:
            j = 1
            while i+j < numLine and lines[i+j] != "":
                hit = ExtractHit(lines[i+j])
                if hit != {} and hit['evalue'] <= EVALUE_THRESHOLD:
                    posQuery = hit['posQuery']
                    posTemplate = hit['posTemplate']
                    if topo1 != "":
                        hit['numTM1'] = len(myfunc.GetTMPosition(topo1[posQuery[0]:posQuery[1]]))
                    else:
                        hit['numTM1'] = 0
                    if topo2 != "":
                        hit['numTM2'] = len(myfunc.GetTMPosition(topo2[posTemplate[0]:posTemplate[1]]))
                    else:
                        hit['numTM2'] = 0
                    hitList.append(hit)
                else:
                    break
                j += 1
            break
        else:
            i += 1

    hitList = sorted(hitList, key=lambda x:x['evalue'], reverse=False)
    hitinfo['hit'] = hitList
    numHit = len(hitList)
    if numHit < 2: # there should be at least two hits
        isDup = False

    else:
        isDup = IsDuplicated(hitList, len(topo1), len(topo2))

    if isDup:
        ss_isdup = 'y'
        hitinfo['isDup'] = 'y'
    else:
        ss_isdup = 'n'
        hitinfo['isDup'] = 'n'
    sys.stdout.write("%d: %s-%s %s numHit=%d\n" %(cnt, seqid1,
        seqid2, ss_isdup, numHit))
    return hitinfo
Beispiel #10
0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    seqdbfile = ""
    infile = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outfile", "--outfile"]:
                outfile = argv[i+1]
                i += 2
            elif argv[i] in ["-seqdb", "--seqdb"]:
                seqdbfile = argv[i+1]
                i += 2
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1
    if infile == "":
        print >> sys.stderr, "topofile not set"
        return 1
    elif not os.path.exists(infile):
        print >> sys.stderr, "topofile %s does not exist"%(infile)
        return 1
#     if seqdbfile == "":
#         print >> sys.stderr, "seqdbfile file not set"
#         return 1
#     elif not os.path.exists(seqdbfile):
#         print >> sys.stderr, "seqdbfile file %s does not exist"%(seqdbfile)
#         return 1
#     seqDict = GetSeqDict(seqdbfile)
#     if seqDict == {}:
#         print >> sys.stderr, "Failed to read seqdbfile %s"%(seqdbfile)
#         return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    fpin = open (infile, "rb");
    if not fpin:
        print >> sys.stderr, "Failed to open input file %s"%(infile)
        return 1
    unprocessedBuffer="";
    isEOFreached = False;
    processedTopoIDSet = set([]);
    while 1:
        buff = fpin.read(BLOCK_SIZE);
        if len(buff) < BLOCK_SIZE:
            isEOFreached=True;
        buff = unprocessedBuffer + buff;
        recordList = [];
        unprocessedBuffer = myfunc.ReadFastaFromBuffer(buff,recordList, isEOFreached);
        if len(recordList) > 0: 
            idListTopo = [r[0] for r in recordList];
            topoList = [r[2] for r in recordList];
            for i in xrange(len(idListTopo)):
                seqid = idListTopo[i]
                topo = topoList[i]

                posTM = myfunc.GetTMPosition(topo)
                if len(posTM) > 0:
                    cnt = 0
                    for (b,e) in posTM:
                        seg = topo[b:e]
                        fpout.write("%s\t%4d\t%s\n"%(seqid, cnt+1, seg))
                        cnt += 1

        if isEOFreached == True:
            break;
    fpin.close();

    myfunc.myclose(fpout)
Beispiel #11
0
def CountIdenticalTopology(pred_topodict, real_topodict, agreement, TM_type,
                           fpout_wrong, seqDict, pred_method_item):  #{{{
    """
    return (cntIDT, cntINV)
    """
    numPredTopo = len(pred_topodict)

    cntIDT = 0
    cntINV = 0
    cntDIFF = 0
    for seqid in pred_topodict:
        predtopo = pred_topodict[seqid]
        try:
            realtopo = real_topodict[seqid]
        except KeyError:
            print >> sys.stderr, "%s a nonTM protein predicted as TM protein" % (
                seqid)
            realtopo = "i" * len(predtopo)
            pass

        pred_NtermState = lcmp.GetNtermState(predtopo)
        real_NtermState = lcmp.GetNtermState(realtopo.replace('.', '-'))
        pred_posTM = myfunc.GetTMPosition(predtopo)
        real_posTM = myfunc.GetTMPosition(realtopo)
        pred_numTM = len(pred_posTM)
        real_numTM = len(real_posTM)

        #         if g_params['isDEBUG'] and seqid == "3cx5I":
        #             print "pred_NtermState = <%s>"%pred_NtermState
        #             print "real_NtermState = <%s>"% real_NtermState
        #             print "pred_posTM = ", pred_posTM
        #             print "real_posTM = ", real_posTM

        if lcmp.IsIdenticalTopology(pred_NtermState, real_NtermState,
                                    pred_numTM, real_numTM, pred_posTM,
                                    real_posTM, predtopo, realtopo,
                                    g_params['min_TM_overlap']):
            cntIDT += 1
        else:
            if fpout_wrong != None:
                # output the wrongly predict topology
                fpout_wrong.write(">%s Number %d mtd_%s\n" %
                                  (seqid, cntDIFF + 1, pred_method_item))
                try:
                    seq = seqDict[seqid]
                    fpout_wrong.write("%-10s %s\n" % ("AASeq", seq))
                except KeyError:
                    seq = ""
                fpout_wrong.write("%-10s %s\n" % ("RealTop", realtopo))
                fpout_wrong.write("%-10s %s\n" % ("PredTop", predtopo))
                fpout_wrong.write("\n")

            if lcmp.IsInvertedTopology(pred_NtermState, real_NtermState,
                                       pred_numTM, real_numTM, pred_posTM,
                                       real_posTM, predtopo, realtopo,
                                       g_params['min_TM_overlap']):
                cntINV += 1
            if g_params['isDEBUG']:
                print >> sys.stderr, "%-7s(real %3s) nTM=%2d %s" % (
                    seqid, agreement, real_numTM, realtopo)
                print >> sys.stderr, "%-7s(pred %3s) nTM=%2d %s" % (
                    seqid, agreement, pred_numTM, predtopo)
                print >> sys.stderr
            cntDIFF += 1
    return (cntIDT, cntINV)
Beispiel #12
0
def WriteInfo(groupList, seqlenDict, seqannoDict, pfamidDefDict,#{{{
        clanidDefDict, topoDict, groupedPfamScanDict, htmlname, fpout):
    cnt = 0
    for tup in groupList:
        try:
            ss = tup[0]
            seqidlist = tup[2]
            famidlist = ss.split("\t")
            fpout.write("Group %d: %d seqs, %d domains "%(cnt+1, tup[1],
                len(famidlist)))
            for famid in famidlist:
                if famid[0] == 'P':
                    famdef = pfamidDefDict[famid]
                else:
                    famdef = clanidDefDict[famid]
                fpout.write(" %s (%s)"%(famid, famdef))
            fpout.write("\n")

            fpout.write("#%-3s %10s %4s %3s %15s %5s\n"%("No",
                "SeqID", "Len", "nTM", "DomainCoverage", "nTM_within"))
            cntseq = 0
            for seqid in seqidlist:
                try:
                    seqlen = seqlenDict[seqid]
                except KeyError:
                    seqlen = -1
                    pass

                fpout.write("%-4d %10s %4d"%(cntseq+1, seqid, seqlen))

                try:
                    topo = topoDict[seqid]
                    posTM = myfunc.GetTMPosition(topo)
                except KeyError:
                    print >> sys.stderr, "topo not found for %s"%seqid
                    fpout.write("\n")
                    continue

                fpout.write(" %3d"%(len(posTM)))

                pfamscan_hit = groupedPfamScanDict[seqid]
                for famid in famidlist:
                    try:
                        b1 = pfamscan_hit[famid]['alnBeg']
                        e1 = pfamscan_hit[famid]['alnEnd']
                        (posTM_covered, indexList_covered) = GetCoveredTM((b1,e1), posTM)
                        fpout.write("%15s %5s %4s"%("(%d,%d)"%(b1,e1), 
                            "%d TM"%(len(posTM_covered)),
                            "%d-%d"%(indexList_covered[0]+1,
                                indexList_covered[len(indexList_covered)-1]+1)
                            ))
                    except (KeyError):
                        print >> sys.stderr, "%s not in pfamscan_hit"%(famid)
                        pass
                fpout.write("\n")
                cntseq += 1
        except (KeyError, IndexError):
            print >> sys.stderr, "Error for %s"%(tup[0])
            pass
        cnt += 1
        fpout.write("\n")
Beispiel #13
0
def AnaLocalTopoAln(idList, topoList, localseqpairDict, fpout, fpout1):
    # fpout: write result for those one end with no TM region
    # fpout1: write result for those one end with one TM region but less then 5
    # residues of TM region
    numseq = len(idList)
    numpair = numseq / 2
    for i in xrange(numpair):
        id1 = idList[2 * i]
        id2 = idList[2 * i + 1]
        topo1 = topoList[2 * i]
        topo2 = topoList[2 * i + 1]
        lengthAln = len(topo1)
        try:
            unaligned_str = localseqpairDict[(id1, id2)][2]
        except (KeyError, IndexError):
            print >> sys.stderr, "no local alignment found for %s %s" % (id1,
                                                                         id2)
            continue
        alignedPosList = myfunc.GetSegPos(unaligned_str, "1")
        if len(alignedPosList) != 1:
            print >> sys.stderr, "aligned region not equal 1 for %s %s" % (id1,
                                                                           id2)
            continue
        else:
            alignedPos = alignedPosList[0]
        if alignedPos[0] == 0 and alignedPos[1] == lengthAln:
            print "%s %s: Full aligned" % (id1, id2)
        else:
            alignedPos = alignedPosList[0]
            topo_Nterm1 = topo1[:alignedPos[0]]
            topo_Cterm1 = topo1[alignedPos[1]:]
            topo_Nterm2 = topo2[:alignedPos[0]]
            topo_Cterm2 = topo2[alignedPos[1]:]
            posTM_Nterm1 = myfunc.GetTMPosition(topo_Nterm1)
            posTM_Cterm1 = myfunc.GetTMPosition(topo_Cterm1)
            posTM_Nterm2 = myfunc.GetTMPosition(topo_Nterm2)
            posTM_Cterm2 = myfunc.GetTMPosition(topo_Cterm2)

            s_Nterm = GetUnglianedTermStatus(posTM_Nterm1, posTM_Nterm2)
            s_Cterm = GetUnglianedTermStatus(posTM_Cterm1, posTM_Cterm2)
            # s1_Nterm, s2_Nterm  is used to record the status of those
            # unaligned terminals with one has a splitted TM helices and
            # another has >= 1 TM helix
            s1_Nterm = GetUnglianedTermStatus1(posTM_Nterm1, posTM_Nterm2)
            s1_Cterm = GetUnglianedTermStatus1(posTM_Cterm1, posTM_Cterm2)

            if s_Nterm < 2 and s_Cterm < 2 and (s_Nterm + s_Cterm) > 0:
                if len(posTM_Nterm1) > 0:
                    num_res_unaligned_Nterm = len(topo_Nterm2.replace("-", ""))
                    numTM_unaligned_Nterm = len(posTM_Nterm1)
                    num_res_to_TM_Nterm = len(topo_Nterm1) - posTM_Nterm1[
                        len(posTM_Nterm1) - 1][1]
                elif len(posTM_Nterm2) > 0:
                    num_res_unaligned_Nterm = len(topo_Nterm1.replace("-", ""))
                    numTM_unaligned_Nterm = len(posTM_Nterm2)
                    num_res_to_TM_Nterm = len(topo_Nterm2) - posTM_Nterm2[
                        len(posTM_Nterm2) - 1][1]
                else:
                    num_res_unaligned_Nterm = 0
                    numTM_unaligned_Nterm = 0
                    num_res_to_TM_Nterm = 0

                if len(posTM_Cterm1) > 0:
                    num_res_unaligned_Cterm = len(topo_Cterm2.replace("-", ""))
                    numTM_unaligned_Cterm = len(posTM_Cterm1)
                    num_res_to_TM_Cterm = posTM_Cterm1[0][0]
                elif len(posTM_Cterm2) > 0:
                    num_res_unaligned_Cterm = len(topo_Cterm1.replace("-", ""))
                    numTM_unaligned_Cterm = len(posTM_Cterm2)
                    num_res_to_TM_Cterm = posTM_Cterm2[0][0]
                else:
                    num_res_unaligned_Cterm = 0
                    numTM_unaligned_Cterm = 0
                    num_res_to_TM_Cterm = 0

                ss = "%s %s %4d %4d %4d           %4d %4d %4d"
                print >> fpout, ss % (
                    id1, id2, num_res_unaligned_Nterm, num_res_to_TM_Nterm,
                    numTM_unaligned_Nterm, num_res_unaligned_Cterm,
                    num_res_to_TM_Cterm, numTM_unaligned_Cterm)
            if ((s1_Nterm == 1.5 or s1_Cterm == 1.5) and s1_Nterm < 2
                    and s1_Cterm < 2):

                num_res_unaligned_Nterm = -1
                numRes_PartHelix_Nterm = -1
                numTM_unaligned_Nterm = -1
                num_res_to_TM_Nterm = -1
                num_res_unaligned_Cterm = -1
                numRes_PartHelix_Cterm = -1
                numTM_unaligned_Cterm = -1
                num_res_to_TM_Cterm = -1
                if s1_Nterm == 1.5:
                    if len(
                            posTM_Nterm1
                    ) == 1 and posTM_Nterm1[0][1] - posTM_Nterm1[0][0] < 5:
                        num_res_unaligned_Nterm = len(
                            topo_Nterm1.replace("-", ""))
                        numRes_PartHelix_Nterm = posTM_Nterm1[0][
                            1] - posTM_Nterm1[0][0]
                        numTM_unaligned_Nterm = len(posTM_Nterm2)
                        num_res_to_TM_Nterm = len(topo_Nterm2) - posTM_Nterm2[
                            len(posTM_Nterm2) - 1][1]
                    elif len(
                            posTM_Nterm2
                    ) == 1 and posTM_Nterm2[0][1] - posTM_Nterm2[0][0] < 5:
                        num_res_unaligned_Nterm = len(
                            topo_Nterm2.replace("-", ""))
                        numRes_PartHelix_Nterm = posTM_Nterm2[0][
                            1] - posTM_Nterm2[0][0]
                        numTM_unaligned_Nterm = len(posTM_Nterm1)
                        num_res_to_TM_Nterm = len(topo_Nterm1) - posTM_Nterm1[
                            len(posTM_Nterm1) - 1][1]

                if s1_Cterm == 1.5:
                    if len(
                            posTM_Cterm1
                    ) == 1 and posTM_Cterm1[0][1] - posTM_Cterm1[0][0] < 5:
                        num_res_unaligned_Cterm = len(
                            topo_Cterm1.replace("-", ""))
                        numRes_PartHelix_Cterm = posTM_Cterm1[0][
                            1] - posTM_Cterm1[0][0]
                        numTM_unaligned_Cterm = len(posTM_Cterm2)
                        num_res_to_TM_Cterm = len(topo_Cterm2) - posTM_Cterm2[
                            len(posTM_Cterm2) - 1][1]
                    elif len(
                            posTM_Cterm2
                    ) == 1 and posTM_Cterm2[0][1] - posTM_Cterm2[0][0] < 5:
                        num_res_unaligned_Cterm = len(
                            topo_Cterm2.replace("-", ""))
                        numRes_PartHelix_Cterm = posTM_Cterm2[0][
                            1] - posTM_Cterm2[0][0]
                        numTM_unaligned_Cterm = len(posTM_Cterm1)
                        num_res_to_TM_Cterm = len(topo_Cterm1) - posTM_Cterm1[
                            len(posTM_Cterm1) - 1][1]

                ss = "%s %s %4d %4d %4d %4d          %4d %4d %4d %4d"
                print >> fpout1, ss % (
                    id1, id2, num_res_unaligned_Nterm, numRes_PartHelix_Nterm,
                    num_res_to_TM_Nterm, numTM_unaligned_Nterm,
                    num_res_unaligned_Cterm, numRes_PartHelix_Cterm,
                    num_res_to_TM_Cterm, numTM_unaligned_Cterm)