Python RemoveUnnecessaryGap 예제들, libtopologycmp.RemoveUnnecessaryGap Python 예제들

예제 #1

0

파일 보기

파일: comptopo.py 프로젝트: vam-sin/bioinfo-toolbox

def CompareToposGaplesslyNew(strTop1, strTop2, strProtein1, strProtein2,
                             fpLog):  #{{{
    # -----iiiMMMooo ___\ iMMMooo
    # iiiiii--MMMMoo    / iMMMMoo
    [strTop1, strTop2] = lcmp.RemoveUnnecessaryGap([strTop1, strTop2])
    if fpLog != 0:
        print >> fpLog, "Unnecessary gaps removed"
        print >> fpLog, "%-20s:%s" % (strProtein1, strTop1)
        print >> fpLog, "%-20s:%s" % (strProtein2, strTop2)
        print >> fpLog
    strNewTop1 = ''
    strNewTop2 = ''
    for i in range(len(strTop1)):
        if not (strTop1[i] == '-' or strTop2[i] == '-'):
            strNewTop1 += strTop1[i]
            strNewTop2 += strTop2[i]
    strNewTop1 = filterTopo(strNewTop1)
    strNewTop2 = filterTopo(strNewTop2)
    if fpLog != 0:
        print >> fpLog, "Gapless"
        print >> fpLog, "%-20s:%s" % (strProtein1, strNewTop1)
        print >> fpLog, "%-20s:%s" % (strProtein2, strNewTop2)
        print >> fpLog

    if len(strNewTop1) <= 0 and len(strNewTop2) <= 0:
        return ("DIFF", 0, 0)
    elif len(strNewTop1) * len(
            strNewTop2) == 0 and len(strNewTop1) + len(strNewTop2) > 0:
        print >> sys.stderr, "%s %s gapless length does not match" % (
            strProtein1, strProtein2)
        sys.exit(1)
    (intMems1, N1) = counttopo(strNewTop1)
    (intMems2, N2) = counttopo(strNewTop2)
    return compareTopos(intMems1, intMems2, strNewTop1, strNewTop2, N1, N2)

예제 #2

0

파일 보기

파일: comptopo.py 프로젝트: nanjiangshu/TMplot

def CompareToposGloballyNew(strTop1, strTop2, strProtein1, strProtein2,
                            fpLog):  #{{{
    [strTop1, strTop2] = lcmp.RemoveUnnecessaryGap([strTop1, strTop2])
    strTop1 = trimTopo(strTop1)
    strTop2 = trimTopo(strTop2)
    strTop1 = filterTopo(strTop1)
    strTop2 = filterTopo(strTop2)
    if fpLog != 0:
        print("Global", file=fpLog)
        print("%-20s:%s" % (strProtein1, strTop1), file=fpLog)
        print("%-20s:%s" % (strProtein2, strTop2), file=fpLog)
        print(file=fpLog)

    if len(strTop1) <= 0 and len(strTop2) <= 0:
        return ("DIFF", 0, 0)
    elif len(strTop1) * len(strTop2) == 0 and len(strTop1) + len(strTop2) > 0:
        print("%s %s global length does not match" %
              (strProtein1, strProtein2),
              file=sys.stderr)
        sys.exit(1)

    (intNumMem1, Nterm1) = counttopo(strTop1)
    (intNumMem2, Nterm2) = counttopo(strTop2)
    return compareTopos(intNumMem1, intNumMem2, strTop1, strTop2, Nterm1,
                        Nterm2)

예제 #3

0

파일 보기

def WriteSeqAlnHTML(seqAlnFileList, extTopoMSA, outfile):  # {{{
    try:
        fpout = open(outfile, "w")
    except IOError:
        print("Failed to write to %s" % (outfile), file=sys.stderr)
        return 1
    WriteHTMLHeader(
        'Alignment highlighted by <font color=%s>TM regions</font>' % ('red'),
        fpout)
    print("Processed alignments:")
    for alnfile in seqAlnFileList:
        rootname_alnfile = os.path.basename(os.path.splitext(alnfile)[0])
        topomsafile = '.'.join([os.path.splitext(alnfile)[0], extTopoMSA])
        if not (os.path.exists(alnfile) and os.path.exists(topomsafile)):
            if not os.path.exists(alnfile):
                sys.stderr.write('alnfile %s does not exist\n' % (alnfile))
            if not os.path.exists(topomsafile):
                sys.stderr.write('topomsafile %s does not exist\n' %
                                 (topomsafile))
            continue
        (seqIDList, seqAnnoList, seqList) = myfunc.ReadFasta(alnfile)
        #print(seqIDList)
        (topoIDList, topoAnnoList, topoList) = myfunc.ReadFasta(topomsafile)
        #print(topoIDList)
        if g_params['removeUnnecessaryGap']:
            seqList = lcmp.RemoveUnnecessaryGap(seqList)
            topoList = lcmp.RemoveUnnecessaryGap(topoList)

        # since there is no shrinking, index map is always p->p
        final2seq_idxMapList = []
        for i in range(len(seqIDList)):
            seqlength = len(seqList[i])
            idxmap = {}
            for j in range(seqlength):
                idxmap[j] = j
            final2seq_idxMapList.append(idxmap)

        print(('\t' + rootname_alnfile))
        WriteHTMLAlignment2(rootname_alnfile, topoIDList, topoAnnoList,
                            topoList, topoList, seqList, final2seq_idxMapList,
                            fpout)

    WriteHTMLTail(fpout)

    fpout.close()
    return 0

예제 #4

0

파일 보기

파일: comptopo.py 프로젝트: vam-sin/bioinfo-toolbox

def compareToposLocally(strAlitopFile):  #{{{
    # Cut off unaligned ends:
    # Modified by Nanjiang from the original code 2010-08-11
    # compareToposLocally, that is
    # -----iiiMMMooo-- ___\ iiiMMMooo
    # iiiiii--MMMMoooo    / i--MMMMoo
    print "###########\nCompareToposLocally\n"
    #debug
    (strTop1, strTop2) = readAliTopo(strAlitopFile)

    [strTop1, strTop2] = lcmp.RemoveUnnecessaryGap([strTop1, strTop2])
    print "strTop1:%s" % (strTop1)
    #debug
    print "strTop2:%s" % (strTop2)
    #debug

    # 1. treat the beginning
    nbegin = 0
    if (strTop1[0] == '-' or strTop2[0] == '-'):
        i = 0
        while (i < len(strTop1) and (strTop1[i] == '-' or strTop2[i] == '-')):
            i = i + 1
        nbegin = i
# 2. treat the ending
    nend = len(strTop1)
    if (strTop1[len(strTop1) - 1] == '-' or strTop2[len(strTop1) - 1] == '-'):
        i = len(strTop1) - 1
        while (i >= 0 and (strTop1[i] == '-' or strTop2[i] == '-')):
            i = i - 1
        nend = i + 1

    tmpStrTop1 = strTop1[nbegin:nend]
    tmpStrTop2 = strTop2[nbegin:nend]
    # 3. remove unnecessary gaps
    strNewTop1 = ''
    strNewTop2 = ''
    for i in range(len(tmpStrTop1)):
        if not (tmpStrTop1[i] == '-' and tmpStrTop2[i] == '-'):
            strNewTop1 += tmpStrTop1[i]
            strNewTop2 += tmpStrTop2[i]

    print "After local treatment\n"
    #debug
    print "strTop1:%s" % (strNewTop1)
    #debug
    print "strTop2:%s" % (strNewTop2)
    #debug

    strNewTop1 = trimTopo(strNewTop1)
    #after local treatment, gaps may still exist in the alignment, use the function trimTopo to remove these gaps
    strNewTop2 = trimTopo(strNewTop2)

    strNewTop1 = filterTopo(strNewTop1)
    strNewTop2 = filterTopo(strNewTop2)
    (intMems1, N1) = counttopo(strNewTop1)
    (intMems2, N2) = counttopo(strNewTop2)
    return compareTopos(intMems1, intMems2, strNewTop1, strNewTop2, N1, N2)

예제 #5

0

파일 보기

파일: comptopo.py 프로젝트: vam-sin/bioinfo-toolbox

def CompareToposLocallyNew(strTop1, strTop2, strProtein1, strProtein2,
                           fpLog):  #{{{
    # -----iiiMMMooo-- ___\ iiiMMMooo
    # iiiiii--MMMMoooo    / i--MMMMoo
    [strTop1, strTop2] = lcmp.RemoveUnnecessaryGap([strTop1, strTop2])
    # 1. treat the beginning
    nbegin = 0
    if (strTop1[0] == '-' or strTop2[0] == '-'):
        i = 0
        while (i < len(strTop1) and (strTop1[i] == '-' or strTop2[i] == '-')):
            i = i + 1
        nbegin = i


# 2. treat the ending
    nend = len(strTop1)
    if (strTop1[len(strTop1) - 1] == '-' or strTop2[len(strTop1) - 1] == '-'):
        i = len(strTop1) - 1
        while (i >= 0 and (strTop1[i] == '-' or strTop2[i] == '-')):
            i = i - 1
        nend = i + 1

    tmpStrTop1 = strTop1[nbegin:nend]
    tmpStrTop2 = strTop2[nbegin:nend]
    # 3. remove unnecessary gaps
    strNewTop1 = ''
    strNewTop2 = ''
    for i in range(len(tmpStrTop1)):
        if not (tmpStrTop1[i] == '-' and tmpStrTop2[i] == '-'):
            strNewTop1 += tmpStrTop1[i]
            strNewTop2 += tmpStrTop2[i]
    strNewTop1 = trimTopo(strNewTop1)
    #after local treatment, gaps may still exist in the alignment, use the function trimTopo to remove these gaps
    strNewTop2 = trimTopo(strNewTop2)

    strNewTop1 = filterTopo(strNewTop1)
    strNewTop2 = filterTopo(strNewTop2)

    if fpLog != 0:
        print >> fpLog, "Locally"
        print >> fpLog, "%-20s:%s" % (strProtein1, strNewTop1)
        print >> fpLog, "%-20s:%s" % (strProtein2, strNewTop2)
        print >> fpLog

    if len(strNewTop1) <= 0 and len(strNewTop2) <= 0:
        return ("DIFF", 0, 0)
    elif len(strNewTop1) * len(
            strNewTop2) == 0 and len(strNewTop1) + len(strNewTop2) > 0:
        print >> sys.stderr, "%s %s local length does not match" % (
            strProtein1, strProtein2)
        sys.exit(1)

    (intMems1, N1) = counttopo(strNewTop1)
    (intMems2, N2) = counttopo(strNewTop2)
    return compareTopos(intMems1, intMems2, strNewTop1, strNewTop2, N1, N2)

예제 #6

0

파일 보기

파일: removeUnnecessaryGap.py 프로젝트: nanjiangshu/TMplot

def action(method, alnfile, outfile):
    (seqidList, seqAnnoList, seqList) = myfunc.ReadFasta(alnfile)
    if (method == 0):
        newSeqList = lcmp.RemoveUnnecessaryGap_old(seqList)
    else:
        newSeqList = lcmp.RemoveUnnecessaryGap(seqList)
    try:
        if outfile == "":
            fpout = sys.stdout
        else:
            fpout = open(outfile, "w")
        for i in range(len(seqidList)):
            fpout.write(">%s\n" % (seqAnnoList[i]))
            fpout.write("%s\n" % (newSeqList[i]))
        if fpout and fpout != sys.stdout:
            fpout.close()
        return 0
    except IOError:
        click.echo("Failed to write to file %s" % (outfile))
        return 1

예제 #7

0

파일 보기

def AddPairwiseAlignmentFactor(pairlistDict, msapath, msaext, #{{{
        isLocalAlignment):
    cntfamid = 0
    verbose = g_params['verbose']
    for famid in pairlistDict:
        cntfamid += 1
        if verbose >= 2:
            print "Add pairwise alignment factor for %d: %s"%(cntfamid, famid)
        msafile = msapath + os.sep + famid + msaext
        if not os.path.exists(msafile):
            print >> sys.stderr, "msafile %s does not exist. Ignore" % msafile
            continue
        (idList, annoList, seqList) = myfunc.ReadFasta(msafile)
        msaDict = {}
        for i in xrange(len(idList)):
            msaDict[idList[i]] = seqList[i]
        pairlist = pairlistDict[famid]
        #print "pairlist=", pairlist
        for i in xrange(len(pairlist)):
            pair = pairlist[i]
            #print "pair = ", pair
            seq1 = ""
            seq2 = ""
            id1 = pair[0]
            id2 = pair[1]
            if id1 in msaDict and id2 in msaDict:
                seq1 = msaDict[id1] 
                seq2 = msaDict[id2]
                [seq1, seq2] = lcmp.RemoveUnnecessaryGap([seq1, seq2])
                if len(seq1) != len(seq2):
                    print >> sys.stderr, "Bad alignment for %s and %s" %(id1,id2)
                else:
                    alignFactor = lcmp.GetAlignmentFactorFromPairAlignment(
                            seq1,seq2, isLocalAlignment)
                    pair.append(alignFactor)
            else:
                if id1 not in msaDict:
                    print >> sys.stderr, "%s not in msafile %s"%(id1, msafile)
                if id2 not in msaDict:
                    print >> sys.stderr, "%s not in msafile %s"%(id2, msafile)
    return 0

예제 #8

0

파일 보기

파일: comptopo.py 프로젝트: vam-sin/bioinfo-toolbox

def compareToposGaplessly(strAlitopFile):  #{{{
    # 1st version:
    # Cut off unaligned ends:
    # By Nanjiang 2010-08-11: this actually cut off all unaligned regions, not only
    # the two endings. The code really deal with the function described below in
    # written in another def, and compareToposLocally is renamed as
    # compareToposGaplessly
    # that is
    # -----iiiMMMooo ___\ iMMMooo
    # iiiiii--MMMMoo    / iMMMMoo
    print "##########\nCompareToposGaplessly\n"
    #debug
    (strTop1, strTop2) = readAliTopo(strAlitopFile)
    [strTop1, strTop2] = lcmp.RemoveUnnecessaryGap([strTop1, strTop2])

    print "strTop1:%s" % (strTop1)
    #debug
    print "strTop2:%s" % (strTop2)
    #debug

    strNewTop1 = ''
    strNewTop2 = ''
    for i in range(len(strTop1)):
        if not (strTop1[i] == '-' or strTop2[i] == '-'):
            strNewTop1 += strTop1[i]
            strNewTop2 += strTop2[i]

    print "After Gapless treatment\n"
    #debug
    print "strTop1:%s" % (strNewTop1)
    #debug
    print "strTop2:%s" % (strNewTop2)
    #debug

    strNewTop1 = filterTopo(strNewTop1)
    strNewTop2 = filterTopo(strNewTop2)
    (intMems1, N1) = counttopo(strNewTop1)
    (intMems2, N2) = counttopo(strNewTop2)
    return compareTopos(intMems1, intMems2, strNewTop1, strNewTop2, N1, N2)

예제 #9

0

파일 보기

파일: comptopo.py 프로젝트: vam-sin/bioinfo-toolbox

def compareToposGlobally(strAlitopFile):  #{{{
    print "########\nCompareToposGlobally"
    #debug
    (strTop1, strTop2) = readAliTopo(strAlitopFile)
    [strTop1, strTop2] = lcmp.RemoveUnnecessaryGap([strTop1, strTop2])
    print "strTop1:%s" % (strTop1)
    #debug
    print "strTop2:%s" % (strTop2)
    #debug
    strTop1 = trimTopo(strTop1)
    strTop2 = trimTopo(strTop2)
    print "After trimming"
    #debug
    print "strTop1:%s" % (strTop1)
    #debug
    print "strTop2:%s" % (strTop2)
    #debug
    strTop1 = filterTopo(strTop1)
    strTop2 = filterTopo(strTop2)
    (intNumMem1, Nterm1) = counttopo(strTop1)
    (intNumMem2, Nterm2) = counttopo(strTop2)
    return compareTopos(intNumMem1, intNumMem2, strTop1, strTop2, Nterm1,
                        Nterm2)

예제 #10

0

파일 보기

def WritePairAln(pairlistDict, msapath, msaext, outname):#{{{
    verbose = g_params['verbose']
    outAlnFile = outname + ".pairaln"
    outTableFile = outname + ".tableinfo"
    outSelPairList = outname + ".pairlistwithpfamid"
    try:
        fpout_aln = open(outAlnFile, "w")
    except IOError:
        print >> sys.stderr, "Failed to write to file", outAlnFile
        return 1
    try:
        fpout_table = open(outTableFile, "w")
    except IOError:
        print >> sys.stderr, "Failed to write to file", outTableFile
        return 1

    try:
        fpout_list = open(outSelPairList, "w")
    except IOError:
        print >> sys.stderr, "Failed to write to file", outSelPairList
        return 1

    fpout_table.write("#%-15s %-15s %6s %6s %9s %6s %6s %9s %6s %6s %6s %6s %6s\n" % (
        "Seq1","Seq2", "IDT0", "SIM0", "AlnLength", "Len1","Len2",
        "Score","N_IDT", "N_SIM", "N_GAP", "IDT1", "IDT2"))

    for famid in pairlistDict:
        if verbose >= 2:
            print "Write pairwise alignment for %s"%(famid)
        msafile = msapath + os.sep + famid + msaext
        if not os.path.exists(msafile):
            print >> sys.stderr, "msafile %s does not exist. Ignore" % msafile
            continue
        (idList, annoList, seqList) = myfunc.ReadFasta(msafile)
        msaDict = {}
        annoDict = {}
        for i in xrange(len(idList)):
            msaDict[idList[i]] = seqList[i]
            annoDict[idList[i]] = annoList[i]
        pairlist = pairlistDict[famid]
        #print "pairlist2=", pairlist
        for pair in pairlist:
            #print "pair2 = ", pair
            seq1 = ""
            seq2 = ""
            id1 = pair[0]
            id2 = pair[1]
            if id1 in msaDict and id2 in msaDict:
                seq1 = msaDict[id1] 
                seq2 = msaDict[id2]
                [seq1, seq2] = lcmp.RemoveUnnecessaryGap([seq1, seq2])
                if len(seq1) != len(seq2):
                    print >> sys.stderr, "Bad alignment for %s and %s" %(id1,id2)
                else:
                    rd = pair[2]
                    fpout_aln.write(">%s aligned_to=%s seqIDT=%.1f seqIDT1=%.1f\n"%(
                        annoDict[id1], id2, rd['seqidt0'], rd['seqidt1']))
                    fpout_aln.write("%s\n"%seq1)
                    fpout_aln.write(">%s aligned_to=%s seqIDT=%.1f seqIDT1=%.1f\n"%(
                        annoDict[id2], id1, rd['seqidt0'], rd['seqidt1']))
                    fpout_aln.write("%s\n"%seq2)
                    fpout_table.write("%-16s %-15s %6.1f %6.1f %9d %6d %6d %9.1f %6d %6d %6d %6.1f %6.1f\n"% (
                        id1, id2, rd['seqidt0'], -1.0,
                        rd['alnLength'],
                        rd['seqLength1'], rd['seqLength2'],
                        -1.0,
                        rd['numIDT'], -1, rd['numGap'],
                        rd['seqidt1'], rd['seqidt2']))
                    fpout_list.write("%s %s %s\n"%(id1, id2, famid))
    fpout_aln.close()
    fpout_table.close()
    fpout_list.close()
    print "Result output to "
    print "\t%s"%outAlnFile
    print "\t%s"%outTableFile

    return 0

예제 #11

0

파일 보기

파일: test.py 프로젝트: vam-sin/bioinfo-toolbox

def main():  #{{{
    if 0:  #{{{
        strTop1 = "---MMMM-----i-i-i---MMM----MMMM-ooo"
        strTop2 = "----MMMM-----i-ii-----MMM---MMM--oo"
        strProtein1 = "id1"
        strProtein2 = "id2"
        fpLog = sys.stdout
        class_gapless, num1_gapless, num2_gapless = ct.CompareToposGaplesslyNew(
            strTop1, strTop2, strProtein1, strProtein2, fpLog)
        # Note: calling the int, float, string will not change their original value
        # calling the dict, list will change their original value
        print "strTop1:", strTop1
        print "strTop2:", strTop2
#}}}
    if 0:  #{{{
        PrintFuncName()
        print("this file name is: %s" % __file__)
#}}}
    if 0:  #{{{
        # filename="/nanjiang/data/blastdb/uniprot_KW181_idt50.fasta"
        filename = sys.argv[1]
        print filename
        fp = open(filename, "r")
        lines = fp.readlines()
        fp.close()
#}}}
    if 0:  #{{{
        # filename="/nanjiang/data/blastdb/uniprot_KW181_idt50.fasta"
        filename = sys.argv[1]
        print filename
        BLOCK_SIZE = 100000
        fp = open(filename, "r")
        buff = fp.read(BLOCK_SIZE)
        while buff:
            buff = fp.read(BLOCK_SIZE)
        fp.close()
#}}}
    if 0:  #{{{
        # filename="/nanjiang/data/blastdb/uniprot_KW181_idt50.fasta"
        filename = sys.argv[1]
        print filename
        fp = open(filename, "r")
        line = fp.readline()
        while line:
            line = fp.readline()
        fp.close()
        #}}}
    if 0:  #{{{
        try:
            BLOCK_SIZE = 100000
            infile = sys.argv[1]
            fpin = open(infile, 'rb')
            unprocessedBuffer = ""
            isEOFreached = False
            while 1:
                buff = fpin.read(BLOCK_SIZE)
                if len(buff) < BLOCK_SIZE:
                    isEOFreached = True
                buff = unprocessedBuffer + buff
                recordList = []
                unprocessedBuffer = myfunc.ReadFastaFromBuffer(
                    buff, recordList, isEOFreached)
                if len(recordList) > 0:
                    for record in recordList:
                        sys.stdout.write(">%s\n" % record[1])
                        sys.stdout.write("%s\n" % record[2])
                if isEOFreached == True:
                    break
            fpin.close()
        except IOError:
            raise
            #}}}
    if 0:  #{{{
        try:
            infile = sys.argv[1]
            (annoList, seqList) = myfunc.ReadFasta_without_id(infile)
            for i in xrange(len(seqList)):
                sys.stdout.write(">%s\n" % annoList[i])
                sys.stdout.write("%s\n" % seqList[i])
        except IOError:
            raise
            #}}}
    if 0:  #{{{
        hhrfile = "hhsearch/A1RZ92-Q74DY9.hhr"
        if IsDuplicatedByHHSearch(hhrfile):
            print "yes"

#}}}
    if 0:  #{{{
        import pairlistwithfamid2pairaln_by_msa
        seq1 = "--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------MLSSTATTMLRAGVSRSSGALQPMLLRSAACPCSPFSMNTKLSQPTSV-----RPLSTSPSALVLRFRAQQQAQLAQQQLRRASSSSSSSSSSTRPRSDAELDANAAEAAAAAQSAAHAGEPVLDWNTFFKLRKTRRRVQLAFSVIMTLITSGAGGAVLSTGVADAMVAQVPLEPMFAVGLMTASFGALGWLMGPAMGGMVFNALKSKYRGQMEIKEGQFFARIKKHRVDPSASSMGNPVPDFYGEKISSVAGYRQWLKDQRAFNKKRTTFV"
        seq2 = "MDILLAVLEQGFIFSIVCFGVYITYKILDFPDLSVDGTFPLGAAVAAAFLVKGYSPVLSSLAALVAGAIAGGITGILHVKFKITNLLSGILVMVGLYSINLRIMGKSNIPLFNKIHLFSDTMNPIIIITVFLLICKITLDLFLKTKAGFILKATGDNEQLVLSLGVNKDLVKIMGLMLSNALVALGGALMAQYQGFSDVGMGTGIVVMGLASVIIGESLFGRIKALNATTRVLLGALVYKLSVSI---ALTVGLAP-------TDLKLVTAIIVVIALSLNKNPLKIITKQKTKEGGIL------NASNTKSAQSVQ-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
        seq1 = "---------------------------------------------------------------------------------------------------------------------------------------MALSSLFFTASALLLMFLAFLGGARNSNPLDRIYWLEAATGNIPGAPALSRWTYWNLCAVNSEGHNECGKSYPDYPFDPPSHRNFNTHVNIPAAFIGTRHYFLTSRFMFPFHIIALFFATCSLLTGFLAMCTRIGNWVSAFSAYFALTFQTITTCLMTAVYVQGRDKFNNNGQSSHLGVKAFAFMWTSVALLFLSCVIYCMGGAVGRKDGGYSGREQRRRGFFNSHRSGSLRSNKETAP"
        seq2 = "MRKIAAIGGIVFISFILTIVAMFTKLWISWSIGKFSYGIGIVPYHSNSAGWFTAASWMVFISFGLFIPLILVVLFTAYKVHHDGCCHSIRHCFNSICLICSIIAVLEIIAFVLMAVNASRYVKGASISEKKSLLQLGSSAYLDLVSAILIIVATVLSGHASHHDCH----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
        alignFactor = pairlistwithfamid2pairaln_by_msa.GetAlignmentFactorFromPairAlignment(
            seq1, seq2)
        print alignFactor
#}}}
    if 0:  #{{{
        try:
            dbname = sys.argv[1]
            print dbname
            from myfunc import MyDB
            cls = MyDB(dbname)
            #            print cls.idList
            record = cls.GetRecord("A0FGX9")
            if record:
                print record
                #             for rd in  cls.GetAllRecord():
                #                 print rd
                (seqid, anno, seq) = myfunc.ExtractFromSeqWithAnno(record)
                print(seqid, anno, seq)
        except IndexError:
            pass

#}}}
    if 0:  #{{{
        import my_extractdb
        #miniking my_extractdb.py see which one is faster
        try:
            dbname = sys.argv[1]
            idlistfile = sys.argv[2]
            cls = myfunc.MyDB(dbname)
            if cls.failure:
                print >> sys.stderr, "MyDB init failed"
            else:
                idlist = open(idlistfile, "r").read().split("\n")
                fpout = sys.stdout
                for seqid in idlist:
                    if seqid:
                        record = cls.GetRecord(seqid)
                        fpout.write(record)
            #             for rd in  cls.GetAllRecord():
            #                 print rd
#                (seqid, anno, seq) = myfunc.ExtractFromSeqWithAnno(record)
#                print (seqid, anno, seq)
        except IndexError:
            print "error"
            pass
#}}}
    if 0:  #{{{ #test ReadLineByBlock
        try:
            infile = sys.argv[1]
            from myfunc import ReadLineByBlock
            cls = ReadLineByBlock(infile)
            lines = cls.readlines()
            while lines != None:
                for line in lines:
                    print line
                lines = cls.readlines()

        except IndexError:
            pass
#}}}
    if 0:  #{{{ #test speed of ReadLineByBlock
        # ReadLineByBlock is about 3 times fater than file.readline()
        try:
            from myfunc import ReadLineByBlock
            infile = sys.argv[1]

            start = time.time()
            hdl = ReadLineByBlock(infile)
            lines = hdl.readlines()
            while lines != None:
                lines = hdl.readlines()
            hdl.close()
            end = time.time()
            msg = "Reading %s by ReadLineByBlock costs %.3fs seconds"
            print msg % (infile, (end - start))

            start = time.time()
            hdl = open(infile, "r")
            line = hdl.readline()
            while line:
                line = hdl.readline()
            hdl.close()
            end = time.time()
            msg = "Reading %s by readline() costs %.3fs seconds"
            print msg % (infile, (end - start))

        except IndexError:
            pass
#}}}
    if 0:  #{{{ #test readline
        try:
            infile = sys.argv[1]
            fp = open(infile, "r")
            line = fp.readline()
            while line:
                print line
                line = fp.readline()
            fp.close()
        except IndexError:
            pass
#}}}
    if 0:  #{{{ #test the speed of GetFirstWord
        try:
            nloop = int(sys.argv[1])
            string = "kjdafk jasdfj j"
            #string = "askdf askdf "
            #            string = "kajsdfasdfsdfjakasjdfka"
            #            string = "kajsdfasdf,sdfjakasjdfka"
            delimiter = " \t\r,.\n"
            delimiter = " "
            for i in xrange(nloop):
                #firstword = myfunc.GetFirstWord(string, delimiter)
                #firstword = string.split()[0]
                #firstword = string.partition(" ")[0]
                firstword = myfunc.GetFirstWord(string)
                #pass
                #print firstword
        except (IndexError, ValueError):
            pass
#}}}
    if 0:  #{{{ # read seq by SeqIO
        from Bio import SeqIO
        try:
            seqfile = sys.argv[1]
            # 1. SeqIO ####################
            start = time.time()
            handle = open(seqfile, "rU")
            cnt = 0
            for record in SeqIO.parse(handle, "fasta"):
                cnt += 1
            handle.close()
            end = time.time()
            msg = "Reading %d sequences by SeqIO costs %.3fs seconds"
            print msg % (cnt, (end - start))

            # 2. ReadFasta ####################
            start = time.time()
            seqfile = sys.argv[1]
            (idList, annoList, seqList) = myfunc.ReadFasta(seqfile)
            end = time.time()
            msg = "Reading %d sequences by ReadFasta costs %.3fs seconds"
            print msg % (len(idList), (end - start))

            # 3. ReadFasta from buffer
            BLOCK_SIZE = 100000
            start = time.time()
            cnt = 0
            fpin = open(seqfile, 'rb')
            unprocessedBuffer = ""
            isEOFreached = False
            while 1:
                buff = fpin.read(BLOCK_SIZE)
                if len(buff) < BLOCK_SIZE:
                    isEOFreached = True
                buff = unprocessedBuffer + buff
                recordList = []
                unprocessedBuffer = myfunc.ReadFastaFromBuffer(
                    buff, recordList, isEOFreached)
                cnt += len(recordList)
                if isEOFreached == True:
                    break
            fpin.close()
            end = time.time()
            msg = "Reading %d sequences by ReadFastaFromBuffer costs %.3fs seconds"
            print msg % (cnt, (end - start))

            # 4. ReadFastaByBlock ####################
            start = time.time()
            seqfile = sys.argv[1]
            hdl = myfunc.ReadFastaByBlock(seqfile, 0, 0)
            if hdl.failure:
                print >> sys.stderr, "Failed to init ReadFastaByBlock"
                return 1
            recordList = hdl.readseq()
            cnt = 0
            while recordList != None:
                cnt += len(recordList)
                #                 for rd in recordList:
                #                     print ">%s"%rd.description
                #                     print rd.seq
                recordList = hdl.readseq()
            hdl.close()
            end = time.time()
            msg = "Reading %d sequences by ReadFastaByBlock costs %.3fs seconds"
            print msg % (cnt, (end - start))
        except (IndexError, ValueError):
            pass
#}}}
    if 0:  #{{{ #test RemoveUnnecessaryGap
        try:
            infile = sys.argv[1]
            start = time.time()
            (idList, seqList) = myfunc.ReadFasta_without_annotation(infile)
            seqList = lcmp.RemoveUnnecessaryGap_old(seqList)
            end = time.time()
            msg = "Run RemoveUnnecessaryGap_old for %s costs %.3fs seconds"
            print >> sys.stderr, msg % (infile, (end - start))
            for seq in seqList:
                print seq

            start = time.time()
            (idList, seqList) = myfunc.ReadFasta_without_annotation(infile)

            seqList = lcmp.RemoveUnnecessaryGap(seqList)
            end = time.time()
            msg = "Run RemoveUnnecessaryGap for %s costs %.3fs seconds"
            print >> sys.stderr, msg % (infile, (end - start))
            for seq in seqList:
                print seq

        except IndexError:
            pass
#}}}
    if 0:  #{{{ #test ReadMPAByBlock
        try:
            infile = sys.argv[1]
            hdl = myfunc.ReadMPAByBlock(infile)
            if hdl.failure:
                return
            recordList = hdl.readseq()
            while recordList != None:
                for rd in recordList:
                    #print rd.seqid
                    print ">%s" % (rd.description)
                    print "%s" % (myfunc.mpa2seq(rd.mpa))
                recordList = hdl.readseq()
            hdl.close()
        except IndexError:
            pass
#}}}
    if 0:  #{{{
        try:
            dbname = sys.argv[1]
            print dbname
            from myfunc import MyDB
            cls = MyDB(dbname)
            #            print cls.idList
            record = cls.GetRecord("A0FGX9")
            if record:
                print record
                #             for rd in  cls.GetAllRecord():
                #                 print rd
                (seqid, anno, seq) = myfunc.ExtractFromSeqWithAnno(record)
                print(seqid, anno, seq)
        except IndexError:
            pass

#}}}
    if 0:  #{{{ #test subprocess
        import glob
        #invoke shell explicitly, not very good, may have security problems
        subprocess.call("seq 10", shell=True)
        subprocess.call("echo wait for 2 seconds...; sleep 2", shell=True)
        subprocess.call("ls topo*.py", shell=True)
    if 1:  #{{{ #test subprocess
        import glob
        #invoke shell implicitly, recommended way
        subprocess.call(["seq", "10"], shell=False)
        subprocess.call(["echo", "wait for 1 seconds..."])
        subprocess.call(["sleep", "1"])
        try:
            print subprocess.check_call(["ls",
                                         "topo*.py"])  #This will not work
        except subprocess.CalledProcessError, e:
            print "error message:", e
        subprocess.call(["ls"] + glob.glob("topo*.py"))