Example #1
0
def RemoveDupSeq(infile, g_outpath, method, isUseMD5):#{{{
    if g_outpath == "":
        outpath = myfunc.my_dirname(infile)
    else:
        outpath = g_outpath
    rootname = os.path.basename(os.path.splitext(infile)[0])

    outfile = "%s%s%s"%(outpath, os.sep, rootname)

    fpout = myfunc.myopen(outfile, None, "w", False)
    if fpout == None:
        return 1

    hdl = myfunc.ReadFastaByBlock(infile)
    if hdl.failure:
        return -1

    myset = set([])

    recordList = hdl.readseq()
    while recordList != None:
        for rd in recordList:
            if method == "id":
                key = rd.seqid
            elif method == "seq":
                if isUseMD5:
                    key = md5.new(rd.seq).digest()
                else:
                    key = rd.seq
            if not key in myset:
                myset.add(key)
                fpout.write(">%s\n%s\n"%(rd.description, rd.seq))
        recordList = hdl.readseq()

    hdl.close()
    myfunc.myclose(fpout)
    return 0
Example #2
0
def RemoveDupSeq(infile, g_outpath, method, isUseMD5):  #{{{
    if g_outpath == "":
        outpath = myfunc.my_dirname(infile)
    else:
        outpath = g_outpath
    rootname = os.path.basename(os.path.splitext(infile)[0])

    outfile = "%s%s%s" % (outpath, os.sep, rootname)

    fpout = myfunc.myopen(outfile, None, "w", False)
    if fpout == None:
        return 1

    hdl = myfunc.ReadFastaByBlock(infile)
    if hdl.failure:
        return -1

    myset = set([])

    recordList = hdl.readseq()
    while recordList != None:
        for rd in recordList:
            if method == "id":
                key = rd.seqid
            elif method == "seq":
                if isUseMD5:
                    key = md5.new(rd.seq).digest()
                else:
                    key = rd.seq
            if not key in myset:
                myset.add(key)
                fpout.write(">%s\n%s\n" % (rd.description, rd.seq))
        recordList = hdl.readseq()

    hdl.close()
    myfunc.myclose(fpout)
    return 0
Example #3
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    SPE_PAIR_LIST = [(2, 1), (2, 4), (2, 6), (2, 8), (3, 6), (3, 7), (4, 6),
                     (4, 8), (4, 10), (5, 7), (5, 10), (6, 8), (6, 10),
                     (6, 12), (7, 14), (8, 10), (8, 12), (10, 12), (10, 13),
                     (11, 13), (12, 14)]

    outfile = ""

    infile = ""
    pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3)
    signalpFile = "%s/wk/MPTopo/pfamAna_refpro/pred_signalp/refpro20120604-celluar.selmaxlength-m1.nr100.signalp_list" % (
        DATADIR3)

    #seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2clanid"%(DATADIR3)
    #seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2pfamid"%(DATADIR3)
    seqid2clanidMapFile = ""
    seqid2pfamidMapFile = ""
    tm_pfamidListFile = ""
    tm_clanidListFile = ""
    pfamid2seqidMapFile = ""
    clanid2seqidMapFile = ""
    dbname_predTM = ""
    pairlistwithpfamidFile = ""

    pfamtype = ""

    pairListFile = ""

    #classList_TableNumTMHeatMap = ["ALL", "RMSP"]
    classList_TableNumTMHeatMap = ["ALL"]

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                (g_params['outpath'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamdef", "--pfamdef"]:
                (pfamDefFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-signalp", "--signalp"]:
                (signalpFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mp", "--mp"]:
                g_params[
                    'pairwise_comparison_method'], i = myfunc.my_getopt_int(
                        argv, i)
            elif argv[i] in ["-mindiffpair", "--mindiffpair"]:
                g_params['mindiffpair'], i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-pfamtype", "--pfamtype"]:
                pfamtype, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-clanidlist", "--clanidlist"]:
                (tm_clanidListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamidlist", "--pfamidlist"]:
                (tm_pfamidListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqid2clanid", "--seqid2clanid"]:
                (seqid2clanidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqid2pfamid", "--seqid2pfamid"]:
                (seqid2pfamidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamid2seqid", "--pfamid2seqid"]:
                (pfamid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-clanid2seqid", "--clanid2seqid"]:
                (clanid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pairlistwithpfamid", "--pairlistwithpfamid"]:
                (pairlistwithpfamidFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-predTMdbname", "--predTMdbname"]:
                (dbname_predTM, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pairlist", "--pairlist"]:
                (pairListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-winsize", "--winsize"]:
                (g_params['winsize'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-outname", "--outname"]:
                (g_params['outname'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-prokar", "--prokar"]:
                g_params['isOnlyAnaProkar'] = True
                i += 1
            elif argv[i] in ["-eukar", "--eukar"]:
                g_params['isOnlyAnaEukar'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(
            infile, "%s (line %d): infile" %
        (__file__, inspect.currentframe().f_lineno)) != 0:
        return 1

    dirpath = myfunc.my_dirname(infile)

    # try to obtain Pfam family tag
    tag = ""
    if pfamtype != "":
        if pfamtype.upper().find("FAM") != -1:
            tag = ".Family"
        elif pfamtype.upper().find("DOM") != -1:
            tag = ".Domain"
        elif pfamtype.upper().find("REP") != -1:
            tag = ".Repeat"
        elif pfamtype.upper().find("MOT") != -1:
            tag = ".Motif"
        else:
            tag = ""
    else:
        if infile.find(".Family.") != -1:
            tag = ".Family"
        elif infile.find(".Domain.") != -1:
            tag = ".Domain"
        elif infile.find(".Repeat.") != -1:
            tag = ".Repeat"
        elif infile.find(".Motif.") != -1:
            tag = ".Motif"
        else:
            tag = ""

    if seqid2clanidMapFile == "":
        seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.seqid2clanid" % (
            DATADIR3)
    if myfunc.checkfile(
            seqid2clanidMapFile, "%s (line %d): seqid2clanidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if seqid2pfamidMapFile == "":
        seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.seqid2pfamid" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            seqid2pfamidMapFile, "%s (line %d): seqid2pfamidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if pfamid2seqidMapFile == "":
        pfamid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.pfamid2seqid" % (
            DATADIR3)
    if myfunc.checkfile(
            pfamid2seqidMapFile, "%s (line %d): pfamid2seqidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if clanid2seqidMapFile == "":
        clanid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.clanid2seqid" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            clanid2seqidMapFile, "%s (line %d): clanid2seqidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if tm_pfamidListFile == "":
        tm_pfamidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.pfamidlist" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            tm_pfamidListFile, "%s (line %d): tm_pfamidListFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if tm_clanidListFile == "":
        tm_clanidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.clanidlist" % (
            DATADIR3)
    if myfunc.checkfile(
            tm_clanidListFile, "%s (line %d): tm_clanidListFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if dbname_predTM == "":
        dbname_predTM = "%s/wk/MPTopo/pfamAna_refpro/pred_topcons_single_method4/refpro20120604-celluar.selmaxlength-m1.topcons-single_topcons_single.m1.agree-44.RMSP" % (
            DATADIR3)
    if myfunc.checkfile(
            "%s0.db" % (dbname_predTM), "%s (line %d): dbname_predTM" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if g_params['isOnlyAnaProkar']:
        prokarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Prokaryota.seqidlist" % (
            DATADIR3)
        g_params['prokarSeqIDSet'] = set(myfunc.ReadIDList(prokarseqidfile))
        if len(g_params['prokarSeqIDSet']) < 1:
            return 1
    if g_params['isOnlyAnaEukar']:
        eukarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Eukaryota.seqidlist" % (
            DATADIR3)
        g_params['eukarSeqIDSet'] = set(myfunc.ReadIDList(eukarseqidfile))
        if len(g_params['eukarSeqIDSet']) < 1:
            return 1

    if pairlistwithpfamidFile == "":
        pairlistwithpfamidFile = "%s/../../Pfam-.maxpair100.pairlistwithpfamid" % (
            dirpath)
    if myfunc.checkfile(
            pairlistwithpfamidFile, "%s (line %d): pairlistwithpfamidFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    pfamid_2_seqidpair_Dict = ReadPairListWithFamID(pairlistwithpfamidFile)
    usedPfamIDSet = set(
        pfamid_2_seqidpair_Dict.keys())  # pfamids used in pair selection

    if pairListFile != "":
        li = myfunc.ReadPairList(pairListFile)
        SPE_PAIR_LIST = []
        for tup in li:
            SPE_PAIR_LIST.append((int(tup[0]), int(tup[1])))

    (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile)
    signalpDict = lcmp.ReadSignalPDict(signalpFile)

    seqid2clanidDict = myfunc.ReadFam2SeqidMap(seqid2clanidMapFile)
    seqid2pfamidDict = myfunc.ReadFam2SeqidMap(seqid2pfamidMapFile)

    clanid2seqidDict = myfunc.ReadFam2SeqidMap(clanid2seqidMapFile)
    pfamid2seqidDict = myfunc.ReadFam2SeqidMap(pfamid2seqidMapFile)

    tm_pfamidList = myfunc.ReadIDList(tm_pfamidListFile)
    tm_clanidList = myfunc.ReadIDList(tm_clanidListFile)

    tm_pfamidSet = set(tm_pfamidList)
    tm_clanidSet = set(tm_clanidList)

    hdl_predTM = myfunc.MyDB(dbname_predTM)
    if not hdl_predTM.failure:
        idSet_TMpro = set(hdl_predTM.indexedIDList)
    else:
        idSet_TMpro = set([])

    #classList_TableNumTMHeatMap = ["ALL", "RMSP", "RMDUP"]
    #alignrangeList = ['FULL_ALIGNED', 'all', 'PART_ALIGNED']
    alignrangeList = ['FULL_ALIGNED']

    if g_params['outpath'] != "" and not os.path.exists(g_params['outpath']):
        cmd = ["mkdir", "-p", g_params['outpath']]
        try:
            subprocess.check_call(cmd)
        except subprocess.CalledProcessError, e:
            print e
            return 1
Example #4
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileListFile = ""
    fileList = []
    pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3)
    threshold_Fraction_Group_2 = 0.05
    threshold_NumSeq_Group_2 = 2
    tableinfoFile = ""
    pdbtospFile = ""
    sprotACListFile = ""

    threshold_g12_seqidt = 20.0

    topoalnFile = ""
    aapath = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqidttype", "--seqidttype"]:
                (g_params['seqidttype'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-tableinfo", "--tableinfo"]:
                (tableinfoFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-topoaln", "--topoaln"]:
                (topoalnFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-aapath", "--aapath"]:
                (aapath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-thncls2", "--thncls2"]:
                (threshold_NumSeq_Group_2, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-thfrac2", "--thfrac2"]:
                (threshold_Fraction_Group_2,
                 i) = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-pfamdef", "--pfamdef"]:
                (pfamDefFile, i) = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-pdbtosp", "--pdbtosp"]):
                pdbtospFile, i = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-sprot", "--sprot"]):
                sprotACListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)
    if len(fileList) < 1:
        print >> sys.stderr, "No input set. exit"
        return 1

    if myfunc.checkfile(topoalnFile, "topoalnFile") != 0:
        return 1
    if myfunc.checkfile(aapath, "aapath") != 0:
        return 1
    if outfile == "":
        print >> sys.stderr, "outfile not set. Exit"
        return 1

    outpath = myfunc.my_dirname(outfile)
    if not os.path.exists(outpath):
        cmd = ["mkdir", "-p", outpath]
        try:
            subprocess.check_output(cmd)
        except subprocess.CalledProcessError, e:
            print e
            return 1
Example #5
0
        for tt in fracList:
            fpout.write(" %7.2f" % (tt * 100))
        fpout.write("\n")
    myfunc.myclose(fpout)
    print "file %s output" % (outfile1)
    # make plot
    cmd = ["%s/plotMaxFracFamilyWithTopoVariation.sh" %
           (binpath)] + outfileList
    try:
        subprocess.check_output(cmd)
    except subprocess.CalledProcessError, e:
        print e

# output selected pairlist and draw pairwise topology alignment for selected
# pairs
    outpath = myfunc.my_dirname(outfile)
    ext_topoaln = ".topoaln.fa"
    outfile_selected_pair = outfile + ".selected.pairlistwithfamid"
    fpout = myfunc.myopen(outfile_selected_pair, None, "w", True)
    print >> fpout, "#%-6s %6s %6s %7s %15s %10s %10s %6s %4s %4s %5s %5s" % (
        "seqid1", "seqid2", "seqidt", "famid", "pfamdef", "numSeqCls1",
        "numSeqCls2", "numSeq", "nTM1", "nTM2", "isSP", "isPDB")
    for li in selectedPairList:
        print >> fpout, "%-7s %6s %6.1f %7s %15s %10d %10d %6d %4d %4d %5d %5d" % (
            li[0], li[1], li[2], li[3], li[4], li[5], li[6], li[7], li[8],
            li[9], li[10], li[11])
    myfunc.myclose(fpout)

    cmd = [
        "%s/selectPairaln.py" % (binpath), "-pairaln", topoalnFile, "-l",
        outfile_selected_pair, "-split", "-outpath", outpath, "-ext",
        cntf = 0
        while i < numInput:
            splitList = []
            cntscore = 0
            j = 0
            while i+j < numInput:
                pair = inputList[i+j]
                j += 1
                seqfilename = pair[0]
                numseq = pair[1]
                if numseq >= 2: # There should be at least two sequences to run msa
                    rootname = os.path.basename(os.path.splitext(seqfilename)[0])
                    if outpath != "":
                        cur_outpath = outpath
                    else:
                        cur_outpath = myfunc.my_dirname(seqfilename)
                    gzfile = cur_outpath + os.sep + "%s.%s.mfa.gz"%(rootname, prog)
                    msafile = cur_outpath + os.sep + "%s.%s.mfa"%(rootname, prog)
                    if ((g_params['isGzip'] == True and not os.path.exists(gzfile)) or 
                            (g_params['isGzip'] == False and not os.path.exists(msafile))):
                        cntscore += pair[1]**2
                        splitList.append(pair[0])
                        if cntscore > threshold:
                            break
                    else:
                        print "%s already exist. Ignore"%(gzfile);

            if len(splitList) > 0:
                splitidlistfile = workdir + os.sep \
                        + "splitidlist.%s.%d.idlist"%(prog, cntf)
                splitscriptfile = workdir + os.sep \
Example #7
0
def Build_seqid2pfamid(infile, g_outpath):  #{{{
    outpath = ""
    dirname_infile = myfunc.my_dirname(infile)
    if g_outpath != "":
        outpath = g_outpath
    else:
        outpath = dirname_infile

    rootname = os.path.basename(os.path.splitext(infile)[0])

    domainfile = "%s/%s.domainlistperseq" % (outpath, rootname)
    seqid2pfamidfile = "%s/%s.seqid2pfamid" % (outpath, rootname)
    if os.path.exists(domainfile) and os.path.exists(
            seqid2pfamidfile) and not g_params['isOverwrite']:
        print >> sys.stderr, "result file %s and %s exist. Ignore" % (
            domainfile, seqid2pfamidfile)
        return 1

    fpout_domain = myfunc.myopen(domainfile, None, "w", True)
    fpout_table = myfunc.myopen(seqid2pfamidfile, None, "w", True)

    evalue_threshold = g_params['evalue_threshold']
    hdl = myfunc.ReadLineByBlock(infile)
    queue = deque([])
    if hdl.failure:
        return 1
    lines = hdl.readlines()
    while lines != None:
        for line in lines:
            if not line or line[0] == "#":
                continue
            rd = ScanfHmmscanRecord(line)
            #             print rd
            if rd == None:
                print >> sys.stderr, "%s: bad record. line=\"%s\"" % (infile,
                                                                      line)
            else:
                evalue = rd[2]
                if evalue <= evalue_threshold:
                    queue.append(rd)
        # scan queue and output result
        idlist = [x[0] for x in queue]
        #         print idlist
        # the top records is complete for one query if there are more than one unique seqids
        idlist_unique = myfunc.uniquelist(idlist)
        idlist_complete = idlist_unique[:-1]  # remove the last item
        idset_complete = set(idlist_complete)
        if len(idset_complete) > 0:
            recordDict = {}
            cnt_used_rd = 0
            for i in xrange(len(queue)):
                seqid = queue[i][0]
                if not seqid in idset_complete:
                    continue
                if not seqid in recordDict:
                    recordDict[seqid] = []
                recordDict[seqid].append(queue[i])
                cnt_used_rd += 1
            #output
            for seqid in idlist_complete:
                try:
                    li = recordDict[seqid]
                except KeyError:
                    print "seqid=%s" % (
                        seqid), "idlist_complete=", idlist_complete
                    raise
                famidlist = [x[1] for x in li]
                famidlist = myfunc.uniquelist(famidlist)
                fpout_table.write("%s %d" % (seqid, len(famidlist)))
                for pfamid in famidlist:
                    fpout_table.write(" %s" % (pfamid))
                fpout_table.write("\n")

                fpout_domain.write("%s %d" % (seqid, len(li)))
                for rd in li:
                    fpout_domain.write(" %s,%d,%d" % (rd[1], rd[5], rd[6]))
                fpout_domain.write("\n")
            # pop up queue
            for i in xrange(cnt_used_rd):
                queue.popleft()
        lines = hdl.readlines()

    if len(queue) > 0:  # output the last item
        seqid = queue[0][0]
        li = queue
        famidlist = [x[1] for x in li]
        famidlist = myfunc.uniquelist(famidlist)
        fpout_table.write("%s %d" % (seqid, len(famidlist)))
        for pfamid in famidlist:
            fpout_table.write(" %s" % (pfamid))
        fpout_table.write("\n")

        fpout_domain.write("%s %d" % (seqid, len(li)))
        for rd in li:
            fpout_domain.write(" %s,%d,%d" % (rd[1], rd[5], rd[6]))
        fpout_domain.write("\n")

    hdl.close()
    myfunc.myclose(fpout_domain)
    myfunc.myclose(fpout_table)

    return 0