def ReadPercentTM(infile):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return {} percentTMDict = {} lines = hdl.readlines() cntline = 0 while lines != None: for line in lines: cntline += 1 if not line or line[0] == "#": continue strs = line.split() if len(strs) == 6: try: pfamid = strs[0] numTM = int(strs[1]) numSeq = int(strs[3]) percentTMDict[pfamid] = [numTM, numSeq] except (IndexError, ValueError): msg = "Error in mapfile %s at line %d: \"%s\"" print >> sys.stderr, msg%(infile, cntline, line) pass lines = hdl.readlines() hdl.close() return percentTMDict
def MPA2MSA_old(infile, output_format, fpout): #{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return 1 lengthList = [] remainLineList = [] lines = hdl.readlines() while lines != None: lines = remainLineList + lines numLine = len(lines) numRD = numLine / 2 for i in xrange(numRD): fpout.write("%s\n" % lines[2 * i]) strs = lines[2 * i + 1].split() for ss in strs: if ss.find("-") != -1: strs1 = ss.split("-") b = int(strs1[0]) e = int(strs1[1]) li = ["-"] * (e - b) fpout.write("%s" % (''.join(li))) else: fpout.write("%s" % (ss)) fpout.write("\n") if numRD * 2 < numLine: remainLineList = [lines[numLine - 1]] else: remainLineList = [] lines = hdl.readlines() hdl.close() return 0
def ReadDupPairDict(infile):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return {} dt = {} lines = hdl.readlines() while lines != None: for line in lines: strs = line.split() if line == "" or line[0] == "#": continue if len(strs) >= 2: if strs[1] == 'y': # it is a duplicated pair ss = strs[0].split("-") if len(ss) == 2: key = (ss[0], ss[1]) dt[key] = {} dt[key]['isDup'] = 'y' li = [] strs1 = line.split('|') for j in range(1, len(strs1)): hit = ParseDupHit(strs1[j].strip()) # hit is a list of # two segments # from query # and template li.append(hit) dt[key]['hit'] = li lines = hdl.readlines() hdl.close() return (dt)
def MPA2MSA(infile, output_format, fpout): #{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return 1 lengthList = [] remainLineList = [] lines = hdl.readlines() while lines != None: lines = remainLineList + lines numLine = len(lines) numRD = numLine / 2 for i in xrange(numRD): li = [] fpout.write("%s\n" % lines[2 * i]) strs = lines[2 * i + 1].split() for ss in strs: if ss[0].isdigit(): lgap = int(ss) li.append("-" * lgap) else: li.append(ss) fpout.write("%s\n" % ("".join(li))) if numRD * 2 < numLine: remainLineList = [lines[numLine - 1]] else: remainLineList = [] lines = hdl.readlines() hdl.close() return 0
def ReadPairInfo(infile): #{{{ hdl = myfunc.ReadLineByBlock(infile) lst = [] if hdl.failure: return [] lines = hdl.readlines() while lines != None: for line in lines: if line != "" and line[0] != "#": strs = line.split() seqid1 = strs[0] seqid2 = strs[1] NtermState1 = strs[2] NtermState2 = strs[3] numTM1 = int(strs[4]) numTM2 = int(strs[5]) seqLen1 = int(strs[6]) seqLen2 = int(strs[7]) seqidt = float(strs[8]) lst.append([ seqid1, seqid2, NtermState1, NtermState2, numTM1, numTM2, seqLen1, seqLen2, seqidt ]) lines = hdl.readlines() hdl.close() return lst
def ReadPfamScan2(infile):#{{{ # a quick solution, to same a little memory evalue_threshold = g_params['evalue_threshold'] seqIDPfamScanDict = {} hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return {} lines = hdl.readlines() while lines != None: for line in lines: if line != "" and line[0] != "#": strs = line.split() if len(strs) >= 15: seqid = strs[0] alnBeg = int (strs[1]) alnEnd = int (strs[2]) pfamid = strs[5].split('.')[0] # tmpdict['pfamname'] = strs[6] evalue = float(strs[12]) clanid = strs[14] tup_info = (alnBeg, alnEnd, pfamid, clanid) if evalue <= evalue_threshold: if seqid in seqIDPfamScanDict: seqIDPfamScanDict[seqid].append(tup_info) else: seqIDPfamScanDict[seqid] = [] seqIDPfamScanDict[seqid].append(tup_info) lines = hdl.readlines() if hdl: hdl.close() return seqIDPfamScanDict
def Filter_seqid2fam_map(infile, keyIDSet, contentIDSet, isKeyIDSet, isContentIDSet, fpout): hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return 1 lines = hdl.readlines() while lines != None: for line in lines: if line: strs = line.split() try: key = strs[0] num = int(strs[1]) idlist = strs[2:] tmp_idlist = [] if (not isKeyIDSet) or (key in keyIDSet): for idd in idlist: if (not isContentIDSet) or (idd in contentIDSet): tmp_idlist.append(idd) if len(tmp_idlist) > 0: fpout.write("%s %d"%(key, len(tmp_idlist))) for idd in tmp_idlist: fpout.write(" %s"%(idd)) fpout.write("\n") except (IndexError): msg = "Error in infile %s with line \"%s\"" print >> sys.stderr, msg%(infile, line) return 1 lines = hdl.readlines() hdl.close() return 0
def ReadDGScore(infile):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: msg = "Failed to read file %s in function %s" print(msg%(infile, sys._getframe().f_code.co_name), file=sys.stderr) return {} dgScoreDict = {} lines = hdl.readlines() while lines != None: for line in lines: if line and line[0] != "#": strs = line.split() numStr = len(strs) if numStr >= 2: try: seqid = strs[0] if numStr == 2: dgscore = float(strs[1]) elif numStr == 3: dgscore = float(strs[2]) if not seqid in dgScoreDict: dgScoreDict[seqid] = [] dgScoreDict[seqid].append(dgscore) except (ValueError, TypeError): pass lines = hdl.readlines() hdl.close() return dgScoreDict
def ReadPSIPREDSS2(infile): hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return (None, None, None) aaSeqList = [] ssSeqList = [] arrayList = [] for i in range(3): arrayList.append(array('h')) lines = hdl.readlines() while lines != None: for line in lines: strs = line.split() if len(strs) == 6 and strs[0].isdigit(): aaSeqList.append(strs[1]) ssSeqList.append(strs[2]) for i in range(3): try: value = int(float(strs[i + 3]) * 1000) arrayList[i].append(value) except (ValueError, IndexError): msg = "Bad record \"%s\" in file %s" print >> sys.stderr, msg % (line, infile) return (None, None, None) lines = hdl.readlines() hdl.close() aaSeq = "".join(aaSeqList) ssSeq = "".join(ssSeqList) return (aaSeq, ssSeq, arrayList)
def GetFullSeq(infile, hdl_seqdb, fpout):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return (1, 0, 0) cntRetrieved = 0 idList = [] lines = hdl.readlines() while lines != None: idList += GetDatabaseIDList(lines) lines = hdl.readlines() hdl.close() idList = myfunc.uniquelist(idList) numID = len(idList) for seqid in idList: record = hdl_seqdb.GetRecord(seqid) if record: fpout.write(record) cntRetrieved += 1 else: msg = "Failed to retrieve record for ID %s" print >> sys.stderr, msg%(seqid) return (0, numID, cntRetrieved)
def ReadPairAlnTableInfo(infile):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: msg = "Failed to read file %s in function %s" print(msg%(infile, sys._getframe().f_code.co_name), file=sys.stderr) return {} pairalnStat = {} lines = hdl.readlines() while lines != None: for line in lines: if line != "" and line[0] != "#": strs = line.split() if len(strs) == 13: try: id1 = strs[0] id2 = strs[1] seqidt = float(strs[2]) alignLen = float(strs[4]) seqlen1 = int(strs[5]) seqlen2 = int(strs[6]) seqidt1 = float(strs[11]) seqidt2 = float(strs[12]) pairid = id1+'-'+id2 pairalnStat[pairid] = {} tmpdict = pairalnStat[pairid] tmpdict['seqidt'] = seqidt tmpdict['seqidt1'] = seqidt1 tmpdict['seqidt2'] = seqidt1 tmpdict['seqLength1'] = seqlen1 tmpdict['seqLength2'] = seqlen2 except (IndexError, ValueError, TypeError, KeyError): pass lines = hdl.readlines() hdl.close() return pairalnStat
def ReadPfamDefFile(infile):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return ({}, {}) dtClan = {} dtPfam = {} lines = hdl.readlines() while lines != None: for line in lines: strs = line.split("\t") try: pfamid = strs[0] pfamDefShort = strs[3] dtPfam[pfamid] = pfamDefShort clanid = strs[1] clanDefShort = strs[2] if clanid != r"\N": dtClan[clanid] = clanDefShort else: dtClan[pfamid] = pfamDefShort except IndexError: pass lines = hdl.readlines() hdl.close() return (dtPfam, dtClan)
def CountUniquePairInvertedInfo(infile, pfamidDefDict, fpout): idset1 = set([]) idset2 = set([]) numTMSet = set([]) numInvPair = 0 numAllPair = 0 ratio = 0.0 hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return 1 lines = hdl.readlines() while lines != None: for line in lines: if line.find("General") == 0: strs = line.split() numInvPair = int(strs[1]) numAllPair = int(strs[2]) ratio = float(strs[3]) if line.find("Pair") == 0: strs = line.split() id1 = strs[1] id2 = strs[2] NtermState1 = strs[3] NtermState2 = strs[4] numTM = int(strs[5]) numTMSet.add(numTM) if NtermState1 == 'i': idset1.add(id1) idset2.add(id2) else: idset1.add(id2) idset2.add(id1) lines = hdl.readlines() hdl.close() pfamid = os.path.basename(infile).split(".")[0] try: pfamdef = pfamidDefDict[pfamid] except KeyError: pfamdef = "N/A" if len(idset1) > 0 or len(idset2) > 0: fpout.write("%-8s %20s %4d %4d %2d %8s %5d %5d %6.3f\n"%( pfamid, pfamdef, len(idset1), len(idset2), len(numTMSet), str(list(numTMSet)), numInvPair, numAllPair, ratio ))
def IDMap2SeqID(infile, fpout): hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return 1 lines = hdl.readlines() while lines != None: for line in lines: if not line or line[0] == "#": continue strs = line.split() if len(strs) > 2: for ss in strs[2:]: print >> fpout, ss lines = hdl.readlines() hdl.close()
def ReadSeqPathMapDict(infile):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return {} dt = {} lines = hdl.readlines() while lines != None: for line in lines: if not line or line[0] == "#": continue strs = line.split() if len(strs) == 2: dt[strs[0]] = strs[1] lines = hdl.readlines() return dt
def ReadGOTerm(infile): #{{{ hdl = myfunc.ReadLineByBlock(infile) dt = {} if hdl.failure: return 1 lines = hdl.readlines() while lines != None: for line in lines: if line != "" and line[0] != "#": strs = line.split("\t") if len(strs) >= 2: goid = strs[0].strip() dt[goid] = strs[1].strip() lines = hdl.readlines() hdl.close() return dt
def ReadGOInfo(infile): #{{{ hdl = myfunc.ReadLineByBlock(infile) GOinfoList = [] if hdl.failure: return 1 lines = hdl.readlines() while lines != None: for line in lines: if line != "" and line[0] != "#": goinfo = ScanfGOInfo(line) if goinfo != {}: GOinfoList.append(goinfo) lines = hdl.readlines() hdl.close() return GOinfoList
def ReadSignalPFile(infile): #{{{ hdl = myfunc.ReadLineByBlock(infile) dt = {} if hdl.failure: return 1 lines = hdl.readlines() while lines != None: for line in lines: if line == "" or line[0] == "#": continue #seqid = myfunc.GetFirstWord(line) seqid = myfunc.GetSeqIDFromAnnotation(line) dt[seqid] = line lines = hdl.readlines() hdl.close() return dt
def ReadGOAnc(infile): hdl = myfunc.ReadLineByBlock(infile) goAncDict = {} if hdl.failure: return 1 lines = hdl.readlines() while lines != None: for line in lines: if line != "" and line[0] != "#": strs = line.split(";") goid = strs[0].strip() goAncDict[goid] = line lines = hdl.readlines() hdl.close() return goAncDict
def ReadDupPairList(infile):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return [] li = [] lines = hdl.readlines() while lines != None: for line in lines: strs = line.split() if len(strs) >= 2: if strs[1] == 'y': ss = strs[0].split("-") if len(ss) == 2: li.append((ss[0], ss[1])) lines = hdl.readlines() hdl.close() return (li)
def ReadSeqLengthDict(infile):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return {} dt = {} lines = hdl.readlines() while lines != None: for line in lines: if not line or line[0] == "#": continue strs = line.split() if len(strs) == 2: seqid = strs[0] length = int(strs[1]) dt[seqid] = length lines = hdl.readlines() hdl.close() return dt
def ReadIDWithAnnoInfo(infile):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return {} dt = {} lines = hdl.readlines() while lines != None: for line in lines: if not line or line[0] == "#": continue strs = line.split("\t") if len(strs) == 2: seqid = strs[0] anno = strs[1].strip() dt[seqid] = anno lines = hdl.readlines() hdl.close() return dt
def SelectLineByID(infile, idListSet, fpout):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return 1 method_getid = g_params['method_getid'] sel_field_list = g_params['sel_field_list'] if method_getid == 3: if len(sel_field_list) == 0: sel_field = 0 elif len(sel_field_list) == 1: sel_field = sel_field_list[0] lines = hdl.readlines() while lines != None: for line in lines: if not line or line[0] == "#": fpout.write("%s\n"%line) else: try: if method_getid == 0: idd = line.split(None, 1)[0] elif method_getid == 1: idd = (line.split(None, 1)[0]).partition(";")[0] elif method_getid == 2: idd = myfunc.GetSeqIDFromAnnotation(line) elif method_getid == 3: if len(sel_field_list) < 2: idd = line.split()[sel_field-1] else: strs = line.split() tmpli = [] for ff in sel_field_list: tmpli.append(strs[ff-1]) idd = tuple(tmpli) else: print method_getid except (IndexError): print >> sys.stderr, ("Bad line \"%s\"\n"%line) if idd in idListSet: fpout.write("%s\n"%line) lines = hdl.readlines() hdl.close() return 0
def ReadMapFile(infile):#{{{ mapDict = {} hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return mapDict lines = hdl.readlines() while lines != None: for line in lines: if not line or line[0] == "#": continue strs = line.split("\t") try: mapDict[strs[0]] = strs[1] except IndexError: pass lines = hdl.readlines() hdl.close() return mapDict
def ReadRLTYInfo(infile):#{{{ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: msg = "Failed to read file %s in function %s" print(msg%(infile, sys._getframe().f_code.co_name), file=sys.stderr) return {} rltyDict = {} lines = hdl.readlines() while lines != None: for line in lines: strs = line.split() if len(strs) == 2: try: seqid = strs[0] rlty = float(strs[1]) rltyDict[seqid] = rlty except (ValueError, TypeError, KeyError): pass lines = hdl.readlines() hdl.close() return rltyDict
def ReadPairInfo(infile): #{{{ """ Format of the pairlistfile #seqid1 seqid2 seqidt famid pfamdef numSeqCls1 numSeqCls2 numSeq nTM1 nTM2 isSP isPDB Output: pairInfoDict {pfamid: {'':, ''}} """ hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return {} lines = hdl.readlines() dt = {} while lines != None: for line in lines: if not line or line[0] == "#": continue strs = line.split() if len(strs) >= 12: pfamid = strs[3] if not pfamid in dt: dt[pfamid] = {} dt[pfamid]['pfamdef'] = strs[4] dt[pfamid]['numSeqCls1'] = int(strs[5]) dt[pfamid]['numSeqCls2'] = int(strs[6]) dt[pfamid]['numseq'] = int(strs[7]) dt[pfamid]['nTM_Group1'] = int(strs[8]) dt[pfamid]['nTM_Group2'] = int(strs[9]) dt[pfamid]['pairlist'] = [] seqid1 = strs[0] seqid2 = strs[1] seqidt = float(strs[2]) isSP = int(strs[10]) isPDB = int(strs[11]) dt[pfamid]['pairlist'].append( (seqid1, seqid2, seqidt, isSP, isPDB)) lines = hdl.readlines() hdl.close() return dt
def ReadSignalPDict(infile):#{{{ # format of signalp file # SeqID location Y hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return {} signalpDict = {} lines = hdl.readlines() while lines != None: for line in lines: if not line or line[0] == "#": continue strs = line.split() if len(strs) >= 2: try: signalpDict[strs[0]] = int(strs[1]) except (ValueError): pass lines = hdl.readlines() hdl.close() return signalpDict
def FilterUniprotIDMap(infile, fpout): hdl = myfunc.ReadLineByBlock(infile) if not hdl: return 1 lines = hdl.readlines() while lines != None: for line in lines: isIgnore = False strs = line.split("\t") try: if strs[2].find(strs[0]) != -1: uniprotid = GetUniprotIDFromLongName(strs[2]) if uniprotid != "": if uniprotid == strs[0]: isIgnore = True else: print >> sys.stderr, "Error\t", line else: print >> sys.stderr, "Null\t", line if not isIgnore: print >> fpout, line except IndexError: print >> sys.stderr, "IndexError\t", line lines = hdl.readlines()
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" gramPositiveFile = "" gramNegativeFile = "" eukFile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram+", "--gram+"]: (gramPositiveFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram-", "--gram-"]: (gramNegativeFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-euk", "--euk"]: (eukFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-debug", "--debug"]: g_params['isDEBUG'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile, "taxidwithtaxoFile") != 0: return 1 if myfunc.checkfile(gramPositiveFile, "gramPositiveFile") != 0: return 1 if myfunc.checkfile(gramNegativeFile, "gramNegativeFile") != 0: return 1 if myfunc.checkfile(eukFile, "eukFile") != 0: return 1 gramPositiveDict = ReadSignalPFile(gramPositiveFile) gramNegativeDict = ReadSignalPFile(gramNegativeFile) eukDict = ReadSignalPFile(eukFile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return 1 lines = hdl.readlines() while lines != None: for line in lines: strs = line.split("\t") if len(strs) == 3: seqid = strs[0].strip() taxo = strs[2].strip() info = "" try: if taxo == "Gram+" or taxo == "gram+": info = gramPositiveDict[seqid] elif taxo == "Gram-" or taxo == "gram-": info = gramNegativeDict[seqid] elif taxo == "Euk" or taxo == "euk": info = eukDict[seqid] if g_params['isDEBUG']: print >> sys.stderr, "%s: %s" % (seqid, taxo) except KeyError: info = "" if info != "": fpout.write("%s\n" % info) lines = hdl.readlines() hdl.close() myfunc.myclose(fpout)
def RunHHSearchPairwise(tableinfoFile, #{{{ hhprofilepathList, hhprofilepathMapDictList, hhsearchpathList, hhsearchpathMapDictList, topoDict, outpath, dupfile): fpoutDup = None if dupfile != "": fpoutDup = myfunc.myopen(dupfile, sys.stdout, "w", False) hdl = myfunc.ReadLineByBlock(tableinfoFile) if hdl.failure: return 1 cnt = 0 lines = hdl.readlines() while lines != None: for line in lines: if not line or line[0] == "#": continue strs = line.split() try: seqid1 = strs[0] seqid2 = strs[1] except (IndexError, ValueError): print >> sys.stderr, "Bad record line \"%s\""%(line) continue try: topo1 = topoDict[seqid1] except KeyError: topo1 = "" try: topo2 = topoDict[seqid2] except KeyError: topo2 = "" seqlen1 = len(topo1) seqlen2 = len(topo2) pairlist = [(seqid1, seqlen1), (seqid2, seqlen2)] pairlist = sorted(pairlist, key=lambda x:x[1], reverse=False) # short - long hhrfile = "%s%s%s_%s.hhr"%(outpath, os.sep, seqid1, seqid2) if g_params['isUsePreBuildHHSearchResult']: keystr = "%s_%s"%(seqid1, seqid2) tmp_hhrfile = GetProfileFileName(hhsearchpathList, hhsearchpathMapDictList, keystr, ".hhr") if os.path.exists(tmp_hhrfile): hhrfile = tmp_hhrfile else: print >> sys.stderr, "hhrfile %s does not exist in"\ " the prebuilt path"%(hhrfile) # update seqid1 and seqid2 (shorter - longer) seqid1 = pairlist[0][0] # shorter sequence seqid2 = pairlist[1][0] # longer sequence try: topo1 = topoDict[seqid1] except KeyError: topo1 = "" try: topo2 = topoDict[seqid2] except KeyError: topo2 = "" seqlen1 = len(topo1) seqlen2 = len(topo2) numTM1 = len(myfunc.GetTMPosition(topo1)) numTM2 = len(myfunc.GetTMPosition(topo2)) if not os.path.exists(hhrfile) or g_params['isForceOverWrite']: a3mfile = GetProfileFileName(hhprofilepathList, #query hhprofilepathMapDictList, pairlist[0][0], ".a3m") hhmfile = GetProfileFileName(hhprofilepathList, #template hhprofilepathMapDictList, pairlist[1][0], ".hhm") if a3mfile == "" or not os.path.exists(a3mfile): print >> sys.stderr, "a3mfile not found for %s. Ignore." %(pairlist[0][0]) elif hhmfile == "" or not os.path.exists(hhmfile): print >> sys.stderr, "hhmfile not found for %s. Ignore." %(pairlist[1][0]) else: tmp_hhrfile = "%s.tmp"%(hhrfile) cmd = "hhsearch -i %s -d %s -o %s -v 0 -nocons -nopred -nodssp" % ( a3mfile, hhmfile, tmp_hhrfile) os.system(cmd) if os.path.exists(tmp_hhrfile): os.system("/bin/mv -f %s %s"%(tmp_hhrfile, hhrfile)) print hhrfile, "output" if fpoutDup and os.path.exists(hhrfile): ss_isdup = "" hitinfo = {} # if IsDuplicatedByHHSearch(hhrfile, seqid1, seqid2, cnt): # ss_isdup = 'y' # else: # ss_isdup = 'n' hitinfo = CheckDuplication(hhrfile, seqid1, seqid2, topoDict, cnt) if hitinfo != {}: fpoutDup.write("%s-%s %s %4d %4d %4d %4d" %( seqid1, seqid2, hitinfo['isDup'], len(topo1), len(topo2), numTM1, numTM2)) if 'hit' in hitinfo: for j in xrange(len(hitinfo['hit'])): hit = hitinfo['hit'][j] ss_hit = "%d-%d(nTM=%d) %d-%d(nTM=%d)"%( hit['posQuery'][0], hit['posQuery'][1], hit['numTM1'], hit['posTemplate'][0], hit['posTemplate'][1], hit['numTM2']) fpoutDup.write(" | %35s"%(ss_hit)) fpoutDup.write("\n") cnt += 1 lines = hdl.readlines() hdl.close() myfunc.myclose(fpoutDup) return 0