def readTransfer(file, flag='symbol', head=1): if flag == 'symbol': symbolDict = {} flag = 1 elif flag == 'locus': locusDict = {} flag = 2 else: print >> sys.stderr, "Wrong flag, should be 'symbol' or 'locus'" sys.exit(1) #--------------------------------------------- for line in open(file): if head: head -= 1 continue #------------------------- line = line.rstrip() (locus, symbol, name) = line.split('\t') name = name.upper() if flag == 1: symbol = symbol.upper() if pattern.match(symbol): #print >>sys.stderr, symbol, name #assert symbol == name if symbol != name: print >> sys.stderr, symbol, name name = symbol if flag == 1: #There are duplicate symbols, before use list to save #names, later change to set. if symbol not in symbolDict: symbolDict[symbol] = set() symbolDict[symbol].add(name) elif flag == 2: #Do not know if there are duplicate locus if locus not in locusDict: locusDict[locus] = set() locusDict[locus].add(name) #------------------------------------------- #------------End reading------------------ if flag == 1: if 0: ctTEST.ct_rdict(symbolDict) sys.exit() return symbolDict elif flag == 2: return locusDict
def readTransfer(file, flag = 'symbol', head = 1): if flag == 'symbol': symbolDict = {} flag = 1 elif flag == 'locus': locusDict = {} flag = 2 else: print >>sys.stderr, "Wrong flag, should be 'symbol' or 'locus'" sys.exit(1) #--------------------------------------------- for line in open(file): if head: head -= 1 continue #------------------------- line = line.rstrip() (locus, symbol, name) = line.split('\t') name = name.upper() if flag == 1: symbol = symbol.upper() if pattern.match(symbol): #print >>sys.stderr, symbol, name #assert symbol == name if symbol != name: print >>sys.stderr, symbol, name name = symbol if flag == 1: #There are duplicate symbols, before use list to save #names, later change to set. if symbol not in symbolDict: symbolDict[symbol] = set() symbolDict[symbol].add(name) elif flag == 2: #Do not know if there are duplicate locus if locus not in locusDict: locusDict[locus] = set() locusDict[locus].add(name) #------------------------------------------- #------------End reading------------------ if flag == 1: if 0: ctTEST.ct_rdict(symbolDict) sys.exit() return symbolDict elif flag == 2: return locusDict
def readInterpro(interprofile, locusL): """ pfamDict = {'PF01209':['AT5G57290.3',], 'Ubie':[AT5G57290.3]} iprDict = {'IPR001813':['AT5G57290.3',], 'Ribosomal':[AT5G57290.3]} mfDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]} bpDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]} ccDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]} """ pfamDict = {} iprDict = {} mfDict = {} bpDict = {} ccDict = {} # pfamsymkeySet = set() # pfamdspkeySet = set() # iprsymkeySet = set() # iprdspkeySet = set() for line in open(interprofile): lineL = line.rstrip().split("\t") locus = lineL[0] if locus not in locusL: continue pfamsym = lineL[4] pfamdsp = lineL[5] # if 1: # print '%s\t%s' % (pfamsym, pfamdsp) # pfamsymkeySet.add(pfamsym) # pfamdspkeySet.add(pfamdsp) add2dict(pfamDict, pfamsym, pfamdsp, locus) iprsym = lineL[11] iprdsp = lineL[12] # iprsymkeySet.add(iprsym) # iprdspkeySet.add(iprdsp) add2dict(iprDict, iprsym, iprdsp, locus) if len(lineL) == 14: addgo(lineL[13], bpDict, ccDict, mfDict, locus) # ---End go extract all---------------------------- # ----end read file # assert len(pfamsymkeySet) == len(pfamdspkeySet) # assert len(iprsymkeySet) == len(iprdspkeySet) if TESTdict: ct_rdict(pfamDict) print >> sys.stderr, "Test finished" sys.exit(1) return (pfamDict, iprDict, bpDict, ccDict, mfDict)
def readInterpro(interprofile, locusL): ''' pfamDict = {'PF01209':['AT5G57290.3',], 'Ubie':[AT5G57290.3]} iprDict = {'IPR001813':['AT5G57290.3',], 'Ribosomal':[AT5G57290.3]} mfDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]} bpDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]} ccDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]} ''' pfamDict = {} iprDict = {} mfDict = {} bpDict = {} ccDict = {} #pfamsymkeySet = set() #pfamdspkeySet = set() #iprsymkeySet = set() #iprdspkeySet = set() for line in open(interprofile): lineL = line.rstrip().split('\t') locus = lineL[0] if locus not in locusL: continue pfamsym = lineL[4] pfamdsp = lineL[5] #if 1: # print '%s\t%s' % (pfamsym, pfamdsp) #pfamsymkeySet.add(pfamsym) #pfamdspkeySet.add(pfamdsp) add2dict(pfamDict, pfamsym, pfamdsp, locus) iprsym = lineL[11] iprdsp = lineL[12] #iprsymkeySet.add(iprsym) #iprdspkeySet.add(iprdsp) add2dict(iprDict, iprsym, iprdsp, locus) if len(lineL) == 14: addgo(lineL[13], bpDict, ccDict, mfDict, locus) #---End go extract all---------------------------- #----end read file #assert len(pfamsymkeySet) == len(pfamdspkeySet) #assert len(iprsymkeySet) == len(iprdspkeySet) if TESTdict: ct_rdict(pfamDict) print >>sys.stderr, "Test finished" sys.exit(1) return (pfamDict, iprDict, bpDict, ccDict, mfDict)