예제 #1
0
def readTransfer(file, flag='symbol', head=1):
    if flag == 'symbol':
        symbolDict = {}
        flag = 1
    elif flag == 'locus':
        locusDict = {}
        flag = 2
    else:
        print >> sys.stderr, "Wrong flag, should be 'symbol' or 'locus'"
        sys.exit(1)
    #---------------------------------------------
    for line in open(file):
        if head:
            head -= 1
            continue
        #-------------------------
        line = line.rstrip()
        (locus, symbol, name) = line.split('\t')
        name = name.upper()
        if flag == 1:
            symbol = symbol.upper()
            if pattern.match(symbol):
                #print >>sys.stderr, symbol, name
                #assert symbol == name
                if symbol != name:
                    print >> sys.stderr, symbol, name
                    name = symbol
        if flag == 1:
            #There are duplicate symbols, before use list to save
            #names, later change to set.
            if symbol not in symbolDict:
                symbolDict[symbol] = set()
            symbolDict[symbol].add(name)
        elif flag == 2:
            #Do not know if there are duplicate locus
            if locus not in locusDict:
                locusDict[locus] = set()
            locusDict[locus].add(name)
        #-------------------------------------------
    #------------End reading------------------
    if flag == 1:
        if 0:
            ctTEST.ct_rdict(symbolDict)
            sys.exit()
        return symbolDict
    elif flag == 2:
        return locusDict
예제 #2
0
def readTransfer(file, flag = 'symbol', head = 1):
    if flag == 'symbol':
        symbolDict = {}
        flag = 1
    elif flag == 'locus':
        locusDict = {}
        flag = 2
    else:
        print >>sys.stderr, "Wrong flag, should be 'symbol' or 'locus'"
        sys.exit(1)
    #---------------------------------------------
    for line in open(file):
        if head:
            head -= 1
            continue
        #-------------------------
        line = line.rstrip()
        (locus, symbol, name) = line.split('\t')
        name = name.upper()
        if flag == 1:
            symbol = symbol.upper()
            if pattern.match(symbol):
                #print >>sys.stderr, symbol, name
                #assert symbol == name
                if symbol != name:
                    print >>sys.stderr, symbol, name
                    name = symbol
        if flag == 1:
            #There are duplicate symbols, before use list to save
            #names, later change to set.
            if symbol not in symbolDict:
                symbolDict[symbol] = set()
            symbolDict[symbol].add(name)
        elif flag == 2:
            #Do not know if there are duplicate locus
            if locus not in locusDict:
                locusDict[locus] = set()
            locusDict[locus].add(name)
        #-------------------------------------------
    #------------End reading------------------
    if flag == 1:
        if 0:
            ctTEST.ct_rdict(symbolDict)
            sys.exit()
        return symbolDict
    elif flag == 2:
        return locusDict
예제 #3
0
def readInterpro(interprofile, locusL):
    """
    pfamDict = {'PF01209':['AT5G57290.3',], 'Ubie':[AT5G57290.3]}
    iprDict = {'IPR001813':['AT5G57290.3',], 'Ribosomal':[AT5G57290.3]}
    mfDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]}
    bpDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]}
    ccDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]}
    """
    pfamDict = {}
    iprDict = {}
    mfDict = {}
    bpDict = {}
    ccDict = {}
    # pfamsymkeySet = set()
    # pfamdspkeySet = set()
    # iprsymkeySet = set()
    # iprdspkeySet = set()
    for line in open(interprofile):
        lineL = line.rstrip().split("\t")
        locus = lineL[0]
        if locus not in locusL:
            continue
        pfamsym = lineL[4]
        pfamdsp = lineL[5]
        # if 1:
        #    print '%s\t%s' % (pfamsym, pfamdsp)
        # pfamsymkeySet.add(pfamsym)
        # pfamdspkeySet.add(pfamdsp)
        add2dict(pfamDict, pfamsym, pfamdsp, locus)
        iprsym = lineL[11]
        iprdsp = lineL[12]
        # iprsymkeySet.add(iprsym)
        # iprdspkeySet.add(iprdsp)
        add2dict(iprDict, iprsym, iprdsp, locus)
        if len(lineL) == 14:
            addgo(lineL[13], bpDict, ccDict, mfDict, locus)
        # ---End go extract all----------------------------
    # ----end read file
    # assert len(pfamsymkeySet) == len(pfamdspkeySet)
    # assert len(iprsymkeySet) == len(iprdspkeySet)
    if TESTdict:
        ct_rdict(pfamDict)
        print >> sys.stderr, "Test finished"
        sys.exit(1)

    return (pfamDict, iprDict, bpDict, ccDict, mfDict)
예제 #4
0
def readInterpro(interprofile, locusL):
    '''
    pfamDict = {'PF01209':['AT5G57290.3',], 'Ubie':[AT5G57290.3]}
    iprDict = {'IPR001813':['AT5G57290.3',], 'Ribosomal':[AT5G57290.3]}
    mfDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]}
    bpDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]}
    ccDict = {'GO:0008168':[AT5G57300.2], 'methy':[AT5G57300.2]}
    '''
    pfamDict = {}
    iprDict = {}
    mfDict = {}
    bpDict = {}
    ccDict = {}
    #pfamsymkeySet = set()
    #pfamdspkeySet = set()
    #iprsymkeySet = set()
    #iprdspkeySet = set()
    for line in open(interprofile):
        lineL = line.rstrip().split('\t')
        locus = lineL[0]
        if locus not in locusL:
            continue
        pfamsym = lineL[4]
        pfamdsp = lineL[5]
        #if 1:
        #    print '%s\t%s' % (pfamsym, pfamdsp)
        #pfamsymkeySet.add(pfamsym)
        #pfamdspkeySet.add(pfamdsp)
        add2dict(pfamDict, pfamsym, pfamdsp, locus)
        iprsym = lineL[11] 
        iprdsp = lineL[12]
        #iprsymkeySet.add(iprsym)
        #iprdspkeySet.add(iprdsp)
        add2dict(iprDict, iprsym, iprdsp, locus)
        if len(lineL) == 14:
            addgo(lineL[13], bpDict, ccDict, mfDict, locus)
        #---End go extract all----------------------------
    #----end read file
    #assert len(pfamsymkeySet) == len(pfamdspkeySet)
    #assert len(iprsymkeySet) == len(iprdspkeySet)
    if TESTdict:
        ct_rdict(pfamDict)
        print >>sys.stderr, "Test finished"
        sys.exit(1)
        
    return (pfamDict, iprDict, bpDict, ccDict, mfDict)