Beispiel #1
0
def __findTs(edges,ciss,proms):
    '''
    findTs will give the transactions lists indexed by the TF name
    proms is the transformed dictionary of promoter
    
    Parameters
    -----------
    edges

    Returns
    ----------
    result: the dict with 
    '''
    result=dict()
    nkey=''
    for key in edges.keys():
        cis_lst=edges[key]
        prom_set=set()
        filt=[]
        for cis in cis_lst:
            if ciss[cis].promname in filt:
                continue
            filt.append(ciss[cis].promname)
            nkey=key+str(ciss[cis].slen)
            dictInsert(result,nkey,proms[ciss[cis].promname])
            
        
    return result
Beispiel #2
0
def getEntropyWeights(e,ciss):
    '''
    getEntropyWeights calculates the similarity based on 
    partial edges e
    
    Retruns
    -----------
    weights: a dictionary of weight associated with each cis-element {2019:[0.1,0.4,0.9...]}  
    '''
    weights=dict.fromkeys(ciss.keys())
    groups=dict.fromkeys(e.keys())
    for key in ciss.keys():
        weights[key]=[1.0]*ciss[key].slen

    
    for key in groups.keys():
        all_cis=e[key]
        if len(all_cis)==0:
            continue
        groups[key]=dict()
        for cis in all_cis:
            cc=ciss[cis]
            dictInsert(groups[key],cc.slen,cis)
    for key in groups.keys():
        for l in groups[key].keys():
            pwm=PWM()
            for cisId in groups[key][l]:
                elem=ciss[cisId]
                pwm.addSeq(elem.seq)
            tmp_weights=pwm.getEntropyWeights()
            for cisId in groups[key][l]:
                weights[cisId]=tmp_weights
            
    return weights
Beispiel #3
0
def listToDict(lst):
    """
    restore the dictionary from list of tuples. See ref of 
    dictToList()
    
    """
    result = dict()
    for (x, y) in lst:
        dictInsert(result, x, y)

    return result
Beispiel #4
0
def summarizePtns(pPtns, nPtns):
    """
    summarizePtns will summarize the patterns into sccinct rules 
    """

    # the edges are going from super set to subset
    ppatterns = dict()
    # the edges are going from subset to supber set
    npatterns = dict()

    for key in pPtns:
        sbNet = dict()
        spNet = dict()
        A = pPtns[key]
        B = nPtns[key]
        C = list(set(A + B))
        tempP = []
        tempN = []
        for i in C:
            for j in C:
                if i == j:
                    continue
                if i.issubset(j):
                    dictInsert(sbNet, i, j)
                elif i.issuperset(j):
                    dictInsert(spNet, i, j)
        for x in A:
            if x not in sbNet:
                tempP.append(x)
                continue
            plst = sbNet[x]
            print "#####", x, plst
            if all([y in B for y in plst]):
                tempP.append(x)

        for x in B:
            if x not in spNet:
                tempN.append(x)
                continue
            plst = spNet[x]
            if all([y in A for y in plst]):
                tempN.append(x)

        ppatterns[key] = tempP
        npatterns[key] = tempN

    return (ppatterns, npatterns)
Beispiel #5
0
def getCis(path):
    
    """
    getCis takes 1 argument as the file path to 'BindingSiteSet.txt'
    and returns the result as a tulple of two dictionaries
    1.Dictionary edges: key=TF name, data=key of attrs
    2.Dictionary attrs: key=key of attrs (row #) data= tuple of detailed attributes
    3.Dicionary promoters: key=gene name, data= key of attrs
    """
    edges=dict()
    attrs=dict()
    proms=dict()
    result=(edges,attrs,proms)
    """
    cnames=[
        'TFId','TFName','TFBSID','lend'
        'rend','strand','interactionID'
        'TU','type','promoter','pos'
        'BSSeq','evidence'
    ]
    """    
    eindx=0
    with open(path,'rb') as file:
        for line in file:
            if line.startswith('#'):
                continue
            tokens=line.split('\t')
            aseq=tokens[-3]
            attrCleaner(tokens)
            if len(tokens[-3])<=0:
                continue
            attrs[eindx]=Cis(tokens[0],tokens[2],\
                             tokens[1],tokens[9],\
                             tokens[3],tokens[4],\
                             tokens[10],tokens[11],\
                             tokens[8],tokens[13],\
                             tokens[5],aseq)
            dictInsert(edges,tokens[1],eindx)
            dictInsert(proms,tokens[9],eindx)
            eindx+=1
        file.close()

    return result
Beispiel #6
0
def constPatternReal(ciss,proms,degree=2):
    '''
    constPatternReal creates groups of patterns based on
    their real labelings
    '''
    result=dict()
    temp=constPattern(ciss,proms,degree)
    for key in temp.keys():
        for p in temp[key]:
            name=''
            existing=[]
            for cis in p.ciss:
                cKey=cis.tfname
                if not cKey in existing:
                    existing.append(cKey)
                    name+=cKey
            if len(existing)<degree:
                continue
            dictInsert(result,name,p)
    return result
Beispiel #7
0
def constPattern(ciss,proms,degree=2):
    '''
    constPattern create new data points that are tuples of
    cis elements in order
    '''

    result=dict()
    for key in proms.keys():
        prom=proms[key]
        if len(prom)<degree:
            continue
        clst=[]
        for c in prom:
            clst.append(ciss[c])
        clst.sort(key=lambda x: float(x.lend))
        
        patterns=__getSubset(clst,degree)
        for elem in patterns:
            dictInsert(result,key,Pattern(elem,key))
    return result
Beispiel #8
0
def evalCisCollection(rules,weighted=False):
    '''
    evalCisCollection stores the 
    (length,support,confidence) in the dictionary list
    '''
    results=dict()
    max_len=0
    maks=dict()
    for key in rules.keys():
        if len(rules[key])==0:
            continue
        max_len=0
        for elem in rules[key]:
            if len(elem[0])>1:
                continue
            if not elem[0].issuperset({key}):
                continue
            dictInsert(results,key,\
                       (len(elem[1]),elem[2],elem[3]))
            if len(elem[1])>max_len:
                maks[key]=(elem[1],elem[2],elem[3])
                max_len=len(elem[1])
            
    perfs=dict()
    for key in results.keys():
        for elem in results[key]:
            l=elem[0]
            if weighted:
                for i in range(elem[1]):
                    dictInsert(perfs,l,elem[2]) 
            else:
                dictInsert(perfs,l,elem[2])
    
    return (perfs,maks)
Beispiel #9
0
netpath='netdata.p'
(edges,ciss,proms)=exp.getPickle(netpath)
#sims=exp.getPickle('dmSimsL.p')
patterns=exp.constPatternReal(ciss,proms,degree=4)
sorted_sums=vs.sortSum(vs.summ(patterns,4,oU=True))


groups=exp.__regroup(ciss)


max_group=dict()
unique=dict()
for key in groups.keys():
    if key in unique.keys():
        if ciss[key].seq in unique[key]:
            continue
    dictInsert(unique,groups[key],ciss[key].seq)
    dictInsert(max_group,groups[key],key)


max_len=0
result=0
for key in max_group.keys():
    temp=len(max_group[key])
    if temp>max_len:
        max_len=temp
        result=key