Ejemplo n.º 1
0
def genes2phens(p2entrez_file, e2z=None):
    ensg2z = e2z if e2z else ut.load_dict_sets(ut.proj_path('convert', 'Hs2Hs_entrez.tab'))
    def dict_sets_rem_prefix(d, sep):
        d = dict([(k,set([vi.split(sep)[1] for vi in v])) for k,v in d.items()])
        return d
    p2z = dict_sets_rem_prefix(ut.load_dict_sets(p2entrez_file), ":")
    return ut.compose_dict_sets(ensg2z, ut.dict_inverse_sets(p2z))
Ejemplo n.º 2
0
def load_havug_cxs(convert_ensg=True):
    fname = ut.proj_path('havug_cxs')
    u2e = ut.dict_inverse_sets(ut.load_dict_sets(
        '../../data/convert/Hs2Hs_uni.tab'))
    hcxs = ut.load_list_of_type(fname,set)
    if convert_ensg:
        hcxs = convert_complexes([(i,c) for i,c in
            enumerate(hcxs)], u2e,
            seqs.load_prots_from_fasta('../../data/sequences/canon/Hs.fasta'))
    return hcxs
Ejemplo n.º 3
0
def ogroup_size_dict(odict):
    """
    Takes a normal odict of fromid: set(toids) and returns a dict of fromid:
    size of that side of the orthogroup.
    """
    ogsize = {}
    odinv = ut.dict_inverse_sets(odict)
    for fromid in odict:
        # Can just use the first one since orthogroups are cohesive
        ogsize[fromid] = len(odinv[list(odict[fromid])[0]])
    return ogsize
Ejemplo n.º 4
0
def ogroup_size_dict(odict):
    """
    Takes a normal odict of fromid: set(toids) and returns a dict of fromid:
    size of that side of the orthogroup.
    """
    ogsize = {}
    odinv = ut.dict_inverse_sets(odict)
    for fromid in odict:
        # Can just use the first one since orthogroups are cohesive
        ogsize[fromid] = len(odinv[list(odict[fromid])[0]])
    return ogsize
Ejemplo n.º 5
0
def remove_multi_keys(d, max_keys=1):
    """
    Given a dict of key: set(vs), eliminate from the dict any keys that map to
    the same set of vs.
    """
    newd = d.copy()
    dinv = ut.dict_inverse_sets(newd)
    for k,vs in newd.items():
        for v in vs:
            if len(dinv[v]) > max_keys:
                del newd[k]
                break
    return newd
Ejemplo n.º 6
0
def remove_multi_keys(d, max_keys=1):
    """
    Given a dict of key: set(vs), eliminate from the dict any keys that map to
    the same set of vs.
    """
    newd = d.copy()
    dinv = ut.dict_inverse_sets(newd)
    for k,vs in newd.items():
        for v in vs:
            if len(dinv[v]) > max_keys:
                del newd[k]
                break
    return newd
Ejemplo n.º 7
0
def load_seq_pairs(fname, metab_exclude=None):
    """
    metab_exclude: should be in sequential_metab/metabolites_exclude.txt
    """
    S, entrez_enzymes, rnames, mnames = load_metabolic_data(fname)
    ez2en = ut.dict_inverse_sets(orth.convert_dict('Hs','Hs_entrez'))
    if metab_exclude:
        print "Excluding %s metabolites, filtering rxns" % len(metab_exclude)
        S, entrez_enzymes = filter_rxns_metabs(S, entrez_enzymes, rnames,
                mnames, metab_exclude) 
    else:
        print "No filtering of metabolites and rxns."
    sequentials = seq_pairs(S, entrez_enzymes, conv_dict=ez2en)
    return sequentials
Ejemplo n.º 8
0
def load_kegg_sequentials(fname, do_convert=True):
    dkegg = load_kegg_brite(fname)
    kegg_paths = [ut.i1(v) for v in dkegg.values() if v]
    def path_pairs(list_path):
        return [(list_path[i],list_path[i+1]) for i in range(len(list_path)-1)]
    group_pairs = ut.flatten([path_pairs(lpath) for lpath in kegg_paths])
    #if return_groups:
        #if conv_dict:
            #return convert_groups_singles(labeled_pairs, conv_dict)
        #else:
            #return labeled_pairs
    single_pairs = [(xi,yi) for x,y in group_pairs for xi in x for yi in y]
    unique_pairs = pu.dedupe(single_pairs)
    print "%s total, %s single, %s unique pairs returned" % (
            len(group_pairs), len(single_pairs), len(unique_pairs)) 
    if do_convert:
        conv_dict = ut.dict_inverse_sets(orth.convert_dict('Hs','Hs_entrez'))
        conv_pairs = convert_pairs_singles(unique_pairs, conv_dict)
        print "%s converted pairs with 1-1 matches" % len(conv_pairs)
        return conv_pairs
    else:
        return unique_pairs
Ejemplo n.º 9
0
def load_havug_ppis():
    hints = ut.load_list_of_lists('../../docs/SupplementaryTableS2.tab')
    u2e = ut.dict_inverse_sets(ut.load_dict_sets('../../data/convert/Hs2Hs_uni.tab'))
    hints = [[list(u2e.get(p,['NoTranslation']))[0] for p in c[:2]]+[c[2]] for c in hints]
    return hints