def genes2phens(p2entrez_file, e2z=None): ensg2z = e2z if e2z else ut.load_dict_sets(ut.proj_path('convert', 'Hs2Hs_entrez.tab')) def dict_sets_rem_prefix(d, sep): d = dict([(k,set([vi.split(sep)[1] for vi in v])) for k,v in d.items()]) return d p2z = dict_sets_rem_prefix(ut.load_dict_sets(p2entrez_file), ":") return ut.compose_dict_sets(ensg2z, ut.dict_inverse_sets(p2z))
def load_havug_cxs(convert_ensg=True): fname = ut.proj_path('havug_cxs') u2e = ut.dict_inverse_sets(ut.load_dict_sets( '../../data/convert/Hs2Hs_uni.tab')) hcxs = ut.load_list_of_type(fname,set) if convert_ensg: hcxs = convert_complexes([(i,c) for i,c in enumerate(hcxs)], u2e, seqs.load_prots_from_fasta('../../data/sequences/canon/Hs.fasta')) return hcxs
def ogroup_size_dict(odict): """ Takes a normal odict of fromid: set(toids) and returns a dict of fromid: size of that side of the orthogroup. """ ogsize = {} odinv = ut.dict_inverse_sets(odict) for fromid in odict: # Can just use the first one since orthogroups are cohesive ogsize[fromid] = len(odinv[list(odict[fromid])[0]]) return ogsize
def remove_multi_keys(d, max_keys=1): """ Given a dict of key: set(vs), eliminate from the dict any keys that map to the same set of vs. """ newd = d.copy() dinv = ut.dict_inverse_sets(newd) for k,vs in newd.items(): for v in vs: if len(dinv[v]) > max_keys: del newd[k] break return newd
def load_seq_pairs(fname, metab_exclude=None): """ metab_exclude: should be in sequential_metab/metabolites_exclude.txt """ S, entrez_enzymes, rnames, mnames = load_metabolic_data(fname) ez2en = ut.dict_inverse_sets(orth.convert_dict('Hs','Hs_entrez')) if metab_exclude: print "Excluding %s metabolites, filtering rxns" % len(metab_exclude) S, entrez_enzymes = filter_rxns_metabs(S, entrez_enzymes, rnames, mnames, metab_exclude) else: print "No filtering of metabolites and rxns." sequentials = seq_pairs(S, entrez_enzymes, conv_dict=ez2en) return sequentials
def load_kegg_sequentials(fname, do_convert=True): dkegg = load_kegg_brite(fname) kegg_paths = [ut.i1(v) for v in dkegg.values() if v] def path_pairs(list_path): return [(list_path[i],list_path[i+1]) for i in range(len(list_path)-1)] group_pairs = ut.flatten([path_pairs(lpath) for lpath in kegg_paths]) #if return_groups: #if conv_dict: #return convert_groups_singles(labeled_pairs, conv_dict) #else: #return labeled_pairs single_pairs = [(xi,yi) for x,y in group_pairs for xi in x for yi in y] unique_pairs = pu.dedupe(single_pairs) print "%s total, %s single, %s unique pairs returned" % ( len(group_pairs), len(single_pairs), len(unique_pairs)) if do_convert: conv_dict = ut.dict_inverse_sets(orth.convert_dict('Hs','Hs_entrez')) conv_pairs = convert_pairs_singles(unique_pairs, conv_dict) print "%s converted pairs with 1-1 matches" % len(conv_pairs) return conv_pairs else: return unique_pairs
def load_havug_ppis(): hints = ut.load_list_of_lists('../../docs/SupplementaryTableS2.tab') u2e = ut.dict_inverse_sets(ut.load_dict_sets('../../data/convert/Hs2Hs_uni.tab')) hints = [[list(u2e.get(p,['NoTranslation']))[0] for p in c[:2]]+[c[2]] for c in hints] return hints