def genes2phens(p2entrez_file, e2z=None): ensg2z = e2z if e2z else ut.load_dict_sets(ut.proj_path('convert', 'Hs2Hs_entrez.tab')) def dict_sets_rem_prefix(d, sep): d = dict([(k,set([vi.split(sep)[1] for vi in v])) for k,v in d.items()]) return d p2z = dict_sets_rem_prefix(ut.load_dict_sets(p2entrez_file), ":") return ut.compose_dict_sets(ensg2z, ut.dict_inverse_sets(p2z))
def convert_dict(fromtype, totype): """ First looks for single conversion step. If not found, splits it up. Returns None if not necessary or not found. """ conv1 = convert_dict_single(fromtype, totype) if conv1: return conv1 else: # If we made it here, try first converting to second species, # then looking for other conversion. conv1 = convert_dict_single(fromtype, totype[:2]) conv2 = convert_dict_single(totype[:2], totype) if conv1 and conv2: return ut.compose_dict_sets(conv1,conv2)
def convert_dict(fromtype, totype): """ First looks for single conversion step. If not found, splits it up. Returns None if not necessary or not found. """ conv1 = convert_dict_single(fromtype, totype) if conv1: return conv1 else: # If we made it here, try first converting to second species, # then looking for other conversion. conv1 = convert_dict_single(fromtype, totype[:2]) conv2 = convert_dict_single(totype[:2], totype) if conv1 and conv2: return ut.compose_dict_sets(conv1, conv2)
def orth_indices(sp_base, sp_target, prot_list, remove_multi_base): """ Using appropriate orthology, take a list of target species gene ids (corresponding to rows in the target species score matrix), and return a dict mapping base species gene ids to (sets of) indices in that list and therefore to (sets of) row/column indices in the square interaction score matrix. """ targ2inds = dict([(k,set([v])) for k,v in ut.list_inv_to_dict(prot_list).items()]) if sp_base == sp_target: return targ2inds else: base2targ = orth.odict(sp_base, sp_target) if remove_multi_base: base2targ = remove_multi_keys(base2targ) base2inds = ut.compose_dict_sets(base2targ, targ2inds) base2inds = dict([(k,v) for k,v in base2inds.items() if len(v)>0]) return base2inds