def found_orth_prots(base_set_prots, sp, base_sp): def isfound(d, key, set_values): for v in d.get(key, []): if v in set_values: return True return False od = orth.odict(sp, base_sp) if sp != base_sp: return set([g for g in all_genes(sp) if isfound(od, g, base_set_prots)]) else: return base_set_prots
def supporting_ppis(ppis, fnames, score_keys, sp_base, cutoff=0.5, verbose=True): ppis_support = [pd.PairDict([]) for p in ppis] eluts = [load_elution(f) for f in fnames] for elut,skey in it.product(eluts, score_keys): if verbose: print skey, ut.shortname(elut.filename) od = orth.odict(sp_base, file_sp(elut.filename)) new_pairs = passing_pairs(elut, skey, cutoff) for p,pdsupport in zip(ppis,ppis_support): for opair in orth.orth_pairs(p[:2], od): opair = tuple(opair) if new_pairs.contains(opair): pdsupport.set(opair,None) return [list(p) + [s.d.keys()] for p,s in zip(ppis, ppis_support)]
def supporting_ppis(ppis, fnames, score_keys, sp_base, cutoff=0.5, verbose=True): ppis_support = [pd.PairDict([]) for p in ppis] eluts = [load_elution(f) for f in fnames] for elut, skey in it.product(eluts, score_keys): if verbose: print skey, ut.shortname(elut.filename) od = orth.odict(sp_base, file_sp(elut.filename)) new_pairs = passing_pairs(elut, skey, cutoff) for p, pdsupport in zip(ppis, ppis_support): for opair in orth.orth_pairs(p[:2], od): opair = tuple(opair) if new_pairs.contains(opair): pdsupport.set(opair, None) return [list(p) + [s.d.keys()] for p, s in zip(ppis, ppis_support)]
def pairs_notfound_sps(df, fs, sps="Hs Mm Sp Dm Ce".split()): """ df: dataframe with id1, id2, and the sp_evidence columns. fs: all the elution filenames """ results = [] for sp in sps: pairs = [(r["id1"], r["id2"]) for i, r in df[df[sp + "_evidence"] != "frac"].iterrows()] print "%s pairs for %s" % (len(pairs), sp) odict = orth.odict("Hs", sp) orths = pairs_found(pairs, odict) if odict else len(pairs) # same sp fs_sp = [f for f in fs if f.find(sp + "_") > -1] print "%s fractionations for %s" % (len(fs_sp), sp) allps = el.all_prots(fs_sp) counts = pairs_orth_found(pairs, odict, allps) results.append((len(pairs), orths, counts)) return sps, results
def orth_indices(sp_base, sp_target, prot_list, remove_multi_base): """ Using appropriate orthology, take a list of target species gene ids (corresponding to rows in the target species score matrix), and return a dict mapping base species gene ids to (sets of) indices in that list and therefore to (sets of) row/column indices in the square interaction score matrix. """ targ2inds = dict([(k,set([v])) for k,v in ut.list_inv_to_dict(prot_list).items()]) if sp_base == sp_target: return targ2inds else: base2targ = orth.odict(sp_base, sp_target) if remove_multi_base: base2targ = remove_multi_keys(base2targ) base2inds = ut.compose_dict_sets(base2targ, targ2inds) base2inds = dict([(k,v) for k,v in base2inds.items() if len(v)>0]) return base2inds
def supporting_ppis_separate(ppis, fnames, score_keys, sp_base, cutoff=0.5, verbose=True): sps = set([file_sp(f) for f in fnames]) print "Species:", " ".join(sps) ppis_support = [dict([(s, pd.PairDict([])) for s in sps]) for p in ppis] eluts = [load_elution(f) for f in fnames] for elut, skey in it.product(eluts, score_keys): sp = file_sp(elut.filename) if verbose: print skey, ut.shortname(elut.filename) od = orth.odict(sp_base, sp) try: new_pairs = passing_pairs(elut, skey, cutoff) except IOError: print "No file for %s %s" % (ut.shortname(elut.filename), skey) continue for p, dsupport in zip(ppis, ppis_support): for opair in orth.orth_pairs(p[:2], od): opair = tuple(opair) if new_pairs.contains(opair): dsupport[sp].set(opair, None) return [list(p) + [[dsupport[sp].d.keys()] for sp in sps] for p, dsupport in zip(ppis, ppis_support)]
def supporting_ppis_separate(ppis, fnames, score_keys, sp_base, cutoff=0.5, verbose=True): sps = set([file_sp(f) for f in fnames]) print "Species:", ' '.join(sps) ppis_support = [dict([(s, pd.PairDict([])) for s in sps]) for p in ppis] eluts = [load_elution(f) for f in fnames] for elut,skey in it.product(eluts, score_keys): sp = file_sp(elut.filename) if verbose: print skey, ut.shortname(elut.filename) od = orth.odict(sp_base, sp) try: new_pairs = passing_pairs(elut, skey, cutoff) except IOError: print "No file for %s %s" % (ut.shortname(elut.filename), skey) continue for p,dsupport in zip(ppis,ppis_support): for opair in orth.orth_pairs(p[:2], od): opair = tuple(opair) if new_pairs.contains(opair): dsupport[sp].set(opair,None) return [list(p) + [[dsupport[sp].d.keys()] for sp in sps] for p,dsupport in zip(ppis, ppis_support)]
def targ2base(sp_base, sp_target): t2b = None if sp_base: if sp_base != sp_target: t2b = orth.odict(sp_target, sp_base) return t2b