def score_pairs_nonpairs(pairs, elut_fs, spbase, scores=['poisson'], score_cutoff=0.25, do_nonpairs=False): pdpairs = pd.PairDict(pairs) for p in pdpairs.d: pdpairs.d[p] = 1 #marked as true paralog if do_nonpairs: for np in pu.nonpairs_gen(pairs, len(pairs)): pdpairs.set(np, 0) # marked as non-paralog arr = ppi.new_score_array(pdpairs, scores, elut_fs, []) del pdpairs #lots of memory scored_arr = ppi.score_and_filter(arr, scores, elut_fs, score_cutoff, spbase, [], '', do_filter=False) return scored_arr
def hist_pairs_nonpairs(scores, pairs, negmult=1, do_plot=True, **kwargs): """ scores: list of tuples (id1, id2, score) pairs: list of tuples (id1, id2) Make a histogram for scores of pairs against random sampling of non-pairs from the set of ids making up pairs. """ assert len(pairs[0])==2, "Too many data points" nonpairs = pu.nonpairs_gen(pairs, len(pairs)*negmult) def scorelist_pairs(pairs, scores): pdscores = pd.PairDict([s[:3] for s in scores]) for p in pairs: puse = pdscores.find(p) yield float(pdscores.d[puse][0]) if puse else 0 pscores, nscores = [[x for x in scorelist_pairs(l, scores)] for l in pairs, nonpairs] if do_plot: hist(pscores, **kwargs) hist(nscores, **kwargs) return pscores, nscores