Example #1
0
def score_pairs_nonpairs(pairs, elut_fs, spbase, scores=['poisson'],
        score_cutoff=0.25, do_nonpairs=False):
    pdpairs = pd.PairDict(pairs)
    for p in pdpairs.d: pdpairs.d[p] = 1 #marked as true paralog
    if do_nonpairs:
        for np in pu.nonpairs_gen(pairs, len(pairs)):
                pdpairs.set(np, 0) # marked as non-paralog
    arr = ppi.new_score_array(pdpairs, scores, elut_fs, [])
    del pdpairs #lots of memory
    scored_arr = ppi.score_and_filter(arr, scores, elut_fs, score_cutoff,
            spbase, [], '', do_filter=False)
    return scored_arr
Example #2
0
def hist_pairs_nonpairs(scores, pairs, negmult=1, do_plot=True, **kwargs):
    """
    scores: list of tuples (id1, id2, score)
    pairs: list of tuples (id1, id2)
    Make a histogram for scores of pairs against random sampling of non-pairs
    from the set of ids making up pairs.
    """
    assert len(pairs[0])==2, "Too many data points"
    nonpairs = pu.nonpairs_gen(pairs, len(pairs)*negmult)
    def scorelist_pairs(pairs, scores):
        pdscores = pd.PairDict([s[:3] for s in scores])
        for p in pairs:
            puse = pdscores.find(p)
            yield float(pdscores.d[puse][0]) if puse else 0
    pscores, nscores = [[x for x in scorelist_pairs(l, scores)] for l in pairs, nonpairs]
    if do_plot:
        hist(pscores, **kwargs)
        hist(nscores, **kwargs)
    return pscores, nscores