예제 #1
0
def ppis_gold_standard(ppis, cxs_splits, species):
    pdppis = pd.PairDict([p[:3] for p in ppis])
    print len(pdppis.d), "predicted interactions"
    ppi_cxs,_,all_cxs = ppi.load_training_complexes(species, None,'') #conv doesn't matter
    pdcorum = pd.PairDict([(i[0],i[1],'gold') for i in
                        co.pairs_from_complexes(ut.i1(all_cxs))])
    print len(pdcorum.d), "total gold standard"
    pdcomb = pd.pd_union_disjoint_vals(pdppis, pdcorum)
    unmr_splits = cp.unmerged_splits_from_merged_splits(ppi_cxs,cxs_splits)
    print "unmerged split assignment lengths", [len(s) for s in unmr_splits]
    pdtrainpos = pd.PairDict([(t[0],t[1]) for t in
        co.pairs_from_complexes(unmr_splits[0])])
    print len(pdtrainpos.d), "total train interactions"
    counterrs = 0
    for tpair in pdtrainpos.d:
        cpair = pdcomb.find(tpair)
        #assert cpair is not None, "Gold standard problem--filter_methods changed since run?"
        if cpair is None or pdcomb.d[cpair][1] != 'gold':
            #print 'error: train should be subset', tpair
            counterrs += 1
        else:
            pdcomb.d[cpair][1] = 'train'
    if counterrs: print "number of training not found in gold std:", counterrs
    comblist = [list(k)+list(v) for k,v in pdcomb.d.items()]
    print (len([1 for p in comblist if p[2] and p[3]=='gold']), 
            "ppis in gold not train")
    print len([1 for p in comblist if p[2] and p[3]=='train']), "ppis in train"
    # only return those that are predictions
    return [p for p in comblist if p[2]]
예제 #2
0
def ppis_scatter(ppis1, ppis2, useinds=range(3)):
    """
    useinds: set to [0,1,3,2] to take ppi.learning_examples output into (score,
    t/f) tuples; [0,1,3] to exclude the class.
    """
    pd1,pd2 = [pd.PairDict([[p[i] for i in useinds] for p in ppis]) 
            for ppis in ppis1,ppis2]
    nvals = len(useinds)-2
    pdcomb = pd.pd_union_disjoint_vals(pd1, pd2, adefaults=[0]*nvals,
            bdefaults=[0]*nvals)
    vals = zip(*ut.i1(pdcomb.d.items()))
    v1s,v2s = zip(*vals[:nvals]), zip(*vals[nvals:])
    v1s,v2s = [ut.i0(x) for x in v1s,v2s]
    return v1s,v2s