Exemple #1
0
def ppis_gold_standard(ppis, cxs_splits, species):
    pdppis = pd.PairDict([p[:3] for p in ppis])
    print len(pdppis.d), "predicted interactions"
    ppi_cxs,_,all_cxs = ppi.load_training_complexes(species, None,'') #conv doesn't matter
    pdcorum = pd.PairDict([(i[0],i[1],'gold') for i in
                        co.pairs_from_complexes(ut.i1(all_cxs))])
    print len(pdcorum.d), "total gold standard"
    pdcomb = pd.pd_union_disjoint_vals(pdppis, pdcorum)
    unmr_splits = cp.unmerged_splits_from_merged_splits(ppi_cxs,cxs_splits)
    print "unmerged split assignment lengths", [len(s) for s in unmr_splits]
    pdtrainpos = pd.PairDict([(t[0],t[1]) for t in
        co.pairs_from_complexes(unmr_splits[0])])
    print len(pdtrainpos.d), "total train interactions"
    counterrs = 0
    for tpair in pdtrainpos.d:
        cpair = pdcomb.find(tpair)
        #assert cpair is not None, "Gold standard problem--filter_methods changed since run?"
        if cpair is None or pdcomb.d[cpair][1] != 'gold':
            #print 'error: train should be subset', tpair
            counterrs += 1
        else:
            pdcomb.d[cpair][1] = 'train'
    if counterrs: print "number of training not found in gold std:", counterrs
    comblist = [list(k)+list(v) for k,v in pdcomb.d.items()]
    print (len([1 for p in comblist if p[2] and p[3]=='gold']), 
            "ppis in gold not train")
    print len([1 for p in comblist if p[2] and p[3]=='train']), "ppis in train"
    # only return those that are predictions
    return [p for p in comblist if p[2]]
def triple_venn_consv():
    hints = co.load_havug_ints()
    ppi_cxs, clean_cxs, corconsv = ppi.load_training_complexes("Hs", "Dm")
    cints = co.pairs_from_complexes(ut.i1(ppi_cxs))  # exclude huge ones
    ints23 = ut.loadpy(ut.bigd("../23_collapsenodes/Hs_filtorth025_withsc_2sp_refilt2sp_cxs_cxppis_clust27_532cxs"))[1]
    ints3 = [cp.consv_pairs(i, h2d) for i in ints23, hints, cints]
    cp.triple_venn(ints3, ["map23", "havug", "corum"])
def arrfeats_prep_all_data(arrfeats, ppis, sp="Hs", gold_consv="Dm", cutoff=0.5):
    print "Adding species summary."
    arrfeats = fe.arr_add_spsummary(arrfeats, cutoff)
    print "Adding ppis."
    arrfeats = fe.arrfeats_add_ppis(arrfeats, ppis)
    _, _, all_cxs = ppi.load_training_complexes(sp, None, gold_consv)
    pdgold = pd.PairDict(co.pairs_from_complexes(ut.i1(all_cxs)))
    print "Setting trues."
    arrfeats = fe.arrfeats_set_gold(arrfeats, pdgold)
    return arrfeats
def result_gold(splits, species, split_inds, make_unmerged=False,
        consv_sp='Dm'):
    if make_unmerged: 
        print "Converting to unmerged using conserved:", (consv_sp if consv_sp
                else "None")
        ppi_corum,_,_ = ppi.load_training_complexes(species,'',consv_sp)
        splits = unmerged_splits_from_merged_splits(ut.i1(ppi_corum),
                [[ut.i1(s) for s in split] for split in splits])
    gold = ut.i1(reduce(operator.add, [splits[i] for i in split_inds]))
    return gold
def gold_label_ppis(ppis, merged_splits, sp, gold_nsp):
    gold_consv = 'Dm' if gold_nsp>1 else ''
    ppi_cxs,_,_ = ppi.load_training_complexes(sp, '', gold_consv)
    train_cxs = unmerged_splits_from_merged_splits(ppi_cxs, merged_splits)[0]
    ppis = cv.gold_label_ppis(ppis, co.pairs_from_complexes(train_cxs))
    return ppis