def ppis_gold_standard(ppis, cxs_splits, species): pdppis = pd.PairDict([p[:3] for p in ppis]) print len(pdppis.d), "predicted interactions" ppi_cxs,_,all_cxs = ppi.load_training_complexes(species, None,'') #conv doesn't matter pdcorum = pd.PairDict([(i[0],i[1],'gold') for i in co.pairs_from_complexes(ut.i1(all_cxs))]) print len(pdcorum.d), "total gold standard" pdcomb = pd.pd_union_disjoint_vals(pdppis, pdcorum) unmr_splits = cp.unmerged_splits_from_merged_splits(ppi_cxs,cxs_splits) print "unmerged split assignment lengths", [len(s) for s in unmr_splits] pdtrainpos = pd.PairDict([(t[0],t[1]) for t in co.pairs_from_complexes(unmr_splits[0])]) print len(pdtrainpos.d), "total train interactions" counterrs = 0 for tpair in pdtrainpos.d: cpair = pdcomb.find(tpair) #assert cpair is not None, "Gold standard problem--filter_methods changed since run?" if cpair is None or pdcomb.d[cpair][1] != 'gold': #print 'error: train should be subset', tpair counterrs += 1 else: pdcomb.d[cpair][1] = 'train' if counterrs: print "number of training not found in gold std:", counterrs comblist = [list(k)+list(v) for k,v in pdcomb.d.items()] print (len([1 for p in comblist if p[2] and p[3]=='gold']), "ppis in gold not train") print len([1 for p in comblist if p[2] and p[3]=='train']), "ppis in train" # only return those that are predictions return [p for p in comblist if p[2]]
def triple_venn_consv(): hints = co.load_havug_ints() ppi_cxs, clean_cxs, corconsv = ppi.load_training_complexes("Hs", "Dm") cints = co.pairs_from_complexes(ut.i1(ppi_cxs)) # exclude huge ones ints23 = ut.loadpy(ut.bigd("../23_collapsenodes/Hs_filtorth025_withsc_2sp_refilt2sp_cxs_cxppis_clust27_532cxs"))[1] ints3 = [cp.consv_pairs(i, h2d) for i in ints23, hints, cints] cp.triple_venn(ints3, ["map23", "havug", "corum"])
def arrfeats_prep_all_data(arrfeats, ppis, sp="Hs", gold_consv="Dm", cutoff=0.5): print "Adding species summary." arrfeats = fe.arr_add_spsummary(arrfeats, cutoff) print "Adding ppis." arrfeats = fe.arrfeats_add_ppis(arrfeats, ppis) _, _, all_cxs = ppi.load_training_complexes(sp, None, gold_consv) pdgold = pd.PairDict(co.pairs_from_complexes(ut.i1(all_cxs))) print "Setting trues." arrfeats = fe.arrfeats_set_gold(arrfeats, pdgold) return arrfeats
def result_gold(splits, species, split_inds, make_unmerged=False, consv_sp='Dm'): if make_unmerged: print "Converting to unmerged using conserved:", (consv_sp if consv_sp else "None") ppi_corum,_,_ = ppi.load_training_complexes(species,'',consv_sp) splits = unmerged_splits_from_merged_splits(ut.i1(ppi_corum), [[ut.i1(s) for s in split] for split in splits]) gold = ut.i1(reduce(operator.add, [splits[i] for i in split_inds])) return gold
def gold_label_ppis(ppis, merged_splits, sp, gold_nsp): gold_consv = 'Dm' if gold_nsp>1 else '' ppi_cxs,_,_ = ppi.load_training_complexes(sp, '', gold_consv) train_cxs = unmerged_splits_from_merged_splits(ppi_cxs, merged_splits)[0] ppis = cv.gold_label_ppis(ppis, co.pairs_from_complexes(train_cxs)) return ppis