def triple_venn_consv(): hints = co.load_havug_ints() ppi_cxs, clean_cxs, corconsv = ppi.load_training_complexes("Hs", "Dm") cints = co.pairs_from_complexes(ut.i1(ppi_cxs)) # exclude huge ones ints23 = ut.loadpy(ut.bigd("../23_collapsenodes/Hs_filtorth025_withsc_2sp_refilt2sp_cxs_cxppis_clust27_532cxs"))[1] ints3 = [cp.consv_pairs(i, h2d) for i in ints23, hints, cints] cp.triple_venn(ints3, ["map23", "havug", "corum"])
def predict_clust(name, sp, nsp, obs=None, exs=None, savef=None, pres=None, pd_spcounts=None, cl_kwargs={}, clusts=None, runid=None, count_ext=False, cutoff=0.5, n_cvs=7, accept_clust=False, obs_fnames=None, base_splits=None, obs_kwargs={}, kfold=3, gold_nspecies=2, do_cluster=True, do_2stage_cluster=True, cxs_cxppis=None, do_rescue=True, n_rescue=20000, rescue_fracs=20, rescue_score=0.9, clstruct=None, **predict_kwargs): """ - obs/test_kwargs: note obs_kwargs is combined with predict_kwargs to enforce consistency. - pd_spcounts: supply from ppi.predict_all if nsp > 1. - base_splits: supply exs.splits to generate examples from existing division of complexes. - cxs_cxppis: provide if you want to export, or do the ppi rescue clustering--also must set accept_clust=True, do_rescue=True """ savef = savef if savef else ut.bigd(name)+'.pyd' print "Will save output to", savef runid = runid or random.randrange(0,1000) if clusts is None: if pres is None: if obs is None: obs, pd_spcounts = ppi.predict_all(sp, obs_fnames, save_fname=savef.replace('.pyd',''), nsp=nsp, **obs_kwargs) if exs is None: cvtest_kwargs = ut.dict_quick_merge(obs_kwargs, predict_kwargs) n_cvs = 1 if base_splits is not None else n_cvs cvs, cvstd = cvstd_via_median(name, sp, nsp, obs_fnames, kfold, base_splits, n_cvs, **cvtest_kwargs) if n_cvs > 1: ut.savepy(cvs, ut.pre_ext(savef, '_cvs_%s' % n_cvs)) ut.savepy(cvstd, ut.pre_ext(savef, '_cvstd')) exs=cvstd.exs pres = predict(name, sp, obs, exs.arrfeats, nsp, **predict_kwargs) pres.exs = exs ut.savepy(pres, ut.pre_ext(savef, '_pres'), check_exists=True) else: pres=ut.struct_copy(pres) if do_rescue: assert obs is not None, "Must supply obs for rescue step" merged_splits = pres.exs.splits[1] # splits is (lp_splits, clean_splits) if do_cluster: if cxs_cxppis is None and clstruct is None: if clusts is None and cxs_cxppis is None: #if calc_fracs: #cl_kwargs['fracs'] = [cp.find_inflection(pres.ppis, merged_splits, #pres.species, gold_nspecies)] clusts, runid = multi_clust(pres.ppis, savef=savef, runid=runid, pres=pres, gold_splits=merged_splits, gold_nspecies=gold_nspecies, **cl_kwargs) ut.savepy(clusts, ut.pre_ext(savef, '_clusts_id%s' % runid)) if do_2stage_cluster: clusts2 = multi_stage2_clust(clusts, pres.ppis, runid=runid, **cl_kwargs) clstruct = cp.result_stats(sp, merged_splits, clusts2, gold_nspecies) ut.savepy(clstruct, ut.pre_ext(savef, '_clstruct2_id%s' % runid)) else: clstruct = cp.result_stats(sp, merged_splits, clusts, nsp) ut.savepy(clstruct, ut.pre_ext(savef, '_clstruct_id%s' % runid)) if accept_clust: if cxs_cxppis is None: pres.cxs, pres.cxppis, pres.ind = cp.select_best(clstruct) ut.savepy([pres.cxs,pres.cxppis], ut.pre_ext(savef,'_cxs_cxppis_id%s_ind%s_%scxs' % (runid, pres.ind, len(pres.cxs)))) else: pres.cxs, pres.cxppis = cxs_cxppis pres.ind = 0 if do_rescue: # note cl_kwargs aren't passed--would be messy pres.cxs, pres.cxppis, pres.ppis_rescue = rescue_ppis(pres, obs, n_rescue, cutoff_fracs=rescue_fracs, cutoff_score=rescue_score) cyto_export(pres, merged_splits, name_ext='_clust%s_%scxs' % (pres.ind, len(pres.cxs)), pd_spcounts=pd_spcounts, arrdata=obs, cutoff=cutoff, count_ext=False, arrdata_ppis=None) return pres else: return pres, clstruct else: return pres