Esempio n. 1
0
def combine_pres_ppis(resa, resb):
    res = ut.struct_copy(resa)
    res.name = 'combined: %s, %s' % (resa.name, resb.name)
    res.ppis = pd.pd_lol(pd.pd_combine_ppis(pd.PairDict(resa.ppis),
            pd.PairDict(resb.ppis), combine_or))
    res.ppis.sort(key=lambda x: x[2], reverse=True)
    return res
Esempio n. 2
0
def sort_elution(elution):
    newel = ut.struct_copy(elution)
    newel.mat = np.array(newel.mat)
    inds = np.argsort(np.sum(newel.mat, axis=1))[::-1]
    newel.prots = list(np.array(newel.prots)[inds])
    newel.mat = newel.mat[inds]
    return newel
Esempio n. 3
0
def sort_elution(elution):
    newel = ut.struct_copy(elution)
    newel.mat = np.array(newel.mat)
    inds = np.argsort(np.sum(newel.mat, axis=1))[::-1]
    newel.prots = list(np.array(newel.prots)[inds])
    newel.mat = newel.mat[inds]
    return newel
Esempio n. 4
0
def convert_elution(elution, odict, gt=None, sep='|'):
    newel = ut.struct_copy(elution)
    newel.prots = ['|'.join([gt.id2name.get(g,g) for g in odict.get(xg,[xg])]) 
            for xg in elution.prots]
    return newel
Esempio n. 5
0
def random_cxstruct(cxstruct, ps, allppis):
    new_cxst = ut.struct_copy(cxstruct)
    new_cxst.cxs = random_complexes(cxstruct.cxs,ps)
    new_cxst.cxppis = _filter_ints(allppis, new_cxst.cxs)
    return new_cxst
Esempio n. 6
0
def addpval(cstr,pval):
    cstr = ut.struct_copy(cstr) 
    cstr.params += ',pval_filt=%0.2f'%pval 
    cstr.cxs,cstr.pvals,cstr.cx_details = keep_pvals(cstr.cxs,cstr.pvals,cstr.cx_details,pval) 
    return cstr 
Esempio n. 7
0
def predict_clust(name, sp, nsp, obs=None, exs=None, savef=None, pres=None,
        pd_spcounts=None, cl_kwargs={}, clusts=None, runid=None,
        count_ext=False, cutoff=0.5, n_cvs=7, accept_clust=False,
        obs_fnames=None, base_splits=None, obs_kwargs={}, kfold=3,
        gold_nspecies=2, do_cluster=True, do_2stage_cluster=True,
        cxs_cxppis=None, do_rescue=True, n_rescue=20000, rescue_fracs=20,
        rescue_score=0.9, clstruct=None, **predict_kwargs):
    """
    - obs/test_kwargs: note obs_kwargs is combined with predict_kwargs to enforce
      consistency.
    - pd_spcounts: supply from ppi.predict_all if nsp > 1.
    - base_splits: supply exs.splits to generate examples from existing
      division of complexes.
    - cxs_cxppis: provide if you want to export, or do the ppi rescue
      clustering--also must set accept_clust=True, do_rescue=True
    """
    savef = savef if savef else ut.bigd(name)+'.pyd'
    print "Will save output to", savef
    runid = runid or random.randrange(0,1000)
    if clusts is None: 
        if pres is None:
            if obs is None:
                obs, pd_spcounts = ppi.predict_all(sp, obs_fnames,
                        save_fname=savef.replace('.pyd',''), nsp=nsp,
                        **obs_kwargs)
            if exs is None:
                cvtest_kwargs = ut.dict_quick_merge(obs_kwargs, predict_kwargs)
                n_cvs = 1 if base_splits is not None else n_cvs
                cvs, cvstd = cvstd_via_median(name, sp, nsp, obs_fnames, kfold,
                        base_splits, n_cvs, **cvtest_kwargs)
                if n_cvs > 1:
                    ut.savepy(cvs, ut.pre_ext(savef, '_cvs_%s' % n_cvs))
                ut.savepy(cvstd, ut.pre_ext(savef, '_cvstd'))
                exs=cvstd.exs
            pres = predict(name, sp, obs, exs.arrfeats, nsp, **predict_kwargs)
            pres.exs = exs
            ut.savepy(pres, ut.pre_ext(savef, '_pres'), check_exists=True) 
        else:
            pres=ut.struct_copy(pres)
            if do_rescue:
                assert obs is not None, "Must supply obs for rescue step"
    merged_splits = pres.exs.splits[1] # splits is (lp_splits, clean_splits)
    if do_cluster:
        if cxs_cxppis is None and clstruct is None:
            if clusts is None and cxs_cxppis is None:
                #if calc_fracs:
                    #cl_kwargs['fracs'] = [cp.find_inflection(pres.ppis, merged_splits,
                        #pres.species, gold_nspecies)]
                clusts, runid = multi_clust(pres.ppis, savef=savef, runid=runid,
                        pres=pres, gold_splits=merged_splits,
                        gold_nspecies=gold_nspecies, **cl_kwargs)
                ut.savepy(clusts, ut.pre_ext(savef, '_clusts_id%s' % runid))
            if do_2stage_cluster:
                clusts2 = multi_stage2_clust(clusts, pres.ppis, runid=runid,
                        **cl_kwargs)
                clstruct = cp.result_stats(sp, merged_splits, clusts2,
                        gold_nspecies) 
                ut.savepy(clstruct, ut.pre_ext(savef, '_clstruct2_id%s' % runid))
            else:
                clstruct = cp.result_stats(sp, merged_splits, clusts, nsp) 
                ut.savepy(clstruct, ut.pre_ext(savef, '_clstruct_id%s' % runid))
        if accept_clust:
            if cxs_cxppis is None:
                pres.cxs, pres.cxppis, pres.ind = cp.select_best(clstruct)
                ut.savepy([pres.cxs,pres.cxppis],
                        ut.pre_ext(savef,'_cxs_cxppis_id%s_ind%s_%scxs'
                            % (runid, pres.ind, len(pres.cxs))))
            else:
                pres.cxs, pres.cxppis = cxs_cxppis
                pres.ind = 0
            if do_rescue:
                # note cl_kwargs aren't passed--would be messy
                pres.cxs, pres.cxppis, pres.ppis_rescue = rescue_ppis(pres,
                        obs, n_rescue, cutoff_fracs=rescue_fracs,
                        cutoff_score=rescue_score)
            cyto_export(pres, merged_splits, name_ext='_clust%s_%scxs' % (pres.ind,
                len(pres.cxs)), pd_spcounts=pd_spcounts, arrdata=obs,
                cutoff=cutoff, count_ext=False, arrdata_ppis=None)
            return pres
        else:
            return pres, clstruct
    else:
        return pres
Esempio n. 8
0
def convert_elution(elution, odict, gt=None, sep="|"):
    newel = ut.struct_copy(elution)
    newel.prots = ["|".join([gt.id2name.get(g, g) for g in odict.get(xg, [xg])]) for xg in elution.prots]
    return newel