def multi_clust(tested, score_cutoffs=None, length_cutoffs=None, fracs=[.012,.014], frac_retain=.1, ds=[.1,.25,.3,.35], ms=[.1,.15,.2], penalties=[.1,1], overlaps=[.55], haircuts=[0,.2], max_pval=1, savef=None, runid=None, show_stats=True, pres=None, gold_nspecies=1, gold_splits=None, gold_minlen=3, mdprod_min=.01, **kwargs): runid = runid or random.randrange(1,1000) fracs = (fracs if fracs is not None else [cl.n_thresh(tested, s)/len(tested) for s in score_cutoffs] if score_cutoffs is not None else [le/len(tested) for le in length_cutoffs]) print "random id:", runid clusts = [] params = [fracs, ds, ms, penalties, overlaps, haircuts] products = it.product(*params) for (f,d,m,p,o,h) in products: if d*m >= mdprod_min: cxstruct = cl.filter_clust(ut.list_frac(tested, f), ut.list_frac(tested, frac_retain), merge_cutoff=o, negmult=m, min_density=d, runid=runid, penalty=p, max_pval=max_pval, max_overlap=o, haircut=h, **kwargs) cxstruct.params = ('density=%s,frac=%s,f_retain=%s,negmult=%s,penalty=%s,max_overlap=%s,haircut=%s' % (d,f,frac_retain,m,p,o,h)) clusts.append(cxstruct) if show_stats and len(cxstruct.cxs)>0: if pres is not None and gold_splits is not None: out = cp.select_best(cp.result_stats(pres.species, gold_splits, clusts[-1:], gold_nspecies, min_gold_size=gold_minlen)) else: print "Can't show stats: pres and gold_splits required." if savef and (len(clusts) % 10 == 1): ut.savepy(clusts, ut.pre_ext(savef, "clusts_temp_%s_%s" % (ut.date(), runid))) return clusts, runid
def multi_stage2_clust(clusts, ppis_all, runid=None, frac_retain=.1, I_params=[2,3,4], **kwargs): clusts2 = [] runid = runid or random.randrange(1,1000) for cxstruct1 in clusts: for I in I_params: ppis_retain = ut.list_frac(ppis_all, frac_retain) cxstruct2 = cl.stage2_clust(cxstruct1.cxppis, ppis_retain, cxstruct1.cxs, runid=runid, I=I, **kwargs) cxstruct2.params = cxstruct1.params + ",I_mcl=%s" % I clusts2.append(cxstruct2) return clusts2
def rescue_ppis(pres, obs, n_rescue, cutoff_fracs=20, cutoff_score=0.9, exclude_ppis=None, frac_retain=0.1, cltype='mcl', merge_cutoff=0.55, I=3, negmult=1): ppis_counted = ev.ppis_fracspassing_counts(pres.ppis[:n_rescue], obs, exclude_ppis=(exclude_ppis or pres.cxppis)) print "%s PPIs considered for possible rescue" % len(ppis_counted) ppis_rescue = [p for p in ppis_counted if p[-1] > cutoff_fracs or p[2] > cutoff_score] print "%s ppis exceeding score cutoff, %s exceeding fracs cutoff" % (len([p for p in ppis_counted if p[-1] > cutoff_fracs]), len([p for p in ppis_counted if p[2] > cutoff_score])) print "%s PPIs to be rescued" % len(ppis_rescue) ppis_rescue_as_cxs = [set((p[0],p[1])) for p in ppis_rescue] additional_cxs = pres.cxs + ppis_rescue_as_cxs cxstruct_rescue = cl.filter_clust(ppis_rescue, ut.list_frac(pres.ppis, frac_retain), cltype='mcl', merge_cutoff=merge_cutoff, I=I, negmult=negmult, add_to_cxs=additional_cxs) print "Total complexes >= size 3: was %s, now %s" % (len([c for c in pres.cxs if len(c)>=3]), len([c for c in cxstruct_rescue.cxs if len(c)>=3])) return cxstruct_rescue.cxs, cxstruct_rescue.cxppis, ppis_rescue