def plot_profiles(prots, eluts, sp='Hs', plot_sums=True, shape=None, min_count=1): """ shape: (m,n) = m rows, n columns eluts: [el.NormElut(f, sp, norm_rows=False, norm_cols=False) for f in fs] """ import plotting as pl gt = seqs.GTrans() use_eluts = elutions_containing_prots(eluts, sp, seqs.names2ids(prots), min_count) shape = shape if shape else ut.sqrt_shape(len(use_eluts)+1) fig = pl.figure() for i,e in enumerate(use_eluts): sp_target = ut.shortname(e.filename)[:2] pl.subplot(shape[0],shape[1],i+1) pl.title(ut.shortname(e.filename)) pids = [gt.name2id[p] for p in prots] protsmax = max([np.max(e.normarr[r]) for p in pids if p in e.baseid2inds for r in e.baseid2inds[p]]) plot_prots(e, pids, e.baseid2inds, protsmax) if plot_sums: # plot total spectral counts normalized to match biggest peak sums = np.sum(e.normarr,axis=0) fmax = np.max(sums) pl.plot(range(sums.shape[1]), np.log2(sums[0,:]).T*np.log2(protsmax)*len(pids)/np.log2(fmax), color='k', linestyle='-', linewidth=.5) # make legend with all prots pl.subplot(shape[0],shape[1],0) for p in prots: pl.plot(0,label=p) pl.legend()
def plot_sums(fs, shape=None): import plotting as pl shape = shape if shape else ut.sqrt_shape(len(fs)) for i,f in enumerate(fs): e = el.load_elution(f) pl.subplot(shape[0],shape[1],i+1) pl.title(ut.shortname(f)) sums = np.sum(e.mat,axis=0) pl.plot(range(sums.shape[1]), sums[0,:].T)
def plot_cdf_pos_randoms(pospairs, ppis): import plotting as pl pl.figure() pos,neg1 = pl.hist_pairs_nonpairs(ppis, pospairs, negmult=1, do_plot=False) pos,neg100 = pl.hist_pairs_nonpairs(ppis, pospairs, negmult=100, do_plot=False) for pairs in pos, neg1, neg100: pl.cdf(pairs,bins=np.arange(0,1.01,.01)) pl.xlabel("PPI score") pl.ylabel("Cumulative fraction of population") pl.title('Several percent of sequential enzymes are high-scoring,\ncompared to much less than one percent for random shufflings') pl.legend(['Sequentials','Size-matched reshuffled','100x larger set of reshuffled'],loc=4)
def plot_pairs_randoms_etc(sequentials, score_ppis, plusns=None): import plotting as pl pl.figure() plus2s, plus3s4s = plusns or plusn(sequentials) #plus2s = plusns or plusn(sequentials) rand_pairs = random_pairs(sequentials, len(sequentials)) scores = [pl.hist_pairs_nonpairs(score_ppis, pairs, negmult=10, do_plot=False) for pairs in sequentials, plus2s, plus3s4s, rand_pairs] #for pairs in sequentials, plus2s, plus3s, plus4s, rand_pairs] ks_pvals = [ks_2samp(pos,neg)[1] for pos,neg in scores] # [1] is p-value logvals = [-np.log10(pval) for pval in ks_pvals] #pl.bar_plot(['%s\np < %0.3g' % (x,y) for x,y in zip('n,n+1 n,n+2 n,n+3 random'.split(), ks_pvals)], logvals) pl.bar_plot(['%s\np < %0.3g' % (x,y) for x,y in zip('n,n+1 n,n+2 n,n+3/4 random'.split(), ks_pvals)], logvals) pl.ylabel('-log10(p-value) : two-sample K/S test') pl.title('Intersection of recon and kegg sequential pairs\nNpairs=%s; %s n+2s, %s n+3s,n+4s' % (len(sequentials), len(plus2s), len(plus3s4s))) return logvals