def plotErrorByDeltaGBin(self, binedges=np.arange(-13, -6.1, 0.1), ylim=[0,0.8], min_n=5, xlim=None, ax=None, color='r', marker='>'): """ Plot how the ci changes with dG. """ variant_table = self.variant_table variant_table.loc[:, 'ci_width'] = (variant_table.dG_ub - variant_table.dG_lb)/2 variant_table.loc[:, 'dG_bin'] = np.digitize(variant_table.dG, binedges) x, y, yerr = returnFractionGroupedBy(variant_table.loc[variant_table.numTests >= min_n], 'dG_bin', 'ci_width') binstart = binedges[0] binwidth = (binedges[1] - binedges[0]) bincenters = np.arange(binedges[0]-binwidth*.5, binedges[-1]+2*binwidth, binwidth) x = bincenters[np.array(x).astype(int)] if ax is None: fig = plt.figure(figsize=(3,3)) ax = fig.add_subplot(111) plt.subplots_adjust(left=0.2, bottom=0.2, top=0.95, right=0.95) ax.scatter(x, y, c=color, edgecolor='k', linewidth=0.5, marker=marker, s=20) ax.errorbar(x, y, yerr, fmt='-', elinewidth=1, capsize=0, capthick=1, color=color, linestyle='', ecolor='k', linewidth=0.5) plt.xlabel('$\Delta G$ (kcal/mol)') plt.ylabel('average error (kcal/mol)') plt.ylim(ylim) plt.xlim(xlim) fix_axes(ax)
def plotHex(self, xlim=[0, 800], min_fluorescence=None): x = self.x y = self.y fig = plt.figure(figsize=(4,3)) ax = fig.add_subplot(111, aspect='equal') im = ax.hexbin(x, y, cmap='Spectral_r', mincnt=1, bins='log', extent=xlim+xlim) plt.colorbar(im) #slope, intercept, r_value, p_value, std_err = st.linregress(x.loc[index],y.loc[index]) ax.set_xlim(xlim) ax.set_ylim(xlim) index = self.getGoodClusters(min_fluorescence=min_fluorescence) #plt.plot(xlim, xlim*slope + intercept, 'k') slope = (y.loc[index]/x.loc[index]).median() plt.plot(xlim, np.array(xlim)*slope, 'k:') plt.plot(xlim, xlim, color='0.5', alpha=0.5 ) plt.xlabel('signal in image 1') plt.ylabel('signal in image 2') annotation_text = ('slope (median)=%4.2f'%(slope)) plt.annotate(annotation_text, xy=(.05, .95), xycoords='axes fraction', horizontalalignment='left', verticalalignment='top') plt.tight_layout() fix_axes(ax) return x, y, index,
def plotFractionNotDifferentByN(self, offset=0, xlim=[0,25], rep=1): """ Plot fraction not different between replicates. """ # get data to plot combined = self.findCombinedTable(offset=offset) x, y, yerr = returnFractionGroupedBy(combined, 'rep%d_n'%rep, 'within_bound') fig = plt.figure(figsize=(4,3)); ax = fig.add_subplot(111) ax.scatter(x, y, c='r', edgecolor='k', linewidth=0.5, marker='>') ax.errorbar(x, y, yerr, fmt='-', elinewidth=1, capsize=2, capthick=1, color='r', linestyle='', ecolor='k', linewidth=1) majorLocator = mpl.ticker.MultipleLocator(5) majorFormatter = mpl.ticker.FormatStrFormatter('%d') minorLocator = mpl.ticker.MultipleLocator(1) ax.xaxis.set_major_locator(majorLocator) ax.xaxis.set_major_formatter(majorFormatter) ax.xaxis.set_minor_locator(minorLocator) plt.xlabel('number of measurements') plt.ylabel('fraction not different from replicate') plt.ylim(0, 1) plt.xlim(xlim) fix_axes(ax) plt.tight_layout()
def plotBootstrappedDist(self, variant, param, log_axis=False): """Plot the distribution of a param.""" variant_table = self.variant_table cluster_table = self.cluster_table subSeries = self.getVariantBindingSeries(variant) params = cluster_table.loc[subSeries.index, param] # make bootstrapped dist if log_axis: vec = np.log10(params.dropna()) med = np.log10(variant_table.loc[variant, param]) ub = np.log10(variant_table.loc[variant, param+'_ub']) lb = np.log10(variant_table.loc[variant, param+'_lb']) xlabel = 'log '+param else: vec = params.dropna() med = variant_table.loc[variant, param] ub = variant_table.loc[variant, param+'_ub'] lb = variant_table.loc[variant, param+'_lb'] xlabel = param plt.figure(figsize=(4,3)) sns.distplot(vec, color='r', kde=False) plt.axvline(med, color='0.5', linestyle='-') plt.axvline(lb, color='0.5', linestyle=':') plt.axvline(ub, color='0.5', linestyle=':') plt.xlabel(xlabel) fix_axes(plt.gca()) plt.tight_layout()
def plotKdVersusKoff(self, ): parameters = fitting.fittingParameters() results_off = self.offRate.variant_table.loc[self.all_variants] dG = self.affinityData.variant_table.dG.loc[self.all_variants] plt.figure(figsize=(4,4)) plt.hexbin(dG, results_off.koff, yscale='log', mincnt=1, cmap='Spectral_r') plt.xlabel('$\Delta G$ (kcal/mol)') plt.ylabel('$k_{off}$ (s)') fix_axes(plt.gca()) plt.tight_layout()
def plotNumberOfMeasurments(self, xlim=[1,100], ax=None): """ Plot how the ci changes with dG. """ variant_table = self.variant_table if ax is None: fig = plt.figure(figsize=(4,3)) ax = fig.add_subplot(111) plt.subplots_adjust(left=0.15, bottom=0.15, top=0.95, right=0.95) sns.distplot(variant_table.numTests, bins=np.arange(*xlim), kde=False, hist_kws={'histtype':'stepfilled', 'linewidth':1}, color='0.5') plt.xlabel('number of measurements') plt.ylabel('number of variants') plt.xlim(xlim) fix_axes(ax)
def plotSlopeVersusSignal(self, min_fluorescence=None): x = self.x y = self.y index = self.getGoodClusters(min_fluorescence=min_fluorescence) slopes = y.loc[index]/x.loc[index] mean_signal = (x + y).loc[index]/2. fig = plt.figure(figsize=(4,3)) ax = fig.add_subplot(111,) im = ax.hexbin(mean_signal, np.log2(slopes), cmap='Spectral_r', mincnt=1, bins='log') plt.xlabel('mean signal per cluster') plt.ylabel('signal in image2/image1') plt.tight_layout() fix_axes(plt.gca())
def findOptimalOffset(self): """ Find the offset that minimizes the fraction different between replicates. """ offsets = np.linspace(-1, 1, 100) number = pd.Series(index=offsets) total_number = float(len(self.findCombinedTable())) for offset in offsets: number.loc[offset] = self.findCombinedTable(offset=offset).within_bound.sum()/total_number plt.figure(figsize=(4,3)); plt.plot(offsets, number) plt.xlabel('offset') plt.ylabel('number within bound') plt.tight_layout() fix_axes(plt.gca()) return number.idxmax()
def plotEquilibrationTimes(self, concentration, wait_time, initial=1E-9): parameters = fitting.fittingParameters() variants = self.getGoodVariants() koff = self.offRate.variant_table.loc[variants].koff dG = self.affinityData.variant_table.loc[variants].dG kds = parameters.find_Kd_from_dG(dG.astype(float))*1E-9 # for meshgrid kobs_bounds = [-5, 0] koff_bounds = [-4, -2] xx, yy = np.meshgrid(np.linspace(*koff_bounds, num=200), np.linspace(*kobs_bounds, num=200)) # for each kobs = x(i,j)+y(i,j), plot fraction equilibrated labels = np.array([0, 0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99, 1-1E-12]) min_kobs = kobs_bounds[0] + np.log10(concentration/initial) plt.figure(figsize=(4,4)) cs =plt.contour(xx, yy, 100*fraction_equilibrated(np.power(10, xx)+np.power(10, yy), wait_time), labels*100, colors='k', linewidths=1) plt.clabel(cs, inline=1, fontsize=10, fmt='%1.0f') plt.hexbin(np.log10(koff), np.log10(concentration*koff/kds), mincnt=1, cmap='Spectral_r', extent=koff_bounds+[min_kobs, min_kobs+2], gridsize=150) plt.xlabel('log$(k_{off})$') plt.ylabel('log$([$flow$] k_{on})$') plt.title('%.2e'%(concentration*1E9)) plt.ylim(min_kobs, min_kobs+2) ax = fix_axes(plt.gca()) plt.tight_layout()
def plotErrorByNumberofMeasurements(self, xlim=[1,100], ylim=[0,1], ax=None, color='r', marker='>'): """ Plot how the ci changes with number of measruements. """ variant_table = self.variant_table variant_table.loc[:, 'ci_width'] = (variant_table.dG_ub - variant_table.dG_lb)/2 x, y, yerr = returnFractionGroupedBy(variant_table, 'numTests', 'ci_width') if ax is None: fig = plt.figure(figsize=(4,3)) ax = fig.add_subplot(111) plt.subplots_adjust(left=0.15, bottom=0.15, top=0.95, right=0.95) ax.scatter(x, y, c=color, edgecolor='k', linewidth=0.5, marker=marker, s=5) ax.errorbar(x, y, yerr, fmt='-', elinewidth=1, capsize=0, capthick=1, color=color, linestyle='', ecolor='k', linewidth=0.5) plt.xlabel('number of measurements') plt.ylabel('average error (kcal/mol)') plt.xlim(xlim) plt.ylim(ylim) fix_axes(ax)
def plotFractionFit(self): """Plot the fraction fit.""" variant_table = self.variant_table pvalue_cutoff = 0.01 # plot binwidth=0.01 bins=np.arange(0,1+binwidth, binwidth) plt.figure(figsize=(4, 3.5)) plt.hist(variant_table.loc[variant_table.pvalue <= pvalue_cutoff].fitFraction.values, alpha=0.5, color='red', bins=bins, label='passing cutoff') plt.hist(variant_table.loc[variant_table.pvalue > pvalue_cutoff].fitFraction.values, alpha=0.5, color='grey', bins=bins, label='fails cutoff') plt.ylabel('number of variants') plt.xlabel('fraction fit') plt.legend(loc='upper left') plt.tight_layout() fix_axes(plt.gca())
def plotFractionNotDifferentByDeltaG(self, offset=0, binedges=np.linspace(-12, -6, 12), min_n=0): combined = self.findCombinedTable(offset=offset, binedges=binedges) x, y, yerr = returnFractionGroupedBy(combined, 'rep1_bin', 'within_bound') fig = plt.figure(figsize=(4,3)); ax = fig.add_subplot(111) ax.scatter(x, y, c='r', edgecolor='k', linewidth=0.5, marker='o') ax.errorbar(x, y, yerr, fmt='-', elinewidth=1, capsize=2, capthick=1, color='r', linestyle='', ecolor='k', linewidth=1) binlabels = binedges plt.xlabel('$\Delta G$') plt.ylabel('fraction not different from replicate') plt.ylim(0, 1) plt.xticks(np.arange(1, len(binedges)+1)-0.5, ['%.2f'%i for i in binedges], rotation=90) plt.xlim([-0.5, len(binedges)+0.5]) fix_axes(ax) plt.tight_layout()
def compareParam(self, param, log_axes=False, filter_pvalue=False, min_n=0, max_dG=None, variants=None): """ Compare measured values for two experiments. Can apply different cutoffs. Default is to use all values that are not NaN in both. Can also apply 'pvalue' cutoff, min_n measurements, maxdG, or you can provide list of variants in which case these override any cutoffs. """ x = self.expt1.variant_table.loc[self.all_variants, param] y = self.expt2.variant_table.loc[self.all_variants, param] if variants is None: variants = np.logical_not(pd.concat([x, y], axis=1).isnull()).all(axis=1) if filter_pvalue: variants = variants&self.getGoodVariants() if min_n > 0: variants = (variants& pd.concat([self.expt1.variant_table.numTests >= min_n, self.expt2.variant_table.numTests >= min_n], axis=1).all(axis=1)) if max_dG is not None: variants = (variants& pd.concat([self.expt1.variant_table.dG <= max_dG, self.expt2.variant_table.dG <= max_dG], axis=1).all(axis=1)) x = x.loc[variants] y = y.loc[variants] plt.figure(figsize=(4,4)) if log_axes: plt.hexbin(x, y, cmap='Spectral_r', mincnt=1, yscale='log', xscale='log') else: plt.hexbin(x, y, cmap='Spectral_r', mincnt=1,) plt.xlabel('expt1 %s'%param) plt.ylabel('expt2 %s'%param) ax = fix_axes(plt.gca()) plt.tight_layout() # plot linear fit xlim = np.array(ax.get_xlim()) if log_axes: slope, intercept, r_value, p_value, std_err = st.linregress(np.log(x),np.log(y)) plt.plot(xlim, np.power(xlim, slope)*np.exp(intercept), 'k', linewidth=1) plt.plot(xlim, xlim/np.mean(x)*np.mean(y), 'r:', linewidth=1) else: slope, intercept, r_value, p_value, std_err = st.linregress(x, y) plt.plot(xlim, xlim*slope+intercept, 'k', linewidth=1) plt.plot(xlim, xlim-np.mean(x)+np.mean(y), 'r:', linewidth=1) return slope, r_value**2
def findAlpha(self, n): x = self.x y = self.y index = (x>0)&(y>0) #vec = np.exp(np.log(y/x)/n).loc[index] vec = np.power(y/x, 1/float(n)).loc[index] alpha = vec.median() lb, ub = bootstrap.ci(vec, statfunction=np.median, n_samples=1000) xlim = [0.9 ,1.1] bins = np.arange(0.898, 1.1, 0.001) plt.figure(figsize=(4,3)); sns.distplot(vec, bins=bins, hist_kws={'histtype':'stepfilled'}, kde_kws={'clip':xlim}); plt.axvline(alpha, color='k', linewidth=0.5); plt.axvline(lb, color='k', linestyle=':', linewidth=0.5); plt.axvline(ub, color='k', linestyle=':', linewidth=0.5); plt.xlabel('photobleach fraction per image'); plt.ylabel('probability'); plt.xlim(xlim) fix_axes(plt.gca()); plt.tight_layout() return alpha, lb, ub
def bootstrapSlope(self, min_fluorescence=None, log_axis=False): x = self.x y = self.y index = self.getGoodClusters(min_fluorescence=min_fluorescence) if log_axis: slopes = np.log(y.loc[index]/x.loc[index]) bins = np.linspace(-3, 3, 100) xlim = [-3, 3] else: slopes = y.loc[index]/x.loc[index] bins = np.linspace(0, 5, 100) xlim = [0, 2] plt.figure(figsize=(4,4)); sns.distplot(slopes, bins=bins, kde_kws={'clip':xlim}, hist_kws={'histtype':'stepfilled'}, color='0.5') plt.axvline(slopes.median(), color='k', linewidth=1) # find probability distribution max digitized = np.digitize(slopes, bins) mode = st.mode(digitized).mode[0] max_prob = (bins[mode] + bins[mode-1])*0.5 plt.axvline(max_prob, color='r', linewidth=1, linestyle=':', label='max probability') annotation_text = ('slope (median)=%4.3f\nslope (max prob)=%4.5f'%(slopes.median(), max_prob)) plt.annotate(annotation_text, xy=(.05, .95), xycoords='axes fraction', horizontalalignment='left', verticalalignment='top') #lb, ub = bootstrap.ci(slopes, statfunction=np.median, n_samples=1000) #for bound in [lb, ub]: # plt.axvline(bound, color='k', linewidth=1, linestyle=':') plt.xlabel('slope') plt.ylabel('probability distribution') plt.xlim(xlim) plt.tight_layout() fix_axes(plt.gca()) return slopes
def plotDeltaGDoubleDagger(self, variant=None, dG_cutoff=None, plot_on=False, params=['koff', 'dG'], variants=None): parameters = fitting.fittingParameters() koff = self.offRate.variant_table.loc[self.all_variants, params[0]].astype( float) dG = self.affinityData.variant_table.loc[self.all_variants, params[1]].astype(float) if variant is None: variant = 34936 # find dG predicted from off and on rates dG_dagger = parameters.find_dG_from_Kd(koff.astype(float)) kds = parameters.find_Kd_from_dG(dG) dG_dagger_on = parameters.find_dG_from_Kd((koff/kds).astype(float)) # find the variants to plot based on goodness of fit and dG cutoff if given if variants is None: variants = self.getGoodVariants() if dG_cutoff is not None: variants = variants&(dG < dG_cutoff) # deced which y to plot based on user input x = (dG - dG.loc[variant]).loc[variants] if plot_on: y = -(dG_dagger_on - dG_dagger_on.loc[variant]).loc[variants] else: y = (dG_dagger - dG_dagger.loc[variant]).loc[variants] # plot fig = plt.figure(figsize=(3,3)); ax = fig.add_subplot(111, aspect='equal') xlim = [min(x.min(), y.min()), max(x.max(), y.max())] im = plt.hexbin(x, y, extent=xlim+xlim, gridsize=100, cmap='Spectral_r', mincnt=1) #sns.kdeplot(x, z, cmap="Blues", shade=True, shade_lowest=False) slope, intercept, r_value, p_value, std_err = st.linregress(x,y) # offset num_variants = 100 offset = (x - y).mean() xlim = np.array(ax.get_xlim()) plt.plot(xlim, xlim*slope + intercept, 'k--', linewidth=1) if plot_on: plt.plot(xlim, [y.mean()]*2, 'r', linewidth=1) else: plt.plot(xlim, xlim, 'r', linewidth=1) plt.xlabel('$\Delta \Delta G$') plt.ylabel('$\Delta \Delta G_{off}\dagger$') #plt.colorbar(im) plt.tight_layout() fix_axes(ax) annotationText = ['slope = %4.2f '%(slope), 'intercept = %4.2f'%intercept, 'pvalue = %4.1e'%p_value ] ax.annotate('\n'.join(annotationText), xy=(.05, .95), xycoords='axes fraction', horizontalalignment='left', verticalalignment='top') return x, y