def analyze(fig, ax, trade, tradeQuantities): m = max(tradeQuantities) if(m == 0): return normalized = [x/m for x in tradeQuantities] s = sorted(normalized) # c = np.cumsum(s) x = [x/(len(tradeQuantities)-1) for x in range(0,len(tradeQuantities))] ax.plot(x,s) kde = scipy.stats.gaussian_kde(s) x = [x/100 for x in range(0,101)] y = kde(x).T ax.plot(x,y) mn = np.median(s) sd = np.std(s) mnsd = mn + 2 * sd if(mnsd < 1): crossing = [b for b in s > mnsd].index(True) / len(s) xb, xt = ax.get_xlim() yb, yt = ax.get_ylim() polygonPoints = [[crossing, yt], [crossing, yb], [xt, yb], [xt,yt]] polygon = Polygon(polygonPoints, True) p = PatchCollection([polygon], alpha=0.3) ax.add_collection(p) map = recolourMap(normalized, trade) fig.savefig('results/trade_' + str(trade) + '.png', bbox_inches='tight') if(mnsd < 1): return np.argmax(normalized) return 0
def plot_gkde(data, *args, **kwargs): """Plot a gaussia kernel density estimator. *args and **kwargs will be passed directory to pyplot.plot()""" kde = scipy.stats.gaussian_kde(data) lower = np.mean(data) - 3*np.std(data) upper = np.mean(data) + 3*np.std(data) x = np.linspace(lower, upper, 100) y = kde(x) pp.plot(x, y, *args, **kwargs)
def KDE(a,evalRange=None,bins=1000): assert len(a) > 1, "Can't KDE a single value" if evalRange is 'padded': padding = np.max(a)-np.min(a) x = np.linspace(np.min(a)-padding,np.max(a)+padding,bins) elif evalRange is None: x = np.linspace(np.min(a),np.max(a),bins) else: x = np.linspace(evalRange[0],evalRange[1],bins) pdf = kde(a) y = pdf(x) return x,y
def __call__(self, observations): """Estimate the p_observations_given_state matrix for a set of observations. If observations is a list/array of length N, returns an array of shape (N, S), where element [t, s] is the probability of the observation at time t assuming the system was in fact in state s. """ observations = numpy.asarray(observations) if self.continuous: state_probabilities = [kde(observations) for kde in self.state_distributions] else: state_probabilities = [hist[observations] for hist in self.state_distributions] return numpy.transpose(state_probabilities)
def decompose(pic): #核密度聚类,给出极大值、极小值点、背景颜色、聚类图层 print u'图层聚类分解中...' d0 = kde(pic.reshape(-1), bw_method=0.2)(range(256)) #核密度估计 d = np.diff(d0) d1 = np.where((d[:-1]<0)*(d[1:]>0))[0] #极小值 d1 = [0]+list(d1)+[256] d2 = np.where((d[:-1]>0)*(d[1:]<0))[0] #极大值 if d1[1] < d2[0]: d2 = [0]+list(d2) if d1[len(d1)-2] > d2[len(d2)-1]: d2 = list(d2)+[255] dc = sum(map(lambda i: d2[i]*(pic >= d1[i])*(pic < d1[i+1]), range(len(d2)))) print u'分解完成. 共%s个图层'%len(d2) return dc
def build_kde(self, inputData, columnName): if len(inputData) < 5: print("Please input atleast 5 data points") return num_div = max(5, int(len(inputData)/10)) # Generating the kde function kde_function = kde(inputData[columnName]) # Generate the plot range plot_range = numpy.linspace(max(self.data_matrix['returns_data']['Simple_Returns']), \ min(self.data_matrix['returns_data']['Simple_Returns']), \ num = num_div) return {'x_kde':plot_range, 'y_kde':kde_function}
def build_kde(self, inputData, columnName): if len(inputData) < 5: print("Please input atleast 5 data points") return # Get number of data points to be plotted num_div = max(5, int(len(inputData)/10)) # Generate the plot range plot_range = numpy.linspace(max(inputData[columnName]), \ min(inputData[columnName]), \ num = num_div) # Generating the kde function kde_function = kde(inputData[columnName]) # Plotting the KDE and histogram simultaneously # ---------------------------------------------- # Initializing the plot fig, ax1 = plt.subplots() # Plot secondary y-axis inputData[columnName].hist(bins = num_div) # Create secondary y-axis ax2 = ax1.twinx() # Plot the graph on primary x-axis ax1.plot(plot_range, kde_function(plot_range), 'r-', \ plot_range, kde_function(plot_range), 'ko') # Labeling the axis ax1.set_xlabel('Returns') ax1.set_ylabel('KDE', color='g') ax2.set_ylabel('Frequency', color='b') return {'x_kde':plot_range, 'y_kde':kde_function}
def maximize(samples): ''' Use Kernel Density Estimation to find a continuous PDF from the discrete sampling. Maximize that distribution. Parameters ---------- samples : list (shape = (nsamples, ndim)) Observations drawn from the distribution which is going to be fit. Returns ------- maximum : list(ndim) Maxima of the probability distributions along each axis. ''' from scipy.optimize import minimize from scipy.stats import gaussian_kde as kde # Give the samples array the proper shape. samples = np.transpose(samples) # Estimate the continous PDF. estimate = kde(samples) # Take the minimum of the estimate. def PDF(x): return -estimate(x) # Initial guess on maximum is made from 50th percentile of the sample. p0 = [np.percentile(samples[i], 50) for i in range(samples.shape[0])] # Calculate the maximum of the distribution. maximum = minimize(PDF, p0).x return maximum
ptsw_compY = ptsw_data[:, 2] ptsw_compZ = ptsw_data[:, 3] # Projection of the converter components # xmin = -20. xmax = +10. ymin = -16. ymax = 0. X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j] positions = np.vstack([X.ravel(), Y.ravel()]) pr = np.vstack([prw_compX[:4000], prw_compY[:4000]]) pdf_2d_pr = kde(pr) Z_pr = np.reshape(pdf_2d_pr(positions).T, X.shape) pts = np.vstack([ptsw_compX[:4000], ptsw_compY[:4000]]) pdf_2d_pts = kde(pts) Z_pts = np.reshape(pdf_2d_pts(positions).T, X.shape) pts_other = np.vstack([ptsw_compX[15000:21000], ptsw_compY[15000:21000]]) pdf_2d_pts_other = kde(pts_other) Z_pts_other = np.reshape(pdf_2d_pts_other(positions).T, X.shape) nlevels = 5 levels_pr = np.linspace(0.0, Z_pr.max(), nlevels + 1)[1:] levels_pts = np.linspace(0.00, Z_pts.max(), nlevels + 1)[1:] levels_pts_other = np.linspace(0.00, Z_pts_other.max(), nlevels + 1)[1:]
def plot_contours(x, y,xlabel='', ylabel='', input_x=None, input_y=None, input_color=['red','blue','magenta'], fontsize=22, title='', plot_samples=False, samples_color='gray', contour_lw=1, savefile=None, plot_contours=True, show_legend=False,fill_contours=True, xmin=None,xmax=None, ymin=None, ymax=None, input_label=None): """ ... """ np.atleast_1d(input_x) np.atleast_1d(input_y) n = 100 points = np.array([x,y]) posterior = kde(points) if xmin is None: xmin=x.min() if xmax is None: xmax=x.max() if ymin is None: ymin=y.min() if ymax is None: ymax=y.max() step_x = ( xmax - xmin ) / n step_y = ( ymax - ymin ) / n grid_pars = np.mgrid[0:n,0:n] par_x = grid_pars[0]*step_x + xmin par_y = grid_pars[1]*step_y + ymin grid_posterior = grid_pars[0]*0. for i in range(n): for j in range(n): grid_posterior[i][j] = posterior([par_x[i][j],par_y[i][j]]) ix,iy = np.unravel_index(grid_posterior.argmax(), grid_posterior.shape) gridmaxx = par_x[ix,iy] gridmaxy = par_y[ix,iy] #print gridmaxx, gridmaxy pl.figure() ax = pl.gca() #xlabel = ax.get_xlabel() #ylabel = ax.get_ylabel() pl.title(title, fontsize=fontsize) fig = pl.gcf() xlabel = ax.set_xlabel(xlabel,fontsize=fontsize) ylabel = ax.set_ylabel(ylabel,fontsize=fontsize) #pl.xlabel(xlabel,fontsize=fontsize) #pl.ylabel(ylabel,fontsize=fontsize) if plot_samples: pl.plot(x,y,'o',ms=1, mfc=samplescolor, mec=samplescolor) for i,(inx, iny) in enumerate(zip(input_x,input_y)): if input_label is not None: label = input_label[i] else: label = 'string' pl.plot(inx,iny,'o',mew=0.1,ms=5,color=input_color[i],label=label,zorder=4) pl.plot(gridmaxx,gridmaxy,'x',mew=3,ms=5,color='k') if plot_contours: percentage_integral = np.array([0.95,0.68,0.]) contours = 0.* percentage_integral num_epsilon_steps = 1000. epsilon = grid_posterior.max()/num_epsilon_steps epsilon_marks = np.arange(num_epsilon_steps + 1) posterior_marks = grid_posterior.max() - epsilon_marks * epsilon posterior_norm = grid_posterior.sum() for j in np.arange(len(percentage_integral)): for i in epsilon_marks: posterior_integral = grid_posterior[np.where(grid_posterior>posterior_marks[i])].sum()/posterior_norm if posterior_integral > percentage_integral[j]: break contours[j] = posterior_marks[i] contours[-1]=grid_posterior.max() pl.contour(par_x, par_y, grid_posterior, contours, linewidths=contour_lw,colors='k',zorder=3) if fill_contours: pl.contourf(par_x, par_y, grid_posterior, contours,colors=contour_colors,alpha=alpha,zorder=2) pl.xlim(xmin=xmin,xmax=xmax) pl.ylim(ymin=ymin,ymax=ymax) if show_legend: pl.legend(prop={'size':20},numpoints=1,loc='upper left') if savefile is None: return par_x, par_y, grid_posterior, contours else: pl.savefig(savefile, bbox_extra_artists=[xlabel, ylabel], bbox_inches='tight')
def plot_chains(mcmc, dists=None, ax=None, fig=None, multi_chain=False): from scipy.stats import gaussian_kde as kde from scipy.stats.mstats import mquantiles tally = mcmc['tally'] # mcmc = mcmc0 chain = mcmc['chain'][tally:, :] guesses = mcmc['guesses'][tally:, :] titles = mcmc['names'] titles = [['$\Omega$', '$\phi$', r'$\rho$', '$f_1$', '$f_2$', '$f_3$'][i] for i in mcmc['active_params']] l = len(mcmc['chain']) - tally if fig: axs = np.array(fig.get_axes()).reshape(len(titles), 2) else: fig, axs = plt.subplots(len(titles), 2, figsize=(18, 21)) axs[0, 0].set_title("Chain") axs[0, 1].set_title("Distribution") # axs[0, 2].set_title("Gelman Rubin") for i, name in enumerate(titles): if multi_chain: ch = chain[:, i] else: ch = guesses[:, i] # LEFT - Chains # Limits if dists: a, b = dists[mcmc['active_params'][i]].args a, b = a, a + b else: a, b = min(ch), max(ch) qs = mquantiles(chain[:,i], [0.025,0.5,0.975]) print (qs) a *= 0.9 b *= 1.1 ax = axs[i, 0] if multi_chain: my_alpha = 1 if mcmc['active'] else 0.6 ax.plot(tally + np.arange(l), chain[:, i], label=mcmc['name'], alpha=my_alpha, zorder=5 + 1 / (10 * my_alpha)) # , label = 'chain {}'.format(j)) else: ax.plot(tally + np.arange(l), guesses[:, i], color='steelblue', ls='--', alpha=0.5, label="Proposed") ax.plot(tally + np.arange(l), chain[:, i], color='red', label="Accepted") # , label = 'chain {}'.format(j)) lims = mcmc['tally'], len(mcmc['chain']) ax.hlines(qs,*lims,lw=1,linestyles='--', zorder=10) ax.set_xlim(*lims) ax.set_yticks(qs) ax.legend() # MIDDLE - Distribution try: if mcmc['active']: density = kde(chain[:, i]) xs = np.linspace(a, b, 50) axs[i, 1].plot(xs, density(xs)) except: print("{} Can't KDE yet on {}".format(mcmc['name'], name)) axs[i,1].set_ylabel(name, rotation=0, fontsize=30,) c, d = axs[i, 1].get_xlim() a = min(a, c) b = max(b, d) axs[i, 1].set_xlim(a, b) # RIGHT - Gelman Rubin # if plot_gr: # axs[i, 2].plot(mcmc['gelman_rubin'][2:, i], color='green') # axs[i, 2].hlines(1, 0, len(mcmc['gelman_rubin'])) # axs[i, 2].hlines(1.1, 0, len(mcmc['gelman_rubin']), linestyles='--') plt.tight_layout() return fig, axs
def plot_2d_posterior(x, y,xlabel='', ylabel='', input_x=None, input_y=None, input_color='red', fontsize=22, contour_colors=('SkyBlue','Aqua'), alpha=0.7, xmin=None, xmax=None, ymin=None, ymax=None, title='', plot_samples=False, samples_color='gray', contour_lw=2, savefile=None, plot_contours=True, show_legend=False): """ The main chains plotting routine, for visualizing 2d posteriors... """ n = 100 points = np.array([x,y]) posterior = kde(points) if xmin is None: xmin=x.min() if xmax is None: xmax=x.max() if ymin is None: ymin=y.min() if ymax is None: ymax=y.max() step_x = ( xmax - xmin ) / n step_y = ( ymax - ymin ) / n grid_pars = np.mgrid[0:n,0:n] par_x = grid_pars[0]*step_x + xmin par_y = grid_pars[1]*step_y + ymin grid_posterior = grid_pars[0]*0. for i in range(n): for j in range(n): grid_posterior[i][j] = posterior([par_x[i][j],par_y[i][j]]) pl.figure() ax = pl.gca() pl.title(title, fontsize=fontsize) fig = pl.gcf() xlabel = ax.set_xlabel(xlabel,fontsize=fontsize) ylabel = ax.set_ylabel(ylabel,fontsize=fontsize) if plot_samples: pl.plot(x,y,'o',ms=1, mfc=samplescolor, mec=samplescolor) pl.plot(input_x,input_y,'x',mew=3,ms=15,color=input_color,label='input',zorder=4) if plot_contours: percentage_integral = np.array([0.95,0.68,0.]) contours = 0.* percentage_integral num_epsilon_steps = 1000. epsilon = grid_posterior.max()/num_epsilon_steps epsilon_marks = np.arange(num_epsilon_steps + 1) posterior_marks = grid_posterior.max() - epsilon_marks * epsilon posterior_norm = grid_posterior.sum() for j in np.arange(len(percentage_integral)): for i in epsilon_marks: posterior_integral = grid_posterior[np.where(grid_posterior>posterior_marks[i])].sum()/posterior_norm if posterior_integral > percentage_integral[j]: break contours[j] = posterior_marks[i] contours[-1]=grid_posterior.max() pl.contour(par_x, par_y, grid_posterior, contours, linewidths=contour_lw,colors='k',zorder=3) pl.contourf(par_x, par_y, grid_posterior, contours,colors=contour_colors,alpha=alpha,zorder=2) pl.xlim(xmin=xmin,xmax=xmax) pl.ylim(ymin=ymin,ymax=ymax) if show_legend: pl.legend(prop={'size':20},numpoints=1) if savefile is None: return par_x, par_y, grid_posterior, contours else: pl.savefig(savefile, bbox_extra_artists=[xlabel, ylabel], bbox_inches='tight')
levels2 = [1., 3, 5., 7, 10., 15.] #~ #~ levels2 = levels[::2] cax2 = ax.contour(X, Y, F, colors="black", levels=levels2, linewidths=1.0) ax.clabel(cax2, fontsize=12, inline=1, fmt="%i") #~ fig.colorbar(cax) X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j] positions = np.vstack([X.ravel(), Y.ravel()]) pdf_pr1 = np.reshape( kde(np.vstack([x_prw, dr_kink_prw]))(positions).T, X.shape) pdf_pr2 = np.reshape( kde(np.vstack([x_prwr, dr_kink_prwr]))(positions).T, X.shape) pdf_pr3 = np.reshape( kde(np.vstack([x_prwr2, dr_kink_prwr2]))(positions).T, X.shape) pdf_pts1 = np.reshape( kde(np.vstack([x_ptsw, dr_kink_ptsw]))(positions).T, X.shape) pdf_pts2 = np.reshape( kde(np.vstack([x_ptswr, dr_kink_ptswr]))(positions).T, X.shape) pdf_pts3 = np.reshape( kde(np.vstack([x_ptswr2, dr_kink_ptswr2]))(positions).T, X.shape) pdf_ppa1 = np.reshape( kde(np.vstack([x_ppaw, dr_kink_ppaw]))(positions).T, X.shape)
########################################### ################ Table 2 ################## ########################################### print('Table 2') print('Lipschitz bound under certain n and J values') print(Lip_Bound_matrix) ##### Generate data for the left plot of Fig 1 ##### # Build $\pi_D^Q$ and $\pi_D^{Q,n}$, use 10,000 samples N_kde = int(1E4) N_mc = int(1E4) np.random.seed(123456) initial_sample = np.random.normal(size=N_kde) pfprior_sample = Qexact(initial_sample) pfprior_dens = kde(pfprior_sample) def pfprior_dens_n(n, x): pfprior_sample_n = Q(n, initial_sample) pdf = kde(pfprior_sample_n) return pdf(x) error_r_D = np.zeros((5, 5)) np.random.seed(123456) qsample = np.random.uniform(1, 4, N_mc) for i in range(5): for j in range(5): error_r_D[i, j] = (np.mean( (np.abs(pfprior_dens(qsample) -
# coding:utf-8 import numpy as np import matplotlib.pyplot as plt from scipy.stats import gaussian_kde as kde from scipy.stats import norm N = 1000 sigma = 0.5 sample = np.r_[np.random.normal(-1, sigma, N) , np.random.normal(1, sigma, N) ] x = np.linspace(-5, 5, 1000) y = kde(sample).evaluate(x) z = norm(-1, sigma).pdf(x) + norm(1, sigma).pdf(x) z = z / np.trapz(z, x) plt.plot(x, y, label="kde") plt.plot(x, z, label="sampling distribution") plt.hist(sample, normed=True, bins=100) plt.legend() plt.show()
def pfprior_dens_n(n, x): pfprior_sample_n = Q(n, initial_sample) pdf = kde(pfprior_sample_n) return pdf(x)
def main(args): """ Main entrypoint for example-generation """ args = parse_args(args) setup_logging(args.loglevel) np.random.seed(args.seed) # example = args.example # num_trials = args.num_trials # fsize = args.fsize # linewidth = args.linewidth # seed = args.seed # inputdim = args.input_dim # save = args.save # alt = args.alt # bayes = args.bayes # prefix = args.prefix # dist = args.dist fdir = "figures/comparison" check_dir(fdir) presentation = False save = True if not presentation: plt.rcParams["mathtext.fontset"] = "stix" plt.rcParams["font.family"] = "STIXGeneral" # number of samples from initial and observed mean (mu) and st. dev (sigma) N, mu, sigma = int(1e3), 0.25, 0.1 lam = np.random.uniform(low=-1, high=1, size=N) # Evaluate the QoI map on this initial sample set to form a predicted data qvals_predict = QoI(lam, 5) # Evaluate lam^5 samples # Estimate the push-forward density for the QoI pi_predict = kde(qvals_predict) # Compute more observations for use in BIP tick_fsize = 28 legend_fsize = 24 for num_data in [1, 5, 10, 20]: np.random.seed( 123456 ) # Just for reproducibility, you can comment out if you want. data = norm.rvs(loc=mu, scale=sigma**2, size=num_data) # We will estimate the observed distribution using a parametric estimate to keep # the assumptions involved as similar as possible between the BIP and the SIP # So, we will assume the sigma is known but that the mean mu is unknown and estimated # from data to fit a Gaussian distribution mu_est = np.mean(data) r_approx = np.divide(norm.pdf(qvals_predict, loc=mu_est, scale=sigma), pi_predict(qvals_predict)) # Use r to compute weighted KDE approximating the updated density update_kde = kde(lam, weights=r_approx) # Construct estimated push-forward of this updated density pf_update_kde = kde(qvals_predict, weights=r_approx) likelihood_vals = np.zeros(N) for i in range(N): likelihood_vals[i] = data_likelihood(qvals_predict[i], data, num_data, sigma) # compute normalizing constants C_nonlinear = np.mean(likelihood_vals) data_like_normalized = likelihood_vals / C_nonlinear posterior_kde = kde(lam, weights=data_like_normalized) # Construct push-forward of statistical Bayesian posterior pf_posterior_kde = kde(qvals_predict, weights=data_like_normalized) # Plot the initial, updated, and posterior densities fig, ax = plt.subplots(figsize=(10, 10)) lam_plot = np.linspace(-1, 1, num=1000) ax.plot( lam_plot, uniform.pdf(lam_plot, loc=-1, scale=2), "b--", linewidth=4, label="Initial/Prior", ) ax.plot(lam_plot, update_kde(lam_plot), "k-.", linewidth=4, label="Update") ax.plot(lam_plot, posterior_kde(lam_plot), "g:", linewidth=4, label="Posterior") ax.set_xlim([-1, 1]) if num_data > 1: plt.annotate(f"$N={num_data}$", (-0.75, 5), fontsize=legend_fsize * 1.5) ax.set_ylim([0, 28]) # fix axis height for comparisons # else: # ax.set_ylim([0, 5]) ax.tick_params(axis="x", labelsize=tick_fsize) ax.tick_params(axis="y", labelsize=tick_fsize) ax.set_xlabel("$\\Lambda$", fontsize=1.25 * tick_fsize) ax.legend(fontsize=legend_fsize, loc="upper left") if save: fig.savefig(f"{fdir}/bip-vs-sip-{num_data}.png", bbox_inches="tight") plt.close(fig) # plt.show() # Plot the push-forward of the initial, observed density, # and push-forward of pullback and stats posterior fig, ax = plt.subplots(figsize=(10, 10)) qplot = np.linspace(-1, 1, num=1000) ax.plot( qplot, norm.pdf(qplot, loc=mu, scale=sigma), "r-", linewidth=6, label="$N(0.25, 0.1^2)$", ) ax.plot(qplot, pi_predict(qplot), "b-.", linewidth=4, label="PF of Initial") ax.plot(qplot, pf_update_kde(qplot), "k--", linewidth=4, label="PF of Update") ax.plot(qplot, pf_posterior_kde(qplot), "g:", linewidth=4, label="PF of Posterior") ax.set_xlim([-1, 1]) if num_data > 1: plt.annotate(f"$N={num_data}$", (-0.75, 5), fontsize=legend_fsize * 1.5) ax.set_ylim([0, 28]) # fix axis height for comparisons # else: # ax.set_ylim([0, 5]) ax.tick_params(axis="x", labelsize=tick_fsize) ax.tick_params(axis="y", labelsize=tick_fsize) ax.set_xlabel("$\\mathcal{D}$", fontsize=1.25 * tick_fsize) ax.legend(fontsize=legend_fsize, loc="upper left") if save: fig.savefig(f"{fdir}/bip-vs-sip-pf-{num_data}.png", bbox_inches="tight") plt.close(fig)
def _get_uni_kde(data, bw_method="silverman"): return kde(data, bw_method=bw_method)
ax = axes[i] ax.spines['left'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_edgecolor('#444444') ax.spines['bottom'].set_linewidth(2.0) ax.spines['bottom'].set_zorder(3) ax.get_yaxis().set_ticks([]) ax.get_yaxis().set_ticklabels([]) cv = data_i[::nskip,cv_index] cv_pdf = kde(cv) yy = cv_pdf(xx) density_max = np.max([density_max,np.max(yy)]) if i==0: ax.plot(xx,yy,color='orangered',linewidth=2.0,label=u"""$P_1$""") else: ax.plot(xx,yy,color='orangered',linewidth=2.0) ax.fill_between(xx,yy,facecolor='orange',alpha=0.7) cv = data_i[::nskip,cv_index+1] cv_pdf = kde(cv)
def kde(self, x=None, y=None, bw_method=None, alpha=None, ax=None, interactive=True, width=450, height=300, **kwds): """Kernel Density Estimate plot for DataFrame data >>> dataframe.vgplot.kde() # doctest: +SKIP Parameters ---------- x : string, optional the column to use as the x-axis variable. If not specified, the index will be used. y : string, optional the column to use as the y-axis variable. If not specified, all columns (except x if specified) will be used. bw_method : str, scalar or callable, optional The method used to calculate the estimator bandwidth. This can be 'scott', 'silverman', a scalar constant or a callable. See `scipy.stats.gaussian_kde` for more details. alpha : float, optional transparency level, 0 <= alpha <= 1 ax : Axes, optional If specified, add the plot as a layer to the given axis interactive : bool, optional if True (default) then produce an interactive plot width : int, optional the width of the plot in pixels height : int, optional the height of the plot in pixels Returns ------- axes : pdvega.Axes The vega-lite plot """ from scipy.stats import gaussian_kde as kde if x is not None: raise NotImplementedError('"x" argument to df.vgplot.kde()') if y is not None: df = self._data[y].to_frame() else: df = self._data tmin, tmax = df.min().min(), df.max().max() trange = tmax - tmin t = np.linspace(tmin - 0.5 * trange, tmax + 0.5 * trange, 1000) kde_df = pd.DataFrame( {col: kde(df[col], bw_method=bw_method).evaluate(t) for col in df}, index=t) kde_df.index.name = ' ' f = FramePlotMethods(kde_df) return f.line(value_name='Density', alpha=alpha, ax=ax, interactive=interactive, width=width, height=height, **kwds)