class EstimatedKDE(object): """docstring for EstimatedKDE""" eps = 0.05 points = 10000 def __init__(self): super(EstimatedKDE, self).__init__() self.dist = None def fit(self, data): self.min = np.min(data) self.max = np.max(data) self.mean = np.mean(data) self.std = np.std(data) self.dist = KDEUnivariate(data) self.dist.fit() return self def mode(self): x = np.linspace(self.min, self.max, self.points) y = self.dist.evaluate(x) return x[np.argmax(y)] def median(self): return self.dist.icdf[50] def pdf(self, x): return self.dist.evaluate(x)
def kde_param_reflection(distro): ### this version is very susceptible to local maxima... ### kde_param tries to ensure correct handling of multimodal distributions distro = distro[np.isfinite(distro)] MIN, MAX = min(distro), max(distro) span = np.linspace(MIN, MAX, 200) ### create distribution reflection lower = MIN - abs(distro - MIN) upper = MAX + abs(distro - MAX) ### staple them together merge = np.concatenate([lower, distro, upper]) ### compute kernal density estimation for both KDE_MAIN = KDEUnivariate(distro) KDE_FULL = KDEUnivariate(merge) ### fit distro, using the std from the main! KDE_MAIN.fit(bw = np.std(distro)/4.) KDE_FULL.fit(bw = np.std(distro)/4.) ### need to use the main KDE to scale the full scale = np.median(np.divide(KDE_MAIN.evaluate(span), KDE_FULL.evaluate(span))) ### now maximize the full KDE, using the maxed main as the starting guess result = minimize(lambda x: -1*KDE_FULL.evaluate(x), x0 = span[KDE_MAIN.evaluate(span) == max(KDE_MAIN.evaluate(span))], method='Powell') ## Powell has been working pretty well. return {'result' : float(result['x']), 'kde' : KDE_MAIN, 'kde_reflect' : interp1d(span, KDE_FULL.evaluate(span) * scale)}
def kde_hist_weight(data, xra, nbin=50, bandwidth=None, density=False, weights=None, err=None, mirror=False, cdf=False): data = data[np.isfinite(data)] xmin, xmax = xra if mirror: idx = (data < xmin + 0.3) data = np.append(data, 2.0 * xmin - data[idx]) x_plot = np.linspace(xmin, xmax, nbin) kde_est = KDEUnivariate(data) fft_opt = False if weights is None: fft_opt = True weights_sum = len(data) * 1.0 else: ftt_opt = False weights_sum = np.sum(weights) if bandwidth is not None: bw_in = bandwidth else: bw_in = 'normal_reference' kde_est.fit(bw=bw_in, weights=weights, fft=fft_opt) if density: result = kde_est.evaluate(x_plot) else: result = kde_est.evaluate(x_plot) * weights_sum result_x = x_plot func = lambda x: kde_est.evaluate(x) if cdf: cdf = [] for xx in x_plot: vv, _ = quad(func, xmin, xx) cdf.append(vv) if cdf: return result, result_x, np.array(cdf) else: return result, result_x
def calc_bayes_factor(prior_samples, posterior_samples, x=0): '''Returns the Bayes Factor (BF01) such that values >1 indicate there is more support for `x` under the posterior, relative to the prior. ''' kde = KDEUnivariate(prior_samples) kde.fit() prior_density_at_zero = kde.evaluate([x]) kde = KDEUnivariate(posterior_samples) kde.fit() posterior_density_at_zero = kde.evaluate([x]) BF_prior_post = prior_density_at_zero/posterior_density_at_zero return BF_prior_post[0]
def uniform_kde_sample(frame, variable, bounds, p_scale=0.7, cut=True): ### updated uniform sample function to ### homogenize the distribution of the training variable. print("... uniform_kde_sample") if variable == 'TEFF': kde_width = 100 else: kde_width = 0.15 ### Basics var_min, var_max = min(frame[variable]), max(frame[variable]) distro = np.array(frame[variable]) ### Handle boundary solution lower = var_min - abs(distro - var_min) upper = var_max + abs(distro - var_max) merge = np.concatenate([lower, upper, distro]) ### KDE KDE_MERGE = KDEUnivariate(merge) KDE_MERGE.fit(bw=kde_width) #### interp KDE_MERGE for computation speed span = np.linspace(var_min, var_max, 100) KDE_FUN = interp1d(span, KDE_MERGE.evaluate(span)) ### Rescale full_c = len(distro) / integrate.quad(KDE_MERGE.evaluate, var_min, var_max)[0] #### This rescales the original distribution KDE function ### respan, because I don't want to be penalized for low counts outide variable range respan = np.linspace(bounds[0], bounds[1], 100) scale = np.percentile(KDE_MERGE.evaluate(respan) * full_c, p_scale * 100.) ### Accept-Reject sampling sample = np.random.uniform(0, 1, len(distro)) * KDE_FUN(distro) * full_c boo_array = sample < scale selection = frame.iloc[boo_array].copy() shuffle = selection.iloc[np.random.permutation(len(selection))].copy() return shuffle
def reweight(rc, metad_traj, cv_columns, v_minus_c_col, rc_bins=20, kt=2.5): """ Reweighting biased MD trajectory to unbiased probabilty along a given reaction coordinate. Using rbias column from COLVAR to perform reweighting per Tiwary and Parinello """ # read in parameters from sgoop object colvar = metad_traj[cv_columns].values v_minus_c = metad_traj[v_minus_c_col].values # calculate rc observable for each frame colvar_rc = np.sum(colvar * rc, axis=1) # calculate frame weights, per Tiwary and Parinello, JCPB 2015 (c(t) method) weights = np.exp(v_minus_c / kt) norm_weights = weights / weights.sum() # fit weighted KDE with statsmodels method kde = KDEUnivariate(colvar_rc) kde.fit(weights=norm_weights, bw=0.05, fft=False) # evaluate pdf on a grid to for use in SGOOP grid = np.linspace(colvar_rc.min(), colvar.max(), num=rc_bins) pdf = kde.evaluate(grid) pdf = pdf / pdf.sum() return pdf, grid
def _kde_plot( values: ndarray, grid: ndarray, axes: Axes, bw: Union[float, str] = "scott" ) -> None: """Calculate KDE for observed spacings. Parameters ---------- values: ndarray the values used to compute (fit) the kernel density estimate grid: ndarray the grid of values over which to evaluate the computed KDE curve axes: pyplot.Axes the current axes object to be modified bw: bandwidh The `bw` argument for statsmodels KDEUnivariate .fit Notes ----- we are doing this manually because we want to ensure consistency of the KDE calculation and remove Seaborn control over the process, while also avoiding inconsistent behaviours like https://github.com/mwaskom/seaborn/issues/938 and https://github.com/mwaskom/seaborn/issues/796 """ values = values[values > 0] # prevent floating-point bad behaviour kde = KDE(values) # kde.fit(kernel="gau", bw="scott", cut=0) kde.fit(kernel="gau", bw=bw, cut=0) evaluated = np.empty_like(grid) for i, _ in enumerate(evaluated): evaluated[i] = kde.evaluate(grid[i]) kde_curve = axes.plot(grid, evaluated, label="Kernel Density Estimate") plt.setp(kde_curve, color="black")
def kde_statsmodels_u(self, x_grid, bandwidth=0.2, **kwargs): """Univariate Kernel Density Estimation with Statsmodels""" from statsmodels.nonparametric.kde import KDEUnivariate kde = KDEUnivariate(self.data) kde.fit(bw=bandwidth, **kwargs) return kde.evaluate(x_grid)
def gaussian_density_estimation(samples, weights, grid, bw=0.1): """ Kernel density estimation with Gaussian kernel. Parameters ---------- samples : np.ndarray Array of sample values. weights : np.ndarray Array of sample weights. If None, unweighted KDE will be performed. grid : np.ndarray Grid points at which the KDE function should be evaluated. bw : float Bandwidth parameter for kernel density estimation. Associated with sigma in the case of a Gaussian kernel. Returns ---------- np.ndarray The probability density values at the supplied grid points. """ # KDE for fine-grained optimization kde = KDEUnivariate(samples) kde.fit(weights=weights, bw=bw, fft=False) # evaluate pdf on a grid to for use in SGOOP # TODO: area under curve between points instead of pdf at point return kde.evaluate(grid)
def draw_hist_and_kde(sample, grid, true_pdf): # гистограмма plt.hist(sample, 20, range=(grid.min(), grid.max()), normed=True, label='histogram') # ядерная оценка плотности kernel_density = KDEUnivariate(sample) kernel_density.fit() plt.plot(grid, kernel_density.evaluate(grid), color='green', linewidth=2, label='kde') # истинная плотность plt.plot(grid, true_pdf(grid), color='red', linewidth=2, alpha=0.3, label='true pdf') plt.legend() plt.show()
def gen_kde_pdf(distribution, bounds=None, kde_width=None): ## boundary correction for KDE if bounds == None: print("\t setting bounds to max value") var_min, var_max = min(distribution), max(distribution) else: distribution = distribution[np.where((distribution > bounds[0]) & (distribution < bounds[1]))] var_min, var_max = bounds[0], bounds[1] lower = var_min - abs(distribution - var_min) upper = var_max + abs(distribution - var_max) merge = np.concatenate([lower, upper, distribution]) if kde_width == None: print("... setting kde_width") kde_width = S_MAD(distribution) / 2. KDE_MERGE = KDEUnivariate(merge) KDE_MERGE.fit(bw=kde_width) SCALE = np.divide(1., integrate.quad(KDE_MERGE.evaluate, var_min, var_max)[0]) return lambda X: SCALE * KDE_MERGE.evaluate(X)
def compute_kde(data, test_x): data = data.flatten() test_x = test_x.flatten() kde = KDEUnivariate(data) kde.fit(kernel="gau", bw="silverman") dens = kde.evaluate(test_x) return dens, None
def kde_1d(signal, x_grid=None): """ Return 1d kde of a vector signal (Created 01/24/2015) Todo: how are the kde's normalized? (i want the kde to sum to 1....) https://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/ http://glowingpython.blogspot.com/2012/08/kernel-density-estimation-with-scipy.html Usage ----- >>> x = np.linspace(0,1,401) >>> kde = tw.kde_1d(signal, x) >>> plt.plot(x, kde) >>> plt.grid('on') """ # from scipy.stats.kde import gaussian_kde # if x is None: # x = np.linspace(0,1,401) # # return gaussian_kde(signal)(x) from statsmodels.nonparametric.kde import KDEUnivariate kde = KDEUnivariate(signal) kde.fit() if x_grid is None: x_grid = np.linspace(0, 1, 401) #bin_space = x_grid[1]-x_grid[0] # kde estimate kde_est = kde.evaluate(x_grid) # normalize to pdf (need to come back on this....multiply by bin-spacing??) kde_est /= kde_est.sum() return kde_est, x_grid
def fit_kde(x, grid): resol = len(grid) d = np.zeros(resol) kde = KDEUnivariate(x) kde.fit() d = kde.evaluate(grid) return d
def pdf(self, token, years, bw=5, *args, **kwargs): """ Estimate a density function from a token's ratio series. Args: token (str) years (iter) bw (int) Returns: OrderedDict {year: density} """ series = self.clean_series(token, *args, **kwargs) # Use the ratio values as weights. weights = np.array(list(series.values())) # Fit the density estimate. density = KDEUnivariate(list(series.keys())) density.fit(fft=False, weights=weights, bw=bw) samples = OrderedDict() for year in years: samples[year] = density.evaluate(year)[0] return samples
def calcKDE(kd_bw=0.1): """ """ #> KDE using StatsModels kde = KDEUnivariate(nao_rn) kde.fit(bw=kd_bw) return kde.evaluate(x_kde)
def weighted_kernel_density_1d(values, weights, bw='silverman', plot=False): from statsmodels.nonparametric.kde import KDEUnivariate kden= KDEUnivariate(values) kden.fit(weights=weights, bw=bw, fft=False) if plot: import matplotlib.pyplot as plt plt.plot(kden.support, [kden.evaluate(xi) for xi in kden.support], 'o-') return kden
def PSTH(spike_times, bw_psth=BW_PSTH, mirror=False, trial_time=None, norm=True, trial_duration=2.5, **kwargs): num = len(spike_times) spike_times_flat = flatten(spike_times) total_spikes = len(spike_times_flat) if trial_time is None: trial_time = (numpy.min(spike_times_flat), numpy.max(spike_times_flat)) if mirror: spike_times_flat = numpy.hstack((-1 * spike_times_flat + 2 * trial_time[0], spike_times_flat, -1 * spike_times_flat + 2 * trial_time[1])) kde = KDEUnivariate(spike_times_flat) if bw_psth is not None: kde.fit(bw=bw_psth) else: kde.fit() if norm: pre_factor = total_spikes / (num * quad(lambda x: kde.evaluate([x])[0], trial_time[0], trial_time[1])[0]) else: pre_factor = 1. return(lambda x: pre_factor * kde.evaluate([x])[0])
def find_outiers_kde(x): x_scaled = scale(list(map(float, x))) kde = KDEUnivariate(x_scaled) kde.fit(bw="scott", fft=True) pred = kde.evaluate(x_scaled) n = sum(pred < 0.5) outlierindices = np.asarray(pred).argsort()[:n] outliervalue = np.asarray(x)[outlierindices] return outlierindices, outliervalue
def find_outiers_kde(x): x_scaled = scale(list(map(float,x))) kde = KDEUnivariate(x_scaled) kde.fit(bw="scott",fft=True) pred = kde.evaluate(x_scaled) n = sum(pred < 0.5) outlierindices=np.asarray(pred).argsort()[:n] outliervalue=np.asarray(x)[outlierindices] return outlierindices,outliervalue
def bootstrap_stats( args: Dict[str, Any], out_q: Optional[mp.Queue] = None) -> Union[None, Dict[str, Any]]: r''' Computes statistics and KDEs of data via sampling with replacement Arguments: args: dictionary of arguments. Possible keys are: data - data to resample name - name prepended to returned keys in result dict weights - array of weights matching length of data to use for weighted resampling n - number of times to resample data x - points at which to compute the kde values of resample data kde - whether to compute the kde values at x-points for resampled data mean - whether to compute the means of the resampled data std - whether to compute standard deviation of resampled data c68 - whether to compute the width of the absolute central 68.2 percentile of the resampled data out_q: if using multiporcessing can place result dictionary in provided queue Returns: Result dictionary if `out_q` is `None` else `None`. ''' out_dict, mean, std, c68, boot = {}, [], [], [], [] name = '' if 'name' not in args else args['name'] weights = None if 'weights' not in args else args['weights'] if 'n' not in args: args['n'] = 100 if 'kde' not in args: args['kde'] = False if 'mean' not in args: args['mean'] = False if 'std' not in args: args['std'] = False if 'c68' not in args: args['c68'] = False if args['kde'] and args['data'].dtype != 'float64': data = np.array(args['data'], dtype='float64') else: data = args['data'] len_d = len(data) np.random.seed() for i in range(args['n']): points = np.random.choice(data, len_d, replace=True, p=weights) if args['kde']: kde = KDEUnivariate(points) kde.fit() boot.append([kde.evaluate(x) for x in args['x']]) if args['mean']: mean.append(np.mean(points)) if args['std']: std.append(np.std(points, ddof=1)) if args['c68']: c68.append(np.percentile(np.abs(points), 68.2)) if args['kde']: out_dict[f'{name}_kde'] = boot if args['mean']: out_dict[f'{name}_mean'] = mean if args['std']: out_dict[f'{name}_std'] = std if args['c68']: out_dict[f'{name}_c68'] = c68 if out_q is not None: out_q.put(out_dict) else: return out_dict
def kde_param(distribution, x0): ### kde_param tries to ensure correct handling of multimodal distributions ### compute kernal density estimation KDE = KDEUnivariate(distribution) KDE.fit(bw=np.std(distribution)/3.0) result = scipy.optimize.minimize(lambda x: -1*KDE.evaluate(x), x0 = x0, method='Powell') ## Powell has been working pretty well. return {'result' : float(result['x']), 'kde' : KDE}
def estimate(self, data, d_min, d_max): d_range = (d_min - 0.5 * (d_max - d_min), d_max + 0.5 * (d_max - d_min)) gridsize = (d_range[1] - d_range[0]) / self.num_bins bw = gridsize * self.num_bins / 100 self.grid = [(d_range[0] + i * gridsize) for i in range(self.num_bins + 1)] self.mids = self.grid[:-1] + np.diff(self.grid) / 2 try: kde = KDEUnivariate(data) kde.fit(bw=bw, kernel=self.kernel, fft=False) self.values = [ kde.evaluate(i)[0] if kde.evaluate(i) > 0 else 0 for i in self.mids ] except: print("KDE did not work, data length =", len(data), ", d_range =", d_range, ", gridsize =", gridsize) self.values = [0] * len(data[0])
def kde_param(distribution, x0): ### compute kernal density estimation KDE = KDEUnivariate(distribution) KDE.fit(bw=np.std(distribution)/3.0) result = scipy.optimize.minimize(lambda x: -1*KDE.evaluate(x), x0 = x0, method='Powell') #print(result) return {'result' : float(result['x']), 'kde' : KDE}
def sample_pdf(catalog, parameter, pdf_fun, params, bounds): ## Catalog: pd.DataFrame() input catalog with arbitrary distribution function ## input_fun: desired distribution of sample ## scale: scale of sample param_span = np.linspace(min(catalog[parameter]), max(catalog[parameter]), 100) print("... determine master KDE") KDE = KDEUnivariate(catalog[parameter]) KDE.fit(bw=np.std(catalog[parameter]) / 3) KDE_FUN = interp1d(param_span, KDE.evaluate(param_span)) ## need to rescale within the bounds. NORM = np.divide( 1., integrate.quad(KDE.evaluate, bounds[0], bounds[1], points=param_span[np.where((param_span > bounds[0]) & (param_span < bounds[1]))], limit=200)[0]) ########################################## N = len(catalog[catalog[parameter].between(*bounds)]) ############################################ ### we need the scale from the other function result, kde_fun = determine_scale(catalog, parameter, pdf_fun, params, bounds=bounds) sample = np.random.uniform(0.0, 1.0, len(catalog)) * len(catalog) * NORM * KDE_FUN( catalog[parameter]) boo_array = sample < result['x'] * pdf_fun(catalog[parameter], *params) return catalog[boo_array & (catalog[parameter].between( bounds[0], bounds[1], inclusive=True))].copy()
def reweight(rc, metad_traj, cv_columns, v_minus_c_col, rc_bins=20, kt=2.5, kde=False): """ Reweighting biased MD trajectory to unbiased probabilty along a given reaction coordinate. Using rbias column from COLVAR to perform reweighting per Tiwary and Parinello """ # read in parameters from sgoop object colvar = metad_traj[cv_columns].values v_minus_c = metad_traj[v_minus_c_col].values # calculate rc observable for each frame colvar_rc = np.sum(colvar * rc, axis=1) # calculate frame weights, per Tiwary and Parinello, JCPB 2015 (c(t) method) weights = np.exp(v_minus_c / kt) norm_weights = weights / weights.sum() if kde: # KDE for fine-grained optimization kde = KDEUnivariate(colvar_rc) kde.fit(weights=norm_weights, bw=0.1, fft=False) # evaluate pdf on a grid to for use in SGOOP # TODO: area under curve between points instead of pdf at point grid = np.linspace(colvar_rc.min(), colvar_rc.max(), num=rc_bins) pdf = kde.evaluate(grid) return pdf, grid # histogram density for coarse optimization ( hist, bin_edges = np.histogram(colvar_rc, weights=norm_weights, bins=rc_bins, density=True, range=(colvar_rc.min(), colvar_rc.max())) # set grid points to center of bins bin_width = bin_edges[1] - bin_edges[0] grid = bin_edges[:-1] + bin_width pdf = hist return pdf, grid
def get_kde(self, forecast_data, bandwidth=None): kde = KDEUnivariate(forecast_data) silverman_bw = bw_silverman(forecast_data) if bandwidth is None or bandwidth < silverman_bw: kde.fit(bw=silverman_bw) else: kde.fit(bw=bandwidth) return kde if noise_sigma is not None and noise_sigma>silverman_bw: kde_obs=KDEUnivariate(forecast_data) kde_obs.fit(bw=noise_sigma) kde_obs = kde_obs.evaluate(y_steps) kde_ax.plot(kde_obs, y_steps, c=c_kde, ls='-')
def __get_kde_values( self, spacings_range: Tuple[float, float], nnnsd: bool = False, kde_gridsize: int = 1000, ) -> np.array: """Fit / derive the KDE using the entire set of unfolded values, but evaluating only over the given `spacings_range`. """ spacings = np.sort(self.vals[2:] - self.vals[:-2]) if nnnsd else self.spacings kde = KDE(spacings) kde.fit(kernel="gau", bw="scott", cut=0, fft=False, gridsize=10000) s = np.linspace(spacings_range[0], spacings_range[1], kde_gridsize) # evaluated = np.empty_like(s) # for i, _ in enumerate(evaluated): # evaluated[i] = kde.evaluate(s[i]) evaluated = kde.evaluate(s) return evaluated
def md_prob(rc, max_cal_traj, rc_bins, bandwidth=0.02, **storage_dict): # Calculates probability along a given RC data_array = max_cal_traj.values proj = np.sum(data_array * rc, axis=1) # get probability w/ statstmodels KDE kde = KDEUnivariate(proj) kde.fit(bw=bandwidth) grid = np.linspace(proj.min(), proj.max(), num=rc_bins) prob = kde.evaluate(grid) # prob = prob / prob.sum() if storage_dict['prob_list'] is not None: storage_dict['prob_list'].append(prob) return prob, grid # Normalize
def mimic_arviz_posterior(context: ParameterContext, state: SequentialAlgorithmState, num_cols: int = 3, ax: Axes = None, **kwargs) -> Axes: """ Helper function for mimicking arviz plotting functionality. Args: context: parameter context to plot. state: associated state. num_cols: the number of columns. ax: pre-defined axes to use. """ if ax is None: num_rows = len(context.parameters) // num_cols _, ax = plt.subplots(num_rows, num_cols) w = state.normalized_weights().cpu().numpy() flat_axes = ax.ravel() handled = list() for ax_, (p, v) in zip(flat_axes, context.parameters.items()): v_numpy = v.cpu().numpy() kde = KDEUnivariate(v_numpy) kde.fit(weights=w, fft=False) x_linspace = np.linspace(v_numpy.min(), v_numpy.max(), 250) ax_.plot(x_linspace, kde.evaluate(x_linspace), **kwargs) ax_.spines["top"].set_visible(False) ax_.spines["right"].set_visible(False) ax_.spines["left"].set_visible(False) ax_.axes.get_yaxis().set_visible(False) ax_.set_title(p) handled.append(ax_) for ax_ in (ax_ for ax_ in flat_axes if ax_ not in handled): ax_.axis("off") return ax
def empiricalPDF(data): """ Evaluate a probability density function using kernel density estimation for input data. :param data: :class:`numpy.ndarray` of data values. :returns: PDF values at the data points. """ LOG.debug("Calculating empirical PDF") sortedmax = np.sort(data) kde = KDEUnivariate(sortedmax) kde.fit() try: res = kde.evaluate(sortedmax) except MemoryError: res = np.zeros(len(sortedmax)) return res
def cluster(x, adjust=1 / 3): xs = np.asanyarray(x).ravel(order='K') idx = np.argsort(xs) xs = xs[idx] kde = KDEUnivariate(xs) kde.fit(adjust=adjust) pdf = kde.evaluate(xs) # Cumulatively count how many of the # cluster boundaries each value exceeds. boundinds, = argrelmin(pdf) bounds = xs[boundinds] invidx = np.argsort(idx) clusters = np.sum(xs.reshape(-1, 1) >= bounds, axis=1) clustinds = clusters[invidx].reshape(x.shape) return clustinds, kde
def kde_statsmodels_u(data, grid, **kwargs): """ Univariate Kernel Density Estimation with Statsmodels Parameters ---------- data : numpy.array Data points used to compute a density estimator. It has `n x 1` dimensions, representing n points and p variables. grid : numpy.array Data points at which the desity will be estimated. It has `m x 1` dimensions, representing m points and p variables. Returns ------- out : numpy.array Density estimate. Has `m x 1` dimensions """ kde = KDEUnivariate(data) kde.fit(**kwargs) return kde.evaluate(grid)
]) except AttributeError: # wtf this fails sometimes, idk, works on root6 HAS_ROOT = False ################################################################## # ... and plot everything. fig, axes = plt.subplots(ncols=2, nrows=2, figsize=(6 * 2, 4 * 2)) axes[0, 0].hist(resid, bins='auto', normed=True) axes[0, 0].plot(x, lg.pdf(x), label='Log Norm') axes[0, 0].plot(x, hc.pdf(x), label='Half Cauchy') if HAS_ROOT: axes[0, 0].plot(x, land, label='Landau', color='blue') axes[0, 0].plot(x, dens.evaluate(x), label='KDE') axes[0, 0].set_xlabel('x') axes[0, 0].set_xlim(0, 0.3) axes[0, 0].set_ylabel('PDF(x)') axes[0, 0].legend() axes[0, 1].hist(resid, bins='auto', normed=True) axes[0, 1].plot(x, lg.pdf(x), label='Log Norm') axes[0, 1].plot(x, hc.pdf(x), label='Half Cauchy') if HAS_ROOT: axes[0, 1].plot(x, land, label='Landau', color='blue') axes[0, 1].plot(x, dens.evaluate(x), label='KDE') axes[0, 1].set_xlabel('x') axes[0, 1].set_ylabel('PDF(x)') axes[0, 1].set_yscale('log') axes[0, 1].legend()
def kde_statsmodels_u(x, x_grid, bandwidth=0.2, **kwargs): """Univariate Kernel Density Estimation with Statsmodels""" kde = KDEUnivariate(x) kde.fit(bw=bandwidth, **kwargs) return kde.evaluate(x_grid)