def kdepy_fftkde(data, a, b, num_bin_joint): """ Calculate Kernel Density Estimation (KDE) using KDEpy.FFTKDE. Note: KDEpy.FFTKDE can do only symmetric kernel (accept only scalar bandwidth). We map data to [-1, 1] domain to make bandwidth independent of parameter range and more symmetric and use mean of list bandwidths (different bandwidth for each dimension) calculated usinf Scott's rule and scipy.stats.gaussian_kde :param data: array of parameter samples :param a: list of left boundaries :param b: list of right boundaries :param num_bin_joint: number of bins (cells) per dimension in estimated posterior :return: estimated posterior of shape (num_bin_joint, )*dimensions """ N_params = len(data[0]) logging.info('KDEpy.FFTKDe: Gaussian KDE {} dimensions'.format(N_params)) time1 = time() a = np.array(a)-1e-10 b = np.array(b)+1e-10 data = 2 * (data - a) / (b - a) - 1 # transform data to be [-1, 1], since gaussian is the same in all directions bandwidth = bw_from_kdescipy(data, 'scott') _, grid_ravel = grid_for_kde(-1*np.ones(N_params), np.ones(N_params), num_bin_joint) kde = FFTKDE(kernel='gaussian', bw=np.mean(bandwidth)) kde.fit(data) Z = kde.evaluate(grid_ravel.T) Z = Z.reshape((num_bin_joint + 1, )*N_params) time2 = time() timer(time1, time2, "Time for kdepy_fftkde") return Z
class Gaussian_Density_Estimator: def __init__(self, kernel='gaussian', bw='silverman'): self.estimator = FFTKDE(kernel=kernel, bw=bw) def train(self, data, weights=None): self.estimator.fit(data, weights=weights) def score_samples(self, input_x=None): if input_x is None: x, y = self.estimator.evaluate() return x, y else: y = self.estimator.evaluate(input_x) return y # import numpy as np # import matplotlib.pyplot as plt # data = np.random.randn(2**6) # density_estimator = Gaussian_Density_Estimator() # density_estimator.train(data) # # x, y = density_estimator.score_samples() # # print(x.shape, y.shape) # x, y = density_estimator.score_samples(10) # print(y) # plt.plot(x, y); plt.tight_layout() # plt.show()
def find_kde(distribution, bw='silverman', npoints=512, kernel='gaussian'): """ Receives a numpy array containing an image and returns image histogram estimatives based on Kernel density function with given bandwidth. The data returned are x, y datapoints""" estimator = FFTKDE(kernel=kernel, bw=bw) x, y = estimator.fit(distribution).evaluate(npoints) # Fix silverman bias in small datasets if (bw == 'silverman') and (estimator.bw < 1): estimator = FFTKDE(kernel=kernel, bw=1) x, y = estimator.fit(distribution).evaluate(npoints) y = y[(x>=0) & (x<=255)] x = x[(x>=0) & (x<=255)] return (x, y)
def _estimate_pdf(self, sample_id: str, features: list, df: pd.DataFrame, **kwargs) -> None: """ Given a sample ID and its events dataframe, estimate the PDF by KDE with the option to perform dimensionality reduction first. Resulting PDF is saved to kde_cache. Parameters ---------- sample_id: str df: Pandas.DataFrame features: list Returns ------- None """ bw = self.kde_bw if bw == "cv": bw = bw_optimisation(data=df, features=features, **kwargs) df = df[features].copy().select_dtypes(include=['number']) kde = FFTKDE(kernel=self.kde_kernel, bw=bw, norm=self.kde_norm) self.kde_cache[sample_id] = kde.fit(df.values).evaluate()[1]
def get_silvermans_bandwidth(X, kernel, bandwidth): # X assert X is not None assert type(X) is np.ndarray assert X.ndim == 1 # kernel # assert kernel in ("triweight", ) # bandwidth assert bandwidth in ("silverman", ) kde = FFTKDE(bw=bandwidth, kernel=kernel) kde.fit(X)(2**10) return kde.bw
def get_kde(gridArray): """ 对风险矩阵做KDE,降低稀疏率 :param gridArray: 待处理风险矩阵 :return: KDE后的风险矩阵 """ #判断风险矩阵是否为零矩阵,若是则返回原矩阵 if np.where(gridArray != 0)[0].shape[0] == 0: return gridArray #找到矩阵中的非零值 tempArray = np.nonzero(gridArray) data = np.zeros((tempArray[0].T.shape[0], 2)) #数据矩阵 data[:, 0] = tempArray[0].T data[:, 1] = tempArray[1].T rows = data.shape[0] weights = [] for i in range(rows): weights.append(gridArray[int(data[i, 0]), int(data[i, 1])]) # fig = plt.figure(figsize=(2,1)) # ax = fig.add_subplot(1,2,1) # bx = fig.add_subplot(1,2,2) # datai = np.zeros((gridArray.shape[0]*gridArray.shape[1], 2)) # dataj = [] # temp = 0 # for i in range(len(gridArray)): # for j in range(len(gridArray)): # datai[temp, :] = np.array([i, j]) # dataj.append(gridArray[i, j]) # temp += 1 grid_points = gridArray.shape[0] #矩阵行(列)数 kde = FFTKDE(kernel='gaussian', norm=2, bw=0.5) grid, points = kde.fit(data, weights=weights).evaluate(grid_points) # x, y = np.unique(grid[:, 0]), np.unique(grid[:, 1]) resultArray = points.reshape(grid_points, grid_points).T # ax.contour(x,y,resultArray, 16, linewidths=0.8, colors='k') # ax.contourf(x,y,resultArray, 16, cmap='RdBu_r') # ax.plot(data[:, 0], data[:, 1], 'ok', ms=3) #原矩阵构图 # grid, points = kde.fit(datai, weights=dataj).evaluate(grid_points) # x, y = np.unique(grid[:, 0]), np.unique(grid[:, 1]) # z = points.reshape(grid_points, grid_points).T # z = RotateMatrix(z) #z旋转90度 # bx.contour(x, y, z, 16, linewidths=0.8, colors='k') # bx.contourf(x, y, z, 16, cmap='RdBu_r') # plt.tight_layout() # plt.show() # print(gridArray) # print(resultArray) return resultArray
interp_dt=0.5) #Extract the axes from the axis list fake_dist = np.reshape(dist, [len(ws), len(nus)]) plt.plot(unw, unNu_overW, color='k', label='analytics', linewidth=4) #plt.imshow(fake_dist.T,aspect = 'auto',origin='lower',extent = [np.min(ws/20),np.max(ws/20),0,np.max(unNu_overW)]) plt.contour(ws / 20, nus, fake_dist.T) plt.xlim([0, 0.6]) plt.show(block=False) from KDEpy import FFTKDE import KDEpy kde = FFTKDE(bw=0.05, kernel='exponential') grid_points = 100, 200 grid, points = kde.fit( na([np.hstack(meanW_burst), np.hstack(meanSC_burst)]).T).evaluate(grid_points) x, y = np.unique(grid[:, 0]), np.unique(grid[:, 1]) z = points.reshape(grid_points[0], grid_points[1]).T # Plot the kernel density estimate N = 2 plt.plot(np.hstack(meanW_burst), np.hstack(meanSC_burst), '.', alpha=0.2) plt.contour(x, y, z, N, linewidths=0.8, colors='k') plt.show(block=False) kde = KernelDensity(bandwidth=0.03) kde.fit(na([np.hstack(meanW_burst), np.hstack(meanSC_burst)]).T) samples = kde.score_samples(na(ev_par))
idx_ones = index_digit_ones(Y_train) print("> idx_ones = ", idx_ones) idxs = [12, 3, 6, 14, 23, 24, 40, 59, 67] # this is extracted indices of digit=1; with idx=12 as digit=3 data = [] for idx in idxs: data.append(X_train[idx]) fig = plt.figure() # more styles: https://matplotlib.org/gallery/lines_bars_and_markers/line_styles_reference.html line_styles = ['--', '-', ':', ':', '-', ':', ':', ':', ':'] for i in range(len(idxs)): estimator = FFTKDE(kernel='gaussian', bw='silverman') # x[i], y[i] = estimator[i].fit(data[i], weights=None).evaluate() x, y = estimator.fit(data[i], weights=None).evaluate() # plt.plot(x[i], y[i], label='Digit='+str(Y_train[idxs[i]])) plt.plot(x, y, linestyle=line_styles[i], label='IDX='+str(idxs[i])+'; Digit='+str(Y_train[idxs[i]])) plt.legend() plt.show() fig.savefig('hw5/results/visualize_kde.png', dpi=fig.dpi) new_data = pca.inverse_transform(data) plot_digit_data(new_data, 'test_kde_plot_digits')
class KDE(Translation): def __init__(self, source, *args, bw=None, kernel='gaussian', density=True, **kwargs): '''run KDE on regular grid Parameters: ----------- source : GridData or PointData bw : str or float or iterable Will default to 'silverman; for 1d data and 1 otherwise coices of 'silverman', 'scott', 'ISJ' for 1d data float specifies fixed bandwidth, in case of iterable a separate, fixed bandwith per dimension kernel : str choices of 'gaussian', 'exponential', 'box', 'tri', 'epa', 'biweight', 'triweight', 'tricube', 'cosine' density : bool (optional) if false, multiply output by sum of data ''' super().__init__(source, *args, dest_needs_grid=True, **kwargs) if bw is None: bw = 'silverman' if self.dest.grid.nax == 1 else 1. self.bw = bw self.kernel = kernel self.density = density if density: self.additional_runs = {'density': None} else: self.additional_runs = {'counts': None} if not self.dest.grid.regular: raise TypeError('dest must have regular grid') def setup(self): self.prepare_source_sample(stacked=False) # every point must be inside output grid (requirement of KDEpy) masks = [ np.logical_and(self.source_sample[i] > dim.points[0], self.source_sample[i] < dim.points[-1]) for i, dim in enumerate(self.dest.grid) ] self.mask = np.all(masks, axis=0) #n_masked = np.sum(~mask) #if n_masked > 0: # warnings.warn('Excluding %i points that are outside grid'%n_masked, Warning, stacklevel=0) sample = [s[self.mask] for s in self.source_sample] self.source_sample = np.stack(sample).T self.prepare_dest_sample(transposed=True) if isinstance(self.bw, (np.ndarray, list, tuple)): for i in range(self.dest.grid.nax): self.source_sample[:, i] /= self.bw[i] self.dest_sample[:, i] /= self.bw[i] bw = 1 else: bw = self.bw self.kde = FFTKDE(bw=bw, kernel=self.kernel) def eval(self, source_data): if self.density: # since we scale the inputs, we need to re-scale the # densities such that they integrate out to being 1 again if isinstance(self.bw, (np.ndarray, list, tuple)): scale = 1. / np.prod(self.bw) else: scale = None if source_data is None: out_array = self.kde.fit(self.source_sample).evaluate( self.dest_sample) out_shape = self.dest.shape if not self.density: out_array *= self.source_sample.size / np.sum(out_array) elif scale is not None: out_array *= scale else: source_data = source_data.flat() if source_data.ndim > 1: out_array = self.get_empty_output_array(source_data.shape[1:], flat=True) for idx in np.ndindex(*source_data.shape[1:]): out_array[(Ellipsis, ) + idx] = self.kde.fit( self.source_sample, weights=source_data[(Ellipsis, ) + idx][self.mask]).evaluate( self.dest_sample) if not self.density: out_array[(Ellipsis, ) + idx] *= np.sum(source_data[ (Ellipsis, ) + idx][self.mask]) / np.sum( out_array[(Ellipsis, ) + idx]) elif scale is not None: out_array[(Ellipsis, ) + idx] *= scale out_shape = (self.dest.shape) + (-1, ) else: out_array = self.kde.fit( self.source_sample, weights=source_data[self.mask]).evaluate(self.dest_sample) out_shape = self.dest.shape if not self.density: out_array *= np.sum( source_data[self.mask]) / np.sum(out_array) elif scale is not None: out_array *= scale #if isinstance(self.bw, (np.ndarray, list, tuple)): # out_array *= np.product(self.bw) return out_array.reshape(out_shape)
def density(values, bw='silverman', npoints=512): estimator = FFTKDE(kernel='gaussian', bw=bw) kx, ky = estimator.fit(values).evaluate(npoints) ky = ky[(kx >= 0) & (kx <= 255)] kx = kx[(kx >= 0) & (kx <= 255)] return kx, ky, estimator.bw
def density(values, bw='silverman', npoints=512, kernel='gaussian'): estimator = FFTKDE(kernel=kernel, bw=bw) x, y = estimator.fit(values).evaluate(npoints) y = y[(x>=0) & (x<=255)] x = x[(x>=0) & (x<=255)] return x, y, estimator.bw
image = imageio.imread(image_filename) distribution = image.ravel() fig, axes = plt.subplots(figsize=(6, 4), dpi=150) nbins = 256 npoints = 512 kernel = 'gaussian' x = np.arange(nbins) y = np.bincount(distribution, minlength=nbins) y = y / np.sum(y) hst_xy = (x, y) estimator = FFTKDE(kernel=kernel, bw='silverman') kx, ky = estimator.fit(distribution).evaluate(npoints) ky = ky[(kx >= 0) & (kx <= 255)] kx = kx[(kx >= 0) & (kx <= 255)] kde_xy = (kx, ky) peaks_idx, _ = find_peaks(kde_xy[1]) half = peak_widths(kde_xy[1], peaks_idx, rel_height=0.5)[:2] peaks = (peaks_idx, half) estimator = FFTKDE(kernel=kernel, bw='silverman') kernel_points, kernel_values = estimator.fit(ky).evaluate(npoints) kx, ky, bw = kernel_points, kernel_values, estimator.bw bws = bw * np.logspace(1, -1, 101) mode_lst = find_modeid(distribution, bws)
def density(self, compare=None): fig, ax = plt.subplots(1, len(self.bands) + 1, figsize=(30, 5)) eval_list = [] ep = 1e-10 for i in range(len(self.bands)): kde = FFTKDE('gaussian', bw=0.13) kde.fit(self.inputs[:, :, :, i].ravel()) if compare != None: min_v, max_v = self.domain(self.inputs[:, :, :, i], compare.inputs[:, :, :, i]) grid = np.linspace(min_v - ep, max_v + ep, 100) else: grid = np.linspace(self.inputs[:, :, :, i].min() - ep, self.inputs[:, :, :, i].max() + ep, 100) evaluation = kde.evaluate(grid) ax[i].plot(grid, evaluation, label=self.name) ax[i].set_title(f"{self.name} {self.bands[i]}") eval_list.append(evaluation) kde = FFTKDE('gaussian', bw=0.13) kde.fit(self.outputs) if compare != None: min_v, max_v = self.domain(self.outputs, compare.outputs) grid = np.linspace(min_v - ep, max_v + ep, 100) else: grid = np.linspace(self.outputs.min() - ep, self.outputs.max() + ep, 100) evaluation = kde.evaluate(grid) ax[-1].plot(grid, evaluation, label=self.name) ax[-1].set_title(f"{self.name} Outputs") eval_list.append(evaluation) if compare != None: for i in range(len(self.bands)): kde = FFTKDE('gaussian', bw=0.13) kde.fit(compare.inputs[:, :, :, i].ravel()) #if compare != None: min_v, max_v = self.domain(self.inputs[:, :, :, i], compare.inputs[:, :, :, i]) grid = np.linspace(min_v - ep, max_v + ep, 100) ax[i].plot(grid, kde.evaluate(grid), label=compare.name) ax[i].plot(grid, kde.evaluate(grid) - eval_list[i], label="Difference") ax[i].set_title( f"{self.name} {self.bands[i]} | Compare: {compare.name}") ax[i].plot([ self.inputs[:, :, :, i].min(), self.inputs[:, :, :, i].max() ], [0.0, 0.0], linestyle='--', alpha=0.3) kde = FFTKDE('gaussian', bw=0.13) kde.fit(compare.outputs) #if compare != None: min_v, max_v = self.domain(self.outputs, compare.outputs) grid = np.linspace(min_v - ep, max_v + ep, 100) ax[-1].plot(grid, kde.evaluate(grid), label=compare.name) ax[-1].plot(grid, kde.evaluate(grid) - eval_list[-1], label="Difference") ax[-1].set_title(f"{self.name} Outputs | Compare: {compare.name}") ax[-1].plot( [self.outputs.min(), self.outputs.max()], [0.0, 0.0], linestyle='--', alpha=0.3) plt.legend() plt.show()
def bootstrap(year1, team1, year2, team2, mongo): """predict 100 random games.""" output = {} data, tc1, tc2 = prepare_data(year1, team1, year2, team2, mongo) data_df = pd.DataFrame(data.reshape(1, 111)) data_copy = data.copy() num_trials = 99 for i in range(num_trials): rand_data = randomize_data(year1, team1, year2, team2, data_copy) data_df.loc[len(data_df)] = rand_data data_df -= ml_stats_mean data_df = data_df / ml_stats_std global graph with graph.as_default(): prediction = model.predict(data_df) estimator = FFTKDE(kernel='gaussian', bw='silverman') over_under = [x for x in prediction[:, 0] + prediction[:, 1]] spread = [x for x in prediction[:, 1] - prediction[:, 0]] home_wins = 0 for i in range(num_trials + 1): if prediction[i, 0] > prediction[i, 1]: home_wins += 1 home_win_pct = home_wins / (num_trials + 1) est_win_pct = round((home_win_pct * 200) - 100) if est_win_pct < 0: output['win_bar_color'] = str(tc2) else: output['win_bar_color'] = str(tc1) output['est_win_pct'] = str(-1 * est_win_pct) grid_min_oe = np.floor(np.min(over_under)) grid_max_oe = np.ceil(np.max(over_under)) grid_min_s = np.floor(np.min(spread)) grid_max_s = np.ceil(np.max(spread)) grid_oe = int(grid_max_oe - grid_min_oe) * 100 grid_s = int(grid_max_s - grid_min_s) * 100 oe_x, oe_y = estimator.fit(over_under, weights=None).evaluate(grid_oe) oe_df = pd.DataFrame({'x': oe_x, 'y': oe_y}) oe_df['x_round'] = round(oe_df['x']) oe_x_group = oe_df.groupby('x_round') oe_ys = oe_x_group['y'].sum() oe_x = list(oe_ys.index) oe_y = list(oe_ys) sum_oe_y = sum(oe_y) oe_y_norm = [x / sum_oe_y for x in oe_y] s_x, s_y = estimator.fit(spread, weights=None).evaluate(grid_s) s_df = pd.DataFrame({'x': s_x, 'y': s_y}) s_df['x_round'] = round(s_df['x']) s_x_group = s_df.groupby('x_round') s_ys = s_x_group['y'].sum() s_x = list(s_ys.index) s_y = list(s_ys) sum_s_y = sum(s_y) min_spread = np.floor(min(s_x)) max_spread = np.ceil(max(s_x)) spread_bound = max(abs(min_spread), abs(max_spread)) output['spread_bounds'] = [str(-1 * spread_bound), str(spread_bound)] spread_colors = [] for i in range(len(s_x)): if s_x[i] <= 0: spread_colors.append(tc1) else: spread_colors.append(tc2) output['spread_colors'] = spread_colors s_y_norm = [x / sum_s_y for x in s_y] output['home_points'] = [str(x) for x in prediction[:, 0]] output['away_points'] = [str(x) for x in prediction[:, 1]] output['over_under_x'] = [str(x) for x in oe_x] output['over_under_y'] = [str(x * 100) for x in oe_y_norm] output['spread_x'] = [str(x) for x in s_x] output['spread_y'] = [str(x * 100) for x in s_y_norm] output['over_under'] = str(round(np.mean(over_under), 1)) output['spread'] = str(round(np.mean(spread), 1)) output['scatter_color'] = [tc1 if prediction[x, 0] > prediction[x, 1] else tc2 for x in range(len(prediction[:, 0]))] output['scatter_marker'] = ['circle' if prediction[x, 0] > prediction[x, 1] else 'rect' for x in range(len(prediction[:, 0]))] output['home_point_prediction'] = str(int(round( np.mean([x for x in prediction[:, 0]])))) output['away_point_prediction'] = str(int(round( np.mean([x for x in prediction[:, 1]])))) return output
def density(values, bw='silverman', npoints=512, kernel='gaussian'): estimator = FFTKDE(kernel=kernel, bw=bw) kernel_points, kernel_values = estimator.fit(values).evaluate(npoints) return kernel_points, kernel_values, estimator.bw
def _interpolate( *, data: ndarray, x_position: ndarray, y_position: ndarray, z_position: Optional[ndarray] = None, extent: Tuple[float, float, float, float], smoothing_length: ndarray, particle_mass: ndarray, number_of_pixels: Tuple[float, float], cross_section: Optional[float] = None, density_weighted: Optional[bool] = None, ) -> ndarray: normalized = False if density_weighted is None: density_weighted = False if density_weighted: normalized = True mask = smoothing_length > 0.0 mask = mask & ((x_position >= extent[0]) & (x_position <= extent[1]) & (y_position >= extent[2]) & (y_position <= extent[3])) if cross_section is not None: if z_position is None: raise ValueError('Must specify z position for cross section') mask = mask & (np.abs(z_position - cross_section) < 2 * smoothing_length) xy = np.vstack((x_position[mask], y_position[mask])).T scalar = data[mask] h = smoothing_length[mask] m = particle_mass[mask] if density_weighted: if cross_section is not None: weights = scalar * m / h * _C_NORM_3D else: weights = scalar * m else: if cross_section is not None: weights = scalar * h**2 * _C_NORM_3D / _H_FACT**3 else: weights = scalar * h**3 / _H_FACT**3 if normalized: weights_norm = weights / scalar kde = FFTKDE(kernel='gaussian') grid, points = kde.fit(xy, weights=weights).evaluate(number_of_pixels) z = points.reshape(number_of_pixels) if normalized: _, points_norm = kde.fit( xy, weights=weights_norm).evaluate(number_of_pixels) z_norm = points_norm.reshape(number_of_pixels) z /= z_norm normalization = np.sum(weights) if normalized: normalization /= np.sum(m) z *= normalization x_grid = np.linspace(grid[0, 0], grid[-1, 0], number_of_pixels[0]) y_grid = np.linspace(grid[0, 1], grid[-1, 1], number_of_pixels[1]) spl = RectBivariateSpline(x_grid, y_grid, z) x_regrid = np.linspace(*extent[:2], number_of_pixels[0]) y_regrid = np.linspace(*extent[2:], number_of_pixels[1]) z_regrid = spl(x_regrid, y_regrid) return z_regrid.T
def analysis(self, algorithms=["tULA", "RWM"], measure="histogram", bins=10, repeat=1, experiment_mode=False): if not experiment_mode: # Print information about the analysis print('\n####### Initializing analysis #########\n' + '#'*39) print(' ALGORITHMS: {:s}'.format(str(algorithms))) print(' MEASURE: {:s}'.format(measure)) print(' PARAMETERS:') for p in [('Potential', self.potential), ('Dimension', self.dim), ('x0', self.x0), ('Step', self.step), ('Number of iterations', self.N), \ ('Burn-in period', self.burn_in), ('Number of simulations', self.N_sim), ('Number of chains', self.N_chains), \ ('Measuring points', self.measuring_points), ('Time allocation', self.timer)]: print(' ' + '{:>22}: {:s}'.format(*map(str,p))) print('#'*39 + '\n') # Collect the measurements. # For N_sim simulations, we store the measurement we are interested in (first moment, second moment, all samples...) measurements = {} for algo in algorithms: measurements[algo] = [] for s in range(self.N_sim): samples = self.sampler.get_samples(algorithm=algo, burn_in=self.burn_in, n_chains=self.N_chains, n_samples=self.N, measuring_points=self.measuring_points, timer=self.timer) if measure == "first_moment": measurement = np.sum(samples, axis=0)/len(samples) elif measure == "second_moment": measurement = np.sum(samples**2, axis=0)/len(samples) elif measure in ["trace", "scatter"]: measurement = samples elif measure == "histogram": measurement = np.histogram(samples, bins=bins, range=(-5, 5), density=True) elif measure in ["FFTKDE_KL", "FFTKDE_TV", "FFTKDE_SW"]: measurement = samples elif measure in ["KL_divergence", "total_variation", "sliced_wasserstein"]: try: # some algorithms blow up measurement = np.histogramdd(samples, bins=bins) except: measurement = None, None elif measure == "sliced_wasserstein_no_histogram": measurement = samples measurements[algo].append(measurement) print(' Algorithm: {:>5}, simulation {:d}, collected {:d} samples.'.format(algo, s, len(samples))) print() # Plot the results if measure in ["first_moment", "second_moment"]: data = [[m[0] for m in measurements[algo]] for algo in algorithms] # data = [[norm(m) for m in measurements[algo]] for algo in algorithms] if not experiment_mode: plt.boxplot(data, labels=algorithms) else: self.experiment_data["results"] = data elif measure == "trace": if not experiment_mode: for algo in algorithms: plt.plot([p[0] for p in measurements[algo][0] if norm(p)<1e6], [p[1] for p in measurements[algo][0] if norm(p)<1e6], '-', linewidth=1, alpha=0.8) plt.legend(algorithms) elif measure == "scatter": if not experiment_mode: if self.dim == 2: for algo in algorithms: plt.scatter([p[0] for p in measurements[algo][0] if norm(p)<1e6], [p[1] for p in measurements[algo][0] if norm(p)<1e6], s=1) plt.legend(algorithms) elif self.dim == 3: fig = plt.figure() ax = fig.add_subplot(111, projection='3d') for algo in algorithms: ax.scatter(xs=[p[0] for p in measurements[algo][0] if norm(p)<1e6], ys=[p[1] for p in measurements[algo][0] if norm(p)<1e6], zs=[p[2] for p in measurements[algo][0] if norm(p)<1e6], s=1) ax.legend(algorithms) elif measure == "histogram": if not experiment_mode: for algo in algorithms: hist, bins = measurements[algo][0] width = 0.85 * (bins[1] - bins[0]) center = (bins[:-1] + bins[1:])/2 plt.bar(center, hist, align='center', width=width, alpha=0.6) self.sampler.potential.plot_density() plt.legend(['true density'] + algorithms) elif measure in ["FFTKDE_KL", "FFTKDE_TV", "FFTKDE_SW"]: data = [] for algo in algorithms: scores = [] for s in range(self.N_sim): weights = np.arange(len(measurements[algo][s])) + 1 # Don't know what this does ^ estimator = FFTKDE(kernel = 'gaussian') x, ys = estimator.fit(measurements[algo][s], weights=weights).evaluate(30) # 30 is arbitrary true_ys = self.sampler.potential.get_density(x) if measure == "FFTKDE_KL": scores.append( entropy(ys/np.sum(ys), true_ys/np.sum(true_ys) )) if measure == "FFTKDE_TV": scores.append( sum(abs( ys/np.sum(ys) - true_ys/np.sum(true_ys) ))/2 ) if measure == "FFTKDE_SW": # print(ys, true_ys, x) scores.append( sliced_wasserstein_distance( ys/np.sum(ys), true_ys/np.sum(true_ys), x, self.dim)) data.append(scores) if not experiment_mode: plt.boxplot(data, labels=algorithms) else: self.experiment_data["results"] = data elif measure in ["KL_divergence", "total_variation", "sliced_wasserstein"]: data = [] for algo in algorithms: scores = [] for p, edges in measurements[algo]: if type(p) == type(None): continue # true distribution histogram q, bin_coors = self.sampler.potential.get_histogram(edges) if measure == "KL_divergence": ps, qs = p.flatten(), q.flatten() scores.append( entropy(ps/sum(ps), qs/sum(qs) )) elif measure == "total_variation": ps, qs = p.flatten(), q.flatten() scores.append( sum(abs( ps/sum(ps) - qs/sum(qs) ))/2 ) elif measure == "sliced_wasserstein": scores.append( sliced_wasserstein_distance( p/np.sum(p), q/np.sum(q), bin_coors, self.dim )) data.append(scores) if not experiment_mode: plt.boxplot(data, labels=algorithms) else: self.experiment_data["results"] = data elif measure == "sliced_wasserstein_no_histogram": data = [] for algo in algorithms: scores = [] for p in measurements[algo]: scores.append(sliced_wasserstein_no_histogram(p, self.sampler.potential.get_density(p) )) data.append(scores) if not experiment_mode: plt.boxplot(data, labels=algorithms) else: self.experiment_data["results"] = data if not experiment_mode: # Label and show plt.title('Measure: {:s}, '.format(measure) + '\nPotential: {:s}'.format(self.potential)) plt.show()