def update(attr, old, new): inds = np.array(new["1d"]["indices"]) #error when crosshair is added #for zero selected or all selected if len(inds) == 0: hist1 = np.zeros_like(edges) u_scat_data = np.array([np.zeros_like(source.data["y"]), np.zeros_like(source.data["y"])]) #update hist values on selection else: hist1, _ = np.histogram(source.data["y"][inds], bins=edges, density=True) u_scat_data = np.array([source.data["y"][inds], source2.data["y"][inds]]) if len(inds) > 2: kde_span = np.linspace(np.min(source.data["y"][inds]), np.max(source.data["y"][inds]), np.size(source.data["y"][inds])) kde_data = gkde(source.data["y"][inds]).evaluate(kde_span) else: kde_data = np.zeros(2) kde_span = np.zeros(2) #update ploting data sources u_hist.data_source.data['right'] = hist1 kde_line.data_source.data['x'] = kde_data kde_line.data_source.data['y'] = kde_span u_scat_points.data_source.data['x'] = u_scat_data[0] u_scat_points.data_source.data['y'] = u_scat_data[1] print str(inds) #too see repose on server (will be removed)
def plot_2d_contour(samples_x, samples_y, num=1, title='', xlim=None, ylim=None, xlabel="$x$", ylabel="$y$"): if len(str(num)) >= 3: plt.subplot(num) else: plt.figure(num) if xlim is None: xlim = [np.min(samples_x), np.max(samples_x)] if ylim is None: ylim = [np.min(samples_y), np.max(samples_y)] xy_kde = gkde(np.vstack([samples_x, samples_y])) xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], 80), np.linspace(ylim[0], ylim[1], 80)) zz = np.reshape(xy_kde(np.vstack([xx.ravel(), yy.ravel()])).T, xx.shape) ax = plt.gca() cfset = ax.contourf(xx, yy, zz, cmap='Blues', alpha=1.0) cset = ax.contour(xx, yy, zz, colors='k', alpha=1.0, linewidths=0.5) ax.clabel(cset, fontsize=4) ax.set_xlabel('$q$') ax.set_ylabel('$Q$') plt.colorbar(cfset) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.xlim(xlim) plt.ylim(ylim) plt.title(title)
def do_kde(y, x=None, scipykde=False, norm=False): if scipykde: from scipy.stats import gaussian_kde as gkde pdf=gkde(y)(x) bw=None else: from .kde.kde import kde bw,x,pdf=kde(y) dx=x[1]-x[0] peaki=pdf.argmax() peakx=x[peaki] nval=(pdf*dx).sum() try: ret=[] for i in xrange(x.size): if i<=peaki: ret.append((pdf[:i+1]*dx).sum()/nval) if i>peaki: ret.append((pdf[i-1:]*dx).sum()/nval) ret=np.array(ret) foo=x[np.abs(np.array(ret)-.16).argsort()] msig,psig=foo[foo<peakx][0],foo[foo>peakx][0] #1sigma values except IndexError: msig,psig,ret=np.nan,np.nan,None if norm: pdf/=pdf.max() return x,pdf,(msig,psig),{'ppf':ret,'bw':bw,'norm':nval}
def plot_2d_scatter(samples_x, samples_y, marker='o', num=1, title='', xlim=None, ylim=None, xlabel="$x$", ylabel="$y$"): if len(str(num)) >= 3: plt.subplot(num) else: plt.figure(num) x = samples_x y = samples_y xy = np.vstack([x, y]) z = gkde(xy)(xy) idx = z.argsort() x, y, z = x[idx], y[idx], z[idx] plt.scatter(x, y, c=z, s=50, edgecolor='', marker=marker) if xlim is None: xlim = [np.min(x), np.max(x)] if ylim is None: ylim = [np.min(y), np.max(y)] plt.xlabel(xlabel) plt.ylabel(ylabel) plt.grid() plt.xlim(xlim) plt.ylim(ylim) plt.title(title)
def create_kernel_density(self): if self.kernel_density is not None: warnings.warn('Found existing kernel density. Overwriting.') elif self.n_dim < 4: self.kernel_density = gkde(np.squeeze(self.samples).T) else: print('Attempting KDE in %d dimensions. Aborting.' % self.n_dim) exit()
def p_Xw_i_outlier(mock, ell=0, rebin=None, krange=None, method='choletsky'): ''' Examine the pdf of X_w^i components that deviate significantly from N(0,1) ''' Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange) X, _ = NG.meansub(Pk) X_w, W = NG.whiten(X, method=method) # whitened data # calculate the chi-squared values of each p(X_w^i) x = np.arange(-5., 5.1, 0.1) chi2 = np.zeros(X_w.shape[1]) for i_bin in range(X_w.shape[1]): kern = gkde(X_w[:,i_bin]) # gaussian KDE kernel using "rule of thumb" scott's rule. chi2[i_bin] = np.sum((UT.gauss(x, 1., 0.) - kern.evaluate(x))**2)/np.float(len(x)) # plot the most discrepant components. prettyplot() fig = plt.figure() sub = fig.add_subplot(111) i_sort = np.argsort(chi2) print 'outlier bins = ', i_sort[-5:] for i_bin in i_sort[-10:]: kern = gkde(X_w[:,i_bin]) # gaussian KDE kernel using "rule of thumb" scott's rule. sub.plot(x, kern.evaluate(x)) sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$') sub.set_xlim([-2.5, 2.5]) sub.set_xlabel('$\mathtt{X^{i}_{W}}$', fontsize=25) sub.set_ylim([0., 0.6]) sub.set_ylabel('$\mathtt{P(X^{i}_{W})}$', fontsize=25) sub.legend(loc='upper right') if rebin is None: f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i_outlier.', method, '.', mock, '.ell', str(ell), '.png']) else: f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i_outlier.', method, '.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png']) fig.savefig(f, bbox_inches='tight') return None
def __init__(self, samples, rv_name='$x$', label='p(x)', rv_transform=lambda x: x, kde=True): self.rv_name = rv_name self.label = label self.samples = samples self.n_samples = np.shape(samples)[0] self.n_dim = np.shape(samples)[1] self.rv_transform = rv_transform self.kde_evals = None if self.n_dim < 4 and kde: self.kernel_density = gkde(np.squeeze(samples).T) elif kde: print('Attempting KDE in %d dimensions. Aborting.' % self.n_dim) exit() else: self.kernel_density = None
def weight_pdf(dpca, marginalization, axes=None, cellnames=None, color=None): if axes is None: fig, axes = plt.subplots(1) fig.suptitle('PDF marginalization weights') else: fig = axes.figure dd = dpca.P[marginalization][:,0] # Neurons x Components pdf = gkde(dd) x = np.linspace(-1.2, 1.2, 100, endpoint=False) axes.plot(x, pdf(x), color=color, linewidth=2) axes = sn.swarmplot(x= dd, color=color, ax=axes) axes.set_ylim(-0.4, np.max(pdf(x)) + 0.2 ) axes.set_yticks([0,1,2]) axes.set_yticklabels([0, 1, 2]) axes.set_xlabel('encoder weight') axes.set_ylabel('probability density') # axes.scatter(dd, np.zeros(len(dd)) + shuff, color=color, alpha=0.5) # # if cellnames is not None: # cellnames = [cell[-4:] for cell in cellnames] # ticks = dd.tolist() # ticks.extend((-1, 1)) # print(ticks) # tick_lables = cellnames.copy() # tick_lables.extend(('-1', '1')) # print(tick_lables) # # todo diferenciate betwee minor and mayor ticks # axes.set_xticks(ticks) # axes.set_xticklabels(tick_lables, rotation='vertical') topcell = cellnames[np.argmax(dd)] return fig, axes, topcell
def make_figure(): #%% Create Time Series Graph #Create Time Series plot area time_plot = figure(plot_height= 400, plot_width= 800, title="", x_axis_label ='Time', tools='', y_axis_label = 'l1013aspv', toolbar_location="left", x_axis_type="datetime", y_range=(min(data_source.data["y1"]) -min(data_source.data["y1"]*0.1 ), max(data_source.data["y1"]) + max(data_source.data["y1"]*0.1))) #modify the BoxSelectTool #dimensions = specify the dimension in which the box selection is free in #select_every_mousemove = select points as box moves over time_plot.add_tools(BoxSelectTool(dimensions = ["width"], select_every_mousemove = True)) #add anther axis time_plot.extra_y_ranges = {"foo": Range1d(start = min(data_source.data["y2"]) - min(data_source.data["y1"]*0.1), end = max(data_source.data["y2"]) + max(data_source.data["y1"]*0.1))} #add data to scatter plot (data points on time plot) time_scat = time_plot.scatter("x", "y1", source = data_source,size = 1, color = "green") time_scat2 = time_plot.scatter("x", "y2", source = data_source,size= 1, color = "blue", y_range_name = "foo") #add time series line time_plot.line("x","y1",source=data_source,color = time_scat.glyph.fill_color, alpha=0.5) time_plot.line("x","y2",source=data_source,color= time_scat2.glyph.fill_color, alpha=0.5,y_range_name="foo") #Customize time_plot grid lines time_plot.xgrid.grid_line_color = None time_plot.ygrid.grid_line_alpha = 0.2 #First axes styling time_plot.yaxis.axis_line_color = time_scat.glyph.fill_color time_plot.yaxis.minor_tick_line_color = time_scat.glyph.fill_color time_plot.yaxis.major_tick_line_color = time_scat.glyph.fill_color time_plot.yaxis.axis_label_text_color = time_scat.glyph.fill_color time_plot.yaxis.major_label_text_color = time_scat.glyph.fill_color #add second axis to time_plot and styling time_plot.add_layout(LinearAxis(y_range_name = "foo", axis_line_color = str(time_scat2.glyph.fill_color), major_label_text_color = str(time_scat2.glyph.fill_color), axis_label_text_color = str(time_scat2.glyph.fill_color), major_tick_line_color = str(time_scat2.glyph.fill_color), minor_tick_line_color = str(time_scat2.glyph.fill_color), axis_label= "l1015asop"), "left") #%% Create Marginal Histogram and KDE #Create marginal histogram for y-axis data density #set up figure hist_plot = figure(plot_height = 400, plot_width = 200, y_range = time_plot.y_range) #add second axis to histogram hist_plot.extra_y_ranges = {"foo": Range1d(start = min(data_source.data["y2"]) - min(data_source.data["y1"]*0.1), end = max(data_source.data["y2"]) + max(data_source.data["y1"]*0.1))} #Customize hist_plot grid lines hist_plot.xgrid.grid_line_alpha = 0.2 hist_plot.ygrid.grid_line_alpha = 0.5 #get histogram data hist, edges = histogram(data_source.data["y1"], density = True, bins = 20) hist2, edges2 = histogram(data_source.data["y2"], density = True, bins = 20) #styleing histograms axises hist_plot.xaxis.axis_label = "" hist_plot.yaxis.axis_label = "" hist_plot.xaxis.visible = None #add gaussian kernel density estomator y_span = linspace(min(data_source.data["y1"]), max(data_source.data["y1"]), size(data_source.data["y1"])) kde = gkde(data_source.data["y1"]).evaluate(y_span) y_span2 = linspace(min(data_source.data["y2"]), max(data_source.data["y2"]), size(data_source.data["y2"])) kde2 = gkde(data_source.data["y2"]).evaluate(y_span2) #Histogram First axes styling hist_plot.yaxis.axis_line_color = time_scat.glyph.fill_color hist_plot.yaxis.minor_tick_line_color = time_scat.glyph.fill_color hist_plot.yaxis.major_tick_line_color = time_scat.glyph.fill_color hist_plot.yaxis.axis_label_text_color = time_scat.glyph.fill_color hist_plot.yaxis.major_label_text_color = time_scat.glyph.fill_color #Histogram second axes styling hist_plot.add_layout(LinearAxis(y_range_name = "foo", axis_line_color = str(time_scat2.glyph.fill_color), major_label_text_color = str(time_scat2.glyph.fill_color), axis_label_text_color = str(time_scat2.glyph.fill_color), major_tick_line_color = str(time_scat2.glyph.fill_color), minor_tick_line_color = str(time_scat2.glyph.fill_color)), "left") #%% Create Scatter Graph scat_plot = figure(plot_height = 400, plot_width = 400, title = "", x_axis_label = 'l1015asop', y_axis_label = 'l1013aspv') #scatter plot axis cutomization scat_plot.yaxis.axis_line_color = time_scat.glyph.fill_color scat_plot.yaxis.minor_tick_line_color = time_scat.glyph.fill_color scat_plot.yaxis.major_tick_line_color = time_scat.glyph.fill_color scat_plot.yaxis.axis_label_text_color = time_scat.glyph.fill_color scat_plot.yaxis.major_label_text_color = time_scat.glyph.fill_color scat_plot.xaxis.axis_line_color = time_scat2.glyph.fill_color scat_plot.xaxis.minor_tick_line_color = time_scat2.glyph.fill_color scat_plot.xaxis.major_tick_line_color = time_scat2.glyph.fill_color scat_plot.xaxis.axis_label_text_color = time_scat2.glyph.fill_color scat_plot.xaxis.major_label_text_color = time_scat2.glyph.fill_color #%% Add data to Histogram and scatter plot (this data is updated in callback fuction) #Create updateable plots u_hist_source = ColumnDataSource(data=dict(top=edges[1:],bottom=edges[:-1],left=zeros_like(edges),right=hist)) u_hist_source2 = ColumnDataSource(data=dict(top=edges2[1:],bottom=edges2[:-1],left=zeros_like(edges2),right=hist2)) u_kde_source = ColumnDataSource(data=dict(x = kde, y = y_span)) u_kde_source2 = ColumnDataSource(data=dict(x = kde2, y = y_span2)) scat_data = ColumnDataSource(data=dict(x=[0],y=[0])) #Updateble histogram hist_plot.quad(top = 'top', bottom = 'bottom', left = 'left', right = 'right', source = u_hist_source, fill_color = time_scat.glyph.fill_color, alpha = 0.5) hist_plot.quad(top = 'top', bottom = 'bottom', left = 'left', right = 'right', source = u_hist_source2, fill_color = time_scat2.glyph.fill_color, alpha = 0.3, y_range_name = "foo") #Updateble kde line hist_plot.line('x', 'y', source=u_kde_source ,line_color = "#008000") hist_plot.line('x', 'y', source=u_kde_source2 ,line_color = "#000099", y_range_name = "foo") #Updateble scatter plot scat_plot.scatter('x', 'y', source=scat_data,size=2, alpha=0.3) #%% Updating fuction data_source.callback = CustomJS(args=dict(hist_data=u_hist_source, hist_data2=u_hist_source2, kde_d = u_kde_source, kde_d2 = u_kde_source2, sc=scat_data), code=""" Update_ALL_Figures(cb_obj, hist_data, hist_data2, kde_d, kde_d2, sc) """) #%% create plot layout layout = gridplot([[time_plot, hist_plot], [scat_plot, None]]) return layout #need to return the layout
#normal distributioin #calculate arithmetic mean mu = np.mean(source.data["y"]) #calculating standard deviation sigma = np.std(source.data["y"]) #calculating normal distribution (probability density function) y_span = np.linspace(np.min(source.data["y"]), np.max(source.data["y"]),np.size(source.data["y"])) #nd = 1/(2*np.pi*sigma)*np.exp(-(y_span - mu)**2/(2*sigma**2)) #construct normal distribution lines #hist_plot.line(nd,y_span,line_color="#668cff", line_width=1,alpha=0.5) #add gaussian kernel density estomator kde = gkde(source.data["y"]).evaluate(y_span) #construct gaussian kernel density estomator lines hist_plot.line(kde,y_span,line_color="#ff6666",line_width=1,alpha=0.5) #Create updateable plots u_hist = hist_plot.quad(top=edges[1:], bottom=edges[:-1], left=0, right=np.zeros_like(edges), fill_color=time_scat.glyph.fill_color, alpha = 0.5) kde_data = np.zeros((len(kde))) kde_line = hist_plot.line(kde_data,y_span,line_color="red") #create scatter plot from of data sets scat_plot = figure(plot_height= 400, plot_width= 400, title="", x_axis_label ='', y_axis_label = '')
density=1, histtype='stepfilled', facecolor=pdf_col, alpha=0.5) # Establishing the y-axis of the pdf and the x-axis ylabel("Probability density") plt.ylim(0, 3.0) plt.xlim(0, 3.0) plt.xticks(np.arange(0, 3.0, step=0.5)) # Overlaying the line of the pdf to give the edge definition ax2 = fig1.add_subplot(111, sharex=ax1, sharey=ax1, frameon=False) #n2, bins2, patches2 = ax2.hist(y, num_bins, density=1, histtype='step', color='black', alpha=0.5) density = gkde(y) xs = np.linspace(0, 3, 300) density.covariance_factor = lambda: 0.05 density._compute_covariance() ax2.plot(xs, density(xs)) # Creating the cdf subplot ax3 = fig1.add_subplot(111, sharex=ax1, frameon=False) ax3.hist(y, 1200, density=1, color='navy', histtype='step', cumulative=True) # Establishing the y-axis of the cdf ax3.yaxis.tick_right() ax3.yaxis.set_label_position("right") xlabel("RMSD (Angstroms)")
np.corrcoef(g.X[key][(cc[key] if coclust else c) == k].T) - np.diag(np.ones(g.m)))[:, :30] f, ax = plt.subplots(1, 1) ax.set(xlabel='Correlation coefficient $\\rho_{ij}^{(k)}$') ax.hist(wc_corcoefs[:, 2:, 2:].flatten(), color='gray', bins=30, label='Histogram of $\\rho_{ij}^{(k)}$ for $X_{d:}^' + key + '$', density=True) ax.scatter(wc_corcoefs[:, 0, 1].flatten(), np.zeros(g.K[key] if coclust else g.K), color='red', label='$\\rho_{ij}^{(k)}$ for $X_{:d}^' + key + '$') xx = np.linspace(-.2, .2, 500) kde = gkde(wc_corcoefs[:, 2:, 2:].flatten()) ax.plot(xx, kde(xx), color='black') ax.legend() if dest_folder != '': plt.savefig(dest_folder + '/corr_sim_' + key + '.pdf') else: plt.savefig('corr_sim_' + key + '.pdf') else: plt.figure() wc_corcoefs = np.zeros((g.K, g.m, 30)) corcoefs = (np.corrcoef(g.X.T) - np.diag(np.ones(g.m)))[:, :30] for k in range(g.K): wc_corcoefs[k] = (np.corrcoef(g.X[c == k].T) - np.diag(np.ones(g.m)))[:, :30] f, ax = plt.subplots(1, 1) ax.set(xlabel='Correlation coefficient $\\rho_{ij}^{(k)}$')
mA = dist_a.mean() mB = dist_b.mean() ax.axvline(mA, c='b') ax.axvline(mB, c='xkcd:orange') ax.plot(x, pdf_a, label=f'A ({mA:1.2e})', c='b') ax.plot(x, pdf_b, label=f'B ({mB:1.2e})', c='xkcd:orange') ax.legend() if pval < 0.05: color = 'k' else: color = 'r' ax.set_title(f"p-value: {pval:1.2e}", fontsize=24, c=color) ax.set_ylim([0, 1.05 * np.max(np.concatenate([pdf_a, pdf_b]))]) st.pyplot(fig) kde = gkde(perm_replicates) x0 = np.linspace(min(perm_replicates), max(max(perm_replicates), empirical_diff_means), 100) p_y = kde.pdf(x0) # if dists have same mean, the probability that the difference in means = 0 # should approach 1 (full confidence) as sample size increases with _lock: fig_p, ax_p = plt.subplots() ax_p.plot(x0, p_y, c='k', lw=2) ax_p.axvline(empirical_diff_means, c='r', lw=2, ls='--') section = np.linspace(empirical_diff_means, max(x0), 100) ax_p.fill_between(section, kde.pdf(section), color='r') ax_p.set_title('Permuted Samples of Test Statistic: Density Estimate') st.pyplot(fig_p)
def predict_and_sample(self, x_pred): if self.regression_model is None: print( 'No regression model available. Make sure you have called build_regression_predict_and_sample().' ) exit() mu = np.zeros(x_pred.shape) sigma = np.zeros(x_pred.shape) hf_model_evals_pred = np.zeros(x_pred.shape) if self.regression_type in [ 'gaussian_process', 'heteroscedastic_gaussian_process' ]: for i in range(x_pred.shape[1]): # Predict q_l|q_l-1 at all low-fidelity samples mu[:, i], sigma[:, i] = self.regression_model[i].predict( x_pred, return_std=True) # Generate high-fidelity samples from the predictions for j in range(mu.shape[0]): hf_model_evals_pred[ j, i] = mu[j, i] + sigma[j, i] * np.random.randn() elif self.regression_type in [ 'consistent_gaussian_process', 'consistent_heteroscedastic_gaussian_process' ]: for i in range(x_pred.shape[1]): # Enrich x_pred if i > 0: x_pred = np.hstack( [x_pred, hf_model_evals_pred[:, 0:i].reshape((-1, i))]) # Predict q_l|q_l-1 at all low-fidelity samples mu[:, i], sigma[:, i] = self.regression_model[i].predict( x_pred, return_std=True) # Generate high-fidelity samples from the predictions for j in range(mu.shape[0]): hf_model_evals_pred[ j, i] = mu[j, i] + sigma[j, i] * np.random.randn() elif self.regression_type in [ 'decoupled_gaussian_process', 'decoupled_heteroscedastic_gaussian_process' ]: for i in range(x_pred.shape[1]): # Predict q_l|q_l-1 at all low-fidelity samples mu[:, i], sigma[:, i] = self.regression_model[i].predict( np.expand_dims(x_pred[:, i], axis=1), return_std=True) # Generate high-fidelity samples from the predictions for j in range(mu.shape[0]): hf_model_evals_pred[ j, i] = mu[j, i] + sigma[j, i] * np.random.randn() elif self.regression_type == 'gaussian_process_kde': for i in range(x_pred.shape[1]): # Predict q_l|q_l-1 at all low-fidelity samples mu[:, i], sigma[:, i] = self.regression_model[i].predict( x_pred, return_std=True) mu_train, sigma_train = self.regression_model[i].predict( self.x_train, return_std=True) noise_train = self.y_train[:, i] - mu_train joint_kde = gkde([self.x_train[:, i], noise_train]) nmin = np.min(noise_train) nmax = np.max(noise_train) # Generate high-fidelity samples from the predictions for j in range(mu.shape[0]): hf_model_evals_pred[j, i] = mu[j, i] # given x_pred, generate samples of y and average to get normalizing factor for slice of kde Nsamp = 100 nvals = np.random.uniform(low=nmin, high=nmax, size=Nsamp) kde_slice_samp = joint_kde( [self.x_pred[j, i] * np.ones(Nsamp), nvals]) normfactor = np.mean(kde_slice_samp) kde_slice_samp *= 1.0 / normfactor ratio = np.divide(kde_slice_samp, 1.0 / (nmax - nmin) * np.ones(Nsamp)) ratio *= 1.0 / np.max(ratio) foundsamp = False ii = 0 while not foundsamp: if ratio[ii] > np.random.uniform(low=0, high=1, size=1): foundsamp = True hf_model_evals_pred[j, i] += nvals[ii] else: ii += 1 else: print('Unknown regression model %s.' % self.regression_type) exit() return hf_model_evals_pred
def build_regression_predict_and_sample(self): hf_model_evals_pred = None if self.regression_type == 'gaussian_process': self.regression_model = [] n_qoi = self.x_train.shape[1] self.mu = np.zeros(self.x_pred.shape) self.sigma = np.zeros(self.x_pred.shape) hf_model_evals_pred = np.zeros(self.x_pred.shape) for i in range(n_qoi): # Fit a GP regression model to approximate p(q_l|q_l-1) kernel = ConstantKernel() + ConstantKernel() * RBF( np.ones(n_qoi)) + WhiteKernel() self.regression_model.append( gaussian_process.GaussianProcessRegressor( kernel=kernel, alpha=1e-6, n_restarts_optimizer=0)) self.regression_model[i].fit(self.x_train, self.y_train[:, i]) # Predict q_l|q_l-1 at all low-fidelity samples self.mu[:, i], self.sigma[:, i] = self.regression_model[i].predict( self.x_pred, return_std=True) # Generate high-fidelity samples from the predictions for j in range(self.mu.shape[0]): hf_model_evals_pred[j, i] = self.mu[ j, i] + self.sigma[j, i] * np.random.randn() elif self.regression_type == 'consistent_gaussian_process': self.regression_model = [] n_qoi = self.x_train.shape[1] self.mu = np.zeros(self.x_pred.shape) self.sigma = np.zeros(self.x_pred.shape) hf_model_evals_pred = np.zeros(self.x_pred.shape) for i in range(n_qoi): # Enrich x_train if i > 0: x_train = np.hstack( [self.x_train, self.y_train[:, 0:i].reshape((-1, i))]) else: x_train = self.x_train # Fit a GP regression model to approximate p(q_l|q_l-1) kernel = ConstantKernel() + ConstantKernel() * RBF( np.ones(n_qoi + i)) + WhiteKernel() self.regression_model.append( gaussian_process.GaussianProcessRegressor( kernel=kernel, alpha=1e-6, n_restarts_optimizer=0)) self.regression_model[i].fit(x_train, self.y_train[:, i]) # Enrich x_pred if i > 0: x_pred = np.hstack([ self.x_pred, hf_model_evals_pred[:, 0:i].reshape( (-1, i)) ]) # x_pred = np.hstack([self.x_pred, self.mu[:, 0:i].reshape((-1, i))]) else: x_pred = self.x_pred # Predict q_l|q_l-1 at all low-fidelity samples self.mu[:, i], self.sigma[:, i] = self.regression_model[i].predict( x_pred, return_std=True) # Generate high-fidelity samples from the predictions for j in range(self.mu.shape[0]): hf_model_evals_pred[j, i] = self.mu[ j, i] + self.sigma[j, i] * np.random.randn() elif self.regression_type == 'gaussian_process_kde': self.regression_model = [] n_qoi = self.x_train.shape[1] self.mu = np.zeros(self.x_pred.shape) self.sigma = np.zeros(self.x_pred.shape) hf_model_evals_pred = np.zeros(self.x_pred.shape) for i in range(n_qoi): # Fit a GP regression model to approximate p(q_l|q_l-1) kernel = ConstantKernel() + RBF(np.ones(n_qoi)) + WhiteKernel() self.regression_model.append( gaussian_process.GaussianProcessRegressor( kernel=kernel, alpha=1e-6, n_restarts_optimizer=0)) self.regression_model[i].fit(self.x_train, self.y_train[:, i]) # Predict q_l|q_l-1 at all low-fidelity samples self.mu[:, i], self.sigma[:, i] = self.regression_model[i].predict( self.x_pred, return_std=True) mu_train = self.regression_model[i].predict(self.x_train, return_std=False) noise_train = self.y_train[:, i] - mu_train nmin = np.min(noise_train) nmax = np.max(noise_train) # plt.figure(1) # samples = np.vstack([np.squeeze(self.x_train), np.squeeze(noise_train)]) # df = pd.DataFrame(samples.T, columns=['$Q_1$', 'Noise']) # g = sns.jointplot(x='$Q_1$', y='Noise', data=df, kind='kde', color='C0', shade=True, # shade_lowest=True, cmap='Blues') # g.plot_joint(plt.scatter, c='k', alpha=0.3, s=20, linewidth=0.0, marker='o') # g.ax_joint.collections[0].set_alpha(0) # # g.ax_joint.legend_.remove() # g.set_axis_labels('$Q_1$', 'Noise') # plt.subplots_adjust(top=0.95) # plt.gcf().subplots_adjust(left=0.15) # # plt.show() # plt.gcf().savefig('output/mfmc_noise_model.pdf', dpi=300) joint_kde = gkde([self.x_train[:, i], noise_train]) # Generate high-fidelity samples from the predictions for j in range(self.mu.shape[0]): hf_model_evals_pred[j, i] = self.mu[j, i] # given x_pred, generate samples of y and average to get normalizing factor for slice of kde Nsamp = 100 nvals = np.random.uniform(low=nmin, high=nmax, size=Nsamp) kde_slice_samp = joint_kde( [self.x_pred[j, i] * np.ones(Nsamp), nvals]) normfactor = np.mean(kde_slice_samp) kde_slice_samp *= 1.0 / normfactor ratio = np.divide(kde_slice_samp, 1.0 / (nmax - nmin) * np.ones(Nsamp)) ratio *= 1.0 / np.max(ratio) foundsamp = False ii = 0 while not foundsamp and ii < Nsamp: if ratio[ii] > np.random.uniform(low=0, high=1, size=1): foundsamp = True hf_model_evals_pred[j, i] += nvals[ii] else: ii += 1 if not foundsamp: print( 'Rejection sampling failed. Using the mean prediction without noise.' ) elif self.regression_type == 'decoupled_gaussian_process': self.regression_model = [] self.mu = np.zeros(self.x_pred.shape) self.sigma = np.zeros(self.x_pred.shape) hf_model_evals_pred = np.zeros(self.x_pred.shape) for i in range(self.x_train.shape[1]): # Fit a GP regression model to approximate p(q_l|q_l-1) kernel = ConstantKernel( ) + ConstantKernel() * RBF() + WhiteKernel() self.regression_model.append( gaussian_process.GaussianProcessRegressor(kernel=kernel, alpha=1e-6)) self.regression_model[i].fit( np.expand_dims(self.x_train[:, i], axis=1), self.y_train[:, i]) # Predict q_l|q_l-1 at all low-fidelity samples self.mu[:, i], self.sigma[:, i] = self.regression_model[i].predict( np.expand_dims(self.x_pred[:, i], axis=1), return_std=True) # Generate high-fidelity samples from the predictions for j in range(self.mu.shape[0]): hf_model_evals_pred[j, i] = self.mu[ j, i] + self.sigma[j, i] * np.random.randn() elif self.regression_type == 'heteroscedastic_gaussian_process': self.regression_model = [] n_qoi = self.x_train.shape[1] self.mu = np.zeros(self.x_pred.shape) self.sigma = np.zeros(self.x_pred.shape) hf_model_evals_pred = np.zeros(self.x_pred.shape) for i in range(n_qoi): # Fit a heteroscedastic GP regression model with spatially varying noise to approximate p(q_l|q_l-1) # See here for more info: https://github.com/jmetzen/gp_extras/ prototypes = KMeans(n_clusters=5).fit( self.x_train).cluster_centers_ kernel = ConstantKernel() + ConstantKernel() * RBF(np.ones(self.y_train.shape[1])) \ + HeteroscedasticKernel.construct(prototypes, 1e-3, (1e-10, 5e1), gamma=5.0, gamma_bounds="fixed") self.regression_model.append( gaussian_process.GaussianProcessRegressor(kernel=kernel, alpha=1e-6)) self.regression_model[i].fit(self.x_train, self.y_train[:, i]) # Predict q_l|q_l-1 at all low-fidelity samples self.mu[:, i], self.sigma[:, i] = self.regression_model[i].predict( self.x_pred, return_std=True) # Generate high-fidelity samples from the predictions for j in range(self.mu.shape[0]): hf_model_evals_pred[j, i] = self.mu[ j, i] + self.sigma[j, i] * np.random.randn() elif self.regression_type == 'consistent_heteroscedastic_gaussian_process': self.regression_model = [] n_qoi = self.x_train.shape[1] self.mu = np.zeros(self.x_pred.shape) self.sigma = np.zeros(self.x_pred.shape) hf_model_evals_pred = np.zeros(self.x_pred.shape) for i in range(n_qoi): # Enrich x_train if i > 0: x_train = np.hstack( [self.x_train, self.y_train[:, 0:i].reshape((-1, i))]) else: x_train = self.x_train # Fit a heteroscedastic GP regression model with spatially varying noise to approximate p(q_l|q_l-1) # See here for more info: https://github.com/jmetzen/gp_extras/ prototypes = KMeans(n_clusters=5).fit(x_train).cluster_centers_ kernel = ConstantKernel() + ConstantKernel() * RBF(np.ones(n_qoi+i)) \ + HeteroscedasticKernel.construct(prototypes, 1e-3, (1e-10, 5e1), gamma=5.0, gamma_bounds="fixed") self.regression_model.append( gaussian_process.GaussianProcessRegressor(kernel=kernel, alpha=1e-6)) self.regression_model[i].fit(x_train, self.y_train[:, i]) # Enrich x_pred if i > 0: x_pred = np.hstack([ self.x_pred, hf_model_evals_pred[:, 0:i].reshape( (-1, i)) ]) # x_pred = np.hstack([self.x_pred, self.mu[:, 0:i].reshape((-1, i))]) else: x_pred = self.x_pred # Predict q_l|q_l-1 at all low-fidelity samples self.mu[:, i], self.sigma[:, i] = self.regression_model[i].predict( x_pred, return_std=True) # Generate high-fidelity samples from the predictions for j in range(self.mu.shape[0]): hf_model_evals_pred[j, i] = self.mu[ j, i] + self.sigma[j, i] * np.random.randn() elif self.regression_type == 'decoupled_heteroscedastic_gaussian_process': self.regression_model = [] self.mu = np.zeros(self.x_pred.shape) self.sigma = np.zeros(self.x_pred.shape) hf_model_evals_pred = np.zeros(self.x_pred.shape) for i in range(self.x_train.shape[1]): # Fit a heteroscedastic GP regression model with spatially varying noise to approximate p(q_l|q_l-1) # See here for more info: https://github.com/jmetzen/gp_extras/ prototypes = KMeans(n_clusters=5).fit( np.expand_dims(self.x_pred[:, i], axis=1)).cluster_centers_ kernel = ConstantKernel() + ConstantKernel() * RBF() \ + HeteroscedasticKernel.construct(prototypes, 1e-3, (1e-10, 5e1), gamma=5.0, gamma_bounds="fixed") self.regression_model.append( gaussian_process.GaussianProcessRegressor(kernel=kernel, alpha=1e-6)) self.regression_model[i].fit( np.expand_dims(self.x_train[:, i], axis=1), self.y_train[:, i]) # Predict q_l|q_l-1 at all low-fidelity samples self.mu[:, i], self.sigma[:, i] = self.regression_model[i].predict( np.expand_dims(self.x_pred[:, i], axis=1), return_std=True) # Generate high-fidelity samples from the predictions for j in range(self.mu.shape[0]): hf_model_evals_pred[j, i] = self.mu[ j, i] + self.sigma[j, i] * np.random.randn() else: print('Unknown regression model %s.' % self.regression_type) exit() # Replace INF or NAN predictions if np.isinf(hf_model_evals_pred).any() or np.isnan( hf_model_evals_pred).any(): warnings.warn( 'Detected INF or NAN values in the predictions, replacing them with the sample mean.' ) mask = np.isnan(hf_model_evals_pred) | np.isinf( hf_model_evals_pred) hf_model_evals_pred[mask] = np.mean(hf_model_evals_pred[~mask], axis=0) return hf_model_evals_pred