def update(attr, old, new):    
    inds = np.array(new["1d"]["indices"])  #error when crosshair is added
    
    #for zero selected or all selected 
    if len(inds) == 0:
        hist1 = np.zeros_like(edges)
        u_scat_data = np.array([np.zeros_like(source.data["y"]),
                                np.zeros_like(source.data["y"])])
    #update hist values on selection
    else:
        hist1, _ = np.histogram(source.data["y"][inds], bins=edges, density=True)
        u_scat_data = np.array([source.data["y"][inds], source2.data["y"][inds]])
        
    if len(inds) > 2:
        kde_span = np.linspace(np.min(source.data["y"][inds]),
                               np.max(source.data["y"][inds]),
                               np.size(source.data["y"][inds]))
        kde_data = gkde(source.data["y"][inds]).evaluate(kde_span)
    else:
        kde_data = np.zeros(2)
        kde_span = np.zeros(2)
    #update ploting data sources    
    u_hist.data_source.data['right'] = hist1
    kde_line.data_source.data['x'] = kde_data
    kde_line.data_source.data['y'] = kde_span
    u_scat_points.data_source.data['x'] = u_scat_data[0]
    u_scat_points.data_source.data['y'] = u_scat_data[1]
    print str(inds) #too see repose on server (will be removed)
Beispiel #2
0
def plot_2d_contour(samples_x, samples_y, num=1, title='', xlim=None, ylim=None, xlabel="$x$", ylabel="$y$"):

    if len(str(num)) >= 3:
        plt.subplot(num)
    else:
        plt.figure(num)

    if xlim is None:
        xlim = [np.min(samples_x), np.max(samples_x)]

    if ylim is None:
        ylim = [np.min(samples_y), np.max(samples_y)]

    xy_kde = gkde(np.vstack([samples_x, samples_y]))
    xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], 80), np.linspace(ylim[0], ylim[1], 80))
    zz = np.reshape(xy_kde(np.vstack([xx.ravel(), yy.ravel()])).T, xx.shape)
    ax = plt.gca()
    cfset = ax.contourf(xx, yy, zz, cmap='Blues', alpha=1.0)
    cset = ax.contour(xx, yy, zz, colors='k', alpha=1.0, linewidths=0.5)
    ax.clabel(cset, fontsize=4)
    ax.set_xlabel('$q$')
    ax.set_ylabel('$Q$')
    plt.colorbar(cfset)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.title(title)
Beispiel #3
0
def do_kde(y, x=None, scipykde=False, norm=False):
    if scipykde:
        from scipy.stats import gaussian_kde as gkde
        pdf=gkde(y)(x)
        bw=None
    else:
        from .kde.kde import kde
        bw,x,pdf=kde(y)

    dx=x[1]-x[0]
    
    peaki=pdf.argmax()
    peakx=x[peaki]
    nval=(pdf*dx).sum()
    try:
        ret=[]
        for i in xrange(x.size):
            if i<=peaki: ret.append((pdf[:i+1]*dx).sum()/nval)
            if i>peaki: ret.append((pdf[i-1:]*dx).sum()/nval)
        ret=np.array(ret)
        foo=x[np.abs(np.array(ret)-.16).argsort()]
        msig,psig=foo[foo<peakx][0],foo[foo>peakx][0] #1sigma values
    except IndexError:
        msig,psig,ret=np.nan,np.nan,None
    if norm: pdf/=pdf.max()

    return x,pdf,(msig,psig),{'ppf':ret,'bw':bw,'norm':nval}
Beispiel #4
0
def plot_2d_scatter(samples_x, samples_y, marker='o', num=1, title='', xlim=None, ylim=None, xlabel="$x$", ylabel="$y$"):

    if len(str(num)) >= 3:
        plt.subplot(num)
    else:
        plt.figure(num)

    x = samples_x
    y = samples_y
    xy = np.vstack([x, y])
    z = gkde(xy)(xy)
    idx = z.argsort()
    x, y, z = x[idx], y[idx], z[idx]
    plt.scatter(x, y, c=z, s=50, edgecolor='', marker=marker)

    if xlim is None:
        xlim = [np.min(x), np.max(x)]
    if ylim is None:
        ylim = [np.min(y), np.max(y)]

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid()
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.title(title)
Beispiel #5
0
 def create_kernel_density(self):
     if self.kernel_density is not None:
         warnings.warn('Found existing kernel density. Overwriting.')
     elif self.n_dim < 4:
         self.kernel_density = gkde(np.squeeze(self.samples).T)
     else:
         print('Attempting KDE in %d dimensions. Aborting.' % self.n_dim)
         exit()
Beispiel #6
0
def p_Xw_i_outlier(mock, ell=0, rebin=None, krange=None, method='choletsky'):
    ''' Examine the pdf of X_w^i components that deviate significantly from  
    N(0,1) 
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    X, _ = NG.meansub(Pk)
    X_w, W = NG.whiten(X, method=method) # whitened data
    
    # calculate the chi-squared values of each p(X_w^i)  
    x = np.arange(-5., 5.1, 0.1)
    chi2 = np.zeros(X_w.shape[1])
    for i_bin in range(X_w.shape[1]): 
        kern = gkde(X_w[:,i_bin]) # gaussian KDE kernel using "rule of thumb" scott's rule. 
        chi2[i_bin] = np.sum((UT.gauss(x, 1., 0.) - kern.evaluate(x))**2)/np.float(len(x))
    
    # plot the most discrepant components. 
    prettyplot()
    fig = plt.figure()
    sub = fig.add_subplot(111)
    i_sort = np.argsort(chi2)
    print 'outlier bins = ', i_sort[-5:]
    for i_bin in i_sort[-10:]: 
        kern = gkde(X_w[:,i_bin]) # gaussian KDE kernel using "rule of thumb" scott's rule. 
        sub.plot(x, kern.evaluate(x))
    sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$')
    sub.set_xlim([-2.5, 2.5])
    sub.set_xlabel('$\mathtt{X^{i}_{W}}$', fontsize=25) 
    sub.set_ylim([0., 0.6])
    sub.set_ylabel('$\mathtt{P(X^{i}_{W})}$', fontsize=25) 
    sub.legend(loc='upper right') 
    
    if rebin is None: 
        f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i_outlier.', method, '.', mock, '.ell', str(ell), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i_outlier.', method, '.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None
Beispiel #7
0
    def __init__(self,
                 samples,
                 rv_name='$x$',
                 label='p(x)',
                 rv_transform=lambda x: x,
                 kde=True):
        self.rv_name = rv_name
        self.label = label
        self.samples = samples
        self.n_samples = np.shape(samples)[0]
        self.n_dim = np.shape(samples)[1]
        self.rv_transform = rv_transform
        self.kde_evals = None

        if self.n_dim < 4 and kde:
            self.kernel_density = gkde(np.squeeze(samples).T)
        elif kde:
            print('Attempting KDE in %d dimensions. Aborting.' % self.n_dim)
            exit()
        else:
            self.kernel_density = None
Beispiel #8
0
def weight_pdf(dpca, marginalization, axes=None, cellnames=None, color=None):

    if axes is None:
        fig, axes = plt.subplots(1)
        fig.suptitle('PDF marginalization weights')

    else:
        fig = axes.figure


    dd = dpca.P[marginalization][:,0]  # Neurons x Components
    pdf = gkde(dd)
    x = np.linspace(-1.2, 1.2, 100, endpoint=False)
    axes.plot(x, pdf(x), color=color, linewidth=2)
    axes = sn.swarmplot(x= dd, color=color, ax=axes)
    axes.set_ylim(-0.4, np.max(pdf(x)) + 0.2  )
    axes.set_yticks([0,1,2])
    axes.set_yticklabels([0, 1, 2])
    axes.set_xlabel('encoder weight')
    axes.set_ylabel('probability density')
    # axes.scatter(dd, np.zeros(len(dd)) + shuff, color=color, alpha=0.5)

    #
    # if cellnames is not None:
    #     cellnames = [cell[-4:] for cell in cellnames]
    #     ticks = dd.tolist()
    #     ticks.extend((-1, 1))
    #     print(ticks)
    #     tick_lables = cellnames.copy()
    #     tick_lables.extend(('-1', '1'))
    #     print(tick_lables)
    #     # todo diferenciate betwee minor and mayor ticks
    #     axes.set_xticks(ticks)
    #     axes.set_xticklabels(tick_lables, rotation='vertical')

    topcell = cellnames[np.argmax(dd)]

    return fig, axes, topcell
Beispiel #9
0
    def make_figure():
#%% Create Time Series Graph
        #Create Time Series plot area
        time_plot = figure(plot_height= 400, plot_width= 800, title="", x_axis_label ='Time', 
                    tools='', y_axis_label = 'l1013aspv', toolbar_location="left",
                    x_axis_type="datetime",
                    y_range=(min(data_source.data["y1"]) -min(data_source.data["y1"]*0.1 ),
                             max(data_source.data["y1"]) + max(data_source.data["y1"]*0.1)))
                       
        #modify the BoxSelectTool 
        #dimensions = specify the dimension in which the box selection is free in
        #select_every_mousemove = select points as box moves over
        time_plot.add_tools(BoxSelectTool(dimensions = ["width"], select_every_mousemove = True))

        #add anther axis
        time_plot.extra_y_ranges = {"foo": Range1d(start = min(data_source.data["y2"]) 
                                                        - min(data_source.data["y1"]*0.1),
                                                  end = max(data_source.data["y2"]) + max(data_source.data["y1"]*0.1))}
                                                  
        #add data to scatter plot (data points on time plot)
        time_scat = time_plot.scatter("x", "y1", source = data_source,size = 1, color = "green")
        time_scat2 = time_plot.scatter("x", "y2", source = data_source,size= 1, color = "blue", y_range_name = "foo")
           
        #add time series line
        time_plot.line("x","y1",source=data_source,color = time_scat.glyph.fill_color,
                                   alpha=0.5)
                                   
        time_plot.line("x","y2",source=data_source,color= time_scat2.glyph.fill_color,
                                    alpha=0.5,y_range_name="foo")
                                    
        #Customize time_plot grid lines
        time_plot.xgrid.grid_line_color = None
        time_plot.ygrid.grid_line_alpha = 0.2
        #First axes styling
        time_plot.yaxis.axis_line_color = time_scat.glyph.fill_color
        time_plot.yaxis.minor_tick_line_color = time_scat.glyph.fill_color
        time_plot.yaxis.major_tick_line_color = time_scat.glyph.fill_color
        time_plot.yaxis.axis_label_text_color = time_scat.glyph.fill_color
        time_plot.yaxis.major_label_text_color = time_scat.glyph.fill_color
                                    
        #add second axis to time_plot and styling
        time_plot.add_layout(LinearAxis(y_range_name = "foo",
                                        axis_line_color = str(time_scat2.glyph.fill_color),
                                        major_label_text_color = str(time_scat2.glyph.fill_color), 
                                        axis_label_text_color = str(time_scat2.glyph.fill_color),
                                        major_tick_line_color = str(time_scat2.glyph.fill_color),
                                        minor_tick_line_color = str(time_scat2.glyph.fill_color),
                                        axis_label= "l1015asop"), "left")
                                    
#%% Create Marginal Histogram and KDE
       #Create marginal histogram for y-axis data density
        #set up figure
        hist_plot = figure(plot_height = 400, plot_width = 200, y_range = time_plot.y_range)
        
        #add second axis to histogram
        hist_plot.extra_y_ranges = {"foo": 
            Range1d(start = min(data_source.data["y2"]) - min(data_source.data["y1"]*0.1),
                    end = max(data_source.data["y2"]) + max(data_source.data["y1"]*0.1))}
        
        #Customize hist_plot grid lines
        hist_plot.xgrid.grid_line_alpha = 0.2
        hist_plot.ygrid.grid_line_alpha = 0.5
                
        #get histogram data 
        hist, edges = histogram(data_source.data["y1"], density = True, bins = 20)
        hist2, edges2 = histogram(data_source.data["y2"], density = True, bins = 20)
        
        #styleing histograms axises              
        hist_plot.xaxis.axis_label = ""
        hist_plot.yaxis.axis_label = ""
        hist_plot.xaxis.visible = None
                    
        #add gaussian kernel density estomator
        y_span = linspace(min(data_source.data["y1"]),
                             max(data_source.data["y1"]), size(data_source.data["y1"]))
        kde = gkde(data_source.data["y1"]).evaluate(y_span)
        
        y_span2 = linspace(min(data_source.data["y2"]),
                             max(data_source.data["y2"]), size(data_source.data["y2"]))
        kde2 = gkde(data_source.data["y2"]).evaluate(y_span2)                             
                                    
                                    
        #Histogram First axes styling
        hist_plot.yaxis.axis_line_color = time_scat.glyph.fill_color
        hist_plot.yaxis.minor_tick_line_color = time_scat.glyph.fill_color
        hist_plot.yaxis.major_tick_line_color = time_scat.glyph.fill_color
        hist_plot.yaxis.axis_label_text_color = time_scat.glyph.fill_color
        hist_plot.yaxis.major_label_text_color = time_scat.glyph.fill_color        
        #Histogram second axes styling
        hist_plot.add_layout(LinearAxis(y_range_name = "foo",
                                        axis_line_color = str(time_scat2.glyph.fill_color),
                                        major_label_text_color = str(time_scat2.glyph.fill_color), 
                                        axis_label_text_color = str(time_scat2.glyph.fill_color),
                                        major_tick_line_color = str(time_scat2.glyph.fill_color),
                                        minor_tick_line_color = str(time_scat2.glyph.fill_color)), "left")
                                        
#%% Create Scatter Graph       
        scat_plot = figure(plot_height = 400, plot_width = 400, title = "", x_axis_label = 'l1015asop', 
                    y_axis_label = 'l1013aspv')
        
        #scatter plot axis cutomization
        scat_plot.yaxis.axis_line_color = time_scat.glyph.fill_color
        scat_plot.yaxis.minor_tick_line_color = time_scat.glyph.fill_color
        scat_plot.yaxis.major_tick_line_color = time_scat.glyph.fill_color
        scat_plot.yaxis.axis_label_text_color = time_scat.glyph.fill_color
        scat_plot.yaxis.major_label_text_color = time_scat.glyph.fill_color
        
        scat_plot.xaxis.axis_line_color = time_scat2.glyph.fill_color
        scat_plot.xaxis.minor_tick_line_color = time_scat2.glyph.fill_color
        scat_plot.xaxis.major_tick_line_color = time_scat2.glyph.fill_color
        scat_plot.xaxis.axis_label_text_color = time_scat2.glyph.fill_color
        scat_plot.xaxis.major_label_text_color = time_scat2.glyph.fill_color 
                
        
#%% Add data to Histogram and scatter plot (this data is updated in callback fuction)       
        #Create updateable plots
        u_hist_source = ColumnDataSource(data=dict(top=edges[1:],bottom=edges[:-1],left=zeros_like(edges),right=hist))
        u_hist_source2 = ColumnDataSource(data=dict(top=edges2[1:],bottom=edges2[:-1],left=zeros_like(edges2),right=hist2))
        u_kde_source = ColumnDataSource(data=dict(x = kde, y = y_span))
        u_kde_source2 = ColumnDataSource(data=dict(x = kde2, y = y_span2))
        scat_data = ColumnDataSource(data=dict(x=[0],y=[0]))

        #Updateble histogram
        hist_plot.quad(top = 'top', bottom = 'bottom', left = 'left', right = 'right', source = u_hist_source,
                                fill_color = time_scat.glyph.fill_color, alpha = 0.5)
                                
        hist_plot.quad(top = 'top', bottom = 'bottom', left = 'left', right = 'right', source = u_hist_source2,
                                fill_color = time_scat2.glyph.fill_color, alpha = 0.3, y_range_name = "foo")
        #Updateble kde line
        hist_plot.line('x', 'y', source=u_kde_source ,line_color = "#008000")
        hist_plot.line('x', 'y', source=u_kde_source2 ,line_color = "#000099", y_range_name = "foo")
        
        
        
        #Updateble scatter plot 
        scat_plot.scatter('x', 'y', source=scat_data,size=2, alpha=0.3)

#%% Updating fuction            
        data_source.callback = CustomJS(args=dict(hist_data=u_hist_source, hist_data2=u_hist_source2,
                                        kde_d = u_kde_source, kde_d2 = u_kde_source2, sc=scat_data),
                                code="""
                            Update_ALL_Figures(cb_obj, hist_data, hist_data2, kde_d, kde_d2, sc)
                                    """)
#%% create plot layout
                                    
        layout = gridplot([[time_plot, hist_plot], [scat_plot, None]])
        return layout #need to return the layout
#normal distributioin
#calculate arithmetic mean
mu = np.mean(source.data["y"])
    
#calculating standard deviation
sigma = np.std(source.data["y"])
    
#calculating normal distribution (probability density function)
y_span = np.linspace(np.min(source.data["y"]),
                     np.max(source.data["y"]),np.size(source.data["y"]))
#nd = 1/(2*np.pi*sigma)*np.exp(-(y_span - mu)**2/(2*sigma**2))
#construct normal distribution lines
#hist_plot.line(nd,y_span,line_color="#668cff", line_width=1,alpha=0.5)
    
#add gaussian kernel density estomator
kde = gkde(source.data["y"]).evaluate(y_span)
#construct gaussian kernel density estomator lines    
hist_plot.line(kde,y_span,line_color="#ff6666",line_width=1,alpha=0.5)
    
#Create updateable plots
u_hist = hist_plot.quad(top=edges[1:], bottom=edges[:-1], left=0,
                        right=np.zeros_like(edges),
                        fill_color=time_scat.glyph.fill_color, alpha = 0.5)
                   
kde_data = np.zeros((len(kde)))
kde_line = hist_plot.line(kde_data,y_span,line_color="red")

#create scatter plot from of data sets
scat_plot = figure(plot_height= 400, plot_width= 400, title="", x_axis_label ='', 
            y_axis_label = '')
           
                            density=1,
                            histtype='stepfilled',
                            facecolor=pdf_col,
                            alpha=0.5)

# Establishing the y-axis of the pdf and the x-axis
ylabel("Probability density")
plt.ylim(0, 3.0)
plt.xlim(0, 3.0)
plt.xticks(np.arange(0, 3.0, step=0.5))

# Overlaying the line of the pdf to give the edge definition
ax2 = fig1.add_subplot(111, sharex=ax1, sharey=ax1, frameon=False)
#n2, bins2, patches2    =       ax2.hist(y, num_bins, density=1, histtype='step', color='black', alpha=0.5)

density = gkde(y)
xs = np.linspace(0, 3, 300)

density.covariance_factor = lambda: 0.05
density._compute_covariance()

ax2.plot(xs, density(xs))

# Creating the cdf subplot
ax3 = fig1.add_subplot(111, sharex=ax1, frameon=False)
ax3.hist(y, 1200, density=1, color='navy', histtype='step', cumulative=True)

# Establishing the y-axis of the cdf
ax3.yaxis.tick_right()
ax3.yaxis.set_label_position("right")
xlabel("RMSD (Angstroms)")
Beispiel #12
0
                 np.corrcoef(g.X[key][(cc[key] if coclust else c) == k].T) -
                 np.diag(np.ones(g.m)))[:, :30]
         f, ax = plt.subplots(1, 1)
         ax.set(xlabel='Correlation coefficient $\\rho_{ij}^{(k)}$')
         ax.hist(wc_corcoefs[:, 2:, 2:].flatten(),
                 color='gray',
                 bins=30,
                 label='Histogram of $\\rho_{ij}^{(k)}$ for $X_{d:}^' +
                 key + '$',
                 density=True)
         ax.scatter(wc_corcoefs[:, 0, 1].flatten(),
                    np.zeros(g.K[key] if coclust else g.K),
                    color='red',
                    label='$\\rho_{ij}^{(k)}$ for $X_{:d}^' + key + '$')
         xx = np.linspace(-.2, .2, 500)
         kde = gkde(wc_corcoefs[:, 2:, 2:].flatten())
         ax.plot(xx, kde(xx), color='black')
         ax.legend()
         if dest_folder != '':
             plt.savefig(dest_folder + '/corr_sim_' + key + '.pdf')
         else:
             plt.savefig('corr_sim_' + key + '.pdf')
 else:
     plt.figure()
     wc_corcoefs = np.zeros((g.K, g.m, 30))
     corcoefs = (np.corrcoef(g.X.T) - np.diag(np.ones(g.m)))[:, :30]
     for k in range(g.K):
         wc_corcoefs[k] = (np.corrcoef(g.X[c == k].T) -
                           np.diag(np.ones(g.m)))[:, :30]
     f, ax = plt.subplots(1, 1)
     ax.set(xlabel='Correlation coefficient $\\rho_{ij}^{(k)}$')
    mA = dist_a.mean()
    mB = dist_b.mean()
    ax.axvline(mA, c='b')
    ax.axvline(mB, c='xkcd:orange')
    ax.plot(x, pdf_a, label=f'A ({mA:1.2e})', c='b')
    ax.plot(x, pdf_b, label=f'B ({mB:1.2e})', c='xkcd:orange')
    ax.legend()
    if pval < 0.05:
        color = 'k'
    else:
        color = 'r'
    ax.set_title(f"p-value: {pval:1.2e}", fontsize=24, c=color)
    ax.set_ylim([0, 1.05 * np.max(np.concatenate([pdf_a, pdf_b]))])
    st.pyplot(fig)

kde = gkde(perm_replicates)
x0 = np.linspace(min(perm_replicates),
                 max(max(perm_replicates), empirical_diff_means), 100)
p_y = kde.pdf(x0)

# if dists have same mean, the probability that the difference in means = 0
# should approach 1 (full confidence) as sample size increases
with _lock:
    fig_p, ax_p = plt.subplots()
    ax_p.plot(x0, p_y, c='k', lw=2)
    ax_p.axvline(empirical_diff_means, c='r', lw=2, ls='--')
    section = np.linspace(empirical_diff_means, max(x0), 100)
    ax_p.fill_between(section, kde.pdf(section), color='r')
    ax_p.set_title('Permuted Samples of Test Statistic: Density Estimate')
    st.pyplot(fig_p)
Beispiel #14
0
    def predict_and_sample(self, x_pred):

        if self.regression_model is None:
            print(
                'No regression model available. Make sure you have called build_regression_predict_and_sample().'
            )
            exit()

        mu = np.zeros(x_pred.shape)
        sigma = np.zeros(x_pred.shape)
        hf_model_evals_pred = np.zeros(x_pred.shape)

        if self.regression_type in [
                'gaussian_process', 'heteroscedastic_gaussian_process'
        ]:

            for i in range(x_pred.shape[1]):

                # Predict q_l|q_l-1 at all low-fidelity samples
                mu[:, i], sigma[:, i] = self.regression_model[i].predict(
                    x_pred, return_std=True)

                # Generate high-fidelity samples from the predictions
                for j in range(mu.shape[0]):
                    hf_model_evals_pred[
                        j, i] = mu[j, i] + sigma[j, i] * np.random.randn()

        elif self.regression_type in [
                'consistent_gaussian_process',
                'consistent_heteroscedastic_gaussian_process'
        ]:

            for i in range(x_pred.shape[1]):

                # Enrich x_pred
                if i > 0:
                    x_pred = np.hstack(
                        [x_pred, hf_model_evals_pred[:, 0:i].reshape((-1, i))])

                # Predict q_l|q_l-1 at all low-fidelity samples
                mu[:, i], sigma[:, i] = self.regression_model[i].predict(
                    x_pred, return_std=True)

                # Generate high-fidelity samples from the predictions
                for j in range(mu.shape[0]):
                    hf_model_evals_pred[
                        j, i] = mu[j, i] + sigma[j, i] * np.random.randn()

        elif self.regression_type in [
                'decoupled_gaussian_process',
                'decoupled_heteroscedastic_gaussian_process'
        ]:

            for i in range(x_pred.shape[1]):

                # Predict q_l|q_l-1 at all low-fidelity samples
                mu[:, i], sigma[:, i] = self.regression_model[i].predict(
                    np.expand_dims(x_pred[:, i], axis=1), return_std=True)

                # Generate high-fidelity samples from the predictions
                for j in range(mu.shape[0]):
                    hf_model_evals_pred[
                        j, i] = mu[j, i] + sigma[j, i] * np.random.randn()

        elif self.regression_type == 'gaussian_process_kde':

            for i in range(x_pred.shape[1]):

                # Predict q_l|q_l-1 at all low-fidelity samples
                mu[:, i], sigma[:, i] = self.regression_model[i].predict(
                    x_pred, return_std=True)

                mu_train, sigma_train = self.regression_model[i].predict(
                    self.x_train, return_std=True)
                noise_train = self.y_train[:, i] - mu_train
                joint_kde = gkde([self.x_train[:, i], noise_train])
                nmin = np.min(noise_train)
                nmax = np.max(noise_train)

                # Generate high-fidelity samples from the predictions
                for j in range(mu.shape[0]):
                    hf_model_evals_pred[j, i] = mu[j, i]

                    # given x_pred, generate samples of y and average to get normalizing factor for slice of kde
                    Nsamp = 100
                    nvals = np.random.uniform(low=nmin, high=nmax, size=Nsamp)
                    kde_slice_samp = joint_kde(
                        [self.x_pred[j, i] * np.ones(Nsamp), nvals])
                    normfactor = np.mean(kde_slice_samp)
                    kde_slice_samp *= 1.0 / normfactor
                    ratio = np.divide(kde_slice_samp,
                                      1.0 / (nmax - nmin) * np.ones(Nsamp))
                    ratio *= 1.0 / np.max(ratio)
                    foundsamp = False
                    ii = 0
                    while not foundsamp:
                        if ratio[ii] > np.random.uniform(low=0, high=1,
                                                         size=1):
                            foundsamp = True
                            hf_model_evals_pred[j, i] += nvals[ii]
                        else:
                            ii += 1

        else:
            print('Unknown regression model %s.' % self.regression_type)
            exit()

        return hf_model_evals_pred
Beispiel #15
0
    def build_regression_predict_and_sample(self):

        hf_model_evals_pred = None

        if self.regression_type == 'gaussian_process':

            self.regression_model = []
            n_qoi = self.x_train.shape[1]
            self.mu = np.zeros(self.x_pred.shape)
            self.sigma = np.zeros(self.x_pred.shape)
            hf_model_evals_pred = np.zeros(self.x_pred.shape)

            for i in range(n_qoi):

                # Fit a GP regression model to approximate p(q_l|q_l-1)
                kernel = ConstantKernel() + ConstantKernel() * RBF(
                    np.ones(n_qoi)) + WhiteKernel()
                self.regression_model.append(
                    gaussian_process.GaussianProcessRegressor(
                        kernel=kernel, alpha=1e-6, n_restarts_optimizer=0))
                self.regression_model[i].fit(self.x_train, self.y_train[:, i])

                # Predict q_l|q_l-1 at all low-fidelity samples
                self.mu[:,
                        i], self.sigma[:,
                                       i] = self.regression_model[i].predict(
                                           self.x_pred, return_std=True)

                # Generate high-fidelity samples from the predictions
                for j in range(self.mu.shape[0]):
                    hf_model_evals_pred[j, i] = self.mu[
                        j, i] + self.sigma[j, i] * np.random.randn()

        elif self.regression_type == 'consistent_gaussian_process':

            self.regression_model = []
            n_qoi = self.x_train.shape[1]
            self.mu = np.zeros(self.x_pred.shape)
            self.sigma = np.zeros(self.x_pred.shape)
            hf_model_evals_pred = np.zeros(self.x_pred.shape)

            for i in range(n_qoi):

                # Enrich x_train
                if i > 0:
                    x_train = np.hstack(
                        [self.x_train, self.y_train[:, 0:i].reshape((-1, i))])
                else:
                    x_train = self.x_train

                # Fit a GP regression model to approximate p(q_l|q_l-1)
                kernel = ConstantKernel() + ConstantKernel() * RBF(
                    np.ones(n_qoi + i)) + WhiteKernel()
                self.regression_model.append(
                    gaussian_process.GaussianProcessRegressor(
                        kernel=kernel, alpha=1e-6, n_restarts_optimizer=0))
                self.regression_model[i].fit(x_train, self.y_train[:, i])

                # Enrich x_pred
                if i > 0:
                    x_pred = np.hstack([
                        self.x_pred, hf_model_evals_pred[:, 0:i].reshape(
                            (-1, i))
                    ])
                    # x_pred = np.hstack([self.x_pred, self.mu[:, 0:i].reshape((-1, i))])
                else:
                    x_pred = self.x_pred

                # Predict q_l|q_l-1 at all low-fidelity samples
                self.mu[:,
                        i], self.sigma[:,
                                       i] = self.regression_model[i].predict(
                                           x_pred, return_std=True)

                # Generate high-fidelity samples from the predictions
                for j in range(self.mu.shape[0]):
                    hf_model_evals_pred[j, i] = self.mu[
                        j, i] + self.sigma[j, i] * np.random.randn()

        elif self.regression_type == 'gaussian_process_kde':

            self.regression_model = []
            n_qoi = self.x_train.shape[1]
            self.mu = np.zeros(self.x_pred.shape)
            self.sigma = np.zeros(self.x_pred.shape)
            hf_model_evals_pred = np.zeros(self.x_pred.shape)

            for i in range(n_qoi):

                # Fit a GP regression model to approximate p(q_l|q_l-1)
                kernel = ConstantKernel() + RBF(np.ones(n_qoi)) + WhiteKernel()
                self.regression_model.append(
                    gaussian_process.GaussianProcessRegressor(
                        kernel=kernel, alpha=1e-6, n_restarts_optimizer=0))
                self.regression_model[i].fit(self.x_train, self.y_train[:, i])

                # Predict q_l|q_l-1 at all low-fidelity samples
                self.mu[:,
                        i], self.sigma[:,
                                       i] = self.regression_model[i].predict(
                                           self.x_pred, return_std=True)

                mu_train = self.regression_model[i].predict(self.x_train,
                                                            return_std=False)
                noise_train = self.y_train[:, i] - mu_train
                nmin = np.min(noise_train)
                nmax = np.max(noise_train)

                # plt.figure(1)
                # samples = np.vstack([np.squeeze(self.x_train), np.squeeze(noise_train)])
                # df = pd.DataFrame(samples.T, columns=['$Q_1$', 'Noise'])
                # g = sns.jointplot(x='$Q_1$', y='Noise', data=df, kind='kde', color='C0', shade=True,
                #                   shade_lowest=True, cmap='Blues')
                # g.plot_joint(plt.scatter, c='k', alpha=0.3, s=20, linewidth=0.0, marker='o')
                # g.ax_joint.collections[0].set_alpha(0)
                # # g.ax_joint.legend_.remove()
                # g.set_axis_labels('$Q_1$', 'Noise')
                # plt.subplots_adjust(top=0.95)
                # plt.gcf().subplots_adjust(left=0.15)
                # # plt.show()
                # plt.gcf().savefig('output/mfmc_noise_model.pdf', dpi=300)

                joint_kde = gkde([self.x_train[:, i], noise_train])

                # Generate high-fidelity samples from the predictions
                for j in range(self.mu.shape[0]):
                    hf_model_evals_pred[j, i] = self.mu[j, i]

                    # given x_pred, generate samples of y and average to get normalizing factor for slice of kde
                    Nsamp = 100
                    nvals = np.random.uniform(low=nmin, high=nmax, size=Nsamp)
                    kde_slice_samp = joint_kde(
                        [self.x_pred[j, i] * np.ones(Nsamp), nvals])
                    normfactor = np.mean(kde_slice_samp)
                    kde_slice_samp *= 1.0 / normfactor
                    ratio = np.divide(kde_slice_samp,
                                      1.0 / (nmax - nmin) * np.ones(Nsamp))
                    ratio *= 1.0 / np.max(ratio)
                    foundsamp = False
                    ii = 0
                    while not foundsamp and ii < Nsamp:
                        if ratio[ii] > np.random.uniform(low=0, high=1,
                                                         size=1):
                            foundsamp = True
                            hf_model_evals_pred[j, i] += nvals[ii]
                        else:
                            ii += 1
                    if not foundsamp:
                        print(
                            'Rejection sampling failed. Using the mean prediction without noise.'
                        )

        elif self.regression_type == 'decoupled_gaussian_process':

            self.regression_model = []
            self.mu = np.zeros(self.x_pred.shape)
            self.sigma = np.zeros(self.x_pred.shape)
            hf_model_evals_pred = np.zeros(self.x_pred.shape)

            for i in range(self.x_train.shape[1]):

                # Fit a GP regression model to approximate p(q_l|q_l-1)
                kernel = ConstantKernel(
                ) + ConstantKernel() * RBF() + WhiteKernel()
                self.regression_model.append(
                    gaussian_process.GaussianProcessRegressor(kernel=kernel,
                                                              alpha=1e-6))
                self.regression_model[i].fit(
                    np.expand_dims(self.x_train[:, i], axis=1),
                    self.y_train[:, i])

                # Predict q_l|q_l-1 at all low-fidelity samples
                self.mu[:,
                        i], self.sigma[:,
                                       i] = self.regression_model[i].predict(
                                           np.expand_dims(self.x_pred[:, i],
                                                          axis=1),
                                           return_std=True)

                # Generate high-fidelity samples from the predictions
                for j in range(self.mu.shape[0]):
                    hf_model_evals_pred[j, i] = self.mu[
                        j, i] + self.sigma[j, i] * np.random.randn()

        elif self.regression_type == 'heteroscedastic_gaussian_process':

            self.regression_model = []
            n_qoi = self.x_train.shape[1]
            self.mu = np.zeros(self.x_pred.shape)
            self.sigma = np.zeros(self.x_pred.shape)
            hf_model_evals_pred = np.zeros(self.x_pred.shape)

            for i in range(n_qoi):

                # Fit a heteroscedastic GP regression model with spatially varying noise to approximate p(q_l|q_l-1)
                # See here for more info: https://github.com/jmetzen/gp_extras/
                prototypes = KMeans(n_clusters=5).fit(
                    self.x_train).cluster_centers_
                kernel = ConstantKernel() + ConstantKernel() * RBF(np.ones(self.y_train.shape[1])) \
                    + HeteroscedasticKernel.construct(prototypes, 1e-3, (1e-10, 5e1), gamma=5.0, gamma_bounds="fixed")
                self.regression_model.append(
                    gaussian_process.GaussianProcessRegressor(kernel=kernel,
                                                              alpha=1e-6))
                self.regression_model[i].fit(self.x_train, self.y_train[:, i])

                # Predict q_l|q_l-1 at all low-fidelity samples
                self.mu[:,
                        i], self.sigma[:,
                                       i] = self.regression_model[i].predict(
                                           self.x_pred, return_std=True)

                # Generate high-fidelity samples from the predictions
                for j in range(self.mu.shape[0]):
                    hf_model_evals_pred[j, i] = self.mu[
                        j, i] + self.sigma[j, i] * np.random.randn()

        elif self.regression_type == 'consistent_heteroscedastic_gaussian_process':

            self.regression_model = []
            n_qoi = self.x_train.shape[1]
            self.mu = np.zeros(self.x_pred.shape)
            self.sigma = np.zeros(self.x_pred.shape)
            hf_model_evals_pred = np.zeros(self.x_pred.shape)

            for i in range(n_qoi):

                # Enrich x_train
                if i > 0:
                    x_train = np.hstack(
                        [self.x_train, self.y_train[:, 0:i].reshape((-1, i))])
                else:
                    x_train = self.x_train

                # Fit a heteroscedastic GP regression model with spatially varying noise to approximate p(q_l|q_l-1)
                # See here for more info: https://github.com/jmetzen/gp_extras/
                prototypes = KMeans(n_clusters=5).fit(x_train).cluster_centers_
                kernel = ConstantKernel() + ConstantKernel() * RBF(np.ones(n_qoi+i)) \
                    + HeteroscedasticKernel.construct(prototypes, 1e-3, (1e-10, 5e1), gamma=5.0, gamma_bounds="fixed")
                self.regression_model.append(
                    gaussian_process.GaussianProcessRegressor(kernel=kernel,
                                                              alpha=1e-6))
                self.regression_model[i].fit(x_train, self.y_train[:, i])

                # Enrich x_pred
                if i > 0:
                    x_pred = np.hstack([
                        self.x_pred, hf_model_evals_pred[:, 0:i].reshape(
                            (-1, i))
                    ])
                    # x_pred = np.hstack([self.x_pred, self.mu[:, 0:i].reshape((-1, i))])
                else:
                    x_pred = self.x_pred

                # Predict q_l|q_l-1 at all low-fidelity samples
                self.mu[:,
                        i], self.sigma[:,
                                       i] = self.regression_model[i].predict(
                                           x_pred, return_std=True)

                # Generate high-fidelity samples from the predictions
                for j in range(self.mu.shape[0]):
                    hf_model_evals_pred[j, i] = self.mu[
                        j, i] + self.sigma[j, i] * np.random.randn()

        elif self.regression_type == 'decoupled_heteroscedastic_gaussian_process':

            self.regression_model = []
            self.mu = np.zeros(self.x_pred.shape)
            self.sigma = np.zeros(self.x_pred.shape)
            hf_model_evals_pred = np.zeros(self.x_pred.shape)

            for i in range(self.x_train.shape[1]):

                # Fit a heteroscedastic GP regression model with spatially varying noise to approximate p(q_l|q_l-1)
                # See here for more info: https://github.com/jmetzen/gp_extras/
                prototypes = KMeans(n_clusters=5).fit(
                    np.expand_dims(self.x_pred[:, i], axis=1)).cluster_centers_
                kernel = ConstantKernel() + ConstantKernel() * RBF() \
                         + HeteroscedasticKernel.construct(prototypes, 1e-3, (1e-10, 5e1), gamma=5.0,
                                                           gamma_bounds="fixed")
                self.regression_model.append(
                    gaussian_process.GaussianProcessRegressor(kernel=kernel,
                                                              alpha=1e-6))
                self.regression_model[i].fit(
                    np.expand_dims(self.x_train[:, i], axis=1),
                    self.y_train[:, i])

                # Predict q_l|q_l-1 at all low-fidelity samples
                self.mu[:,
                        i], self.sigma[:,
                                       i] = self.regression_model[i].predict(
                                           np.expand_dims(self.x_pred[:, i],
                                                          axis=1),
                                           return_std=True)

                # Generate high-fidelity samples from the predictions
                for j in range(self.mu.shape[0]):
                    hf_model_evals_pred[j, i] = self.mu[
                        j, i] + self.sigma[j, i] * np.random.randn()

        else:
            print('Unknown regression model %s.' % self.regression_type)
            exit()

        # Replace INF or NAN predictions
        if np.isinf(hf_model_evals_pred).any() or np.isnan(
                hf_model_evals_pred).any():
            warnings.warn(
                'Detected INF or NAN values in the predictions, replacing them with the sample mean.'
            )
            mask = np.isnan(hf_model_evals_pred) | np.isinf(
                hf_model_evals_pred)
            hf_model_evals_pred[mask] = np.mean(hf_model_evals_pred[~mask],
                                                axis=0)

        return hf_model_evals_pred