コード例 #1
0
def kdepy_fftkde(data, a, b, num_bin_joint):
    """ Calculate Kernel Density Estimation (KDE) using KDEpy.FFTKDE.
    Note: KDEpy.FFTKDE can do only symmetric kernel (accept only scalar bandwidth).
    We map data to [-1, 1] domain to make bandwidth independent of parameter range and more symmetric
    and use mean of list bandwidths (different bandwidth for each dimension)
    calculated usinf Scott's rule and scipy.stats.gaussian_kde
    :param data: array of parameter samples
    :param a: list of left boundaries
    :param b: list of right boundaries
    :param num_bin_joint: number of bins (cells) per dimension in estimated posterior
    :return: estimated posterior of shape (num_bin_joint, )*dimensions
    """

    N_params = len(data[0])
    logging.info('KDEpy.FFTKDe: Gaussian KDE {} dimensions'.format(N_params))
    time1 = time()
    a = np.array(a)-1e-10
    b = np.array(b)+1e-10
    data = 2 * (data - a) / (b - a) - 1     # transform data to be [-1, 1], since gaussian is the same in all directions
    bandwidth = bw_from_kdescipy(data, 'scott')
    _, grid_ravel = grid_for_kde(-1*np.ones(N_params), np.ones(N_params), num_bin_joint)
    kde = FFTKDE(kernel='gaussian', bw=np.mean(bandwidth))
    kde.fit(data)
    Z = kde.evaluate(grid_ravel.T)
    Z = Z.reshape((num_bin_joint + 1, )*N_params)
    time2 = time()
    timer(time1, time2, "Time for kdepy_fftkde")
    return Z
コード例 #2
0
class Gaussian_Density_Estimator:
    def __init__(self, kernel='gaussian', bw='silverman'):
        self.estimator = FFTKDE(kernel=kernel, bw=bw)

    def train(self, data, weights=None):
        self.estimator.fit(data, weights=weights)

    def score_samples(self, input_x=None):
        if input_x is None:
            x, y = self.estimator.evaluate()
            return x, y
        else:
            y = self.estimator.evaluate(input_x)
            return y


# import numpy as np
# import matplotlib.pyplot as plt

# data = np.random.randn(2**6)
# density_estimator = Gaussian_Density_Estimator()
# density_estimator.train(data)
# # x, y = density_estimator.score_samples()
# # print(x.shape, y.shape)

# x, y = density_estimator.score_samples(10)
# print(y)

# plt.plot(x, y); plt.tight_layout()
# plt.show()
コード例 #3
0
def find_kde(distribution, bw='silverman', npoints=512, kernel='gaussian'):
    """ Receives a numpy array containing an image and returns
    image histogram estimatives based on Kernel density function
    with given bandwidth. The data returned are x, y datapoints"""
    estimator = FFTKDE(kernel=kernel, bw=bw)
    x, y = estimator.fit(distribution).evaluate(npoints)

    # Fix silverman bias in small datasets
    if (bw == 'silverman') and (estimator.bw < 1):
        estimator = FFTKDE(kernel=kernel, bw=1)
        x, y = estimator.fit(distribution).evaluate(npoints)

    y = y[(x>=0) & (x<=255)] 
    x = x[(x>=0) & (x<=255)] 
    return (x, y)
コード例 #4
0
    def _estimate_pdf(self,
                      sample_id: str,
                      features: list,
                      df: pd.DataFrame,
                      **kwargs) -> None:
        """
        Given a sample ID and its events dataframe, estimate the PDF by KDE with the option
        to perform dimensionality reduction first. Resulting PDF is saved to kde_cache.

        Parameters
        ----------
        sample_id: str
        df: Pandas.DataFrame
        features: list

        Returns
        -------
        None
        """
        bw = self.kde_bw
        if bw == "cv":
            bw = bw_optimisation(data=df, features=features, **kwargs)
        df = df[features].copy().select_dtypes(include=['number'])
        kde = FFTKDE(kernel=self.kde_kernel, bw=bw, norm=self.kde_norm)
        self.kde_cache[sample_id] = kde.fit(df.values).evaluate()[1]
コード例 #5
0
def get_silvermans_bandwidth(X, kernel, bandwidth):

    # X
    assert X is not None
    assert type(X) is np.ndarray
    assert X.ndim == 1

    # kernel
    # assert kernel in ("triweight", )

    # bandwidth
    assert bandwidth in ("silverman", )

    kde = FFTKDE(bw=bandwidth, kernel=kernel)
    kde.fit(X)(2**10)

    return kde.bw
コード例 #6
0
def get_kde(gridArray):
    """
    对风险矩阵做KDE,降低稀疏率
    :param gridArray: 待处理风险矩阵
    :return: KDE后的风险矩阵
    """
    #判断风险矩阵是否为零矩阵,若是则返回原矩阵
    if np.where(gridArray != 0)[0].shape[0] == 0:
        return gridArray
    #找到矩阵中的非零值
    tempArray = np.nonzero(gridArray)
    data = np.zeros((tempArray[0].T.shape[0], 2))  #数据矩阵
    data[:, 0] = tempArray[0].T
    data[:, 1] = tempArray[1].T
    rows = data.shape[0]
    weights = []
    for i in range(rows):
        weights.append(gridArray[int(data[i, 0]), int(data[i, 1])])

    # fig = plt.figure(figsize=(2,1))
    # ax = fig.add_subplot(1,2,1)
    # bx = fig.add_subplot(1,2,2)
    # datai = np.zeros((gridArray.shape[0]*gridArray.shape[1], 2))
    # dataj = []
    # temp = 0
    # for i in range(len(gridArray)):
    #     for j in range(len(gridArray)):
    #         datai[temp, :] = np.array([i, j])
    #         dataj.append(gridArray[i, j])
    #         temp += 1

    grid_points = gridArray.shape[0]  #矩阵行(列)数
    kde = FFTKDE(kernel='gaussian', norm=2, bw=0.5)
    grid, points = kde.fit(data, weights=weights).evaluate(grid_points)
    # x, y = np.unique(grid[:, 0]), np.unique(grid[:, 1])
    resultArray = points.reshape(grid_points, grid_points).T

    # ax.contour(x,y,resultArray, 16, linewidths=0.8, colors='k')
    # ax.contourf(x,y,resultArray, 16, cmap='RdBu_r')
    # ax.plot(data[:, 0], data[:, 1], 'ok', ms=3)

    #原矩阵构图
    # grid, points = kde.fit(datai, weights=dataj).evaluate(grid_points)
    # x, y = np.unique(grid[:, 0]), np.unique(grid[:, 1])
    # z = points.reshape(grid_points, grid_points).T
    # z = RotateMatrix(z)     #z旋转90度
    # bx.contour(x, y, z, 16, linewidths=0.8, colors='k')
    # bx.contourf(x, y, z, 16, cmap='RdBu_r')

    # plt.tight_layout()
    # plt.show()

    # print(gridArray)
    # print(resultArray)
    return resultArray
コード例 #7
0
ファイル: MFeqs.py プロジェクト: LevinaLab/PNAS-2021
                                     interp_dt=0.5)
#Extract the axes from the axis list
fake_dist = np.reshape(dist, [len(ws), len(nus)])
plt.plot(unw, unNu_overW, color='k', label='analytics', linewidth=4)
#plt.imshow(fake_dist.T,aspect = 'auto',origin='lower',extent = [np.min(ws/20),np.max(ws/20),0,np.max(unNu_overW)])
plt.contour(ws / 20, nus, fake_dist.T)
plt.xlim([0, 0.6])
plt.show(block=False)

from KDEpy import FFTKDE
import KDEpy
kde = FFTKDE(bw=0.05, kernel='exponential')

grid_points = 100, 200
grid, points = kde.fit(
    na([np.hstack(meanW_burst),
        np.hstack(meanSC_burst)]).T).evaluate(grid_points)

x, y = np.unique(grid[:, 0]), np.unique(grid[:, 1])
z = points.reshape(grid_points[0], grid_points[1]).T

# Plot the kernel density estimate
N = 2
plt.plot(np.hstack(meanW_burst), np.hstack(meanSC_burst), '.', alpha=0.2)
plt.contour(x, y, z, N, linewidths=0.8, colors='k')
plt.show(block=False)

kde = KernelDensity(bandwidth=0.03)
kde.fit(na([np.hstack(meanW_burst), np.hstack(meanSC_burst)]).T)
samples = kde.score_samples(na(ev_par))
コード例 #8
0
    idx_ones = index_digit_ones(Y_train)
    print("> idx_ones = ", idx_ones)

    idxs = [12, 3, 6, 14, 23, 24, 40, 59, 67] # this is extracted indices of digit=1; with idx=12 as digit=3
    data = []
    for idx in idxs:
        data.append(X_train[idx])

    fig = plt.figure()
    # more styles: https://matplotlib.org/gallery/lines_bars_and_markers/line_styles_reference.html
    line_styles = ['--', '-', ':', ':', '-', ':', ':', ':', ':']
    for i in range(len(idxs)):
        estimator = FFTKDE(kernel='gaussian', bw='silverman')
        # x[i], y[i] = estimator[i].fit(data[i], weights=None).evaluate()
        x, y = estimator.fit(data[i], weights=None).evaluate()

        # plt.plot(x[i], y[i], label='Digit='+str(Y_train[idxs[i]]))
        plt.plot(x, y, linestyle=line_styles[i], label='IDX='+str(idxs[i])+'; Digit='+str(Y_train[idxs[i]]))

    plt.legend()
    plt.show()
    fig.savefig('hw5/results/visualize_kde.png', dpi=fig.dpi)

    new_data = pca.inverse_transform(data)

    plot_digit_data(new_data, 'test_kde_plot_digits')



コード例 #9
0
ファイル: kde.py プロジェクト: philippeller/dama
class KDE(Translation):
    def __init__(self,
                 source,
                 *args,
                 bw=None,
                 kernel='gaussian',
                 density=True,
                 **kwargs):
        '''run KDE on regular grid

        Parameters:
        -----------

        source : GridData or PointData
        bw : str or float or iterable
            Will default to 'silverman; for 1d data and 1 otherwise
            coices of 'silverman', 'scott', 'ISJ' for 1d data
            float specifies fixed bandwidth, in case of iterable a separate, fixed bandwith per dimension
        kernel : str
            choices of 'gaussian', 'exponential', 'box', 'tri', 'epa', 'biweight', 'triweight', 'tricube', 'cosine'
        density : bool (optional)
            if false, multiply output by sum of data
        '''
        super().__init__(source, *args, dest_needs_grid=True, **kwargs)

        if bw is None:
            bw = 'silverman' if self.dest.grid.nax == 1 else 1.
        self.bw = bw
        self.kernel = kernel
        self.density = density

        if density:
            self.additional_runs = {'density': None}
        else:
            self.additional_runs = {'counts': None}

        if not self.dest.grid.regular:
            raise TypeError('dest must have regular grid')

    def setup(self):
        self.prepare_source_sample(stacked=False)
        # every point must be inside output grid (requirement of KDEpy)
        masks = [
            np.logical_and(self.source_sample[i] > dim.points[0],
                           self.source_sample[i] < dim.points[-1])
            for i, dim in enumerate(self.dest.grid)
        ]
        self.mask = np.all(masks, axis=0)
        #n_masked = np.sum(~mask)
        #if n_masked > 0:
        #    warnings.warn('Excluding %i points that are outside grid'%n_masked, Warning, stacklevel=0)
        sample = [s[self.mask] for s in self.source_sample]

        self.source_sample = np.stack(sample).T
        self.prepare_dest_sample(transposed=True)

        if isinstance(self.bw, (np.ndarray, list, tuple)):
            for i in range(self.dest.grid.nax):
                self.source_sample[:, i] /= self.bw[i]
                self.dest_sample[:, i] /= self.bw[i]
            bw = 1
        else:
            bw = self.bw

        self.kde = FFTKDE(bw=bw, kernel=self.kernel)

    def eval(self, source_data):

        if self.density:
            # since we scale the inputs, we need to re-scale the
            # densities such that they integrate out to being 1 again
            if isinstance(self.bw, (np.ndarray, list, tuple)):
                scale = 1. / np.prod(self.bw)
            else:
                scale = None

        if source_data is None:
            out_array = self.kde.fit(self.source_sample).evaluate(
                self.dest_sample)
            out_shape = self.dest.shape
            if not self.density:
                out_array *= self.source_sample.size / np.sum(out_array)
            elif scale is not None:
                out_array *= scale

        else:
            source_data = source_data.flat()

            if source_data.ndim > 1:
                out_array = self.get_empty_output_array(source_data.shape[1:],
                                                        flat=True)
                for idx in np.ndindex(*source_data.shape[1:]):
                    out_array[(Ellipsis, ) + idx] = self.kde.fit(
                        self.source_sample,
                        weights=source_data[(Ellipsis, ) +
                                            idx][self.mask]).evaluate(
                                                self.dest_sample)
                    if not self.density:
                        out_array[(Ellipsis, ) + idx] *= np.sum(source_data[
                            (Ellipsis, ) + idx][self.mask]) / np.sum(
                                out_array[(Ellipsis, ) + idx])
                    elif scale is not None:
                        out_array[(Ellipsis, ) + idx] *= scale
                out_shape = (self.dest.shape) + (-1, )

            else:
                out_array = self.kde.fit(
                    self.source_sample,
                    weights=source_data[self.mask]).evaluate(self.dest_sample)
                out_shape = self.dest.shape
                if not self.density:
                    out_array *= np.sum(
                        source_data[self.mask]) / np.sum(out_array)
                elif scale is not None:
                    out_array *= scale

        #if isinstance(self.bw, (np.ndarray, list, tuple)):
        #    out_array *= np.product(self.bw)

        return out_array.reshape(out_shape)
コード例 #10
0
def density(values, bw='silverman', npoints=512):
    estimator = FFTKDE(kernel='gaussian', bw=bw)
    kx, ky = estimator.fit(values).evaluate(npoints)
    ky = ky[(kx >= 0) & (kx <= 255)]
    kx = kx[(kx >= 0) & (kx <= 255)]
    return kx, ky, estimator.bw
コード例 #11
0
def density(values, bw='silverman', npoints=512, kernel='gaussian'):
    estimator = FFTKDE(kernel=kernel, bw=bw)
    x, y = estimator.fit(values).evaluate(npoints)
    y = y[(x>=0) & (x<=255)] 
    x = x[(x>=0) & (x<=255)] 
    return x, y, estimator.bw
コード例 #12
0
image = imageio.imread(image_filename)
distribution = image.ravel()

fig, axes = plt.subplots(figsize=(6, 4), dpi=150)

nbins = 256
npoints = 512
kernel = 'gaussian'

x = np.arange(nbins)
y = np.bincount(distribution, minlength=nbins)
y = y / np.sum(y)
hst_xy = (x, y)

estimator = FFTKDE(kernel=kernel, bw='silverman')
kx, ky = estimator.fit(distribution).evaluate(npoints)
ky = ky[(kx >= 0) & (kx <= 255)]
kx = kx[(kx >= 0) & (kx <= 255)]
kde_xy = (kx, ky)

peaks_idx, _ = find_peaks(kde_xy[1])
half = peak_widths(kde_xy[1], peaks_idx, rel_height=0.5)[:2]
peaks = (peaks_idx, half)

estimator = FFTKDE(kernel=kernel, bw='silverman')
kernel_points, kernel_values = estimator.fit(ky).evaluate(npoints)

kx, ky, bw = kernel_points, kernel_values, estimator.bw
bws = bw * np.logspace(1, -1, 101)

mode_lst = find_modeid(distribution, bws)
コード例 #13
0
    def density(self, compare=None):

        fig, ax = plt.subplots(1, len(self.bands) + 1, figsize=(30, 5))
        eval_list = []
        ep = 1e-10
        for i in range(len(self.bands)):

            kde = FFTKDE('gaussian', bw=0.13)
            kde.fit(self.inputs[:, :, :, i].ravel())

            if compare != None:
                min_v, max_v = self.domain(self.inputs[:, :, :, i],
                                           compare.inputs[:, :, :, i])
                grid = np.linspace(min_v - ep, max_v + ep, 100)
            else:
                grid = np.linspace(self.inputs[:, :, :, i].min() - ep,
                                   self.inputs[:, :, :, i].max() + ep, 100)
            evaluation = kde.evaluate(grid)
            ax[i].plot(grid, evaluation, label=self.name)
            ax[i].set_title(f"{self.name} {self.bands[i]}")
            eval_list.append(evaluation)

        kde = FFTKDE('gaussian', bw=0.13)
        kde.fit(self.outputs)
        if compare != None:
            min_v, max_v = self.domain(self.outputs, compare.outputs)
            grid = np.linspace(min_v - ep, max_v + ep, 100)
        else:
            grid = np.linspace(self.outputs.min() - ep,
                               self.outputs.max() + ep, 100)

        evaluation = kde.evaluate(grid)
        ax[-1].plot(grid, evaluation, label=self.name)
        ax[-1].set_title(f"{self.name} Outputs")
        eval_list.append(evaluation)

        if compare != None:
            for i in range(len(self.bands)):

                kde = FFTKDE('gaussian', bw=0.13)
                kde.fit(compare.inputs[:, :, :, i].ravel())
                #if compare != None:
                min_v, max_v = self.domain(self.inputs[:, :, :, i],
                                           compare.inputs[:, :, :, i])
                grid = np.linspace(min_v - ep, max_v + ep, 100)

                ax[i].plot(grid, kde.evaluate(grid), label=compare.name)
                ax[i].plot(grid,
                           kde.evaluate(grid) - eval_list[i],
                           label="Difference")
                ax[i].set_title(
                    f"{self.name} {self.bands[i]} | Compare: {compare.name}")
                ax[i].plot([
                    self.inputs[:, :, :, i].min(), self.inputs[:, :, :,
                                                               i].max()
                ], [0.0, 0.0],
                           linestyle='--',
                           alpha=0.3)

            kde = FFTKDE('gaussian', bw=0.13)
            kde.fit(compare.outputs)
            #if compare != None:
            min_v, max_v = self.domain(self.outputs, compare.outputs)
            grid = np.linspace(min_v - ep, max_v + ep, 100)

            ax[-1].plot(grid, kde.evaluate(grid), label=compare.name)
            ax[-1].plot(grid,
                        kde.evaluate(grid) - eval_list[-1],
                        label="Difference")
            ax[-1].set_title(f"{self.name} Outputs | Compare: {compare.name}")
            ax[-1].plot(
                [self.outputs.min(), self.outputs.max()], [0.0, 0.0],
                linestyle='--',
                alpha=0.3)

        plt.legend()
        plt.show()
コード例 #14
0
ファイル: ml_backend.py プロジェクト: bpate05/BracketBuddy
def bootstrap(year1, team1, year2, team2, mongo):
    """predict 100 random games."""
    output = {}
    data, tc1, tc2 = prepare_data(year1, team1, year2, team2, mongo)
    data_df = pd.DataFrame(data.reshape(1, 111))
    data_copy = data.copy()
    num_trials = 99
    for i in range(num_trials):
        rand_data = randomize_data(year1, team1, year2, team2, data_copy)
        data_df.loc[len(data_df)] = rand_data
    data_df -= ml_stats_mean
    data_df = data_df / ml_stats_std
    global graph
    with graph.as_default():
        prediction = model.predict(data_df)
    estimator = FFTKDE(kernel='gaussian', bw='silverman')
    over_under = [x for x in prediction[:, 0] + prediction[:, 1]]
    spread = [x for x in prediction[:, 1] - prediction[:, 0]]
    home_wins = 0
    for i in range(num_trials + 1):
        if prediction[i, 0] > prediction[i, 1]:
            home_wins += 1
    home_win_pct = home_wins / (num_trials + 1)
    est_win_pct = round((home_win_pct * 200) - 100)
    if est_win_pct < 0:
        output['win_bar_color'] = str(tc2)
    else:
        output['win_bar_color'] = str(tc1)
    output['est_win_pct'] = str(-1 * est_win_pct)
    grid_min_oe = np.floor(np.min(over_under))
    grid_max_oe = np.ceil(np.max(over_under))
    grid_min_s = np.floor(np.min(spread))
    grid_max_s = np.ceil(np.max(spread))
    grid_oe = int(grid_max_oe - grid_min_oe) * 100
    grid_s = int(grid_max_s - grid_min_s) * 100
    oe_x, oe_y = estimator.fit(over_under, weights=None).evaluate(grid_oe)
    oe_df = pd.DataFrame({'x': oe_x, 'y': oe_y})
    oe_df['x_round'] = round(oe_df['x'])
    oe_x_group = oe_df.groupby('x_round')
    oe_ys = oe_x_group['y'].sum()
    oe_x = list(oe_ys.index)
    oe_y = list(oe_ys)
    sum_oe_y = sum(oe_y)
    oe_y_norm = [x / sum_oe_y for x in oe_y]
    s_x, s_y = estimator.fit(spread, weights=None).evaluate(grid_s)
    s_df = pd.DataFrame({'x': s_x, 'y': s_y})
    s_df['x_round'] = round(s_df['x'])
    s_x_group = s_df.groupby('x_round')
    s_ys = s_x_group['y'].sum()
    s_x = list(s_ys.index)
    s_y = list(s_ys)
    sum_s_y = sum(s_y)
    min_spread = np.floor(min(s_x))
    max_spread = np.ceil(max(s_x))
    spread_bound = max(abs(min_spread), abs(max_spread))
    output['spread_bounds'] = [str(-1 * spread_bound), str(spread_bound)]
    spread_colors = []
    for i in range(len(s_x)):
        if s_x[i] <= 0:
            spread_colors.append(tc1)
        else:
            spread_colors.append(tc2)
    output['spread_colors'] = spread_colors
    s_y_norm = [x / sum_s_y for x in s_y]
    output['home_points'] = [str(x) for x in prediction[:, 0]]
    output['away_points'] = [str(x) for x in prediction[:, 1]]
    output['over_under_x'] = [str(x) for x in oe_x]
    output['over_under_y'] = [str(x * 100) for x in oe_y_norm]
    output['spread_x'] = [str(x) for x in s_x]
    output['spread_y'] = [str(x * 100) for x in s_y_norm]
    output['over_under'] = str(round(np.mean(over_under), 1))
    output['spread'] = str(round(np.mean(spread), 1))
    output['scatter_color'] = [tc1 if prediction[x, 0] >
                               prediction[x, 1] else tc2 for x in range(len(prediction[:, 0]))]
    output['scatter_marker'] = ['circle' if prediction[x, 0] >
                                prediction[x, 1] else 'rect' for x in range(len(prediction[:, 0]))]
    output['home_point_prediction'] = str(int(round(
        np.mean([x for x in prediction[:, 0]]))))
    output['away_point_prediction'] = str(int(round(
        np.mean([x for x in prediction[:, 1]]))))
    return output
コード例 #15
0
def density(values, bw='silverman', npoints=512, kernel='gaussian'):
    estimator = FFTKDE(kernel=kernel, bw=bw)
    kernel_points, kernel_values = estimator.fit(values).evaluate(npoints)
    return kernel_points, kernel_values, estimator.bw
コード例 #16
0
def _interpolate(
    *,
    data: ndarray,
    x_position: ndarray,
    y_position: ndarray,
    z_position: Optional[ndarray] = None,
    extent: Tuple[float, float, float, float],
    smoothing_length: ndarray,
    particle_mass: ndarray,
    number_of_pixels: Tuple[float, float],
    cross_section: Optional[float] = None,
    density_weighted: Optional[bool] = None,
) -> ndarray:
    normalized = False
    if density_weighted is None:
        density_weighted = False
    if density_weighted:
        normalized = True

    mask = smoothing_length > 0.0
    mask = mask & ((x_position >= extent[0])
                   & (x_position <= extent[1])
                   & (y_position >= extent[2])
                   & (y_position <= extent[3]))
    if cross_section is not None:
        if z_position is None:
            raise ValueError('Must specify z position for cross section')
        mask = mask & (np.abs(z_position - cross_section) <
                       2 * smoothing_length)

    xy = np.vstack((x_position[mask], y_position[mask])).T
    scalar = data[mask]
    h = smoothing_length[mask]
    m = particle_mass[mask]

    if density_weighted:
        if cross_section is not None:
            weights = scalar * m / h * _C_NORM_3D
        else:
            weights = scalar * m
    else:
        if cross_section is not None:
            weights = scalar * h**2 * _C_NORM_3D / _H_FACT**3
        else:
            weights = scalar * h**3 / _H_FACT**3
    if normalized:
        weights_norm = weights / scalar

    kde = FFTKDE(kernel='gaussian')
    grid, points = kde.fit(xy, weights=weights).evaluate(number_of_pixels)
    z = points.reshape(number_of_pixels)

    if normalized:
        _, points_norm = kde.fit(
            xy, weights=weights_norm).evaluate(number_of_pixels)
        z_norm = points_norm.reshape(number_of_pixels)
        z /= z_norm

    normalization = np.sum(weights)
    if normalized:
        normalization /= np.sum(m)
    z *= normalization

    x_grid = np.linspace(grid[0, 0], grid[-1, 0], number_of_pixels[0])
    y_grid = np.linspace(grid[0, 1], grid[-1, 1], number_of_pixels[1])
    spl = RectBivariateSpline(x_grid, y_grid, z)
    x_regrid = np.linspace(*extent[:2], number_of_pixels[0])
    y_regrid = np.linspace(*extent[2:], number_of_pixels[1])
    z_regrid = spl(x_regrid, y_regrid)

    return z_regrid.T
コード例 #17
0
ファイル: langevin.py プロジェクト: swyoon/LangevinMC
    def analysis(self, algorithms=["tULA", "RWM"], measure="histogram", bins=10, repeat=1, experiment_mode=False):
        if not experiment_mode:
            # Print information about the analysis
            print('\n####### Initializing analysis #########\n' + '#'*39)
            print(' ALGORITHMS: {:s}'.format(str(algorithms)))
            print(' MEASURE: {:s}'.format(measure))
            print(' PARAMETERS:')
            for p in [('Potential', self.potential), ('Dimension', self.dim), ('x0', self.x0), ('Step', self.step), ('Number of iterations', self.N), \
                      ('Burn-in period', self.burn_in), ('Number of simulations', self.N_sim), ('Number of chains', self.N_chains), \
                      ('Measuring points', self.measuring_points), ('Time allocation', self.timer)]:
                print('  ' + '{:>22}:   {:s}'.format(*map(str,p)))
            print('#'*39 + '\n')

        # Collect the measurements.
        # For N_sim simulations, we store the measurement we are interested in (first moment, second moment, all samples...)
        measurements = {}
        for algo in algorithms:
            measurements[algo] = []

            for s in range(self.N_sim):
                samples = self.sampler.get_samples(algorithm=algo, burn_in=self.burn_in, n_chains=self.N_chains, n_samples=self.N, measuring_points=self.measuring_points, timer=self.timer)

                if measure == "first_moment":
                    measurement = np.sum(samples, axis=0)/len(samples)

                elif measure == "second_moment":
                    measurement = np.sum(samples**2, axis=0)/len(samples)

                elif measure in ["trace", "scatter"]:
                    measurement = samples

                elif measure == "histogram":
                    measurement = np.histogram(samples, bins=bins, range=(-5, 5), density=True)

                elif measure in ["FFTKDE_KL", "FFTKDE_TV", "FFTKDE_SW"]:
                    measurement = samples

                elif measure in ["KL_divergence", "total_variation", "sliced_wasserstein"]:
                    try: # some algorithms blow up
                        measurement = np.histogramdd(samples, bins=bins)
                    except:
                        measurement = None, None

                elif measure == "sliced_wasserstein_no_histogram":
                    measurement = samples

                measurements[algo].append(measurement)
                print('   Algorithm: {:>5}, simulation {:d}, collected {:d} samples.'.format(algo, s, len(samples)))
        print()


        # Plot the results
        if measure in ["first_moment", "second_moment"]:
            data = [[m[0] for m in measurements[algo]] for algo in algorithms]
            # data = [[norm(m) for m in measurements[algo]] for algo in algorithms]

            if not experiment_mode:
                plt.boxplot(data, labels=algorithms)
            else:
                self.experiment_data["results"] = data

        elif measure == "trace":
            if not experiment_mode:
                for algo in algorithms:
                    plt.plot([p[0] for p in measurements[algo][0] if norm(p)<1e6], [p[1] for p in measurements[algo][0] if norm(p)<1e6], '-', linewidth=1, alpha=0.8)
                plt.legend(algorithms)

        elif measure == "scatter":
            if not experiment_mode:
                if self.dim == 2:
                    for algo in algorithms:
                        plt.scatter([p[0] for p in measurements[algo][0] if norm(p)<1e6], [p[1] for p in measurements[algo][0] if norm(p)<1e6], s=1)
                    plt.legend(algorithms)

                elif self.dim == 3:
                    fig = plt.figure()
                    ax = fig.add_subplot(111, projection='3d')
                    for algo in algorithms:
                        ax.scatter(xs=[p[0] for p in measurements[algo][0] if norm(p)<1e6], ys=[p[1] for p in measurements[algo][0] if norm(p)<1e6], zs=[p[2] for p in measurements[algo][0] if norm(p)<1e6], s=1)
                    ax.legend(algorithms)

        elif measure == "histogram":
            if not experiment_mode:
                for algo in algorithms:
                    hist, bins = measurements[algo][0]
                    width = 0.85 * (bins[1] - bins[0])
                    center = (bins[:-1] + bins[1:])/2
                    plt.bar(center, hist, align='center', width=width, alpha=0.6)
                self.sampler.potential.plot_density()
                plt.legend(['true density'] + algorithms)

        elif measure in ["FFTKDE_KL", "FFTKDE_TV", "FFTKDE_SW"]:
            data = []
            for algo in algorithms:
                scores = []
                for s in range(self.N_sim):
                    weights = np.arange(len(measurements[algo][s])) + 1
                    # Don't know what this does ^
                    estimator = FFTKDE(kernel = 'gaussian')
                    x, ys = estimator.fit(measurements[algo][s], weights=weights).evaluate(30) # 30 is arbitrary
                    true_ys = self.sampler.potential.get_density(x)

                    if measure == "FFTKDE_KL":
                        scores.append( entropy(ys/np.sum(ys), true_ys/np.sum(true_ys) ))
                    if measure == "FFTKDE_TV":
                        scores.append( sum(abs( ys/np.sum(ys) - true_ys/np.sum(true_ys) ))/2 )
                    if measure == "FFTKDE_SW":
                        # print(ys, true_ys, x)
                        scores.append( sliced_wasserstein_distance( ys/np.sum(ys), true_ys/np.sum(true_ys), x, self.dim))
                data.append(scores)

            if not experiment_mode:
                plt.boxplot(data, labels=algorithms)
            else:
                self.experiment_data["results"] = data

        elif measure in ["KL_divergence", "total_variation", "sliced_wasserstein"]:
            data = []
            for algo in algorithms:
                scores = []
                for p, edges in measurements[algo]:
                    if type(p) == type(None):
                        continue
                    # true distribution histogram
                    q, bin_coors = self.sampler.potential.get_histogram(edges)

                    if measure == "KL_divergence":
                        ps, qs = p.flatten(), q.flatten()
                        scores.append( entropy(ps/sum(ps), qs/sum(qs) ))
                    elif measure == "total_variation":
                        ps, qs = p.flatten(), q.flatten()
                        scores.append( sum(abs( ps/sum(ps) - qs/sum(qs) ))/2 )
                    elif measure == "sliced_wasserstein":
                        scores.append( sliced_wasserstein_distance( p/np.sum(p), q/np.sum(q), bin_coors, self.dim ))
                data.append(scores)

            if not experiment_mode:
                plt.boxplot(data, labels=algorithms)
            else:
                self.experiment_data["results"] = data

        elif measure == "sliced_wasserstein_no_histogram":
            data = []
            for algo in algorithms:
                scores = []
                for p in measurements[algo]:
                    scores.append(sliced_wasserstein_no_histogram(p, self.sampler.potential.get_density(p) ))
                data.append(scores)
            if not experiment_mode:
                plt.boxplot(data, labels=algorithms)
            else:
                self.experiment_data["results"] = data

        if not experiment_mode:
            # Label and show
            plt.title('Measure: {:s}, '.format(measure) + '\nPotential: {:s}'.format(self.potential))
            plt.show()