Ejemplo n.º 1
0
class EstimatedKDE(object):
    """docstring for EstimatedKDE"""
    eps = 0.05
    points = 10000

    def __init__(self):
        super(EstimatedKDE, self).__init__()
        self.dist = None

    def fit(self, data):
        self.min = np.min(data)
        self.max = np.max(data)
        self.mean = np.mean(data)
        self.std = np.std(data)
        self.dist = KDEUnivariate(data)
        self.dist.fit()
        return self

    def mode(self):
        x = np.linspace(self.min, self.max, self.points)
        y = self.dist.evaluate(x)
        return x[np.argmax(y)]

    def median(self):
        return self.dist.icdf[50]

    def pdf(self, x):
        return self.dist.evaluate(x)
Ejemplo n.º 2
0
def kde_param_reflection(distro):
    ### this version is very susceptible to local maxima...
    ### kde_param tries to ensure correct handling of multimodal distributions

    distro = distro[np.isfinite(distro)]

    MIN, MAX = min(distro), max(distro)
    span = np.linspace(MIN, MAX, 200)

    ### create distribution reflection
    lower = MIN - abs(distro - MIN)
    upper = MAX + abs(distro - MAX)

    ### staple them together
    merge = np.concatenate([lower, distro, upper])

    ### compute kernal density estimation for both
    KDE_MAIN = KDEUnivariate(distro)
    KDE_FULL = KDEUnivariate(merge)

    ### fit distro, using the std from the main!

    KDE_MAIN.fit(bw = np.std(distro)/4.)
    KDE_FULL.fit(bw = np.std(distro)/4.)

    ### need to use the main KDE to scale the full
    scale = np.median(np.divide(KDE_MAIN.evaluate(span), KDE_FULL.evaluate(span)))


    ### now maximize the full KDE, using the maxed main as the starting guess
    result = minimize(lambda x: -1*KDE_FULL.evaluate(x),
    x0 = span[KDE_MAIN.evaluate(span) == max(KDE_MAIN.evaluate(span))], method='Powell')  ## Powell has been working pretty well.

    return {'result' : float(result['x']), 'kde' : KDE_MAIN, 'kde_reflect' : interp1d(span, KDE_FULL.evaluate(span) * scale)}
Ejemplo n.º 3
0
def kde_hist_weight(data,
                    xra,
                    nbin=50,
                    bandwidth=None,
                    density=False,
                    weights=None,
                    err=None,
                    mirror=False,
                    cdf=False):

    data = data[np.isfinite(data)]
    xmin, xmax = xra

    if mirror:
        idx = (data < xmin + 0.3)
        data = np.append(data, 2.0 * xmin - data[idx])

    x_plot = np.linspace(xmin, xmax, nbin)
    kde_est = KDEUnivariate(data)
    fft_opt = False
    if weights is None:
        fft_opt = True
        weights_sum = len(data) * 1.0
    else:
        ftt_opt = False
        weights_sum = np.sum(weights)

    if bandwidth is not None:
        bw_in = bandwidth
    else:
        bw_in = 'normal_reference'

    kde_est.fit(bw=bw_in, weights=weights, fft=fft_opt)
    if density:
        result = kde_est.evaluate(x_plot)
    else:
        result = kde_est.evaluate(x_plot) * weights_sum

    result_x = x_plot

    func = lambda x: kde_est.evaluate(x)

    if cdf:
        cdf = []
        for xx in x_plot:
            vv, _ = quad(func, xmin, xx)
            cdf.append(vv)

    if cdf:
        return result, result_x, np.array(cdf)
    else:
        return result, result_x
Ejemplo n.º 4
0
def calc_bayes_factor(prior_samples, posterior_samples, x=0):
    '''Returns the Bayes Factor (BF01) such that values >1 indicate there is 
    more support for `x` under the posterior, relative to the prior.
    '''
    kde = KDEUnivariate(prior_samples)
    kde.fit()
    prior_density_at_zero = kde.evaluate([x])

    kde = KDEUnivariate(posterior_samples)
    kde.fit()
    posterior_density_at_zero = kde.evaluate([x])
    
    BF_prior_post = prior_density_at_zero/posterior_density_at_zero
    return BF_prior_post[0]
Ejemplo n.º 5
0
def uniform_kde_sample(frame, variable, bounds, p_scale=0.7, cut=True):
    ### updated uniform sample function to
    ### homogenize the distribution of the training variable.

    print("... uniform_kde_sample")

    if variable == 'TEFF':
        kde_width = 100
    else:
        kde_width = 0.15

    ### Basics
    var_min, var_max = min(frame[variable]), max(frame[variable])

    distro = np.array(frame[variable])

    ### Handle boundary solution

    lower = var_min - abs(distro - var_min)
    upper = var_max + abs(distro - var_max)
    merge = np.concatenate([lower, upper, distro])

    ### KDE

    KDE_MERGE = KDEUnivariate(merge)
    KDE_MERGE.fit(bw=kde_width)

    #### interp KDE_MERGE for computation speed
    span = np.linspace(var_min, var_max, 100)
    KDE_FUN = interp1d(span, KDE_MERGE.evaluate(span))

    ### Rescale
    full_c = len(distro) / integrate.quad(KDE_MERGE.evaluate, var_min,
                                          var_max)[0]
    #### This rescales the original distribution KDE function

    ### respan, because I don't want to be penalized for low counts outide variable range
    respan = np.linspace(bounds[0], bounds[1], 100)

    scale = np.percentile(KDE_MERGE.evaluate(respan) * full_c, p_scale * 100.)

    ### Accept-Reject sampling
    sample = np.random.uniform(0, 1, len(distro)) * KDE_FUN(distro) * full_c
    boo_array = sample < scale

    selection = frame.iloc[boo_array].copy()
    shuffle = selection.iloc[np.random.permutation(len(selection))].copy()

    return shuffle
Ejemplo n.º 6
0
def reweight(rc, metad_traj, cv_columns, v_minus_c_col, rc_bins=20, kt=2.5):
    """
    Reweighting biased MD trajectory to unbiased probabilty along
    a given reaction coordinate. Using rbias column from COLVAR to
    perform reweighting per Tiwary and Parinello

    """
    # read in parameters from sgoop object
    colvar = metad_traj[cv_columns].values
    v_minus_c = metad_traj[v_minus_c_col].values

    # calculate rc observable for each frame
    colvar_rc = np.sum(colvar * rc, axis=1)

    # calculate frame weights, per Tiwary and Parinello, JCPB 2015 (c(t) method)
    weights = np.exp(v_minus_c / kt)
    norm_weights = weights / weights.sum()

    # fit weighted KDE with statsmodels method
    kde = KDEUnivariate(colvar_rc)
    kde.fit(weights=norm_weights, bw=0.05, fft=False)

    # evaluate pdf on a grid to for use in SGOOP
    grid = np.linspace(colvar_rc.min(), colvar.max(), num=rc_bins)
    pdf = kde.evaluate(grid)
    pdf = pdf / pdf.sum()

    return pdf, grid
Ejemplo n.º 7
0
def _kde_plot(
    values: ndarray, grid: ndarray, axes: Axes, bw: Union[float, str] = "scott"
) -> None:
    """Calculate KDE for observed spacings.

    Parameters
    ----------
    values: ndarray
        the values used to compute (fit) the kernel density estimate

    grid: ndarray
        the grid of values over which to evaluate the computed KDE curve

    axes: pyplot.Axes
        the current axes object to be modified

    bw: bandwidh
        The `bw` argument for statsmodels KDEUnivariate .fit

    Notes
    -----
    we are doing this manually because we want to ensure consistency of the KDE
    calculation and remove Seaborn control over the process, while also avoiding
    inconsistent behaviours like https://github.com/mwaskom/seaborn/issues/938
    and https://github.com/mwaskom/seaborn/issues/796
    """
    values = values[values > 0]  # prevent floating-point bad behaviour
    kde = KDE(values)
    # kde.fit(kernel="gau", bw="scott", cut=0)
    kde.fit(kernel="gau", bw=bw, cut=0)
    evaluated = np.empty_like(grid)
    for i, _ in enumerate(evaluated):
        evaluated[i] = kde.evaluate(grid[i])
    kde_curve = axes.plot(grid, evaluated, label="Kernel Density Estimate")
    plt.setp(kde_curve, color="black")
Ejemplo n.º 8
0
    def kde_statsmodels_u(self, x_grid, bandwidth=0.2, **kwargs):
        """Univariate Kernel Density Estimation with Statsmodels"""
        from statsmodels.nonparametric.kde import KDEUnivariate

        kde = KDEUnivariate(self.data)
        kde.fit(bw=bandwidth, **kwargs)
        return kde.evaluate(x_grid)
Ejemplo n.º 9
0
def gaussian_density_estimation(samples, weights, grid, bw=0.1):
    """
    Kernel density estimation with Gaussian kernel.


    Parameters
    ----------
    samples : np.ndarray
        Array of sample values.
    weights : np.ndarray
        Array of sample weights. If None, unweighted KDE will be performed.
    grid : np.ndarray
        Grid points at which the KDE function should be evaluated.
    bw : float
        Bandwidth parameter for kernel density estimation. Associated with
        sigma in the case of a Gaussian kernel.

    Returns
    ----------
    np.ndarray
        The probability density values at the supplied grid points.
    """
    # KDE for fine-grained optimization
    kde = KDEUnivariate(samples)
    kde.fit(weights=weights, bw=bw, fft=False)

    # evaluate pdf on a grid to for use in SGOOP
    # TODO: area under curve between points instead of pdf at point
    return kde.evaluate(grid)
Ejemplo n.º 10
0
def draw_hist_and_kde(sample, grid, true_pdf):
    # гистограмма
    plt.hist(sample,
             20,
             range=(grid.min(), grid.max()),
             normed=True,
             label='histogram')

    # ядерная оценка плотности
    kernel_density = KDEUnivariate(sample)
    kernel_density.fit()
    plt.plot(grid,
             kernel_density.evaluate(grid),
             color='green',
             linewidth=2,
             label='kde')

    # истинная плотность
    plt.plot(grid,
             true_pdf(grid),
             color='red',
             linewidth=2,
             alpha=0.3,
             label='true pdf')

    plt.legend()
    plt.show()
Ejemplo n.º 11
0
def gen_kde_pdf(distribution, bounds=None, kde_width=None):
    ## boundary correction for KDE

    if bounds == None:
        print("\t setting bounds to max value")
        var_min, var_max = min(distribution), max(distribution)

    else:
        distribution = distribution[np.where((distribution > bounds[0])
                                             & (distribution < bounds[1]))]
        var_min, var_max = bounds[0], bounds[1]

    lower = var_min - abs(distribution - var_min)
    upper = var_max + abs(distribution - var_max)
    merge = np.concatenate([lower, upper, distribution])

    if kde_width == None:
        print("... setting kde_width")
        kde_width = S_MAD(distribution) / 2.

    KDE_MERGE = KDEUnivariate(merge)
    KDE_MERGE.fit(bw=kde_width)

    SCALE = np.divide(1.,
                      integrate.quad(KDE_MERGE.evaluate, var_min, var_max)[0])

    return lambda X: SCALE * KDE_MERGE.evaluate(X)
Ejemplo n.º 12
0
def compute_kde(data, test_x):
    data = data.flatten()
    test_x = test_x.flatten()
    kde = KDEUnivariate(data)
    kde.fit(kernel="gau", bw="silverman")
    dens = kde.evaluate(test_x)
    return dens, None
Ejemplo n.º 13
0
Archivo: core.py Proyecto: wtak23/pytak
def kde_1d(signal, x_grid=None):
    """ Return 1d kde of a vector signal (Created 01/24/2015)

    Todo: how are the kde's normalized?  (i want the kde to sum to 1....)

    https://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/
    http://glowingpython.blogspot.com/2012/08/kernel-density-estimation-with-scipy.html

    Usage
    -----
    >>> x = np.linspace(0,1,401)
    >>> kde = tw.kde_1d(signal, x)
    >>> plt.plot(x, kde)
    >>> plt.grid('on')
    """
    #    from scipy.stats.kde import gaussian_kde
    #    if x is None:
    #        x = np.linspace(0,1,401)
    #
    #    return gaussian_kde(signal)(x)
    from statsmodels.nonparametric.kde import KDEUnivariate
    kde = KDEUnivariate(signal)
    kde.fit()
    if x_grid is None:
        x_grid = np.linspace(0, 1, 401)
    #bin_space = x_grid[1]-x_grid[0]

    # kde estimate
    kde_est = kde.evaluate(x_grid)

    # normalize to pdf (need to come back on this....multiply by bin-spacing??)
    kde_est /= kde_est.sum()

    return kde_est, x_grid
def fit_kde(x, grid):
    resol = len(grid)
    d = np.zeros(resol)
    kde = KDEUnivariate(x)
    kde.fit()
    d = kde.evaluate(grid)
    return d
    def pdf(self, token, years, bw=5, *args, **kwargs):

        """
        Estimate a density function from a token's ratio series.

        Args:
            token (str)
            years (iter)
            bw (int)

        Returns: OrderedDict {year: density}
        """

        series = self.clean_series(token, *args, **kwargs)

        # Use the ratio values as weights.
        weights = np.array(list(series.values()))

        # Fit the density estimate.
        density = KDEUnivariate(list(series.keys()))
        density.fit(fft=False, weights=weights, bw=bw)

        samples = OrderedDict()

        for year in years:
            samples[year] = density.evaluate(year)[0]

        return samples
Ejemplo n.º 16
0
def calcKDE(kd_bw=0.1):
    """ """

    #> KDE using StatsModels
    kde = KDEUnivariate(nao_rn)
    kde.fit(bw=kd_bw)

    return kde.evaluate(x_kde)
Ejemplo n.º 17
0
def weighted_kernel_density_1d(values, weights, bw='silverman', plot=False):
    from statsmodels.nonparametric.kde import KDEUnivariate
    kden= KDEUnivariate(values)
    kden.fit(weights=weights, bw=bw, fft=False)
    if plot:
        import matplotlib.pyplot as plt
        plt.plot(kden.support, [kden.evaluate(xi) for xi in kden.support], 'o-')
    return kden
Ejemplo n.º 18
0
def PSTH(spike_times, bw_psth=BW_PSTH, mirror=False, trial_time=None, norm=True, trial_duration=2.5, **kwargs):
    num = len(spike_times)
    spike_times_flat = flatten(spike_times)
    total_spikes = len(spike_times_flat)
    if trial_time is None:
        trial_time = (numpy.min(spike_times_flat), numpy.max(spike_times_flat))
    if mirror:
        spike_times_flat = numpy.hstack((-1 * spike_times_flat + 2 * trial_time[0], spike_times_flat, -1 * spike_times_flat + 2 * trial_time[1]))
    kde = KDEUnivariate(spike_times_flat)
    if bw_psth is not None:
        kde.fit(bw=bw_psth)
    else:
        kde.fit()
    if norm:
        pre_factor = total_spikes / (num * quad(lambda x: kde.evaluate([x])[0], trial_time[0], trial_time[1])[0])
    else: 
        pre_factor = 1.
    return(lambda x: pre_factor * kde.evaluate([x])[0])
Ejemplo n.º 19
0
def find_outiers_kde(x):
    x_scaled = scale(list(map(float, x)))
    kde = KDEUnivariate(x_scaled)
    kde.fit(bw="scott", fft=True)
    pred = kde.evaluate(x_scaled)

    n = sum(pred < 0.5)
    outlierindices = np.asarray(pred).argsort()[:n]
    outliervalue = np.asarray(x)[outlierindices]
    return outlierindices, outliervalue
Ejemplo n.º 20
0
def find_outiers_kde(x):
    x_scaled = scale(list(map(float,x)))
    kde = KDEUnivariate(x_scaled)
    kde.fit(bw="scott",fft=True)
    pred = kde.evaluate(x_scaled)
    
    n = sum(pred < 0.5)
    outlierindices=np.asarray(pred).argsort()[:n]
    outliervalue=np.asarray(x)[outlierindices]
    return outlierindices,outliervalue
Ejemplo n.º 21
0
def bootstrap_stats(
        args: Dict[str, Any],
        out_q: Optional[mp.Queue] = None) -> Union[None, Dict[str, Any]]:
    r'''
    Computes statistics and KDEs of data via sampling with replacement

    Arguments:
        args: dictionary of arguments. Possible keys are:
            data - data to resample
            name - name prepended to returned keys in result dict
            weights - array of weights matching length of data to use for weighted resampling
            n - number of times to resample data
            x - points at which to compute the kde values of resample data
            kde - whether to compute the kde values at x-points for resampled data
            mean - whether to compute the means of the resampled data
            std - whether to compute standard deviation of resampled data
            c68 - whether to compute the width of the absolute central 68.2 percentile of the resampled data

        out_q: if using multiporcessing can place result dictionary in provided queue

    Returns:
        Result dictionary if `out_q` is `None` else `None`.
    '''

    out_dict, mean, std, c68, boot = {}, [], [], [], []
    name = '' if 'name' not in args else args['name']
    weights = None if 'weights' not in args else args['weights']
    if 'n' not in args: args['n'] = 100
    if 'kde' not in args: args['kde'] = False
    if 'mean' not in args: args['mean'] = False
    if 'std' not in args: args['std'] = False
    if 'c68' not in args: args['c68'] = False
    if args['kde'] and args['data'].dtype != 'float64':
        data = np.array(args['data'], dtype='float64')
    else:
        data = args['data']
    len_d = len(data)

    np.random.seed()
    for i in range(args['n']):
        points = np.random.choice(data, len_d, replace=True, p=weights)
        if args['kde']:
            kde = KDEUnivariate(points)
            kde.fit()
            boot.append([kde.evaluate(x) for x in args['x']])
        if args['mean']: mean.append(np.mean(points))
        if args['std']: std.append(np.std(points, ddof=1))
        if args['c68']: c68.append(np.percentile(np.abs(points), 68.2))

    if args['kde']: out_dict[f'{name}_kde'] = boot
    if args['mean']: out_dict[f'{name}_mean'] = mean
    if args['std']: out_dict[f'{name}_std'] = std
    if args['c68']: out_dict[f'{name}_c68'] = c68
    if out_q is not None: out_q.put(out_dict)
    else: return out_dict
Ejemplo n.º 22
0
def kde_param(distribution, x0):
    ### kde_param tries to ensure correct handling of multimodal distributions

    ### compute kernal density estimation
    KDE = KDEUnivariate(distribution)

    KDE.fit(bw=np.std(distribution)/3.0)

    result = scipy.optimize.minimize(lambda x: -1*KDE.evaluate(x),
    x0 = x0, method='Powell')  ## Powell has been working pretty well.

    return {'result' : float(result['x']), 'kde' : KDE}
Ejemplo n.º 23
0
    def estimate(self, data, d_min, d_max):
        d_range = (d_min - 0.5 * (d_max - d_min),
                   d_max + 0.5 * (d_max - d_min))
        gridsize = (d_range[1] - d_range[0]) / self.num_bins
        bw = gridsize * self.num_bins / 100
        self.grid = [(d_range[0] + i * gridsize)
                     for i in range(self.num_bins + 1)]
        self.mids = self.grid[:-1] + np.diff(self.grid) / 2

        try:
            kde = KDEUnivariate(data)
            kde.fit(bw=bw, kernel=self.kernel, fft=False)

            self.values = [
                kde.evaluate(i)[0] if kde.evaluate(i) > 0 else 0
                for i in self.mids
            ]
        except:
            print("KDE did not work, data length =", len(data), ", d_range =",
                  d_range, ", gridsize =", gridsize)
            self.values = [0] * len(data[0])
Ejemplo n.º 24
0
def kde_param(distribution, x0):


    ### compute kernal density estimation
    KDE = KDEUnivariate(distribution)

    KDE.fit(bw=np.std(distribution)/3.0)

    result = scipy.optimize.minimize(lambda x: -1*KDE.evaluate(x),
    x0 = x0, method='Powell')
    #print(result)

    return {'result' : float(result['x']), 'kde' : KDE}
Ejemplo n.º 25
0
def sample_pdf(catalog, parameter, pdf_fun, params, bounds):

    ## Catalog: pd.DataFrame() input catalog with arbitrary distribution function
    ## input_fun: desired distribution of sample
    ## scale:   scale of sample

    param_span = np.linspace(min(catalog[parameter]), max(catalog[parameter]),
                             100)

    print("... determine master KDE")

    KDE = KDEUnivariate(catalog[parameter])
    KDE.fit(bw=np.std(catalog[parameter]) / 3)

    KDE_FUN = interp1d(param_span, KDE.evaluate(param_span))

    ## need to rescale within the bounds.

    NORM = np.divide(
        1.,
        integrate.quad(KDE.evaluate,
                       bounds[0],
                       bounds[1],
                       points=param_span[np.where((param_span > bounds[0])
                                                  & (param_span < bounds[1]))],
                       limit=200)[0])

    ##########################################

    N = len(catalog[catalog[parameter].between(*bounds)])

    ############################################

    ### we need the scale from the other function

    result, kde_fun = determine_scale(catalog,
                                      parameter,
                                      pdf_fun,
                                      params,
                                      bounds=bounds)

    sample = np.random.uniform(0.0, 1.0,
                               len(catalog)) * len(catalog) * NORM * KDE_FUN(
                                   catalog[parameter])

    boo_array = sample < result['x'] * pdf_fun(catalog[parameter], *params)

    return catalog[boo_array & (catalog[parameter].between(
        bounds[0], bounds[1], inclusive=True))].copy()
Ejemplo n.º 26
0
def reweight(rc,
             metad_traj,
             cv_columns,
             v_minus_c_col,
             rc_bins=20,
             kt=2.5,
             kde=False):
    """
    Reweighting biased MD trajectory to unbiased probabilty along
    a given reaction coordinate. Using rbias column from COLVAR to
    perform reweighting per Tiwary and Parinello

    """
    # read in parameters from sgoop object
    colvar = metad_traj[cv_columns].values
    v_minus_c = metad_traj[v_minus_c_col].values

    # calculate rc observable for each frame
    colvar_rc = np.sum(colvar * rc, axis=1)

    # calculate frame weights, per Tiwary and Parinello, JCPB 2015 (c(t) method)
    weights = np.exp(v_minus_c / kt)
    norm_weights = weights / weights.sum()

    if kde:
        # KDE for fine-grained optimization
        kde = KDEUnivariate(colvar_rc)
        kde.fit(weights=norm_weights, bw=0.1, fft=False)

        # evaluate pdf on a grid to for use in SGOOP
        # TODO: area under curve between points instead of pdf at point
        grid = np.linspace(colvar_rc.min(), colvar_rc.max(), num=rc_bins)
        pdf = kde.evaluate(grid)

        return pdf, grid

    # histogram density for coarse optimization (
    hist, bin_edges = np.histogram(colvar_rc,
                                   weights=norm_weights,
                                   bins=rc_bins,
                                   density=True,
                                   range=(colvar_rc.min(), colvar_rc.max()))
    # set grid points to center of bins
    bin_width = bin_edges[1] - bin_edges[0]
    grid = bin_edges[:-1] + bin_width
    pdf = hist

    return pdf, grid
Ejemplo n.º 27
0
    def get_kde(self, forecast_data, bandwidth=None):
        kde = KDEUnivariate(forecast_data)
        silverman_bw = bw_silverman(forecast_data)
        if bandwidth is None or bandwidth < silverman_bw:
            kde.fit(bw=silverman_bw)
        else:
            kde.fit(bw=bandwidth)
        return kde


        if noise_sigma is not None and noise_sigma>silverman_bw:
            kde_obs=KDEUnivariate(forecast_data)
            kde_obs.fit(bw=noise_sigma)
            kde_obs = kde_obs.evaluate(y_steps)
            kde_ax.plot(kde_obs, y_steps,
                        c=c_kde, ls='-')
Ejemplo n.º 28
0
 def __get_kde_values(
     self,
     spacings_range: Tuple[float, float],
     nnnsd: bool = False,
     kde_gridsize: int = 1000,
 ) -> np.array:
     """Fit / derive the KDE using the entire set of unfolded values, but
     evaluating only over the given `spacings_range`. """
     spacings = np.sort(self.vals[2:] - self.vals[:-2]) if nnnsd else self.spacings
     kde = KDE(spacings)
     kde.fit(kernel="gau", bw="scott", cut=0, fft=False, gridsize=10000)
     s = np.linspace(spacings_range[0], spacings_range[1], kde_gridsize)
     # evaluated = np.empty_like(s)
     # for i, _ in enumerate(evaluated):
     #     evaluated[i] = kde.evaluate(s[i])
     evaluated = kde.evaluate(s)
     return evaluated
Ejemplo n.º 29
0
def md_prob(rc, max_cal_traj, rc_bins, bandwidth=0.02, **storage_dict):
    # Calculates probability along a given RC
    data_array = max_cal_traj.values
    proj = np.sum(data_array * rc, axis=1)

    # get probability w/ statstmodels KDE
    kde = KDEUnivariate(proj)
    kde.fit(bw=bandwidth)

    grid = np.linspace(proj.min(), proj.max(), num=rc_bins)
    prob = kde.evaluate(grid)
    # prob = prob / prob.sum()

    if storage_dict['prob_list'] is not None:
        storage_dict['prob_list'].append(prob)

    return prob, grid  # Normalize
Ejemplo n.º 30
0
def mimic_arviz_posterior(context: ParameterContext,
                          state: SequentialAlgorithmState,
                          num_cols: int = 3,
                          ax: Axes = None,
                          **kwargs) -> Axes:
    """
    Helper function for mimicking arviz plotting functionality.

    Args:
        context: parameter context to plot.
        state: associated state.
        num_cols: the number of columns.
        ax: pre-defined axes to use.
    """

    if ax is None:
        num_rows = len(context.parameters) // num_cols
        _, ax = plt.subplots(num_rows, num_cols)

    w = state.normalized_weights().cpu().numpy()
    flat_axes = ax.ravel()

    handled = list()

    for ax_, (p, v) in zip(flat_axes, context.parameters.items()):
        v_numpy = v.cpu().numpy()

        kde = KDEUnivariate(v_numpy)
        kde.fit(weights=w, fft=False)

        x_linspace = np.linspace(v_numpy.min(), v_numpy.max(), 250)

        ax_.plot(x_linspace, kde.evaluate(x_linspace), **kwargs)

        ax_.spines["top"].set_visible(False)
        ax_.spines["right"].set_visible(False)
        ax_.spines["left"].set_visible(False)
        ax_.axes.get_yaxis().set_visible(False)
        ax_.set_title(p)

        handled.append(ax_)

    for ax_ in (ax_ for ax_ in flat_axes if ax_ not in handled):
        ax_.axis("off")

    return ax
Ejemplo n.º 31
0
def empiricalPDF(data):
    """
    Evaluate a probability density function using kernel density
    estimation for input data.

    :param data: :class:`numpy.ndarray` of data values.

    :returns: PDF values at the data points.
    """
    LOG.debug("Calculating empirical PDF")
    sortedmax = np.sort(data)
    kde = KDEUnivariate(sortedmax)
    kde.fit()
    try:
        res = kde.evaluate(sortedmax)
    except MemoryError:
        res = np.zeros(len(sortedmax))
    return res
Ejemplo n.º 32
0
def cluster(x, adjust=1 / 3):
    xs = np.asanyarray(x).ravel(order='K')
    idx = np.argsort(xs)
    xs = xs[idx]

    kde = KDEUnivariate(xs)
    kde.fit(adjust=adjust)
    pdf = kde.evaluate(xs)

    # Cumulatively count how many of the
    # cluster boundaries each value exceeds.
    boundinds, = argrelmin(pdf)
    bounds = xs[boundinds]
    invidx = np.argsort(idx)
    clusters = np.sum(xs.reshape(-1, 1) >= bounds, axis=1)
    clustinds = clusters[invidx].reshape(x.shape)

    return clustinds, kde
Ejemplo n.º 33
0
def kde_statsmodels_u(data, grid, **kwargs):
    """
    Univariate Kernel Density Estimation with Statsmodels

    Parameters
    ----------
    data : numpy.array
        Data points used to compute a density estimator. It
        has `n x 1` dimensions, representing n points and p
        variables.
    grid : numpy.array
        Data points at which the desity will be estimated. It
        has `m x 1` dimensions, representing m points and p
        variables.

    Returns
    -------
    out : numpy.array
        Density estimate. Has `m x 1` dimensions
    """
    kde = KDEUnivariate(data)
    kde.fit(**kwargs)
    return kde.evaluate(grid)
Ejemplo n.º 34
0
 def kde_statsmodels_u(self, x_grid, bandwidth=0.2, **kwargs):
     """Univariate Kernel Density Estimation with Statsmodels"""
     from statsmodels.nonparametric.kde import KDEUnivariate
     kde = KDEUnivariate(self.data)
     kde.fit(bw=bandwidth, **kwargs)
     return kde.evaluate(x_grid)
Ejemplo n.º 35
0
        ])
    except AttributeError:
        # wtf this fails sometimes, idk, works on root6
        HAS_ROOT = False

##################################################################
# ... and plot everything.

fig, axes = plt.subplots(ncols=2, nrows=2, figsize=(6 * 2, 4 * 2))

axes[0, 0].hist(resid, bins='auto', normed=True)
axes[0, 0].plot(x, lg.pdf(x), label='Log Norm')
axes[0, 0].plot(x, hc.pdf(x), label='Half Cauchy')
if HAS_ROOT:
    axes[0, 0].plot(x, land, label='Landau', color='blue')
axes[0, 0].plot(x, dens.evaluate(x), label='KDE')
axes[0, 0].set_xlabel('x')
axes[0, 0].set_xlim(0, 0.3)
axes[0, 0].set_ylabel('PDF(x)')
axes[0, 0].legend()

axes[0, 1].hist(resid, bins='auto', normed=True)
axes[0, 1].plot(x, lg.pdf(x), label='Log Norm')
axes[0, 1].plot(x, hc.pdf(x), label='Half Cauchy')
if HAS_ROOT:
    axes[0, 1].plot(x, land, label='Landau', color='blue')
axes[0, 1].plot(x, dens.evaluate(x), label='KDE')
axes[0, 1].set_xlabel('x')
axes[0, 1].set_ylabel('PDF(x)')
axes[0, 1].set_yscale('log')
axes[0, 1].legend()
def kde_statsmodels_u(x, x_grid, bandwidth=0.2, **kwargs):
    """Univariate Kernel Density Estimation with Statsmodels"""
    kde = KDEUnivariate(x)
    kde.fit(bw=bandwidth, **kwargs)
    return kde.evaluate(x_grid)