Esempio n. 1
0
    def _estimate_pdf(self,
                      sample_id: str,
                      features: list,
                      df: pd.DataFrame,
                      **kwargs) -> None:
        """
        Given a sample ID and its events dataframe, estimate the PDF by KDE with the option
        to perform dimensionality reduction first. Resulting PDF is saved to kde_cache.

        Parameters
        ----------
        sample_id: str
        df: Pandas.DataFrame
        features: list

        Returns
        -------
        None
        """
        bw = self.kde_bw
        if bw == "cv":
            bw = bw_optimisation(data=df, features=features, **kwargs)
        df = df[features].copy().select_dtypes(include=['number'])
        kde = FFTKDE(kernel=self.kde_kernel, bw=bw, norm=self.kde_norm)
        self.kde_cache[sample_id] = kde.fit(df.values).evaluate()[1]
Esempio n. 2
0
    def setup(self):
        self.prepare_source_sample(stacked=False)
        # every point must be inside output grid (requirement of KDEpy)
        masks = [
            np.logical_and(self.source_sample[i] > dim.points[0],
                           self.source_sample[i] < dim.points[-1])
            for i, dim in enumerate(self.dest.grid)
        ]
        self.mask = np.all(masks, axis=0)
        #n_masked = np.sum(~mask)
        #if n_masked > 0:
        #    warnings.warn('Excluding %i points that are outside grid'%n_masked, Warning, stacklevel=0)
        sample = [s[self.mask] for s in self.source_sample]

        self.source_sample = np.stack(sample).T
        self.prepare_dest_sample(transposed=True)

        if isinstance(self.bw, (np.ndarray, list, tuple)):
            for i in range(self.dest.grid.nax):
                self.source_sample[:, i] /= self.bw[i]
                self.dest_sample[:, i] /= self.bw[i]
            bw = 1
        else:
            bw = self.bw

        self.kde = FFTKDE(bw=bw, kernel=self.kernel)
Esempio n. 3
0
def kdepy_fftkde(data, a, b, num_bin_joint):
    """ Calculate Kernel Density Estimation (KDE) using KDEpy.FFTKDE.
    Note: KDEpy.FFTKDE can do only symmetric kernel (accept only scalar bandwidth).
    We map data to [-1, 1] domain to make bandwidth independent of parameter range and more symmetric
    and use mean of list bandwidths (different bandwidth for each dimension)
    calculated usinf Scott's rule and scipy.stats.gaussian_kde
    :param data: array of parameter samples
    :param a: list of left boundaries
    :param b: list of right boundaries
    :param num_bin_joint: number of bins (cells) per dimension in estimated posterior
    :return: estimated posterior of shape (num_bin_joint, )*dimensions
    """

    N_params = len(data[0])
    logging.info('KDEpy.FFTKDe: Gaussian KDE {} dimensions'.format(N_params))
    time1 = time()
    a = np.array(a)-1e-10
    b = np.array(b)+1e-10
    data = 2 * (data - a) / (b - a) - 1     # transform data to be [-1, 1], since gaussian is the same in all directions
    bandwidth = bw_from_kdescipy(data, 'scott')
    _, grid_ravel = grid_for_kde(-1*np.ones(N_params), np.ones(N_params), num_bin_joint)
    kde = FFTKDE(kernel='gaussian', bw=np.mean(bandwidth))
    kde.fit(data)
    Z = kde.evaluate(grid_ravel.T)
    Z = Z.reshape((num_bin_joint + 1, )*N_params)
    time2 = time()
    timer(time1, time2, "Time for kdepy_fftkde")
    return Z
Esempio n. 4
0
def estimate_pdfs(target: pd.DataFrame,
                  ref: pd.DataFrame,
                  var: str):
    """
    Given some target and reference DataFrame, estimate PDF for each using convolution based
    kernel density estimation (see KDEpy). 'var' is the variable of interest and should be a
    column in both ref and target

    Parameters
    ----------
    target: Pandas.DataFrame
    ref: Pandas.DataFrame
    var: str

    Returns
    -------
    (numpy.ndarray, numpy.ndarray, numpy.ndarray)
        Target PDF, reference PDF, and grid space
    """
    min_ = np.min([target[var].min(), ref[var].min()])
    max_ = np.max([target[var].max(), ref[var].max()])
    x = np.linspace(min_ - 0.1,
                    max_ + 0.1,
                    100000)
    y1 = (FFTKDE(kernel="gaussian",
                 bw="silverman")
          .fit(target[var].values)
          .evaluate(x))
    y2 = (FFTKDE(kernel="gaussian",
                 bw="silverman")
          .fit(ref[var].values)
          .evaluate(x))
    return y1, y2, x
Esempio n. 5
0
class Gaussian_Density_Estimator:
    def __init__(self, kernel='gaussian', bw='silverman'):
        self.estimator = FFTKDE(kernel=kernel, bw=bw)

    def train(self, data, weights=None):
        self.estimator.fit(data, weights=weights)

    def score_samples(self, input_x=None):
        if input_x is None:
            x, y = self.estimator.evaluate()
            return x, y
        else:
            y = self.estimator.evaluate(input_x)
            return y


# import numpy as np
# import matplotlib.pyplot as plt

# data = np.random.randn(2**6)
# density_estimator = Gaussian_Density_Estimator()
# density_estimator.train(data)
# # x, y = density_estimator.score_samples()
# # print(x.shape, y.shape)

# x, y = density_estimator.score_samples(10)
# print(y)

# plt.plot(x, y); plt.tight_layout()
# plt.show()
Esempio n. 6
0
def find_kde(distribution, bw='silverman', npoints=512, kernel='gaussian'):
    """ Receives a numpy array containing an image and returns
    image histogram estimatives based on Kernel density function
    with given bandwidth. The data returned are x, y datapoints"""
    estimator = FFTKDE(kernel=kernel, bw=bw)
    x, y = estimator.fit(distribution).evaluate(npoints)
    y = y[(x >= 0) & (x <= 255)]
    x = x[(x >= 0) & (x <= 255)]
    return (x, y), estimator.bw
Esempio n. 7
0
def get_kde(gridArray):
    """
    对风险矩阵做KDE,降低稀疏率
    :param gridArray: 待处理风险矩阵
    :return: KDE后的风险矩阵
    """
    #判断风险矩阵是否为零矩阵,若是则返回原矩阵
    if np.where(gridArray != 0)[0].shape[0] == 0:
        return gridArray
    #找到矩阵中的非零值
    tempArray = np.nonzero(gridArray)
    data = np.zeros((tempArray[0].T.shape[0], 2))  #数据矩阵
    data[:, 0] = tempArray[0].T
    data[:, 1] = tempArray[1].T
    rows = data.shape[0]
    weights = []
    for i in range(rows):
        weights.append(gridArray[int(data[i, 0]), int(data[i, 1])])

    # fig = plt.figure(figsize=(2,1))
    # ax = fig.add_subplot(1,2,1)
    # bx = fig.add_subplot(1,2,2)
    # datai = np.zeros((gridArray.shape[0]*gridArray.shape[1], 2))
    # dataj = []
    # temp = 0
    # for i in range(len(gridArray)):
    #     for j in range(len(gridArray)):
    #         datai[temp, :] = np.array([i, j])
    #         dataj.append(gridArray[i, j])
    #         temp += 1

    grid_points = gridArray.shape[0]  #矩阵行(列)数
    kde = FFTKDE(kernel='gaussian', norm=2, bw=0.5)
    grid, points = kde.fit(data, weights=weights).evaluate(grid_points)
    # x, y = np.unique(grid[:, 0]), np.unique(grid[:, 1])
    resultArray = points.reshape(grid_points, grid_points).T

    # ax.contour(x,y,resultArray, 16, linewidths=0.8, colors='k')
    # ax.contourf(x,y,resultArray, 16, cmap='RdBu_r')
    # ax.plot(data[:, 0], data[:, 1], 'ok', ms=3)

    #原矩阵构图
    # grid, points = kde.fit(datai, weights=dataj).evaluate(grid_points)
    # x, y = np.unique(grid[:, 0]), np.unique(grid[:, 1])
    # z = points.reshape(grid_points, grid_points).T
    # z = RotateMatrix(z)     #z旋转90度
    # bx.contour(x, y, z, 16, linewidths=0.8, colors='k')
    # bx.contourf(x, y, z, 16, cmap='RdBu_r')

    # plt.tight_layout()
    # plt.show()

    # print(gridArray)
    # print(resultArray)
    return resultArray
def loadCautiousDict(filename):
    data = pd.read_csv(filename)
    paramDict = data.set_index('keys').T.to_dict('list')

    for key, value in paramDict.items():
        prob, length = ast.literal_eval(value[0])
        if len(length) > 200:
            kde = FFTKDE(kernel='gaussian', bw='ISJ').fit(length)
            kde.evaluate()
            paramDict[key] = prob, kde.bw, length

    return paramDict
Esempio n. 9
0
def is_outlier(y, x, th=10):
    z = FFTKDE(kernel='gaussian', bw='ISJ').fit(x)
    z.evaluate()
    bin_width = (max(x) - min(x)) * z.bw / 2
    eps = _EPS * 10

    breaks1 = np.arange(min(x), max(x) + bin_width, bin_width)
    breaks2 = np.arange(
        min(x) - eps - bin_width / 2,
        max(x) + bin_width, bin_width)
    score1 = robust_scale_binned(y, x, breaks1)
    score2 = robust_scale_binned(y, x, breaks2)
    return np.abs(np.vstack((score1, score2))).min(0) > th
Esempio n. 10
0
    def compute_mi_cost(self, frame=-1):
        """For selected image-scan pair, compute mutual information cost.

        Using locations of projected lidar points that land within image bounds,
        generate vector of the grayscale intensities, and vector of reflection
        intensities. Model distribution of each using a Kernel Density Estimate
        (KDE). Compute the mutual information between the two random variables.

        :param frame: Integer index indicating the image-scan pair.
        :return: Negative of the Mutual information cost.
        """
        self.project_point_cloud()
        grayscale_img = cv.cvtColor(self.img_detector.imgs[frame],
                                    cv.COLOR_BGR2GRAY)
        projected_points_valid = self.projected_points[frame][self.projection_mask[frame], :]

        grayscale_vector = np.expand_dims(
            grayscale_img[projected_points_valid[:, 1].astype(np.uint),
                          projected_points_valid[:, 0].astype(np.uint)], 1)
        reflectance_vector = np.expand_dims(
            (self.pc_detector.reflectances[frame][self.projection_mask[frame]] *
             255.0), 1).astype(np.int)

        if len(reflectance_vector) > 0 and len(grayscale_vector) > 0:

            joint_data = np.hstack([grayscale_vector, reflectance_vector])
            intensity_vector = np.linspace(-1, 256, 510)
            grid_x, grid_y = np.meshgrid(intensity_vector, intensity_vector)
            grid_data = np.vstack([grid_y.ravel(), grid_x.ravel()])
            grid_data = grid_data.T

            gray_probs = FFTKDE(
                bw='silverman').fit(grayscale_vector).evaluate(intensity_vector)

            refl_probs = FFTKDE(bw='silverman').fit(
                reflectance_vector).evaluate(intensity_vector)
            joint_probs = FFTKDE().fit(joint_data).evaluate(grid_data)

            gray_probs /= np.sum(gray_probs)
            refl_probs /= np.sum(refl_probs)

            joint_probs /= np.sum(joint_probs)
            mi_cost = entropy(gray_probs) + \
                entropy(refl_probs) - entropy(joint_probs)
            mi_cost = mi_cost

        else:
            mi_cost = 0
        return -mi_cost
Esempio n. 11
0
def compare_pdf_Oscillator():

    smpl = np.genfromtxt("map_samples2D.txt")
    ndim = 2
    tf = 25
    nsteps = 1000
    u_init = [0, 0]
    noise = Noise([0, tf])
    lam = noise.get_eigenvalues(ndim)
    mean = np.zeros(ndim)
    cov = np.diag(lam)
    domain = [[-a, a] for a in 6.0 * np.sqrt(np.diag(cov))]
    inputs = GaussianInputs(mean, cov, domain)
    weights = inputs.pdf(smpl[:, 0:-1])
    x_d = np.linspace(-3, 3, 500)

    #weights = weights/weights
    pdf_scipy = stats.gaussian_kde(smpl[:, -1], weights=weights)
    pdf_numba = KDE_Numba(smpl[:, -1], weights=weights)

    pdf_kdepy = FFTKDE(bw=pdf_numba.bw).fit(smpl[:, -1], weights)

    plt.semilogy(x_d, pdf_scipy(x_d), lw=3)
    plt.semilogy(x_d, pdf_numba(x_d), '--')
    plt.semilogy(x_d, pdf_kdepy(x_d), '--', lw=0.5)
    plt.xlim(-3, 3)
    plt.ylim(1e-8, 1e2)
    plt.show()
Esempio n. 12
0
def kde_smooth(a):
    '''
    Smoothing a 2d array of waveforms along the first dimension (time)
    '''
    x = np.arange(a.shape[0])
    # an array of x values larger than x is needed for the KDE thing
    extended_x = np.concatenate([[-1], x, [a.shape[0]]])

    out = []
    for i, z_idx in enumerate(range(a.shape[1])):
        y = a[:, z_idx]

        # here we will sample the waveform, creating a number of points
        # depending on the ADC count, higher ADC -> more points
        points = []
        weights = []
        for idx in range(len(x)):
            n = int(np.ceil(y[idx]**0.7 * 0.1))
            points.extend(x[idx] + np.random.rand(n) - 0.5)
            weights.extend([n**0.5] * n)
        points = np.array(points)
        weights = np.array(weights)

        # this is the actual KDE
        yf = FFTKDE(bw='ISJ').fit(points, weights=weights)(extended_x)
        # cut away the extended values, and normalize
        out.append(yf[1:-1] * np.sum(y))

    # now patch it back together into a 2d array
    return np.stack(out).T
Esempio n. 13
0
    def grid_pdf(self, bw=None):
        """
        Compute grid of probability density function with KDEpy.FFTKDE and a
        Gaussian kernel.

        NOTE: The probability density function returned by KDEpy.FFTKDE seems
              not to be normalised for multivariate variables, this function
              should thus be considered with extra care.

        Parameters
        ----------
        bw : 1D array-like
            Bandwidths.
            NOTE: if bw=None then bw=self.h.

        Returns
        -------
        x : 2D array-like
            Coordinates at which the probability density function is evaluated.
        y : array-like
            Values of the probability density function.
        """

        if bw == None: bw = self.h
        bw = np.array(bw, ndmin=2)
        data = self.data / bw  # rescaled data
        x, y = FFTKDE(kernel='gaussian', bw=1).fit(data).evaluate()
        return x * bw, y
def fit_kernel_density(X, xi, weights=None, bw=None):
    #    kde_pdf_x, kde_pdf_y  = FFTKDE().fit(onp.array(X)).evaluate()

    X, weights = onp.array(X), onp.array(weights)
    X = X.flatten()
    if bw is None:
        try:
            sc = gaussian_kde(X, weights=weights)
            bw = onp.sqrt(sc.covariance).flatten()
        except:
            bw = 1.0
        if bw < 1e-8:
            bw = 1.0

    #print("bw", bw)

    kde_pdf_x, kde_pdf_y = FFTKDE(bw=bw).fit(X, weights).evaluate()

    #plt.hist(X, bins = 50, density = True)
    #plt.plot(kde_pdf_x, kde_pdf_y, 'r-')
    #plt.show()

    # Define the interpolation function
    interp1d_fun = interp1d(kde_pdf_x,
                            kde_pdf_y,
                            kind='linear',
                            fill_value='extrapolate')

    # Evaluate the weights on the input data
    pdf = interp1d_fun(xi)
    return np.clip(pdf, a_min=0.0) + 1e-8
Esempio n. 15
0
    def plot_shift(self,
                   x: np.ndarray,
                   ax: plt.Axes or None = None):
        """
        Plot the reference PDF and overlay the target data before and after landmark
        registration.

        Parameters
        ----------
        x: numpy.ndarray
            Target data
        ax: Matplotlib.Axes, optional

        Returns
        -------
        Matplotlib.Axes
        """
        ax = ax or plt.subplots(figsize=(5, 5))[1]
        shifted = self.shift_data(x)
        x = np.linspace(np.min(x) - 0.1,
                        np.max(x) + 0.1,
                        10000)
        y2 = (FFTKDE(kernel="gaussian",
                     bw="silverman")
              .fit(shifted)
              .evaluate(x))

        self.original_functions.plot(axes=ax)
        ax.plot(x, y2)
        ax.legend(labels=["Before", "Ref", "After"])
        return ax
Esempio n. 16
0
def OutlierGrade(input_path,fig_name):
    str_cmd="../build/MGS -i "+ input_path +" -o 1.csv -M OutlierGrade"
    os.system(str_cmd)
    file=pd.read_csv("1.csv")
    og=np.asarray(file)[:,0]
    dty=np.asarray(file)[:,1]
    x, y = FFTKDE(kernel="gaussian", bw=1).fit(og).evaluate()
    # print(np.max(y))
    # print("og=%s" % str(len(y[y<np.max(y)*0.5])/len(x)))    
    st=y>np.max(y)*0.5
    flag=0
    seg_start=0
    seg_length=0
    for i in range(len(st)):
        if st[i]==True and flag==0:
            flag=1
            seg_start=x[i]           
        if st[i]==False and flag==1:
            flag=0
            seg_length=seg_length+(x[i-1]-seg_start)            
    og=seg_length/50    
    # print("og:  %.2f" % (x[y==np.max(y)]/np.max(x)))
    plt.plot(x,y,'r')    
    plt.savefig(fig_name)
    return og
Esempio n. 17
0
def get_silvermans_bandwidth(X, kernel, bandwidth):

    # X
    assert X is not None
    assert type(X) is np.ndarray
    assert X.ndim == 1

    # kernel
    # assert kernel in ("triweight", )

    # bandwidth
    assert bandwidth in ("silverman", )

    kde = FFTKDE(bw=bandwidth, kernel=kernel)
    kde.fit(X)(2**10)

    return kde.bw
Esempio n. 18
0
class KDEpyFFTwithISJBandwidth:
    description = 'KDE using KDEpy.FFTKDE and ISJ bandwidth'

    def __init__(self, data, bandwidth, xlim=None):
        self._instance = FFTKDE(kernel="gaussian", bw='ISJ').fit(data)

    def pdf(self, x):
        x = x.numpy()
        return self._instance.evaluate(x)
Esempio n. 19
0
def main():
    here = os.path.abspath(os.path.dirname(__file__))
    save_path = os.path.join(here, r'_static/img/')

    fig, ax = plt.subplots()
    fig.set_tight_layout(True)

    # Query the figure's on-screen size and DPI. Note that when saving the figure to
    # a file, we need to provide a DPI for that separately.
    print('fig size: {0} DPI, size in inches {1}'.format(
        fig.get_dpi(), fig.get_size_inches()))

    # Plot a scatter that persists (isn't redrawn) and the initial line.
    np.random.seed(123)
    distribution = stats.norm()
    data = distribution.rvs(128)
    ax.set_title('Kernel density estimation animated', fontsize=16)
    ax.scatter(data,
               np.zeros_like(data),
               color='red',
               marker='|',
               zorder=10,
               label='Data')

    #x = np.arange(0, 20, 0.1)
    #ax.scatter(x, x + np.random.normal(0, 3.0, len(x)))
    x = np.linspace(np.min(data) - 1, np.max(data) + 1, num=2**10)
    y = FFTKDE(bw=1.05**(0 - 84)).fit(data)(x)
    line, = ax.plot(x, y, linewidth=2, label='KDE')
    ax.plot(x, distribution.pdf(x), linewidth=2, label='True distribution')
    ax.grid(True, zorder=-55)

    plt.legend(fontsize=12)

    def update(i):
        label = 'timestep {0}'.format(i)

        # Update the line and the axes (with a new xlabel). Return a tuple of
        # "artists" that have to be redrawn for this frame.
        bw = 1.05**(i - 64)
        y = FFTKDE(bw=bw).fit(data)(x)
        line.set_ydata(y)
        bw_formatted = str(round(bw, 3)).ljust(5, '0')
        ax.set_xlabel(f'Bandwidth $h$: {bw_formatted}', fontsize=14)
        print(label, bw)
        return line, ax

    # FuncAnimation will call the 'update' function for each frame; here
    # animating over 10 frames, with an interval of 200ms between frames.
    anim = FuncAnimation(fig, update, frames=np.arange(1, 128), interval=25)

    # plt.show() will just loop the animation forever.
    anim.save(os.path.join(save_path, r'KDE_bw_animation.gif'),
              dpi=80,
              writer='imagemagick')
    plt.show()
Esempio n. 20
0
def test_find_local_minima():
    n1 = np.random.normal(loc=2, scale=1, size=1000)
    n2 = np.random.normal(loc=10, scale=0.5, size=1000)
    data = np.hstack([n1, n2])
    x, y = FFTKDE(kernel='gaussian', bw='silverman').fit(data).evaluate()
    peak1 = np.where(y == np.max(y[np.where(x < 6)]))[0][0]
    peak2 = np.where(y == np.max(y[np.where(x > 6)]))[0][0]
    minima = x[np.where(y == np.min(y[np.where((x > 4) & (x < 7))]))[0][0]]
    assert gate.find_local_minima(p=y, x=x, peaks=np.array([peak1,
                                                            peak2])) == minima
Esempio n. 21
0
def custom_KDE(data, weights=None, bw=None):
    data = data.flatten()
    if bw is None:
        try:
            sc = scipy.stats.gaussian_kde(data, weights=weights)
            bw = np.sqrt(sc.covariance).flatten()
        except:
            bw = 1.0
        if bw < 1e-8:
            bw = 1.0
    return FFTKDE(bw=bw).fit(data, weights)
Esempio n. 22
0
def multimodality(Xi):
    # inputs
    #   Xi: cluster data (nxm), matrix of n vectors with m dimensions
    # outputs
    #   mm: multimodality flag (scalar: 0 or 1)
    n, m = Xi.shape
    mm, bwf = 0, 10
    points = int(50 * (np.log10(n) + 1))
    for i in range(0, m):
        feat = Xi[:, i].reshape(-1, 1)
        bw = (max(feat) - min(feat)) / bwf
        if bw > 0:
            try:
                x, y = FFTKDE(bw='silverman').fit(feat).evaluate(points)
            except:
                x, y = FFTKDE(bw=bwf).fit(feat).evaluate(points)
            peaks, _ = find_peaks(y, prominence=0.5)
            if len(peaks) > 1:
                mm = 1
    return mm
Esempio n. 23
0
def dds(genes_log10_gmean_step1, grid_points=2 ** 10):
    # density dependent downsampling
    x, y = (
        FFTKDE(kernel="gaussian", bw="silverman")
        .fit(npy.asarray(genes_log10_gmean_step1))
        .evaluate(grid_points=grid_points)
    )
    density = interpolate.interp1d(x=x, y=y, assume_sorted=False)
    sampling_prob = 1 / (density(genes_log10_gmean_step1) + npy.finfo(float).eps)

    return sampling_prob / sampling_prob.sum()
Esempio n. 24
0
def bounded_1d_kde(X,
                   low_bound,
                   high_bound,
                   kernel='triweight',
                   bandwidth=None,
                   output="function",
                   grid=None):
    #epsilon = 1e-8

    # output
    assert output in ("discrete_signal", "function")

    data = X

    if bandwidth is None:
        bandwidth = "silverman"

    if bandwidth in ("silverman", "ISJ"):
        # Determining bandwidth
        bandwidth = get_silvermans_bandwidth(X=X,
                                             kernel=kernel,
                                             bandwidth=bandwidth)
        print("Automatic bandwidth selection gave : {}".format(bandwidth))

    # Mirror the data about the domain boundary
    data = np.concatenate((data, 2 * low_bound - data))
    data = np.concatenate((data, 2 * high_bound - data))

    # Calculating kde
    fftkde = FFTKDE(bw=bandwidth, kernel=kernel).fit(data)
    if grid is not None:
        x, y = grid, fftkde(grid_points=grid)
    else:
        x, y = fftkde(2**20)

    # Setting the KDE to zero outside of the domain
    y[x <= low_bound] = 0
    y[x > high_bound] = 0

    # Normalizing to get integral of ~1
    y = y * 3

    #y[y < epsilon] = epsilon

    if output == "function":
        return interp1d(x, y, kind='linear')
    else:
        # Removing out of support samples
        y = y[low_bound <= x]
        x = x[low_bound <= x]
        y = y[x <= high_bound]
        x = x[x <= high_bound]

        return x, y
Esempio n. 25
0
    def update(i):
        label = 'timestep {0}'.format(i)

        # Update the line and the axes (with a new xlabel). Return a tuple of
        # "artists" that have to be redrawn for this frame.
        bw = 1.05**(i - 64)
        y = FFTKDE(bw=bw).fit(data)(x)
        line.set_ydata(y)
        bw_formatted = str(round(bw, 3)).ljust(5, '0')
        ax.set_xlabel(f'Bandwidth $h$: {bw_formatted}', fontsize=14)
        print(label, bw)
        return line, ax
Esempio n. 26
0
def Homogeneity(input_path):
    str_cmd="../build/MGS -i "+ input_path +" -o 2.csv -M Homogeneity"
    os.system(str_cmd)
    file=pd.read_csv("2.csv")
    uty=np.asarray(file)[:,0]
    x, y = FFTKDE(kernel="gaussian", bw=0.01).fit(uty).evaluate()        
    plt.rcParams.update({'font.size': 22})
    plt.plot(x,y)
    peaks, _ = find_peaks(y, height=0.01)
    plt.plot(np.array([x[peaks]]), y[peaks]+0.3*np.ones([1,peaks.shape[0]]), "rv", markersize=8)  
    fname=input_path.split("/")[-1].split(".")[0]    
    plt.savefig("fig/Homogeneity_"+fname+".png")
    # print("Hm: %d" % (len(peaks)))
    return len(peaks)
Esempio n. 27
0
def _KDE(x, y, nGS):
    """Compute a bivariate kde using KDEpy."""

    # Grid points in the x and y direction
    grid_points_x, grid_points_y = nGS + 6, 2**8

    # Stack the data for 2D input, compute the KDE
    data = np.vstack((x, y)).T
    kde = FFTKDE(bw=0.025).fit(data)
    grid, points = kde.evaluate((grid_points_x, grid_points_y))

    # Retrieve grid values, reshape output and plot boundaries
    x2, y2 = np.unique(grid[:, 0]), np.unique(grid[:, 1])
    z = points.reshape(grid_points_x, grid_points_y)

    # Compute y_pred = E[y | x] = sum_y p(y | x) * y
    y_pred = np.sum((z.T / np.sum(z, axis=1)).T * y2, axis=1)
    id = np.where(x2 < np.min(x))
    x2 = np.delete(x2, id)
    y_pred = np.delete(y_pred, id)
    id = np.where(x2 > np.max(x))
    y_pred = np.delete(y_pred, id)

    return y_pred
Esempio n. 28
0
    def plot_kde(self, var: str or list):
        """
        Utility function; generates a KDE plot for a single variable in 'data' attribute.
        Uses gaussian kernel and Silverman's method for bandwidth estimation.

        Parameters
        ----------
        var: str

        Returns
        -------
        Matplotlib.Axes
        """
        v = self.data[var].values
        if isinstance(var, list):
            v = np.sum([self.data[x].values for x in var], axis=0)
        x, y = (FFTKDE(kernel="gaussian", bw="silverman").fit(v).evaluate())
        fig, ax = plt.subplots(figsize=(6, 6))
        ax.plot(x, y)
        ax.set_xlabel(var)
        return fig, ax
Esempio n. 29
0
def get_density(nu_po_di, co__=(), pl=True, na_=(), **ke_ar):

    n_po, n_di = nu_po_di.shape

    if len(co__) != n_di:

        print("Making coordinate")

        co__ = [
            make_1d_grid(ve.min(), ve.max(), 1 / 3, 8) for ve in nu_po_di.T
        ]

    co_po_di = make_nd_grid(co__)

    de_ = (FFTKDE(**ke_ar).fit(nu_po_di).evaluate(grid_points=co_po_di).clip(
        min=FLOAT_RESOLUTION))

    if pl:

        plot(co_po_di, de_, na_=list(na_) + ["Density"])

    return co_po_di, de_
Esempio n. 30
0
def test_find_inflection_point():
    np.random.seed(42)
    n1 = np.random.normal(loc=2, scale=1, size=1000)
    x, y = FFTKDE(kernel='gaussian', bw='silverman').fit(n1).evaluate()
    inflection_point = gate.find_inflection_point(x=x,
                                                  p=y,
                                                  peak_idx=int(np.argmax(y)),
                                                  incline=False)
    plt.plot(x, y)
    plt.axvline(inflection_point, c="r")
    plt.title("Test inflection point; incline=False")
    plt.show()
    assert 3 < inflection_point < 4
    inflection_point = gate.find_inflection_point(x=x,
                                                  p=y,
                                                  peak_idx=int(np.argmax(y)),
                                                  incline=True)
    plt.plot(x, y)
    plt.axvline(inflection_point, c="r")
    plt.title("Test inflection point; incline=True")
    plt.show()
    assert 0 < inflection_point < 1