Exemple #1
0
def plot_calibration_curve_multiple_measurements_withax(
        ypreds_probabilistic,
        yreal_dev,
        nr_quantiles=100,
        debug=False,
        ax=None,
        name=''):
    quantiles = []
    for i in range(len(ypreds_probabilistic)):
        yp_horizon = ypreds_probabilistic[i]  # 48 * rv_historgram
        yr_horizon = yreal_dev[i]
        for j in range(len(yr_horizon)):
            yp = yp_horizon[j]  # rv
            yr = yr_horizon[j]  # real value
            samples = stats.norm.rvs(
                yp.mean(), yp.std(), 1000,
                random_state=123)  # TODO: correct the sampling
            samples = np.append(samples, yr)
            samples = np.sort(samples)
            yr_idx = np.where(yr == samples)[0]
            yr_idx = yr_idx / (len(samples) - 1)
            quantiles.append(yr_idx[0])
    #plt.hist(quantiles)
    #plt.show()
    X = np.linspace(0, 1, nr_quantiles)
    y = []
    for i in range(len(X)):
        y.append(np.sum([1 for q in quantiles if q <= X[i]]))
    y = np.array(y)
    y = y / len(quantiles)
    ax.plot(np.append(0, X), np.append(0, y), label=name, linewidth=2)
    #ax.plot(X, X, 'k-')
    #plt.title('Calibration Curve %s' % name)
    ax.set_xlabel('Quantiles')
    ax.set_ylabel('Frequency')
    ax.set_xlim([-0.005, 1.002])
    ax.set_ylim([-0.002, 1.002])
    ax.legend()
    #plt.show()
    return

    # real data distribution
    data = []
    for y in yreal_dev:
        data.append(list(y))
    rv = stats.rv_histogram(np.histogram(data, bins=10))
    X = np.linspace(0, 1, 101)
    horizon_distribution = []
    # horizon forecast distribution
    for probabilisticForecast in ypreds_probabilistic:
        for rv in probabilisticForecast.forecast_variables:
            samples = stats.norm.rvs(rv.mean(),
                                     rv.std(),
                                     1000,
                                     random_state=123)
            horizon_distribution.append(samples)
    rv_horizon = stats.rv_histogram(np.histogram(horizon_distribution,
                                                 bins=50))
    # calibration curve
    plot_calibration_curve_withax(rv, rv_horizon, nr_quantiles, ax, name)
Exemple #2
0
def match_rdist(df, sample, rtol=5, seed=0, pltname=None, random=False,
                plotpath='figures/sampling/match_radial_dists/'):
    # get MW satellite distribution
    MC_dwarfs = np.load('data/sampling/'+sample+'.npy')
    dists = np.median(MC_dwarfs[:,6,:], axis=1)

    # get CDF for satellites
    edges = np.arange(301, step=0.1)
    if random:
        hist = rv_histogram(np.histogram(dists, bins=edges))
    else:
        hist = rv_histogram(np.histogram(dists, bins=edges), seed=seed)


    # use inverse tranform method with r +- rtol to select subhalos
    subs = df.copy()
    selected = []
    while True:
            r_sample = hist.rvs()
            diff = np.abs(subs.r - r_sample)
            if np.min(diff) < rtol:
                index = np.argmin(np.abs(subs.r.values - r_sample))
                name = subs.iloc[index].name
                subs.drop([name], inplace=True)
                selected.append(name)
            else:
                break
    survived = df.loc[selected]

    # plot if wanted
    if pltname is not None:
        plot_match(plotpath+pltname+'.png', df, survived, dists)

    return survived
Exemple #3
0
def plot_calibration_curve_single_measurement(probabilisticForecast,
                                              y,
                                              nr_quantiles=21,
                                              debug=False):
    # real data distribution
    rv = stats.rv_histogram(np.histogram(y, bins=10))
    X = np.linspace(0, 1, 11)
    plt.plot(X, rv.pdf(X), label='Data') if debug else None
    #
    # horizon forecast distribution
    horizon_distribution = []
    for rv in probabilisticForecast.forecast_variables:
        samples = stats.norm.rvs(rv.mean(), rv.std(), 1000, random_state=123)
        horizon_distribution.append(samples)
    rv_horizon = stats.rv_histogram(np.histogram(horizon_distribution,
                                                 bins=10))
    if debug:
        plt.plot(X, rv_horizon.pdf(X), label='Forecast')
        plt.title('PDF')
        plt.xlabel('Power')
        plt.ylabel('Frequency')
        plt.legend()
        plt.show()
    #
    # calibration curve
    plot_calibration_curve(rv, rv_horizon, nr_quantiles)
Exemple #4
0
    def createSample(self, numTrials):
        # Determine proportion of each response type
        [unique, counts] = np.unique(self.simDistrChoice, return_counts=True)
        totalSims = np.sum(~np.isnan(self.simDistrChoice))

        simPDF = np.empty(self.model.numAcc, dtype=object)
        numChoices = np.zeros(self.model.numAcc, dtype=int)
        sampleRT = np.empty(self.model.numAcc, dtype=object)
        simSampleHist = np.empty(self.model.numAcc, dtype=object)
        for a in range(self.model.numAcc):
            # Convert simulation histograms into PDFs (normalized so sum = 1 for
            # each accumulator)
            simPDF[a] = rv_histogram([self.simChoiceHist[a], self.bins])
            # Compute number of sim trials for each response type
            if a in unique:
                numChoices[a] = np.floor(
                    np.divide(counts[np.where(unique == a)[0]],
                              totalSims,
                              dtype=float) * numTrials)
            # For each response type, draw the corresponding number of sim trials from respective PDFs
            sampleRT[a] = simPDF[a].rvs(size=numChoices[a])
            # Compute histograms for each accumulator's RTs
            [simSampleHist[a],
             _] = np.histogram(sampleRT[a],
                               bins=self.model.maxRT // self.model.timeStep,
                               range=(0, self.model.maxRT))

        # Concatenate generated data across response types
        simSampleChoice = np.repeat(np.arange(self.model.numAcc), numChoices)
        simSampleRT = np.concatenate(sampleRT)

        return simSampleChoice, simSampleRT, simSampleHist
Exemple #5
0
    def __init__(self, X, bin_width=None, bin_origin=None):
        """
			Initialisation of Copula
			
			Parameters
			----------
			X           : np.array[ shape = (n_samples,n_features) ]
				Dataset to fit the Copula
			bin_width   : None or np.array[ shape = (n_features) ]
				Lenght of cells. Each dimension of bin_width is the lenght of regular cells in each dimensions
			bin_origin  : None or np.array[ shape = (n_features) ]
				Coordinate of lower corner of one cell
				
			
			Attributes
			----------
			dim        : int
				Dimension
			muX        : Apyga.stats.SparseHist
				Multivariate histogram
			muXi       : list(scipy.stats.rv_histogram)
				Marginals of X
		"""
        size, self.dim = X.shape
        self.muX = SparseHist(X, bin_width, bin_origin)
        self.muXi = [
            sc.rv_histogram(
                np.histogram(X[:, i],
                             bins=np.arange(X[:, i].min(), X[:, i].max(),
                                            self.muX.bin_width[i])))
            for i in range(self.dim)
        ]
Exemple #6
0
def generate_distribution(data, dist_name='hist', bins=200):
    """Generate distribution"""

    _data = data[~np.isnan(data)] 

    out_dist = None
    out_params = None

    if dist_name=='hist':
        h = np.histogram(_data, bins=bins, density=True)
        out_dist = st.rv_histogram(h)
    else:
        y, x = np.histogram(_data, bins=bins, density=True)
        x = (x + np.roll(x, -1))[:-1] / 2.0
        # Try to fit the distribution

        try:
            # Ignore warnings from data that can't be fit
            with warnings.catch_warnings():
                warnings.filterwarnings('error')
                # fit dist to data
                out_dist = getattr(st, dist_name)
                out_params = out_dist.fit(_data)
        except Exception:
            return (None, None)
    return (out_dist, out_params)
Exemple #7
0
    def __init__(
        self,
        X: np.ndarray,
        bins: Union[int, str] = "auto",
        alpha: float = 1e-10,
        bound_ext: float = 0.1,
    ):

        estimators = []

        for iX in X.T:
            diff = iX.max() - iX.min()
            lower_bound = iX.min() - bound_ext * diff
            upper_bound = iX.max() + bound_ext * diff

            # create histogram
            if bins in ["blocks", "knuth"]:
                hist = astro_hist(iX,
                                  bins=bins,
                                  range=(lower_bound, upper_bound))
            else:
                hist = np.histogram(iX,
                                    bins=bins,
                                    range=(lower_bound, upper_bound))

            # create histogram object
            i_estimator = rv_histogram(hist)

            # add some regularization
            i_estimator._hpdf += alpha

            estimators.append(i_estimator)

        self.estimators = estimators
Exemple #8
0
def spectra_axial_spray(particle, filename, groupname, fraction_of_shot):
    print(particle, filename, groupname, fraction_of_shot)
    fin = h5py.File(filename, 'r')
    g = fin[groupname]
    photon_energy = g['energy'][:]*MeV
    thetax = g['thetax'][:]*mrad
    thetay = g['thetay'][:]*mrad
    d2W = g['d2W'][:]*joule/(mrad**2*MeV)
    fin.close()
    dthetax = thetax[1]-thetax[0]
    dthetay = thetay[1]-thetay[0]

    spectral_energy_density = d2W.sum(axis=(1,2))*dthetax*dthetay
    spectral_photon_density = spectral_energy_density/photon_energy

    energy = simps(spectral_energy_density, photon_energy)
    num_photons = simps(spectral_photon_density, photon_energy)

    photon_energy_bins = np.append(photon_energy, photon_energy[-1]+1*MeV)
    photon_energy_binwidth = (
        photon_energy_bins[1:] - photon_energy_bins[:-1])
    rv = rv_histogram((spectral_photon_density, photon_energy_bins))

    num_events = int(fraction_of_shot*num_photons)
    print('num_events=', num_events)
    for i in range(num_events):
        energy = rv.rvs()
        print(i, energy/MeV)
        yield particle, g4.G4ThreeVector(), g4.G4ThreeVector(0,0,1), energy
    print('yo')
    raise StopIteration
 def __init__(
         self,
         data,
         bins: int = 20,
 ):
     self.histogram = np.histogram(data, bins=bins)
     self._distribution = rv_histogram(self.histogram)
Exemple #10
0
    def generate_distribution(self):
        """Generate distribution"""
        _data = self.data[~np.isnan(self.data)] 

        self.dist = None
        self.params = None

        if self.dist_name=='hist':
            h = np.histogram(_data, bins=self.bins, density=True)
            self.dist = st.rv_histogram(h)
        else:
            # Try to fit the distribution
            try:
                # Ignore warnings from data that can't be fit
                with warnings.catch_warnings():
                    warnings.filterwarnings('error')
                    # fit dist to data
                    self.dist = getattr(st, self.dist_name)
                    if self.params is not None:
                        # Separate parts of parameters
                        arg = self.params[:-2]
                        loc = self.params[-2]
                        scale = self.params[-1]
                        self.params = self.dist.fit(_data, loc=loc, scale=scale, *arg)
                    else:
                        self.params = self.dist.fit(_data)
                        # Separate parts of parameters
                        arg = self.params[:-2]
                        loc = self.params[-2]
                        scale = self.params[-1]
                    self.dist = self.dist(loc=loc, scale=scale, *arg)
            except Exception as e:
                print(e)
Exemple #11
0
def sample_posterior(posterior, place_bin_edges, n_samples=1000):
    """Samples the posterior positions.

    Parameters
    ----------
    posterior : xarray.DataArray, shape (n_time, n_position_bins) or
        shape (n_time, n_x_bins, n_y_bins)

    Returns
    -------
    posterior_samples : numpy.ndarray, shape (n_time, n_samples)

    """
    # Stack 2D positions into one dimension
    try:
        posterior = posterior.stack(z=["x_position", "y_position"]).values
    except (KeyError, AttributeError):
        posterior = np.asarray(posterior)

    place_bin_edges = place_bin_edges.squeeze()
    n_time = posterior.shape[0]

    posterior_samples = [
        rv_histogram(
            (posterior[time_ind], place_bin_edges)).rvs(size=n_samples)
        for time_ind in range(n_time)
    ]

    return np.asarray(posterior_samples)
Exemple #12
0
    def __init__(self, edges, heights):

        self.edges = edges  # list of arrays for bin edges along each dim
        self.heights = heights  # n histogram values -> array of bin heights

        self.nheights = np.abs(self.heights)
        self.nheights = self.nheights - self.nheights.min()
        if len(self.edges) == 1:
            self.dist = scistats.rv_histogram((self.nheights, self.edges[0]))

        elif len(self.edges) == 2:
            #bin_coords = [np.unique(self.edges[:,0]),
            #              np.unique(self.edges[:,1])]
            #bin_widths = [bin_coords[0][1] - bin_coords[0][0],
            #              bin_coords[1][1] - bin_coords[1][0]] # regular grid
            # what about using np.histogramdd ???
            #self.dist = self.nheights / float(np.sum(self.nheights * np.prod(bin_widths)))
            #self._hpdf = np.hstack([0.0, self._hpdf, 0.0])

            # bin-center coordinates
            self.edgesm = []
            for edge in self.edges:
                self.edgesm.append((edge[1:] + edge[:-1]) / 2)
            xgrid, ygrid = np.meshgrid(self.edgesm[0], self.edgesm[1])
            self.bin_coords = np.column_stack([xgrid.ravel(), ygrid.ravel()])

            # remove irregularities
            self.dist = self.nheights.copy()
            self.dist = np.abs(self.dist)
            self.dist -= np.min(self.dist)
            # normalize PDF
            delta_params = np.outer(np.diff(self.edges[0]),
                                    np.diff(self.edges[1])).flatten()
            norm = np.sum(self.nheights * delta_params)
            self.dist = self.nheights / norm
Exemple #13
0
def generate_3d_distrib(xin, yin, zin, pdf3d, num, eps=2.2e-16):
    samples = np.zeros([num, 3])
    #https://stackoverflow.com/questions/11144513/cartesian-product-of-x-and-y-array-points-into-single-array-of-2d-points
    combined_x_y_arrays = np.transpose([np.tile(xin, len(yin)), np.repeat(yin, len(xin))])
    print(combined_x_y_arrays)
    mytree = cKDTree(combined_x_y_arrays)

    xpdf = pdf3d.sum(axis=(1,2))
    print('xpdf')
    print(xpdf)
    xbins = np.append(xin, xin[-1]+xin[1]-xin[0])
    ybins = np.append(yin, yin[-1]+yin[1]-yin[0])
    zbins = np.append(zin, zin[-1]+zin[1]-zin[0])
    rv = rv_histogram((xpdf, xbins))
    xsamples = rv.rvs(size=num)
    samples[:, 0] = xsamples
    pdf2d = pdf3d.sum(axis=2)
    yfunc = interp1d(xin, pdf2d, axis=0)
    #zfunc = RectBivariateSplineAxis12(xin, yin, pdf3d, axis=1)
    ypdfs = yfunc(xsamples)

    for i in range(num):
        rv = rv_histogram((ypdfs[i], ybins))
        y = rv.rvs()
        samples[i, 1] = y
        if(i%1000==0):
            print(i)
        x = xsamples[i]
        dist, index = mytree.query([x, y])
        #print(dist)
        if(True):
            #print("use kdt tree")
            #xn, yn = combined_x_y_arrays[index]
            #print(xn, yn)
            xindex = index % len(xin)
            yindex = (index - xindex)/len(xin)
            #print(yindex)
            zpdf = pdf3d[xindex, int(yindex), :]
            if(i%1000==0):
                print("zpdf")
                print(repr(zpdf))
            rv = rv_histogram((zpdf, zbins))
        else:
            zpdf = RectBivariateSplineAxis12(xin, yin, pdf3d, xsamples[i], y)
            rv = rv_histogram((zpdf, zbins))
        samples[i, 2] = rv.rvs()
    return samples
Exemple #14
0
def calculate_pdf_and_cdf(predictor):
    inferences = np.loadtxt(
        'yips_evaluation/inferences-{}.txt'.format(predictor), delimiter=',')
    labels = np.loadtxt('yips_evaluation/labels.txt', delimiter=',')
    errors = []
    print(inferences.shape)
    for i in range(inferences.shape[0]):
        infer, label = inferences[i], labels[i]
        if label[-1] == 1:
            error = label[:-1] - infer[:-1]
            error[-1] = (error[-1] + np.pi) % (2 * np.pi) - np.pi
            errors.append(list(error))
    errors = np.array(errors)
    print(errors[:, 0])

    fontsize = 70
    error_number = 2
    x_e = errors[:, error_number]
    error_labels = ['$X_e$[m]', '$Y_e$[m]', '$\\Phi_e$[rad]']
    x_es = np.linspace(errors[:, error_number].min(),
                       errors[:, error_number].max(), 300)
    bins = 40
    print('Skewness: {}, kurtosis: {}, K2&P-value: {}'.format(
        st.skew(x_e), st.kurtosis(x_e), st.normaltest(x_e)))
    # ax = new_figure(fontsize=fontsize, y_label='Probability', x_label=error_labels[error_number])
    # ax.hist(x_e, bins=bins, density=1, histtype='bar', facecolor='C1', alpha=1.0,
    #         cumulative=True, rwidth=0.8, linewidth=12, color='C1', label='Data')
    # ax.plot(x_es, st.rv_histogram(np.histogram(x_e, bins=bins)).cdf(x_es), linewidth=12, color='C0', label='CDF')
    # ax.legend(prop={'size': fontsize}, loc=2)

    ax1 = new_figure(fontsize=fontsize,
                     y_label='',
                     x_label=error_labels[error_number])
    ax1.hist(x_e,
             bins=bins,
             density=1,
             histtype='bar',
             facecolor='C1',
             alpha=1.0,
             cumulative=False,
             rwidth=0.8,
             linewidth=12,
             color='C1',
             label='Data')
    ax1.plot(x_es,
             st.gaussian_kde(x_e).pdf(x_es),
             linewidth=12,
             color='C0',
             label='PDF')
    ax1.plot(x_es,
             st.rv_histogram(np.histogram(x_e, bins=bins)).cdf(x_es),
             linewidth=12,
             color='C3',
             label='CDF')
    ax1.set_xticks([-3, -1.5, 0, 1.5, 3])
    ax1.legend(prop={'size': fontsize}, loc=2, frameon=False)
    plt.show()
Exemple #15
0
	def fit( self , Y , X ):
		"""
			Fit of the quantile mapping model
			
			Parameters
			----------
			Y	: np.array[ shape = (n_samples,n_features) ]
				Reference dataset
			X	: np.array[ shape = (n_samples,n_features) ]
				Biased dataset
		"""
		if len(X.shape) == 1:
			X = X.reshape( (X.size,1) )
		if len(Y.shape) == 1:
			Y = Y.reshape( (Y.size,1) )
		self._n_features = X.shape[1]
		
		if self.bins is None:
			self.bins = self._bin_estimator( Y , X )
		self._rvY = [sc.rv_histogram( np.histogram( Y[:,i] , self.bins[i] ) ) for i in range(self._n_features)]
		self._rvX = [sc.rv_histogram( np.histogram( X[:,i] , self.bins[i] ) ) for i in range(self._n_features)]
Exemple #16
0
def hist_entropy(
    X: np.ndarray,
    bins: Union[str, int] = "auto",
    correction: bool = True,
    hist_kwargs: Optional[Dict] = {},
) -> float:
    """Calculates the entropy using the histogram of a univariate dataset.
    Option to do a Miller Maddow correction.
    
    Parameters
    ----------
    X : np.ndarray, (n_samples)
        the univariate input dataset
    
    bins : {str, int}, default='auto'
        the number of bins to use for the histogram estimation
    
    correction : bool, default=True
        implements the Miller-Maddow correction for the histogram
        entropy estimation.
    
    hist_kwargs: Optional[Dict], default={}
        the histogram kwargs to be used when constructing the histogram
        See documention for more details:
        https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html

    Returns
    -------
    H_hist_entropy : float
        the entropy for this univariate histogram

    Example
    -------
    >> from scipy import stats
    >> from pysim.information import histogram_entropy
    >> X = stats.gamma(a=10).rvs(1_000, random_state=123)
    >> histogram_entropy(X)
    array(2.52771628)
    """
    # get histogram
    hist_counts = np.histogram(X, bins=bins, **hist_kwargs)

    # create random variable
    hist_dist = stats.rv_histogram(hist_counts)

    # calculate entropy
    H = hist_dist.entropy()

    # MLE Estimator with Miller-Maddow Correction
    if correction == True:
        H += 0.5 * (np.sum(hist_counts[0] > 0) - 1) / hist_counts[0].sum()

    return H
Exemple #17
0
def sample_histogram_node(node, n_samples, data, rand_gen):
    assert isinstance(node, Histogram)
    assert n_samples > 0
    # sample the value at each bin according to the densities of each bin
    if node.meta_type == MetaType.DISCRETE or node.meta_type == MetaType.BINARY:
        X = rand_gen.choice(np.array(node.bin_repr_points),
                            p=node.densities,
                            size=n_samples)
    else:
        X = rv_histogram((node.densities,
                          node.breaks)).ppf(rand_gen.random_sample(n_samples))

    return X
Exemple #18
0
def do_MLE_withWeights(data, dist, minimum, maximum, bw):
    nbins = int( (maximum - minimum) / bw )
    rawData = data[np.where((minimum<=data)&(maximum>=data))]
    sums, bins = np.histogram(rawData, bins=nbins, range=[minimum, maximum])
    bincenters = (lambda v: 0.5*(v[1:]+v[:-1]))(bins)
    sums, bins = np.histogram(bins[:-1], bins=nbins, range=[minimum, maximum], \
                              density=True, weights=sums/bincenters)
    hist_dist = rv_histogram((sums, bins))
    #pars = gamma.fit(weightedData, floc=0.0)
    pars = dist.fit(hist_dist.rvs(size=10000000), floc=0.0)
    a1, loc1, scale1 = pars
    print minimum, maximum, a1, loc1, scale1, a1 * scale1, a1 * scale1**2
    return a1, scale1
Exemple #19
0
    def plot_result(self):
        fig, axes = plt.subplots(4, 2, figsize=(7, 6))
        xrange = np.arange(2, self.n + 1)
        ax_left = axes[:, 0].ravel()
        ax_right = axes[:, 1].ravel()
        ax_left[0].plot(xrange, self.lhoods[1:], linewidth=0.69)
        ax_left[0].set_title(r'Posterior trace')
        ax_left[1].plot(xrange, self.thetas.T[0][1:], linewidth=0.69)
        ax_left[1].set_title(r'$w_1^t$ (slope) trace')
        ax_left[2].plot(xrange, self.thetas.T[1][1:], linewidth=0.69)
        ax_left[2].set_title(r'$w_0^t$ (intercept) trace')
        ax_left[3].plot(xrange, self.thetas.T[2][1:], linewidth=0.69)
        ax_left[3].set_title(r'$\beta^t$ trace')

        ax_right[0].hist(self.lhoods[1:], bins=40)
        ax_right[0].set_title(r'Posterior hist')
        ax_right[1].hist(self.thetas.T[0][1:], bins=40)
        ax_right[1].set_title(r'$w_1^t$ hist')
        ax_right[2].hist(self.thetas.T[1][1:], bins=40)
        ax_right[2].set_title(r'$w_0^t$ hist')
        ax_right[3].hist(self.thetas.T[2][1:], bins=40)
        ax_right[3].set_title(r'$\beta^t$ hist')
        fig.suptitle('Metropolis')
        fig.tight_layout()
        plt.savefig('metropolis2.pdf')
        plt.show()

        print('Enter burnout cutoff:')
        burnout = int(input())
        xrange = np.linspace(1, 10, 10000)
        slope_hist = rv_histogram(
            np.histogram(self.thetas.T[0][burnout:], bins=100))
        slope = xrange[slope_hist.pdf(xrange).argmax()]

        xrange = np.linspace(1, 5000, 10000)
        intercept_hist = rv_histogram(
            np.histogram(self.thetas.T[1][burnout:], bins=100))
        intercept = xrange[intercept_hist.pdf(xrange).argmax()]
        return intercept, slope
Exemple #20
0
def UL_uncert(chain, p=0.95):
    corr = acor(chain)[0]
    N = len(chain)
    Neff = N / corr

    hist = np.histogram(chain, bins=100)
    pdf = ss.rv_histogram(hist).pdf

    UL = np.percentile(chain, 100 * p)  # 95 for 95% (not 0.95)
    pUL = pdf(UL)
    dUL = np.sqrt(p * (1 - p) / Neff) / pUL

    return UL, dUL
 def predict(self, cond_x, random_x):
     cond_x_filtered = np.where(cond_x > self.x_bins.max(),
                                self.x_bins.max(), cond_x)
     cond_x_filtered = np.where(cond_x < self.x_bins.min(),
                                self.x_bins.min(), cond_x_filtered)
     random_percentile = norm.cdf(random_x)
     sampled_u = np.zeros(cond_x.shape)
     for c, cond_x_val in enumerate(cond_x_filtered):
         x_bin = np.searchsorted(self.x_bins, cond_x_val)
         sampled_u[c] = rv_histogram(
             (self.model[:,
                         x_bin[0]], self.u_bins)).ppf(random_percentile[c])
     return sampled_u.ravel()
Exemple #22
0
 def texture_stats(self, patch):
     glcm = greycomatrix(
         patch.astype('int'),
         [3],
         [0, 0.25, 0.5],
         256,
         symmetric=True,
         normed=True,
     )
     dissimilarity = greycoprops(glcm, 'dissimilarity')[0, 0]
     correlation = greycoprops(glcm, 'correlation')[0, 0]
     hist = np.histogram(patch, bins='fd')
     distribution = stats.rv_histogram(hist)
     return patch.std(), distribution.entropy(), dissimilarity, correlation
Exemple #23
0
def sample_decorrelation_phase(L,
                               coherence,
                               size=1,
                               phi_num=1000,
                               display=False,
                               scale=1.0,
                               font_size=12):
    '''Sample decorrelation phase noise with PDF determined by L and coherence
    Inputs:
        L         - int, multilook number
        coherence - float, spatial coherence
        size      - int, sample number
    Output:
        sample    - 1D np.array in size of (size,), sampled phase
    unw_n = sample_decorrelation_phase(L=1, coherence=0.7, size=100000, display=True)
    '''
    size = int(size)

    phiMax = np.pi * float(scale)
    pdf = ifginv.phase_pdf_ds(
        int(L), coherence,
        phi_num=phi_num)[0].flatten()  #for PS: ifginv.phase_variance_ps()
    phi = np.linspace(-phiMax, phiMax, phi_num + 1, endpoint=True)
    phi_dist = stats.rv_histogram((pdf, phi))
    #sample = np.nan
    #while sample is np.nan:
    sample = phi_dist.rvs(size=size)

    if display:
        #size = 10000
        fig, ax = plt.subplots(figsize=[5, 3])
        ax.hist(sample,
                bins=50,
                density=True,
                label='Sample\nHistogram\n(norm)')
        ax.plot(phi, phi_dist.pdf(phi), label='PDF')
        ax.plot(phi, phi_dist.cdf(phi), label='CDF')
        ax.set_xlabel('Phase', fontsize=font_size)
        ax.set_ylabel('Probability', fontsize=font_size)
        ax.set_title(r'L = %d, $\gamma$ = %.2f, sample size = %d' %
                     (L, coherence, size),
                     fontsize=font_size)
        ax.set_xlim([-np.pi, np.pi])
        ax.set_xticks([-np.pi, 0, np.pi])
        ax.set_xticklabels([r'-$\pi$', '0', r'$\pi$'], fontsize=font_size)
        ax.tick_params(direction='in', labelsize=font_size)
        ax.legend(fontsize=font_size)
        plt.savefig('DecorNoiseSampling.jpg', bbox_inches='tight', dpi=600)
        plt.show()
    return sample
Exemple #24
0
 def update_data(self, data, debug):
     self.data = data
     bins = np.linspace(0, 1, self.nr_bins)
     hist, edges = np.histogram(self.data, bins=bins, density=True)
     hist /= np.sum(hist)
     self.hist_dist = stats.rv_histogram((hist, edges))
     # calculate the bin edges by dividing the quantiles equally on the CDF:
     quantiles = np.linspace(0, 1, num=self.nr_bins +
                             1)  # quantile edges from the right
     self.bin_edges = self.hist_dist.ppf(
         quantiles)  # invCDF to find the bin edges
     self.bin_centers = np.array(self.bin_edges[:-1] +
                                 np.diff(self.bin_edges) / 2)
     self.plot_debug() if debug else None
def _detect_dist_continuous(col_stats):
    """
    Detects type of continuous distribution based on Kolmogorov-Smirnov Goodness-of-fit test, https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test.
    Args:
        col_stats (dict): Column data statistics. The column data must be of a continuous numerical random variable.
    Returns:
        dist (dict): Dictionary stating distribution type along with other parameters for the distribution.
    """
    bin_counts, bin_edges = (
        col_stats["histogram"]["bin_counts"],
        col_stats["histogram"]["bin_edges"],
    )

    # Create a continuous distribution from the histogram and sample data from it
    hist_dist = stats.rv_histogram((bin_counts, bin_edges))
    hist_mean = hist_dist.mean()
    observed_samples = hist_dist.rvs(size=1000)

    # Center the distribution around 0
    observed_samples -= hist_mean

    # Distributions to test against (must be a continuous distribution from scipy.stats)
    # Distribution name -> list of positional arguments for the distribution
    # If the observed histogram is centered around 0, means of distributions set to 0
    test_dists = (
        # norm(loc, scale)
        ("norm", (0, col_stats["stddev"])),
        # skewnorm(a, loc, scale)
        ("skewnorm", (col_stats["skewness"], 0, col_stats["stddev"])),
        # uniform(loc, scale)
        ("uniform", (col_stats["min"], col_stats["max"] - col_stats["min"])),
    )

    dist = {}
    max_p = 0

    for dist_name, dist_args in test_dists:

        # overfitting on purpose for testing
        # method = getattr(stats, dist_name)
        # dist_args = method.fit(observed_samples)

        p = stats.kstest(observed_samples, dist_name, dist_args)[1]
        if p > max_p:
            dist["dist"] = dist_name
            dist["args"] = dist_args
            max_p = p

    return dist
Exemple #26
0
def sample_decorrelation_phase(coherence,
                               L,
                               size=1,
                               phi_num=1000,
                               display=False,
                               scale=1.0,
                               font_size=12):
    '''Sample decorrelation phase based on PDF determined by L and coherence value
    Parameters: coherence - float, spatial coherence
                L         - int, number of independent looks
                phi_num   - int, sample number
    Returns:    phase     - 1D np.array in size of (size,), sampled phase
    Examples:
        decor_noise = sample_decorrelation_phase(0.7, L=1, size=1e4, display=True)
    '''
    size = int(size)
    phiMax = np.pi * float(scale)

    # numerical solution of phase PDF for distributed scatterers
    pdf = phase_pdf_ds(int(L), coherence, phi_num=phi_num)[0].flatten()

    # generate phase distribution
    phi = np.linspace(-phiMax, phiMax, phi_num + 1, endpoint=True)
    phi_dist = stats.rv_histogram((pdf, phi))

    # sample from the distribution
    phase = phi_dist.rvs(size=size)

    if display:
        fig, ax = plt.subplots(figsize=[5, 3])
        ax.hist(phase,
                bins=50,
                density=True,
                label='Sample\nHistogram\n(norm)')
        ax.plot(phi, phi_dist.pdf(phi), label='PDF')
        ax.plot(phi, phi_dist.cdf(phi), label='CDF')
        ax.set_xlabel('Phase', fontsize=font_size)
        ax.set_ylabel('Probability', fontsize=font_size)
        ax.set_title(r'L = %d, $\gamma$ = %.2f, sample size = %d' %
                     (L, coherence, size),
                     fontsize=font_size)
        ax.set_xlim([-np.pi, np.pi])
        ax.set_xticks([-np.pi, 0, np.pi])
        ax.set_xticklabels([r'-$\pi$', '0', r'$\pi$'], fontsize=font_size)
        ax.tick_params(direction='in', labelsize=font_size)
        ax.legend(fontsize=font_size)
        plt.savefig('DecorNoiseSampling.jpg', bbox_inches='tight', dpi=600)
        plt.show()
    return phase
Exemple #27
0
def calculate_pdf_and_cdf2(targets):
    fontsize = 70
    bins = 40
    error_number = 1
    error_labels = ['$X_e$', '$Y_e$', '$\\Phi_e$']
    plot_labels = ['VGG-19', 'SVG-16', 'VGG-16', 'ResNet-50']
    plot_labels.reverse()
    ax = new_figure(fontsize=fontsize,
                    y_label='',
                    x_label=error_labels[error_number])
    ax1 = new_figure(fontsize=fontsize,
                     y_label='',
                     x_label=error_labels[error_number])
    targets.reverse()
    print targets
    for j, tar in enumerate(targets):
        inferences = np.loadtxt(
            'yips_evaluation/inferences-{}.txt'.format(tar), delimiter=',')
        labels = np.loadtxt('yips_evaluation/labels.txt', delimiter=',')
        errors = []
        for i in range(inferences.shape[0]):
            infer, label = inferences[i], labels[i]
            if label[-1] == 1:
                error = label[:-1] - infer[:-1]
                error[-1] = (error[-1] + np.pi) % (2 * np.pi) - np.pi
                errors.append(list(error))
        errors = np.array(errors)

        x_e = errors[:, error_number]
        x_es = np.linspace(errors[:, error_number].min(),
                           errors[:, error_number].max(), 300)
        # ax.hist(x_e, bins=bins, density=1, histtype='bar', facecolor='C1', alpha=1.0,
        #         cumulative=True, rwidth=0.8, linewidth=12, color='C1', label='Data')
        ax.plot(x_es,
                st.rv_histogram(np.histogram(x_e, bins=bins)).cdf(x_es),
                linewidth=12,
                color='C{}'.format(j),
                label=plot_labels[j])
        ax.legend(prop={'size': fontsize}, loc=2, frameon=False)

        # ax1.hist(x_e, bins=bins, density=1, histtype='bar', facecolor='C1',
        #          alpha=1.0, cumulative=False, rwidth=0.8, linewidth=12, color='C1', label='Data')
        ax1.plot(x_es,
                 st.gaussian_kde(x_e).pdf(x_es),
                 linewidth=12,
                 color='C{}'.format(j),
                 label=plot_labels[j])
        ax1.legend(prop={'size': fontsize}, loc=2, frameon=False)
    plt.show()
Exemple #28
0
def resources(log,
              phi=None,
              sw=None,
              cutoff=None,
              a=1,
              h=1,
              beta=1.1,
              fluid='oil',
              m=1000,
              seed=1706):
    np.random.seed(seed=seed)
    # Define coefficients depending on the fluid
    if fluid == 'oil':
        c = 7.758  #Mbbl
        name = 'OOIP'
    elif fluid == 'gas':
        c = 0.000043560  #Bscf
        name = 'OGIP'
    r = pd.DataFrame()
    for p in phi:
        phieh = np.histogram(log.loc[log[p] > cutoff[0], p])
        phie_dist = st.rv_histogram(phieh)
        phie_random = phie_dist.rvs(size=m)
        for s in sw:
            swh = np.histogram(log.loc[log[s] < cutoff[1], s])
            sw_dist = st.rv_histogram(swh)
            sw_random = sw_dist.rvs(size=m)
            #Original hydrocarbon in place
            orip = c * a * h * phie_random * (1 - sw_random) * (1 / beta)
            orip = pd.DataFrame({name: orip})
            orip['PhieCurve'] = p
            orip['SwCurve'] = s
            orip['Phie'] = phie_random
            orip['Sw'] = sw_random
            r = r.append(orip)
    return r.reset_index(drop=True)
 def add_variable(self, scenarios, debug=False):
     samples = np.empty((0, ))
     for scenario in scenarios:
         rv = stats.norm.rvs(scenario.mu, scenario.sigma, 250)
         samples = np.append(samples, rv, axis=0)
     # print(samples.shape)
     bins = np.linspace(0, 1, 100)
     hist, edges = np.histogram(samples, bins=bins, density=True)
     hist = hist / np.sum(hist)
     hist_dist = stats.rv_histogram((hist, edges))
     # debug
     if debug:
         X = np.linspace(0, 1, num=300 + 1)
         plt.plot(X, hist_dist.pdf(X))
         plt.show()
     self.forecast_variables.append(hist_dist)
    def ks_test(self, ax: Axes):
        rtts, rtts_control = self.get_comparison_rtts()
        stat, pval = stats.ks_2samp(rtts, rtts_control)
        ax.hist(rtts, color='Orange', bins=1000, alpha=0.5,
                label='Normalized RTTs with flooder')
        ax.hist(rtts_control, color='Blue', bins=1000,
                alpha=0.5, label='Normalized RTTs under control')
        ax.legend()
        ax.set_title('RTT histogram normalized by mean and stdev')

        hist = stats.rv_histogram(np.histogram(rtts_control, bins=1000))
        ax1 = plotter.get_new_subplot(
            'QQ Plot for RTTs with flooder against control')
        stats.probplot(rtts, plot=ax1, fit=True, dist=hist)
        ax1.set_title('RTTs QQ Plot')
        return "Kolmogorov Smirnov Two Sample Test: statistic value: %0.2f, pvalue: %0.2f" % (stat, pval)
Exemple #31
0
def sample_decorrelation_phase(L, coherence, size=1, display=False, scale=1.0, font_size=12):
    '''Sample decorrelation phase noise with PDF determined by L and coherence
    Inputs:
        L         - int, multilook number
        coherence - float, spatial coherence
        size      - int, sample number
    Output:
        sample    - 1D np.array in size of (size,), sampled phase
    unw_n = sample_decorrelation_phase(L=1, coherence=0.7, size=100000, display=True)
    '''
    phiNum = 100
    phiMax = np.pi * float(scale)
    pdf = ifginv.phase_pdf_ds(int(L), coherence, phi_num=phiNum)[0].flatten()   #for PS: ifginv.phase_variance_ps()
    phi = np.linspace(-phiMax, phiMax, phiNum+1, endpoint=True)
    phi_dist = stats.rv_histogram((pdf, phi))
    #sample = np.nan
    #while sample is np.nan:
    sample = phi_dist.rvs(size=size)

    if display:
        #size = 10000
        fig, ax = plt.subplots(figsize=[5,3])
        ax.hist(sample, bins=50, density=True, label='Sample\nHistogram\n(norm)')
        ax.plot(phi, phi_dist.pdf(phi), label='PDF')
        ax.plot(phi, phi_dist.cdf(phi), label='CDF')
        ax.set_xlabel('Phase', fontsize=font_size)
        ax.set_ylabel('Probability', fontsize=font_size)
        ax.set_title(r'L = %d, $\gamma$ = %.1f, sample size = %d' % (L, coherence, size), fontsize=font_size)
        ax.set_xlim([-np.pi, np.pi])
        ax.set_xticks([-np.pi, 0, np.pi])
        ax.set_xticklabels([r'-$\pi$', '0', r'$\pi$'], fontsize=font_size)
        ax.tick_params(direction='in', labelsize=font_size)
        ax.legend(fontsize=font_size)
        plt.savefig('DecorNoiseSampling.jpg', bbox_inches='tight', dpi=600)
        plt.show()
    return sample
# These distributions fail the complex derivative test below.
# Here 'fail' mean produce wrong results and/or raise exceptions, depending
# on the implementation details of corresponding special functions.
# cf https://github.com/scipy/scipy/pull/4979 for a discussion.
fails_cmplx = set(['beta', 'betaprime', 'chi', 'chi2', 'dgamma', 'dweibull',
                   'erlang', 'f', 'gamma', 'gausshyper', 'gengamma',
                   'gennorm', 'genpareto', 'halfgennorm', 'invgamma',
                   'ksone', 'kstwobign', 'levy_l', 'loggamma', 'logistic',
                   'maxwell', 'nakagami', 'ncf', 'nct', 'ncx2', 'norminvgauss',
                   'pearson3', 'rice', 't', 'skewnorm', 'tukeylambda',
                   'vonmises', 'vonmises_line', 'rv_histogram_instance'])

_h = np.histogram([1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6,
                   6, 6, 6, 7, 7, 7, 8, 8, 9], bins=8)
histogram_test_instance = stats.rv_histogram(_h)


def cases_test_cont_basic():
    for distname, arg in distcont[:] + [(histogram_test_instance, tuple())]:
        if distname == 'levy_stable':
            continue
        elif distname in distslow:
            yield pytest.param(distname, arg, marks=pytest.mark.slow)
        else:
            yield distname, arg


@pytest.mark.parametrize('distname,arg', cases_test_cont_basic())
def test_cont_basic(distname, arg):
    # this test skips slow distributions
Exemple #33
0
    nb_generated = 10  # столько случайных фраз сгенерируем и покажем

    if False:  #NET_CONFIG['arch'] == 'vae':
        # Для вариационного автоэнкодера нужно на вход декодера подавать
        # нормально распределенный шум с единичной дисперсией.
        X_probe = np.random.normal(loc=1.0, scale=1.0, size=(nb_generated, latent_dim))
    else:
        # Мы должны подавать на входе декодера вектор скрытых переменных с разбросом
        # значений отдельных компонентов, примерно соответствующим распределению
        # для тренировочных данных. Мы уже собрали гистограммы для каждой скрытой переменной,
        # их надо загрузить и использовать.
        with open(latent_histos_path, 'rb') as f:
            latent_histos = pickle.load(f)

        pdfs = []
        for idim, histo in enumerate(latent_histos):
            pdfs.append(stats.rv_histogram(histogram=histo))

        X_probe = np.zeros((nb_generated, latent_dim))
        for idim in range(latent_dim):
            p = pdfs[idim].rvs(size=nb_generated)
            X_probe[:, idim] = p

    # Пропускаем подготовленный вектор через декодер.
    y_probe = decoder_model.predict(X_probe)

    # декодируем результаты работы модели
    result_phrases = w2v_decoder.decode_output(y_probe)
    for phrase in result_phrases:
        print(u'{}'.format(phrase))