def plot_calibration_curve_multiple_measurements_withax( ypreds_probabilistic, yreal_dev, nr_quantiles=100, debug=False, ax=None, name=''): quantiles = [] for i in range(len(ypreds_probabilistic)): yp_horizon = ypreds_probabilistic[i] # 48 * rv_historgram yr_horizon = yreal_dev[i] for j in range(len(yr_horizon)): yp = yp_horizon[j] # rv yr = yr_horizon[j] # real value samples = stats.norm.rvs( yp.mean(), yp.std(), 1000, random_state=123) # TODO: correct the sampling samples = np.append(samples, yr) samples = np.sort(samples) yr_idx = np.where(yr == samples)[0] yr_idx = yr_idx / (len(samples) - 1) quantiles.append(yr_idx[0]) #plt.hist(quantiles) #plt.show() X = np.linspace(0, 1, nr_quantiles) y = [] for i in range(len(X)): y.append(np.sum([1 for q in quantiles if q <= X[i]])) y = np.array(y) y = y / len(quantiles) ax.plot(np.append(0, X), np.append(0, y), label=name, linewidth=2) #ax.plot(X, X, 'k-') #plt.title('Calibration Curve %s' % name) ax.set_xlabel('Quantiles') ax.set_ylabel('Frequency') ax.set_xlim([-0.005, 1.002]) ax.set_ylim([-0.002, 1.002]) ax.legend() #plt.show() return # real data distribution data = [] for y in yreal_dev: data.append(list(y)) rv = stats.rv_histogram(np.histogram(data, bins=10)) X = np.linspace(0, 1, 101) horizon_distribution = [] # horizon forecast distribution for probabilisticForecast in ypreds_probabilistic: for rv in probabilisticForecast.forecast_variables: samples = stats.norm.rvs(rv.mean(), rv.std(), 1000, random_state=123) horizon_distribution.append(samples) rv_horizon = stats.rv_histogram(np.histogram(horizon_distribution, bins=50)) # calibration curve plot_calibration_curve_withax(rv, rv_horizon, nr_quantiles, ax, name)
def match_rdist(df, sample, rtol=5, seed=0, pltname=None, random=False, plotpath='figures/sampling/match_radial_dists/'): # get MW satellite distribution MC_dwarfs = np.load('data/sampling/'+sample+'.npy') dists = np.median(MC_dwarfs[:,6,:], axis=1) # get CDF for satellites edges = np.arange(301, step=0.1) if random: hist = rv_histogram(np.histogram(dists, bins=edges)) else: hist = rv_histogram(np.histogram(dists, bins=edges), seed=seed) # use inverse tranform method with r +- rtol to select subhalos subs = df.copy() selected = [] while True: r_sample = hist.rvs() diff = np.abs(subs.r - r_sample) if np.min(diff) < rtol: index = np.argmin(np.abs(subs.r.values - r_sample)) name = subs.iloc[index].name subs.drop([name], inplace=True) selected.append(name) else: break survived = df.loc[selected] # plot if wanted if pltname is not None: plot_match(plotpath+pltname+'.png', df, survived, dists) return survived
def plot_calibration_curve_single_measurement(probabilisticForecast, y, nr_quantiles=21, debug=False): # real data distribution rv = stats.rv_histogram(np.histogram(y, bins=10)) X = np.linspace(0, 1, 11) plt.plot(X, rv.pdf(X), label='Data') if debug else None # # horizon forecast distribution horizon_distribution = [] for rv in probabilisticForecast.forecast_variables: samples = stats.norm.rvs(rv.mean(), rv.std(), 1000, random_state=123) horizon_distribution.append(samples) rv_horizon = stats.rv_histogram(np.histogram(horizon_distribution, bins=10)) if debug: plt.plot(X, rv_horizon.pdf(X), label='Forecast') plt.title('PDF') plt.xlabel('Power') plt.ylabel('Frequency') plt.legend() plt.show() # # calibration curve plot_calibration_curve(rv, rv_horizon, nr_quantiles)
def createSample(self, numTrials): # Determine proportion of each response type [unique, counts] = np.unique(self.simDistrChoice, return_counts=True) totalSims = np.sum(~np.isnan(self.simDistrChoice)) simPDF = np.empty(self.model.numAcc, dtype=object) numChoices = np.zeros(self.model.numAcc, dtype=int) sampleRT = np.empty(self.model.numAcc, dtype=object) simSampleHist = np.empty(self.model.numAcc, dtype=object) for a in range(self.model.numAcc): # Convert simulation histograms into PDFs (normalized so sum = 1 for # each accumulator) simPDF[a] = rv_histogram([self.simChoiceHist[a], self.bins]) # Compute number of sim trials for each response type if a in unique: numChoices[a] = np.floor( np.divide(counts[np.where(unique == a)[0]], totalSims, dtype=float) * numTrials) # For each response type, draw the corresponding number of sim trials from respective PDFs sampleRT[a] = simPDF[a].rvs(size=numChoices[a]) # Compute histograms for each accumulator's RTs [simSampleHist[a], _] = np.histogram(sampleRT[a], bins=self.model.maxRT // self.model.timeStep, range=(0, self.model.maxRT)) # Concatenate generated data across response types simSampleChoice = np.repeat(np.arange(self.model.numAcc), numChoices) simSampleRT = np.concatenate(sampleRT) return simSampleChoice, simSampleRT, simSampleHist
def __init__(self, X, bin_width=None, bin_origin=None): """ Initialisation of Copula Parameters ---------- X : np.array[ shape = (n_samples,n_features) ] Dataset to fit the Copula bin_width : None or np.array[ shape = (n_features) ] Lenght of cells. Each dimension of bin_width is the lenght of regular cells in each dimensions bin_origin : None or np.array[ shape = (n_features) ] Coordinate of lower corner of one cell Attributes ---------- dim : int Dimension muX : Apyga.stats.SparseHist Multivariate histogram muXi : list(scipy.stats.rv_histogram) Marginals of X """ size, self.dim = X.shape self.muX = SparseHist(X, bin_width, bin_origin) self.muXi = [ sc.rv_histogram( np.histogram(X[:, i], bins=np.arange(X[:, i].min(), X[:, i].max(), self.muX.bin_width[i]))) for i in range(self.dim) ]
def generate_distribution(data, dist_name='hist', bins=200): """Generate distribution""" _data = data[~np.isnan(data)] out_dist = None out_params = None if dist_name=='hist': h = np.histogram(_data, bins=bins, density=True) out_dist = st.rv_histogram(h) else: y, x = np.histogram(_data, bins=bins, density=True) x = (x + np.roll(x, -1))[:-1] / 2.0 # Try to fit the distribution try: # Ignore warnings from data that can't be fit with warnings.catch_warnings(): warnings.filterwarnings('error') # fit dist to data out_dist = getattr(st, dist_name) out_params = out_dist.fit(_data) except Exception: return (None, None) return (out_dist, out_params)
def __init__( self, X: np.ndarray, bins: Union[int, str] = "auto", alpha: float = 1e-10, bound_ext: float = 0.1, ): estimators = [] for iX in X.T: diff = iX.max() - iX.min() lower_bound = iX.min() - bound_ext * diff upper_bound = iX.max() + bound_ext * diff # create histogram if bins in ["blocks", "knuth"]: hist = astro_hist(iX, bins=bins, range=(lower_bound, upper_bound)) else: hist = np.histogram(iX, bins=bins, range=(lower_bound, upper_bound)) # create histogram object i_estimator = rv_histogram(hist) # add some regularization i_estimator._hpdf += alpha estimators.append(i_estimator) self.estimators = estimators
def spectra_axial_spray(particle, filename, groupname, fraction_of_shot): print(particle, filename, groupname, fraction_of_shot) fin = h5py.File(filename, 'r') g = fin[groupname] photon_energy = g['energy'][:]*MeV thetax = g['thetax'][:]*mrad thetay = g['thetay'][:]*mrad d2W = g['d2W'][:]*joule/(mrad**2*MeV) fin.close() dthetax = thetax[1]-thetax[0] dthetay = thetay[1]-thetay[0] spectral_energy_density = d2W.sum(axis=(1,2))*dthetax*dthetay spectral_photon_density = spectral_energy_density/photon_energy energy = simps(spectral_energy_density, photon_energy) num_photons = simps(spectral_photon_density, photon_energy) photon_energy_bins = np.append(photon_energy, photon_energy[-1]+1*MeV) photon_energy_binwidth = ( photon_energy_bins[1:] - photon_energy_bins[:-1]) rv = rv_histogram((spectral_photon_density, photon_energy_bins)) num_events = int(fraction_of_shot*num_photons) print('num_events=', num_events) for i in range(num_events): energy = rv.rvs() print(i, energy/MeV) yield particle, g4.G4ThreeVector(), g4.G4ThreeVector(0,0,1), energy print('yo') raise StopIteration
def __init__( self, data, bins: int = 20, ): self.histogram = np.histogram(data, bins=bins) self._distribution = rv_histogram(self.histogram)
def generate_distribution(self): """Generate distribution""" _data = self.data[~np.isnan(self.data)] self.dist = None self.params = None if self.dist_name=='hist': h = np.histogram(_data, bins=self.bins, density=True) self.dist = st.rv_histogram(h) else: # Try to fit the distribution try: # Ignore warnings from data that can't be fit with warnings.catch_warnings(): warnings.filterwarnings('error') # fit dist to data self.dist = getattr(st, self.dist_name) if self.params is not None: # Separate parts of parameters arg = self.params[:-2] loc = self.params[-2] scale = self.params[-1] self.params = self.dist.fit(_data, loc=loc, scale=scale, *arg) else: self.params = self.dist.fit(_data) # Separate parts of parameters arg = self.params[:-2] loc = self.params[-2] scale = self.params[-1] self.dist = self.dist(loc=loc, scale=scale, *arg) except Exception as e: print(e)
def sample_posterior(posterior, place_bin_edges, n_samples=1000): """Samples the posterior positions. Parameters ---------- posterior : xarray.DataArray, shape (n_time, n_position_bins) or shape (n_time, n_x_bins, n_y_bins) Returns ------- posterior_samples : numpy.ndarray, shape (n_time, n_samples) """ # Stack 2D positions into one dimension try: posterior = posterior.stack(z=["x_position", "y_position"]).values except (KeyError, AttributeError): posterior = np.asarray(posterior) place_bin_edges = place_bin_edges.squeeze() n_time = posterior.shape[0] posterior_samples = [ rv_histogram( (posterior[time_ind], place_bin_edges)).rvs(size=n_samples) for time_ind in range(n_time) ] return np.asarray(posterior_samples)
def __init__(self, edges, heights): self.edges = edges # list of arrays for bin edges along each dim self.heights = heights # n histogram values -> array of bin heights self.nheights = np.abs(self.heights) self.nheights = self.nheights - self.nheights.min() if len(self.edges) == 1: self.dist = scistats.rv_histogram((self.nheights, self.edges[0])) elif len(self.edges) == 2: #bin_coords = [np.unique(self.edges[:,0]), # np.unique(self.edges[:,1])] #bin_widths = [bin_coords[0][1] - bin_coords[0][0], # bin_coords[1][1] - bin_coords[1][0]] # regular grid # what about using np.histogramdd ??? #self.dist = self.nheights / float(np.sum(self.nheights * np.prod(bin_widths))) #self._hpdf = np.hstack([0.0, self._hpdf, 0.0]) # bin-center coordinates self.edgesm = [] for edge in self.edges: self.edgesm.append((edge[1:] + edge[:-1]) / 2) xgrid, ygrid = np.meshgrid(self.edgesm[0], self.edgesm[1]) self.bin_coords = np.column_stack([xgrid.ravel(), ygrid.ravel()]) # remove irregularities self.dist = self.nheights.copy() self.dist = np.abs(self.dist) self.dist -= np.min(self.dist) # normalize PDF delta_params = np.outer(np.diff(self.edges[0]), np.diff(self.edges[1])).flatten() norm = np.sum(self.nheights * delta_params) self.dist = self.nheights / norm
def generate_3d_distrib(xin, yin, zin, pdf3d, num, eps=2.2e-16): samples = np.zeros([num, 3]) #https://stackoverflow.com/questions/11144513/cartesian-product-of-x-and-y-array-points-into-single-array-of-2d-points combined_x_y_arrays = np.transpose([np.tile(xin, len(yin)), np.repeat(yin, len(xin))]) print(combined_x_y_arrays) mytree = cKDTree(combined_x_y_arrays) xpdf = pdf3d.sum(axis=(1,2)) print('xpdf') print(xpdf) xbins = np.append(xin, xin[-1]+xin[1]-xin[0]) ybins = np.append(yin, yin[-1]+yin[1]-yin[0]) zbins = np.append(zin, zin[-1]+zin[1]-zin[0]) rv = rv_histogram((xpdf, xbins)) xsamples = rv.rvs(size=num) samples[:, 0] = xsamples pdf2d = pdf3d.sum(axis=2) yfunc = interp1d(xin, pdf2d, axis=0) #zfunc = RectBivariateSplineAxis12(xin, yin, pdf3d, axis=1) ypdfs = yfunc(xsamples) for i in range(num): rv = rv_histogram((ypdfs[i], ybins)) y = rv.rvs() samples[i, 1] = y if(i%1000==0): print(i) x = xsamples[i] dist, index = mytree.query([x, y]) #print(dist) if(True): #print("use kdt tree") #xn, yn = combined_x_y_arrays[index] #print(xn, yn) xindex = index % len(xin) yindex = (index - xindex)/len(xin) #print(yindex) zpdf = pdf3d[xindex, int(yindex), :] if(i%1000==0): print("zpdf") print(repr(zpdf)) rv = rv_histogram((zpdf, zbins)) else: zpdf = RectBivariateSplineAxis12(xin, yin, pdf3d, xsamples[i], y) rv = rv_histogram((zpdf, zbins)) samples[i, 2] = rv.rvs() return samples
def calculate_pdf_and_cdf(predictor): inferences = np.loadtxt( 'yips_evaluation/inferences-{}.txt'.format(predictor), delimiter=',') labels = np.loadtxt('yips_evaluation/labels.txt', delimiter=',') errors = [] print(inferences.shape) for i in range(inferences.shape[0]): infer, label = inferences[i], labels[i] if label[-1] == 1: error = label[:-1] - infer[:-1] error[-1] = (error[-1] + np.pi) % (2 * np.pi) - np.pi errors.append(list(error)) errors = np.array(errors) print(errors[:, 0]) fontsize = 70 error_number = 2 x_e = errors[:, error_number] error_labels = ['$X_e$[m]', '$Y_e$[m]', '$\\Phi_e$[rad]'] x_es = np.linspace(errors[:, error_number].min(), errors[:, error_number].max(), 300) bins = 40 print('Skewness: {}, kurtosis: {}, K2&P-value: {}'.format( st.skew(x_e), st.kurtosis(x_e), st.normaltest(x_e))) # ax = new_figure(fontsize=fontsize, y_label='Probability', x_label=error_labels[error_number]) # ax.hist(x_e, bins=bins, density=1, histtype='bar', facecolor='C1', alpha=1.0, # cumulative=True, rwidth=0.8, linewidth=12, color='C1', label='Data') # ax.plot(x_es, st.rv_histogram(np.histogram(x_e, bins=bins)).cdf(x_es), linewidth=12, color='C0', label='CDF') # ax.legend(prop={'size': fontsize}, loc=2) ax1 = new_figure(fontsize=fontsize, y_label='', x_label=error_labels[error_number]) ax1.hist(x_e, bins=bins, density=1, histtype='bar', facecolor='C1', alpha=1.0, cumulative=False, rwidth=0.8, linewidth=12, color='C1', label='Data') ax1.plot(x_es, st.gaussian_kde(x_e).pdf(x_es), linewidth=12, color='C0', label='PDF') ax1.plot(x_es, st.rv_histogram(np.histogram(x_e, bins=bins)).cdf(x_es), linewidth=12, color='C3', label='CDF') ax1.set_xticks([-3, -1.5, 0, 1.5, 3]) ax1.legend(prop={'size': fontsize}, loc=2, frameon=False) plt.show()
def fit( self , Y , X ): """ Fit of the quantile mapping model Parameters ---------- Y : np.array[ shape = (n_samples,n_features) ] Reference dataset X : np.array[ shape = (n_samples,n_features) ] Biased dataset """ if len(X.shape) == 1: X = X.reshape( (X.size,1) ) if len(Y.shape) == 1: Y = Y.reshape( (Y.size,1) ) self._n_features = X.shape[1] if self.bins is None: self.bins = self._bin_estimator( Y , X ) self._rvY = [sc.rv_histogram( np.histogram( Y[:,i] , self.bins[i] ) ) for i in range(self._n_features)] self._rvX = [sc.rv_histogram( np.histogram( X[:,i] , self.bins[i] ) ) for i in range(self._n_features)]
def hist_entropy( X: np.ndarray, bins: Union[str, int] = "auto", correction: bool = True, hist_kwargs: Optional[Dict] = {}, ) -> float: """Calculates the entropy using the histogram of a univariate dataset. Option to do a Miller Maddow correction. Parameters ---------- X : np.ndarray, (n_samples) the univariate input dataset bins : {str, int}, default='auto' the number of bins to use for the histogram estimation correction : bool, default=True implements the Miller-Maddow correction for the histogram entropy estimation. hist_kwargs: Optional[Dict], default={} the histogram kwargs to be used when constructing the histogram See documention for more details: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html Returns ------- H_hist_entropy : float the entropy for this univariate histogram Example ------- >> from scipy import stats >> from pysim.information import histogram_entropy >> X = stats.gamma(a=10).rvs(1_000, random_state=123) >> histogram_entropy(X) array(2.52771628) """ # get histogram hist_counts = np.histogram(X, bins=bins, **hist_kwargs) # create random variable hist_dist = stats.rv_histogram(hist_counts) # calculate entropy H = hist_dist.entropy() # MLE Estimator with Miller-Maddow Correction if correction == True: H += 0.5 * (np.sum(hist_counts[0] > 0) - 1) / hist_counts[0].sum() return H
def sample_histogram_node(node, n_samples, data, rand_gen): assert isinstance(node, Histogram) assert n_samples > 0 # sample the value at each bin according to the densities of each bin if node.meta_type == MetaType.DISCRETE or node.meta_type == MetaType.BINARY: X = rand_gen.choice(np.array(node.bin_repr_points), p=node.densities, size=n_samples) else: X = rv_histogram((node.densities, node.breaks)).ppf(rand_gen.random_sample(n_samples)) return X
def do_MLE_withWeights(data, dist, minimum, maximum, bw): nbins = int( (maximum - minimum) / bw ) rawData = data[np.where((minimum<=data)&(maximum>=data))] sums, bins = np.histogram(rawData, bins=nbins, range=[minimum, maximum]) bincenters = (lambda v: 0.5*(v[1:]+v[:-1]))(bins) sums, bins = np.histogram(bins[:-1], bins=nbins, range=[minimum, maximum], \ density=True, weights=sums/bincenters) hist_dist = rv_histogram((sums, bins)) #pars = gamma.fit(weightedData, floc=0.0) pars = dist.fit(hist_dist.rvs(size=10000000), floc=0.0) a1, loc1, scale1 = pars print minimum, maximum, a1, loc1, scale1, a1 * scale1, a1 * scale1**2 return a1, scale1
def plot_result(self): fig, axes = plt.subplots(4, 2, figsize=(7, 6)) xrange = np.arange(2, self.n + 1) ax_left = axes[:, 0].ravel() ax_right = axes[:, 1].ravel() ax_left[0].plot(xrange, self.lhoods[1:], linewidth=0.69) ax_left[0].set_title(r'Posterior trace') ax_left[1].plot(xrange, self.thetas.T[0][1:], linewidth=0.69) ax_left[1].set_title(r'$w_1^t$ (slope) trace') ax_left[2].plot(xrange, self.thetas.T[1][1:], linewidth=0.69) ax_left[2].set_title(r'$w_0^t$ (intercept) trace') ax_left[3].plot(xrange, self.thetas.T[2][1:], linewidth=0.69) ax_left[3].set_title(r'$\beta^t$ trace') ax_right[0].hist(self.lhoods[1:], bins=40) ax_right[0].set_title(r'Posterior hist') ax_right[1].hist(self.thetas.T[0][1:], bins=40) ax_right[1].set_title(r'$w_1^t$ hist') ax_right[2].hist(self.thetas.T[1][1:], bins=40) ax_right[2].set_title(r'$w_0^t$ hist') ax_right[3].hist(self.thetas.T[2][1:], bins=40) ax_right[3].set_title(r'$\beta^t$ hist') fig.suptitle('Metropolis') fig.tight_layout() plt.savefig('metropolis2.pdf') plt.show() print('Enter burnout cutoff:') burnout = int(input()) xrange = np.linspace(1, 10, 10000) slope_hist = rv_histogram( np.histogram(self.thetas.T[0][burnout:], bins=100)) slope = xrange[slope_hist.pdf(xrange).argmax()] xrange = np.linspace(1, 5000, 10000) intercept_hist = rv_histogram( np.histogram(self.thetas.T[1][burnout:], bins=100)) intercept = xrange[intercept_hist.pdf(xrange).argmax()] return intercept, slope
def UL_uncert(chain, p=0.95): corr = acor(chain)[0] N = len(chain) Neff = N / corr hist = np.histogram(chain, bins=100) pdf = ss.rv_histogram(hist).pdf UL = np.percentile(chain, 100 * p) # 95 for 95% (not 0.95) pUL = pdf(UL) dUL = np.sqrt(p * (1 - p) / Neff) / pUL return UL, dUL
def predict(self, cond_x, random_x): cond_x_filtered = np.where(cond_x > self.x_bins.max(), self.x_bins.max(), cond_x) cond_x_filtered = np.where(cond_x < self.x_bins.min(), self.x_bins.min(), cond_x_filtered) random_percentile = norm.cdf(random_x) sampled_u = np.zeros(cond_x.shape) for c, cond_x_val in enumerate(cond_x_filtered): x_bin = np.searchsorted(self.x_bins, cond_x_val) sampled_u[c] = rv_histogram( (self.model[:, x_bin[0]], self.u_bins)).ppf(random_percentile[c]) return sampled_u.ravel()
def texture_stats(self, patch): glcm = greycomatrix( patch.astype('int'), [3], [0, 0.25, 0.5], 256, symmetric=True, normed=True, ) dissimilarity = greycoprops(glcm, 'dissimilarity')[0, 0] correlation = greycoprops(glcm, 'correlation')[0, 0] hist = np.histogram(patch, bins='fd') distribution = stats.rv_histogram(hist) return patch.std(), distribution.entropy(), dissimilarity, correlation
def sample_decorrelation_phase(L, coherence, size=1, phi_num=1000, display=False, scale=1.0, font_size=12): '''Sample decorrelation phase noise with PDF determined by L and coherence Inputs: L - int, multilook number coherence - float, spatial coherence size - int, sample number Output: sample - 1D np.array in size of (size,), sampled phase unw_n = sample_decorrelation_phase(L=1, coherence=0.7, size=100000, display=True) ''' size = int(size) phiMax = np.pi * float(scale) pdf = ifginv.phase_pdf_ds( int(L), coherence, phi_num=phi_num)[0].flatten() #for PS: ifginv.phase_variance_ps() phi = np.linspace(-phiMax, phiMax, phi_num + 1, endpoint=True) phi_dist = stats.rv_histogram((pdf, phi)) #sample = np.nan #while sample is np.nan: sample = phi_dist.rvs(size=size) if display: #size = 10000 fig, ax = plt.subplots(figsize=[5, 3]) ax.hist(sample, bins=50, density=True, label='Sample\nHistogram\n(norm)') ax.plot(phi, phi_dist.pdf(phi), label='PDF') ax.plot(phi, phi_dist.cdf(phi), label='CDF') ax.set_xlabel('Phase', fontsize=font_size) ax.set_ylabel('Probability', fontsize=font_size) ax.set_title(r'L = %d, $\gamma$ = %.2f, sample size = %d' % (L, coherence, size), fontsize=font_size) ax.set_xlim([-np.pi, np.pi]) ax.set_xticks([-np.pi, 0, np.pi]) ax.set_xticklabels([r'-$\pi$', '0', r'$\pi$'], fontsize=font_size) ax.tick_params(direction='in', labelsize=font_size) ax.legend(fontsize=font_size) plt.savefig('DecorNoiseSampling.jpg', bbox_inches='tight', dpi=600) plt.show() return sample
def update_data(self, data, debug): self.data = data bins = np.linspace(0, 1, self.nr_bins) hist, edges = np.histogram(self.data, bins=bins, density=True) hist /= np.sum(hist) self.hist_dist = stats.rv_histogram((hist, edges)) # calculate the bin edges by dividing the quantiles equally on the CDF: quantiles = np.linspace(0, 1, num=self.nr_bins + 1) # quantile edges from the right self.bin_edges = self.hist_dist.ppf( quantiles) # invCDF to find the bin edges self.bin_centers = np.array(self.bin_edges[:-1] + np.diff(self.bin_edges) / 2) self.plot_debug() if debug else None
def _detect_dist_continuous(col_stats): """ Detects type of continuous distribution based on Kolmogorov-Smirnov Goodness-of-fit test, https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test. Args: col_stats (dict): Column data statistics. The column data must be of a continuous numerical random variable. Returns: dist (dict): Dictionary stating distribution type along with other parameters for the distribution. """ bin_counts, bin_edges = ( col_stats["histogram"]["bin_counts"], col_stats["histogram"]["bin_edges"], ) # Create a continuous distribution from the histogram and sample data from it hist_dist = stats.rv_histogram((bin_counts, bin_edges)) hist_mean = hist_dist.mean() observed_samples = hist_dist.rvs(size=1000) # Center the distribution around 0 observed_samples -= hist_mean # Distributions to test against (must be a continuous distribution from scipy.stats) # Distribution name -> list of positional arguments for the distribution # If the observed histogram is centered around 0, means of distributions set to 0 test_dists = ( # norm(loc, scale) ("norm", (0, col_stats["stddev"])), # skewnorm(a, loc, scale) ("skewnorm", (col_stats["skewness"], 0, col_stats["stddev"])), # uniform(loc, scale) ("uniform", (col_stats["min"], col_stats["max"] - col_stats["min"])), ) dist = {} max_p = 0 for dist_name, dist_args in test_dists: # overfitting on purpose for testing # method = getattr(stats, dist_name) # dist_args = method.fit(observed_samples) p = stats.kstest(observed_samples, dist_name, dist_args)[1] if p > max_p: dist["dist"] = dist_name dist["args"] = dist_args max_p = p return dist
def sample_decorrelation_phase(coherence, L, size=1, phi_num=1000, display=False, scale=1.0, font_size=12): '''Sample decorrelation phase based on PDF determined by L and coherence value Parameters: coherence - float, spatial coherence L - int, number of independent looks phi_num - int, sample number Returns: phase - 1D np.array in size of (size,), sampled phase Examples: decor_noise = sample_decorrelation_phase(0.7, L=1, size=1e4, display=True) ''' size = int(size) phiMax = np.pi * float(scale) # numerical solution of phase PDF for distributed scatterers pdf = phase_pdf_ds(int(L), coherence, phi_num=phi_num)[0].flatten() # generate phase distribution phi = np.linspace(-phiMax, phiMax, phi_num + 1, endpoint=True) phi_dist = stats.rv_histogram((pdf, phi)) # sample from the distribution phase = phi_dist.rvs(size=size) if display: fig, ax = plt.subplots(figsize=[5, 3]) ax.hist(phase, bins=50, density=True, label='Sample\nHistogram\n(norm)') ax.plot(phi, phi_dist.pdf(phi), label='PDF') ax.plot(phi, phi_dist.cdf(phi), label='CDF') ax.set_xlabel('Phase', fontsize=font_size) ax.set_ylabel('Probability', fontsize=font_size) ax.set_title(r'L = %d, $\gamma$ = %.2f, sample size = %d' % (L, coherence, size), fontsize=font_size) ax.set_xlim([-np.pi, np.pi]) ax.set_xticks([-np.pi, 0, np.pi]) ax.set_xticklabels([r'-$\pi$', '0', r'$\pi$'], fontsize=font_size) ax.tick_params(direction='in', labelsize=font_size) ax.legend(fontsize=font_size) plt.savefig('DecorNoiseSampling.jpg', bbox_inches='tight', dpi=600) plt.show() return phase
def calculate_pdf_and_cdf2(targets): fontsize = 70 bins = 40 error_number = 1 error_labels = ['$X_e$', '$Y_e$', '$\\Phi_e$'] plot_labels = ['VGG-19', 'SVG-16', 'VGG-16', 'ResNet-50'] plot_labels.reverse() ax = new_figure(fontsize=fontsize, y_label='', x_label=error_labels[error_number]) ax1 = new_figure(fontsize=fontsize, y_label='', x_label=error_labels[error_number]) targets.reverse() print targets for j, tar in enumerate(targets): inferences = np.loadtxt( 'yips_evaluation/inferences-{}.txt'.format(tar), delimiter=',') labels = np.loadtxt('yips_evaluation/labels.txt', delimiter=',') errors = [] for i in range(inferences.shape[0]): infer, label = inferences[i], labels[i] if label[-1] == 1: error = label[:-1] - infer[:-1] error[-1] = (error[-1] + np.pi) % (2 * np.pi) - np.pi errors.append(list(error)) errors = np.array(errors) x_e = errors[:, error_number] x_es = np.linspace(errors[:, error_number].min(), errors[:, error_number].max(), 300) # ax.hist(x_e, bins=bins, density=1, histtype='bar', facecolor='C1', alpha=1.0, # cumulative=True, rwidth=0.8, linewidth=12, color='C1', label='Data') ax.plot(x_es, st.rv_histogram(np.histogram(x_e, bins=bins)).cdf(x_es), linewidth=12, color='C{}'.format(j), label=plot_labels[j]) ax.legend(prop={'size': fontsize}, loc=2, frameon=False) # ax1.hist(x_e, bins=bins, density=1, histtype='bar', facecolor='C1', # alpha=1.0, cumulative=False, rwidth=0.8, linewidth=12, color='C1', label='Data') ax1.plot(x_es, st.gaussian_kde(x_e).pdf(x_es), linewidth=12, color='C{}'.format(j), label=plot_labels[j]) ax1.legend(prop={'size': fontsize}, loc=2, frameon=False) plt.show()
def resources(log, phi=None, sw=None, cutoff=None, a=1, h=1, beta=1.1, fluid='oil', m=1000, seed=1706): np.random.seed(seed=seed) # Define coefficients depending on the fluid if fluid == 'oil': c = 7.758 #Mbbl name = 'OOIP' elif fluid == 'gas': c = 0.000043560 #Bscf name = 'OGIP' r = pd.DataFrame() for p in phi: phieh = np.histogram(log.loc[log[p] > cutoff[0], p]) phie_dist = st.rv_histogram(phieh) phie_random = phie_dist.rvs(size=m) for s in sw: swh = np.histogram(log.loc[log[s] < cutoff[1], s]) sw_dist = st.rv_histogram(swh) sw_random = sw_dist.rvs(size=m) #Original hydrocarbon in place orip = c * a * h * phie_random * (1 - sw_random) * (1 / beta) orip = pd.DataFrame({name: orip}) orip['PhieCurve'] = p orip['SwCurve'] = s orip['Phie'] = phie_random orip['Sw'] = sw_random r = r.append(orip) return r.reset_index(drop=True)
def add_variable(self, scenarios, debug=False): samples = np.empty((0, )) for scenario in scenarios: rv = stats.norm.rvs(scenario.mu, scenario.sigma, 250) samples = np.append(samples, rv, axis=0) # print(samples.shape) bins = np.linspace(0, 1, 100) hist, edges = np.histogram(samples, bins=bins, density=True) hist = hist / np.sum(hist) hist_dist = stats.rv_histogram((hist, edges)) # debug if debug: X = np.linspace(0, 1, num=300 + 1) plt.plot(X, hist_dist.pdf(X)) plt.show() self.forecast_variables.append(hist_dist)
def ks_test(self, ax: Axes): rtts, rtts_control = self.get_comparison_rtts() stat, pval = stats.ks_2samp(rtts, rtts_control) ax.hist(rtts, color='Orange', bins=1000, alpha=0.5, label='Normalized RTTs with flooder') ax.hist(rtts_control, color='Blue', bins=1000, alpha=0.5, label='Normalized RTTs under control') ax.legend() ax.set_title('RTT histogram normalized by mean and stdev') hist = stats.rv_histogram(np.histogram(rtts_control, bins=1000)) ax1 = plotter.get_new_subplot( 'QQ Plot for RTTs with flooder against control') stats.probplot(rtts, plot=ax1, fit=True, dist=hist) ax1.set_title('RTTs QQ Plot') return "Kolmogorov Smirnov Two Sample Test: statistic value: %0.2f, pvalue: %0.2f" % (stat, pval)
def sample_decorrelation_phase(L, coherence, size=1, display=False, scale=1.0, font_size=12): '''Sample decorrelation phase noise with PDF determined by L and coherence Inputs: L - int, multilook number coherence - float, spatial coherence size - int, sample number Output: sample - 1D np.array in size of (size,), sampled phase unw_n = sample_decorrelation_phase(L=1, coherence=0.7, size=100000, display=True) ''' phiNum = 100 phiMax = np.pi * float(scale) pdf = ifginv.phase_pdf_ds(int(L), coherence, phi_num=phiNum)[0].flatten() #for PS: ifginv.phase_variance_ps() phi = np.linspace(-phiMax, phiMax, phiNum+1, endpoint=True) phi_dist = stats.rv_histogram((pdf, phi)) #sample = np.nan #while sample is np.nan: sample = phi_dist.rvs(size=size) if display: #size = 10000 fig, ax = plt.subplots(figsize=[5,3]) ax.hist(sample, bins=50, density=True, label='Sample\nHistogram\n(norm)') ax.plot(phi, phi_dist.pdf(phi), label='PDF') ax.plot(phi, phi_dist.cdf(phi), label='CDF') ax.set_xlabel('Phase', fontsize=font_size) ax.set_ylabel('Probability', fontsize=font_size) ax.set_title(r'L = %d, $\gamma$ = %.1f, sample size = %d' % (L, coherence, size), fontsize=font_size) ax.set_xlim([-np.pi, np.pi]) ax.set_xticks([-np.pi, 0, np.pi]) ax.set_xticklabels([r'-$\pi$', '0', r'$\pi$'], fontsize=font_size) ax.tick_params(direction='in', labelsize=font_size) ax.legend(fontsize=font_size) plt.savefig('DecorNoiseSampling.jpg', bbox_inches='tight', dpi=600) plt.show() return sample
# These distributions fail the complex derivative test below. # Here 'fail' mean produce wrong results and/or raise exceptions, depending # on the implementation details of corresponding special functions. # cf https://github.com/scipy/scipy/pull/4979 for a discussion. fails_cmplx = set(['beta', 'betaprime', 'chi', 'chi2', 'dgamma', 'dweibull', 'erlang', 'f', 'gamma', 'gausshyper', 'gengamma', 'gennorm', 'genpareto', 'halfgennorm', 'invgamma', 'ksone', 'kstwobign', 'levy_l', 'loggamma', 'logistic', 'maxwell', 'nakagami', 'ncf', 'nct', 'ncx2', 'norminvgauss', 'pearson3', 'rice', 't', 'skewnorm', 'tukeylambda', 'vonmises', 'vonmises_line', 'rv_histogram_instance']) _h = np.histogram([1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9], bins=8) histogram_test_instance = stats.rv_histogram(_h) def cases_test_cont_basic(): for distname, arg in distcont[:] + [(histogram_test_instance, tuple())]: if distname == 'levy_stable': continue elif distname in distslow: yield pytest.param(distname, arg, marks=pytest.mark.slow) else: yield distname, arg @pytest.mark.parametrize('distname,arg', cases_test_cont_basic()) def test_cont_basic(distname, arg): # this test skips slow distributions
nb_generated = 10 # столько случайных фраз сгенерируем и покажем if False: #NET_CONFIG['arch'] == 'vae': # Для вариационного автоэнкодера нужно на вход декодера подавать # нормально распределенный шум с единичной дисперсией. X_probe = np.random.normal(loc=1.0, scale=1.0, size=(nb_generated, latent_dim)) else: # Мы должны подавать на входе декодера вектор скрытых переменных с разбросом # значений отдельных компонентов, примерно соответствующим распределению # для тренировочных данных. Мы уже собрали гистограммы для каждой скрытой переменной, # их надо загрузить и использовать. with open(latent_histos_path, 'rb') as f: latent_histos = pickle.load(f) pdfs = [] for idim, histo in enumerate(latent_histos): pdfs.append(stats.rv_histogram(histogram=histo)) X_probe = np.zeros((nb_generated, latent_dim)) for idim in range(latent_dim): p = pdfs[idim].rvs(size=nb_generated) X_probe[:, idim] = p # Пропускаем подготовленный вектор через декодер. y_probe = decoder_model.predict(X_probe) # декодируем результаты работы модели result_phrases = w2v_decoder.decode_output(y_probe) for phrase in result_phrases: print(u'{}'.format(phrase))