def prob_alias(self, plot=False): """Returns tuple (threshold, probability)""" from scipy.stats import gamma # scipy-ref.pdf Section 5.13 on page 390 if plot: import matplotlib.pyplot as plt plt.ion() plt.clf() nd = self.get_all_noise_dists() a, loc, scale = gamma.fit(nd) ndrv = gamma(a, loc, scale) if plot: plt.hist(nd, normed=True) # 'normed' might become 'density' later? x = range(max(nd)) plt.plot(x, ndrv.pdf(x)) icd = self.get_all_inter_chip_dists() a, loc, scale = gamma.fit(icd) icdrv = gamma(a, loc, scale) if plot: plt.hist(icd, normed=True) x = range(max(icd)) plt.plot(x, icdrv.pdf(x)) # Here it goes! threshold = ndrv.ppf(0.997) if plot: plt.axvline(threshold) prob = icdrv.cdf(threshold) print 'Noise 99.7%% threshold: %f, probability of aliasing: %1.3e' % ( threshold, prob) return threshold, prob
def rv_gama(hind, obs, fcst): # Ajuste da distribuição gama para os dados observados obs = np.sort(obs) n_zeros_obs = len(np.where(obs == 0)[0]) q = n_zeros_obs / float(len(obs)) obs = obs[obs > 0] gamma_obs = gamma.fit(obs, floc=0) # Ajuste da distrivbuição gama para os dados do modelo hind = np.sort(hind) hind = hind[n_zeros_obs:] hind = hind[hind > 0] gamma_hind = gamma.fit(hind, floc=0) # Correção da previsão usando função gama if fcst < hind[0]: corr = 0 else: prob_mod = gamma.cdf(fcst, *gamma_hind) H = q + (1 - q) * prob_mod corr = gamma.ppf(H, *gamma_obs) if str(corr) == 'inf': print('hey there') return corr
def prob_alias(self, plot=False): """Returns tuple (threshold, probability)""" from scipy.stats import gamma # scipy-ref.pdf Section 5.13 on page 390 if plot: import matplotlib.pyplot as plt plt.ion() plt.clf() nd = self.get_all_noise_dists() a, loc, scale = gamma.fit(nd) ndrv = gamma(a, loc, scale) if plot: plt.hist(nd, normed=True) # 'normed' might become 'density' later? x = range(max(nd)) plt.plot(x, ndrv.pdf(x)) icd = self.get_all_inter_chip_dists() a, loc, scale = gamma.fit(icd) icdrv = gamma(a, loc, scale) if plot: plt.hist(icd, normed=True) x = range(max(icd)) plt.plot(x, icdrv.pdf(x)) # Here it goes! threshold = ndrv.ppf(0.997) if plot: plt.axvline(threshold) prob = icdrv.cdf(threshold) print 'Noise 99.7%% threshold: %f, probability of aliasing: %1.3e' % (threshold, prob) return threshold, prob
def fit_gamma(self): """ Fit a gamma distribution to each chronology, get mean stats """ self.gamma_alphas = [] for ie_set in self.interevent_times.T: gamfit = gamma.fit(ie_set, floc=0) self.gamma_alphas.append(gamfit[0]) # test fitting to all data at once gamfit_all = gamma.fit(self.interevent_times.flatten(), floc=0) self.gamma_alphas = np.array(self.gamma_alphas) self.mean_gamma_alpha = np.mean(self.gamma_alphas) self.mean_gamma_alpha_all = gamfit_all[0]
def __init__(self, wind_data): self.wind_data = wind_data #self.wind_data_long =pd.melt(wind_data) #initialize fitting model self.alpha_hat=np.empty(3) #initialize alpha estimates self.sigma_hat=np.empty(3) #initialize sigma estimates for i in range(3): self.alpha_hat[i], self.sigma_hat[i] = fmin(self.weibul_sq_error, [2,2], args=(self.wind_data.iloc[:,i].tolist(),)) #parameters for gamma distribution of alpha and sigma self.shape_alpha, self.location_alpha, self.scale_alpha = gamma.fit(self.alpha_hat) self.shape_sigma, self.location_sigma, self.scale_sigma = gamma.fit(self.sigma_hat)
def fit(self,obs_data,sim_data): #: estimates parameters from provided data #: dry day fraction self.obs_param['c'] = (obs_data[obs_data==0].shape[0]) / (obs_data.shape[0]) self.sim_param['c'] = (sim_data[sim_data==0].shape[0]) / (sim_data.shape[0]) #: fit gamma with non zero values with floc=0 self.obs_param['a'], _, self.obs_param['b'] = gamma.fit(obs_data[obs_data>0], floc=0) self.sim_param['a'], _, self.sim_param['b'] = gamma.fit(sim_data[sim_data>0], floc=0) return self
def returnDistData(cls, self): gammaParam = gamma.fit(10**(self.data / 10)) gammaDist = gamma.pdf(self.data, *gammaParam) rayleighParam = rayleigh.fit(self.data) rayleighDist = rayleigh.pdf(self.data, *rayleighParam) normParam = norm.fit(self.data) normDist = norm.pdf(self.data, *normParam) logNormParam = lognorm.fit(self.data) lognormDist = lognorm.pdf(self.data, *logNormParam) nakagamiParam = nakagami.fit(self.data) nakagamiDist = nakagami.pdf(self.data, *nakagamiParam) exponParam = expon.fit(self.data) exponDist = expon.pdf(self.data, *exponParam) exponweibParam = exponweib.fit(self.data) weibDist = exponweib.pdf(self.data, *exponweibParam) distDF = pd.DataFrame(np.column_stack([ gammaDist, rayleighDist, normDist, lognormDist, nakagamiDist, exponDist, weibDist ]), columns=[ 'gammaDist', 'rayleighDist', 'normDist', 'lognormDist', 'nakagamiDist', 'exponDist', 'weibDist' ]) self.distDF = distDF
def run_kstests(json_path, run_date, member): try: full_path = json_path + "/{0}/{1}/mesh_*.json".format(run_date, member) json_files = sorted(glob(full_path)) ks_results = {"id":[], "ks":[]} for json_file in json_files: js = open(json_file) mesh_track = json.load(js) js.close() id = mesh_track["properties"]["id"] for m, mesh_obj in enumerate(mesh_track["features"]): step_id = id + "_{0:03d}".format(m) ts = np.array(mesh_obj["properties"]["timesteps"]) mask = np.array(mesh_obj["properties"]["masks"]) vals = ts[mask == 1] gdist = gamma.fit(vals, floc=vals.min()-0.1) sig = kstest(vals, gamma(*gdist).cdf) ks_results["id"].append(step_id) ks_results["ks"].append(sig) if sig[1] < 0.01: print(step_id,) print(sig[1],gdist) print(np.sort(vals)) plt.figure(figsize=(8,8)) plt.pcolormesh(ts, alpha=0.5, cmap="YlOrRd", vmin=0, vmax=100) pc = plt.pcolormesh(np.ma.array(ts, mask=mask==0), cmap="YlOrRd", vmin=0, vmax=100) plt.title(step_id) plt.colorbar(pc) plt.savefig(step_id + ".png", bbox_inches="tight", dpi=150) plt.close() ks_frame = pd.DataFrame(ks_results["ks"], index=ks_results["id"],columns=["D", "p-val"]) print(ks_frame.shape[0]) except Exception as e: raise e return ks_frame
def fit_gamma_distribution(self, desorption_thresh=5, plot=False, bins=15, normed=True): # first get the detachment time dt = self.get_desorption_distribution(thresh=desorption_thresh) # you need to invert the data - to be able to fit gamma # dt = dt.max() - dt fit_alpha, fit_loc, fit_beta = gamma.fit(dt) x = np.linspace(0, dt.max(), 100) pdf_fitted = gamma.pdf(x, fit_alpha, fit_loc, fit_beta) # normalize # pdf_fitted = pdf_fitted / pdf_fitted.max() # this is the maximum of the distribution mode = x[pdf_fitted.argmax()] if plot: x = np.linspace(0, dt.max(), 100) plt.hist(dt, bins=bins, normed=normed) plt.plot(x, pdf_fitted) plt.show() return fit_alpha, fit_loc, fit_beta
def match_single_track_dist(model_track, obs_track): label_columns = ["Max_Hail_Size", "Shape", "Location", "Scale"] obs_hail_dists = pd.DataFrame(index=obs_track.times, columns=label_columns) model_hail_dists = pd.DataFrame(index=model_track.times, columns=label_columns) for t, step in enumerate(obs_track.timesteps): step_vals = step[(obs_track.masks[t] == 1) & ( obs_track.timesteps[t] > self.mrms_ew.min_intensity)] min_hail = step_vals.min() - 0.1 obs_hail_dists.loc[obs_track.times[t], ["Shape", "Location", "Scale"]] = gamma.fit( step_vals, floc=min_hail) obs_hail_dists.loc[obs_track.times[t], "Max_Hail_Size"] = step_vals.max() if obs_track.times.size > 1 and model_track.times.size > 1: normalized_obs_times = 1.0 / (obs_track.times.max() - obs_track.times.min()) \ * (obs_track.times - obs_track.times.min()) normalized_model_times = 1.0 / (model_track.times.max() - model_track.times.min()) \ * (model_track.times - model_track.times.min()) for col in label_columns: interp_func = interp1d(normalized_obs_times, obs_hail_dists[col], kind="linear", bounds_error=False, fill_value=0) model_hail_dists.loc[model_track.times, col] = interp_func( normalized_model_times) else: for param in obs_hail_dists.columns: model_hail_dists.loc[model_track.times, param] = obs_hail_dists.loc[ obs_track.times[0], param] return model_hail_dists
def estimate_gamma_dist_scipy(vals): # shifting can happen (e.g. size selection of DNA fragments), but tentatively ignores this. alpha_hat, loc_hat, beta_hat = gamma.fit(vals, floc=0.0) logger.info("estimated Gamma dist params are a = %f, b = %f." % (alpha_hat, beta_hat)) return (alpha_hat, beta_hat)
def fit_gamma_param(estacion, mes, year_test='None'): # Ajusta parametros de Gamma para precipitacion do, dm = select_data_period(estacion, 'precip', mes) cdf_limite = .9999999 # Minimos de precipitacion (obs fijo = 0.1, modelo el que mejor ajusta a frec) xo_min = 0.1 minimos_pp = pd.read_excel('../datos/minimos_pp.xls', index_col=0) xm_min = minimos_pp.loc[mes, estacion] # Days with precipitacion ppo_data = precipitation_days(do, xo_min) ppm_data = precipitation_days(dm, xm_min) # Fit a Gamma distribution over days with precipitation obs_gamma_param = gamma.fit(ppo_data, floc=0) mod_gamma_param = gamma.fit(ppm_data, floc=0) return obs_gamma_param, mod_gamma_param
def get_tonicdrive_stats(self, remove_bad_data_indices=True, visualize=False): # Obs? """ Fits a normal distribution to "tonic drive" values :param remove_bad_data_indices: True/False (default True) :param visualize: True/False (default False) :return: mean and SD of the fitted normal distribution """ tonicdrive = self.read_tonicdrive() if remove_bad_data_indices: good_indices = np.setdiff1d(range(self.n_cells), self.bad_data_indices) tonicdrive = tonicdrive[good_indices] skew, mean, sd = gamma.fit(tonicdrive) print(len(tonicdrive)) if visualize: x_min, x_max = gamma.ppf([0.001, 0.999], a=skew, loc=mean, scale=sd) xs = np.linspace(x_min, x_max, 100) plt.plot(xs, gamma.pdf(xs, a=skew, loc=mean, scale=sd)) plt.hist(tonicdrive, density=True) plt.title(self.gc_type + ' ' + self.response_type) plt.xlabel('Tonic drive (a.u.)') plt.show() return mean, sd
def _fit_gamma(sampleses, filename): """Fits a gamma distribution to the first 16 samples and plots the results Assuming that filename ends with ".pdf" """ for i, samples in enumerate(sampleses[:16]): sample_mean = np.mean(samples) sample_var = np.var(samples) sample_median = np.median(samples) shape, loc, scale = gamma.fit(samples) stat, pval = kstest( samples, 'gamma', args=(shape, loc, scale)) fig, axis = plt.subplots(1, 1) axis.hist(samples, normed=True) if i == 15: fig.savefig('last.pdf') plotx = np.linspace(np.min(samples), np.max(samples)) axis.plot( plotx, gamma.pdf(plotx, shape, loc=loc, scale=scale), linewidth=3) axis.set_title( 'shape='+str(shape)+'; loc='+str(loc) + '; scale='+str(scale)+'\n' + 'stat='+str(stat)+'; pval='+str(pval)+'\n' + 'mean='+str(shape*scale)+'; var='+str(shape*scale*scale)+'\n' + 's_mean='+str(sample_mean)+'; s_var='+str(sample_var)+'\n' + 's_median='+str(sample_median)) fig.savefig( filename[:-4]+'_fit_'+_pad_num(i+1)+'.pdf', bbox_inches='tight') plt.close()
def ssi_gamma(df_SM, acc_per, df_var='sm'): # Group data by desired accumulation period and interpolate month_values = df_SM[df_var].resample('M').mean() month_values = month_values.interpolate() accum_period = month_values.rolling(acc_per).mean() SSI_gamma = accum_period.copy() mesi = np.arange(1, 13, 1) #npixel=np.arange(0,len(SSI.columns)) for jj in mesi: dfM = np.where(accum_period.index.month == jj) series = accum_period.values[dfM] wh = ~np.isnan(series) series1 = series[~np.isnan(series)] bp = np.float32((np.sum(series1 == 0)) + 1) / (2 * (len(series1) + 1)) series2 = series1[np.nonzero(series1)] alpha, loc, beta = gamma.fit(series2, floc=0) val = gamma.cdf(series1, alpha, loc, beta) for ii in range(len(series1)): if series1[ii] == 0: val[ii] = bp # Plotting position formula Gringorten sta_inv = norm.ppf(val) series[wh] = sta_inv SSI_gamma.iloc[accum_period.index.month == jj] = series return SSI_gamma
def generate_slm_from_txt(training_rows, slm_dir, do_plot=True): slm_fxt = os.path.join(slm_dir, "slm.fxt") slength_counts = Counter() slen=1 maxl=0 #print training_rows for r in training_rows: r = r.strip() segs = r.split(BREAK) # chop the line up into segments for s in segs: slen = len(s.split()) if slen > maxl: print "new max length = ", slen maxl = slen print "from seg: ", s # print "from row: ", r if slen: slength_counts[slen]+=1 #_ = raw_input("hit key") els = list( slength_counts.elements() ) #Counter.elements() returns iterator that iterates across n instances of each element e where slength_counts[e]=n .. we make this into a list for plotting print els x_vals = range(0, max(els)+1) (shape, loc, scale) = gamma.fit(els, floc=0) gam_gen = gamma(shape, loc, scale) #use these model params to build a new gamma distrib/n generator write_slm(slm_fxt, x_vals, gam_gen) if do_plot: plot_graph(x_vals, gam_gen, els) compile_slm(slm_dir) #this last step compiles the slm to binary .fst format
def test_random_vars(self): gen = libMHCUDA.minhash_cuda_init(1000, 128, devices=1, verbosity=2) rs, ln_cs, betas = libMHCUDA.minhash_cuda_retrieve_vars(gen) libMHCUDA.minhash_cuda_fini(gen) cs = numpy.exp(ln_cs) a, loc, scale = gamma.fit(rs) self.assertTrue(1.97 < a < 2.03) self.assertTrue(-0.01 < loc < 0.01) self.assertTrue(0.98 < scale < 1.02) a, loc, scale = gamma.fit(cs) self.assertTrue(1.97 < a < 2.03) self.assertTrue(-0.01 < loc < 0.01) self.assertTrue(0.98 < scale < 1.02) bmin, bmax = uniform.fit(betas) self.assertTrue(0 <= bmin < 0.001) self.assertTrue(0.999 <= bmax <= 1)
def _fit(self, X): a, loc, scale = gamma.fit(X) self._params = { 'a': a, 'loc': loc, 'scale': scale, }
def gamma_plot(cell, savefig = False): data = pd.concat([speed_pair_before, speed_pair_after]) data = list(data[data[cell] > 0][cell]) x = np.linspace(0, 35, 1000) shape, loc, scale = gamma.fit(data, floc = 0) print (f'{cell} Expected: {shape * scale}') if savefig: sns.distplot(data, kde = False, norm_hist = True, color = '#3498db', bins = np.linspace(0, 8, 32)) y = gamma.pdf(x, shape, loc, scale) plt.title(f'Distribution of {cell} cells\' speed', fontsize = 15) plt.axvline(shape * scale, linestyle = 'dashed', color = 'black', zorder = 1, label = 'Expectation', linewidth = 3) plt.xlim([0, 8]) plt.xticks(fontsize = 12) plt.yticks(fontsize = 12) plt.xlabel(f'Velocity ({chr(956)}m/s)', fontsize = 12) plt.plot(x, y, label = 'Fitted Gamma', linewidth = 3) leg = plt.legend(prop = {'size': 12}) for line in leg.get_lines(): line.set_linewidth(3) plt.tight_layout() plt.savefig(os.path.join(N_PATH, f'{cell}-Gamma.png'), format = 'png', dpi = 300) plt.savefig(os.path.join(N_PATH, f'{cell}-Gamma.pdf'), format = 'pdf', dpi = 500) plt.close() return shape * scale
def do_MLE(data, minimum, maximum): pars = gamma.fit(data[np.where((minimum<=data)&(maximum>=data))], floc=0.0) a1, loc1, scale1 = pars #print minimum, maximum, a1, loc1, scale1, \ # gamma.nnlf(pars, data[np.where((minimum<=data)&(maximum>=data))]), \ # a1 * scale1, a1 * scale1**2 return a1, scale1
def fit_gamma_param(df, xmin, mes, year_test='None', option=0): """ """ cdf_limite = .9999999 if mes - 1 <= 0: cnd = [12, 1, 2] elif mes + 1 >= 13: cnd = [11, 12, 1] else: cnd = [mes - 1, mes, mes + 1] if year_test == 'None': datos = df.loc[df['month'].isin(cnd), 'precip'].values else: id_fm = np.logical_and(df.Fecha >= '01/01/'+str(year_test), df.Fecha <= '12/31/'+str(year_test)) # generate index to work in cnd and out of year considered. im_tot = np.logical_and(df['month'].isin(cnd), np.logical_not(id_fm)) # extract data to generate the distribution of historical data. #print(np.unique(pd.DatetimeIndex(df.loc[im_tot, 'Fecha']).year.to_numpy())) #print(np.unique(pd.DatetimeIndex(df.loc[im_tot, 'Fecha']).month.to_numpy())) datos = df.loc[im_tot, 'precip'].values # Days with precipitacion in_dato = np.array([e > xmin if ~np.isnan(e) else False for e in datos], dtype=bool) precdias = datos[in_dato] # Fit a Gamma distribution over days with precipitation param_gamma = gamma.fit(precdias, floc=0) gamma_cdf = gamma.cdf(np.sort(precdias), *param_gamma) gamma_cdf[gamma_cdf > cdf_limite] = cdf_limite if option == 0: return param_gamma else: return param_gamma, precdias, gamma_cdf
def precision_prior_params(data, num_classes, pseudo_inputs_per_class): # load the data into RAM to support sample with replacement x = [] y = [] for batch in data: x.append(batch['image']) y.append(batch['label']) x = tf.concat(x, axis=0) y = tf.concat(y, axis=0) # git distribution of precision across pixel positions variance = tf.math.reduce_variance(tf.keras.layers.Flatten()(x), axis=0) precision = 1 / tf.clip_by_value( variance, clip_value_min=(1 / 255), clip_value_max=np.inf) a, _, b_inv = gamma.fit(precision, floc=0) b = 1 / b_inv # randomly select pseudo inputs u = [] for i in range(num_classes): i_choice = np.random.choice(np.where(y == i)[0], size=pseudo_inputs_per_class, replace=False) u.append(tf.gather(params=x, indices=i_choice, axis=0)) u = tf.concat(u, axis=0) return a, b, u
def maximum_likelihood_fit(data): """Estimate parameters from samples. This is a wrapper around scipy's maximum likelihood estimator to estimate the parameters of a gamma distribution from samples. Parameters ---------- data : list or list of lists/arrays Data to estimate parameters from. Lists of different length may be passed. Returns ------- parameter : array-like, shape=[..., 2] Estimate of parameter obtained by maximum likelihood. """ def is_nested(sample): """Check if sample contains an iterable.""" for el in sample: try: return iter(el) except TypeError: return False if not is_nested(data): data = [data] parameters = [] for sample in data: sample = gs.array(sample) kappa, _, scale = gamma.fit(sample, floc=0) nu = 1 / scale parameters.append(gs.array([kappa, kappa / nu])) return parameters[0] if len(data) == 1 else gs.stack(parameters)
def getGammaPdf(dataset, nbins, bins): shape, loc, scale = gamma.fit(dataset, floc=0) x = np.linspace(min(bins), max(bins), nbins) print('GAM: shape=' + str(shape) + ', loc=' + str(loc) + ", scale=" + str(scale)) pdf = gamma.pdf(x, shape, loc, scale) return (x, pdf)
def gamma_correction(obs_data, mod_data, sce_data, lower_limit=0.1, cdf_threshold=0.9999999): obs_raindays, mod_raindays, sce_raindays = [ x[x >= lower_limit] for x in [obs_data, mod_data, sce_data] ] obs_gamma, mod_gamma, sce_gamma = [ gamma.fit(x) for x in [obs_raindays, mod_raindays, sce_raindays] ] obs_cdf = gamma.cdf(np.sort(obs_raindays), *obs_gamma) mod_cdf = gamma.cdf(np.sort(mod_raindays), *mod_gamma) sce_cdf = gamma.cdf(np.sort(sce_raindays), *sce_gamma) obs_cdf[obs_cdf > cdf_threshold] = cdf_threshold mod_cdf[mod_cdf > cdf_threshold] = cdf_threshold sce_cdf[sce_cdf > cdf_threshold] = cdf_threshold obs_cdf_intpol = np.interp( np.linspace(1, len(obs_raindays), len(sce_raindays)), np.linspace(1, len(obs_raindays), len(obs_raindays)), obs_cdf) mod_cdf_intpol = np.interp( np.linspace(1, len(mod_raindays), len(sce_raindays)), np.linspace(1, len(mod_raindays), len(mod_raindays)), mod_cdf) obs_inverse, mod_inverse, sce_inverse = [ 1. / (1. - x) for x in [obs_cdf_intpol, mod_cdf_intpol, sce_cdf] ] adapted_cdf = 1 - 1. / (obs_inverse * sce_inverse / mod_inverse) adapted_cdf[adapted_cdf < 0.] = 0. initial = gamma.ppf(np.sort(adapted_cdf), *obs_gamma) * gamma.ppf( sce_cdf, *sce_gamma) / gamma.ppf(sce_cdf, *mod_gamma) obs_frequency = 1. * obs_raindays.shape[0] / obs_data.shape[0] mod_frequency = 1. * mod_raindays.shape[0] / mod_data.shape[0] sce_frequency = 1. * sce_raindays.shape[0] / sce_data.shape[0] days_min = len(sce_raindays) * sce_frequency / mod_frequency expected_sce_raindays = int(min(days_min, len(sce_data))) sce_argsort = np.argsort(sce_data) correction = np.zeros(len(sce_data)) if len(sce_raindays) > expected_sce_raindays: initial = np.interp( np.linspace(1, len(sce_raindays), expected_sce_raindays), np.linspace(1, len(sce_raindays), len(sce_raindays)), initial) else: initial = np.hstack( (np.zeros(expected_sce_raindays - len(sce_raindays)), initial)) correction[sce_argsort[:expected_sce_raindays]] = initial #correction = pd.Series(correction, index=sce_data.index) return correction
def plot_time(time: pandas.Series): """ make a probability density function estimate based on the data in this simulation, time interval is same distribution for all sensors and rooms https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html """ intervals = time.diff().dropna().dt.total_seconds() Nbin = 100 Fa, Floc, Fscale = gamma.fit(intervals) ti = np.arange(0.01, 5, 0.01) # arbitrary time interval range to plot over pd = gamma.pdf(ti, Fa, loc=Floc, scale=Fscale) # fit ax = plt.figure().gca() ax.plot(ti, pd) ax.set_xlabel("Time Interval (seconds)") ax.set_ylabel("Probability") ax.set_title("Time interval observed") # add the measured data to the plot ax.hist(intervals, bins=Nbin)
def simulate_gamma(psth, trials, duration, num_trials=20): #rescale the ISIs dt = 0.001 rs_isis = [] for trial in trials: if len(trial) < 1: continue csum = np.cumsum(psth) * dt for k, ti in enumerate(trial[1:]): tj = trial[k] if ti > duration or tj > duration or ti < 0.0 or tj < 0.0: continue ti_index = int((ti / duration) * len(psth)) tj_index = int((tj / duration) * len(psth)) #print 'k=%d, ti=%0.6f, tj=%0.6f, duration=%0.3f' % (k, ti, tj, duration) #print ' ti_index=%d, tj_index=%d, len(psth)=%d, len(csum)=%d' % (ti_index, tj_index, len(psth), len(csum)) #get rescaled time as difference in cumulative intensity ui = csum[ti_index] - csum[tj_index] if ui < 0.0: print 'ui < 0! ui=%0.6f, csum[ti]=%0.6f, csum[tj]=%0.6f' % ( ui, csum[ti_index], csum[tj_index]) else: rs_isis.append(ui) rs_isis = np.array(rs_isis) rs_isi_x = np.arange(rs_isis.min(), rs_isis.max(), 1e-5) #fit a gamma distribution to the rescaled ISIs gamma_alpha, gamma_loc, gamma_beta = gamma.fit(rs_isis) gamma_pdf = gamma.pdf(rs_isi_x, gamma_alpha, loc=gamma_loc, scale=gamma_beta) print 'Rescaled ISI Gamma Fit Params: alpha=%0.3f, beta=%0.3f, loc=%0.3f' % ( gamma_alpha, gamma_beta, gamma_loc) #simulate new trials using rescaled ISIs new_trials = [] for nt in range(num_trials): ntrial = [] next_rs_time = gamma.rvs(gamma_alpha, loc=gamma_loc, scale=gamma_beta) csum = 0.0 for t_index, pval in enumerate(psth): csum += pval * dt if csum >= next_rs_time: #spike! t = t_index * dt ntrial.append(t) #reset integral and generate new rescaled ISI csum = 0.0 next_rs_time = gamma.rvs(gamma_alpha, loc=gamma_loc, scale=gamma_beta) new_trials.append(ntrial) #plt.figure() #plt.hist(rs_isis, bins=20, normed=True) #plt.plot(rs_isi_x, gamma_pdf, 'r-') #plt.title('Rescaled ISIs') return new_trials
def train_gamma(X, y): """ Description: This is trained by the density of gamma-distributions for each features. @params: X: training features y: training y @return: model: """ m, n = X.shape model = {} ## calculate prob of spam and nonspam p_spam = sum(y == 1) * 1.0 / m p_nonspam = sum(y == 0) * 1.0 / m model["p_spam"] = p_spam model["p_nonspam"] = p_nonspam index_spam = y == 1 index_nonspam = y == 0 gammas_spam = [] gammas_nonspam = [] for i in range(n): ga = {} x_spam = asarray(X[index_spam, i]) a, floc, scale = gamma.fit(x_spam) ga["a"] = a ga["floc"] = floc ga["scale"] = scale gammas_spam.append(ga) ga = {} x_nonspam = asarray(X[index_nonspam, i]) a, floc, scale = gamma.fit(x_nonspam) ga["a"] = a ga["floc"] = floc ga["scale"] = scale gammas_nonspam.append(ga) model["gammas_spam"] = gammas_spam model["gammas_nonspam"] = gammas_nonspam return model
def params_of(strings): strings_logprobs = np.empty(len(strings)) for i, string in enumerate(strings): strings_logprobs[i] = sum(old_logprobs[state, symbol] for state, symbol in of(string)) strings_params = gamma.fit(strings_logprobs[strings_logprobs != np.inf]) _, bins, _ = plt.hist(strings_logprobs[strings_logprobs != np.inf], 500, histtype = 'step', normed = True) plt.plot(bins, gamma.pdf(bins, *strings_params)) return strings_params
def HSIC_pval(X, Y, N_samp=500, kernelX="Gaussian", kernelY="Gaussian", eta=0.001, sigmaX=None, sigmaY=None, p_method="boots", return_boots=False): """ Calculates HSIC and p-value Gram matrices are approximated using incomplete Cholesky decomposition. X: Data. Each row is a datapoint. Y: Data. Each row is a datapoint. N_samp: Number of samples kernelX: Kernel to use (Gaussian, Linear, Delta) kernelY: Kernel to use (Gaussian, Linear, Delta) eta: Threshold for incomplete Cholesky decomposition sigmaX: sigma for X when using Gaussian kernel sigmaY: sigma for Y when using Gaussian kernel """ timeA = time.time() m, _ = X.shape sigmaX = getSigmaGaussian(X, X, 200) if sigmaX is None else sigmaX sigmaY = getSigmaGaussian(Y, Y, 200) if sigmaY is None else sigmaY A, max_rankA = incompleteCholeskyKernel(X, m, kernelX, sigmaX, eta) B, max_rankB = incompleteCholeskyKernel(Y, m, kernelY, sigmaY, eta) centered_A = A.T - A.T.mean(axis=0) tmp = B * np.mat(centered_A) HSIC = np.trace(tmp * tmp.T) / m**2 boots = [] Yrand = np.copy(Y) for _ in xrange(N_samp): np.random.shuffle(Yrand) B, max_rankB = incompleteCholeskyKernel(Yrand, m, kernelY, sigmaY, eta) tmp = np.mat(B) * np.mat(centered_A) boots.append(np.trace(tmp * tmp.T) / m**2) boots = np.array(boots) if p_method == "boots": pval = (sum(b >= HSIC for b in boots) + 1) / float(len(boots) + 1) else: #gamma fit_alpha, fit_loc, fit_beta = gamma.fit(boots) pval = 1 - gamma.cdf(HSIC, fit_alpha, scale=fit_beta, loc=fit_loc) if return_boots: return HSIC, pval, boots else: return HSIC, pval
def __init__(self, mode=0, elem=None, sample=None): if mode == 0: self.a = elem[0] self.mu = elem[1] self.sigma = elem[2] else: self.a, self.mu, self.sigma = gamma.fit(sample) self.math_average = gamma.mean(self.a, loc=self.mu, scale=self.sigma) self.dispersion = gamma.var(self.a, loc=self.mu, scale=self.sigma)
def fit_gamma_expon_Q(trace,gammafactor=20,exponfactor=2,plot=False): params = [] for i in range(1,len(trace[0])): if i < 4: a,loc,theta = gamma.fit(trace[:,i],floc = 0) params.append([a/gammafactor,loc,theta*gammafactor]) if i == 4: loc,scale = expon.fit(trace[:,i],floc = 0) params.append([loc,scale*exponfactor]) return params
def get_outliers(data, filter, plotting): if plotting: for x, r in [("x1", (0, 1)), ("x2", (0, 30)), ("x3", (0, 1))]: plt.violinplot(data[x], vert=False) plt.xlim(r) plt.savefig("plots/violin/%s.png" % x) plt.clf() if filter: data_fl = data[data["class"] == 0] else: data_fl = data pdf = pd.DataFrame({}) a, b, loc, scale = beta.fit(data_fl["x1"]) pdf["x1"] = beta.logpdf(data["x1"], a, b, loc=loc, scale=scale) a, loc, scale = gamma.fit(data_fl["x2"]) pdf["x2"] = gamma.logpdf(data["x2"], a, loc=loc, scale=scale) a, b, loc, scale = beta.fit(data_fl["x3"]) pdf["x3"] = beta.logpdf(data["x3"], a, b, loc=loc, scale=scale) pdfs = pdf["x1"] + pdf["x2"] + pdf["x3"] if plotting: sns.boxplot(y=pdfs, x="class", data=data) plt.savefig("plots/boxplot.png") plt.clf() if plotting: plt.plot(np.sort(pdfs)) splits = [40, 45, 50, 60] for split in splits: split = np.sort(pdfs)[60] plt.plot((0, 1000), (split, split), 'k-', lw=0.5) split = np.sort(pdfs)[50] plt.plot((0, 1000), (split, split), 'k.', lw=0.5) split = np.sort(pdfs)[45] plt.plot((0, 1000), (split, split), 'k--', lw=0.5) split = np.sort(pdfs)[40] plt.plot((0, 1000), (split, split), 'k--', lw=0.5) plt.savefig("plots/thresholds.png") plt.clf() outliers = np.argsort(pdfs) final = [] for outlier in outliers: if data["class"][outlier] == -1: final.append(outlier) return np.array(final[:100])
def continuous(): """Fit distributions to symptoms' duration data.""" # fetch data x = _symptoms_data() # fit distributions return { 'x': x, 'norm': norm.fit(x), 'lognorm': lognorm.fit(x, floc=0), 'gamma': gamma.fit(x, floc=0) }
def fit_gamma_sp(trace,factor=5,plot=False): a,loc,theta = gamma.fit(trace[:,4],floc=0) if plot == True: xmax = max(trace[:,4]) xmin = min(trace[:,4]) xdata = np.linspace(0,xmax*2,num=500) #plt.plot(xdata,gamma.pdf(xdata,a,loc,theta)) plt.plot(xdata,gamma.pdf(xdata,a/factor,loc,theta*factor)) plt.hist(trace[:,4],bins=50,density=True) plt.show() return a/factor, loc, theta*factor
def gbmodelpredictrv(rvinput, rvhandperc): prediction = modelefftr.predict(rvinput) distribution = probdensityrv[(probdensityrv[0] > prediction - .025) & ( probdensityrv[0] < prediction + .025)]['test actual'] fit_alpha, fit_loc, fit_beta = gamma.fit(distribution) #flhandperc = tools.handprobability() #flhandperc = flhandperc.sort_values('rank') for i in rvhandperc.index: rvhandperc.at[i, 'rank'] = rvhandperc.at[i, 'rank'] * gamma.pdf( rvhandperc.at[i, 'rank'], fit_alpha, loc=fit_loc, scale=fit_beta) return rvhandperc
def simulate_gamma(psth, trials, duration, num_trials=20): #rescale the ISIs dt = 0.001 rs_isis = [] for trial in trials: if len(trial) < 1: continue csum = np.cumsum(psth)*dt for k,ti in enumerate(trial[1:]): tj = trial[k] if ti > duration or tj > duration or ti < 0.0 or tj < 0.0: continue ti_index = int((ti / duration) * len(psth)) tj_index = int((tj / duration) * len(psth)) #print 'k=%d, ti=%0.6f, tj=%0.6f, duration=%0.3f' % (k, ti, tj, duration) #print ' ti_index=%d, tj_index=%d, len(psth)=%d, len(csum)=%d' % (ti_index, tj_index, len(psth), len(csum)) #get rescaled time as difference in cumulative intensity ui = csum[ti_index] - csum[tj_index] if ui < 0.0: print 'ui < 0! ui=%0.6f, csum[ti]=%0.6f, csum[tj]=%0.6f' % (ui, csum[ti_index], csum[tj_index]) else: rs_isis.append(ui) rs_isis = np.array(rs_isis) rs_isi_x = np.arange(rs_isis.min(), rs_isis.max(), 1e-5) #fit a gamma distribution to the rescaled ISIs gamma_alpha,gamma_loc,gamma_beta = gamma.fit(rs_isis) gamma_pdf = gamma.pdf(rs_isi_x, gamma_alpha, loc=gamma_loc, scale=gamma_beta) print 'Rescaled ISI Gamma Fit Params: alpha=%0.3f, beta=%0.3f, loc=%0.3f' % (gamma_alpha, gamma_beta, gamma_loc) #simulate new trials using rescaled ISIs new_trials = [] for nt in range(num_trials): ntrial = [] next_rs_time = gamma.rvs(gamma_alpha, loc=gamma_loc,scale=gamma_beta) csum = 0.0 for t_index,pval in enumerate(psth): csum += pval*dt if csum >= next_rs_time: #spike! t = t_index*dt ntrial.append(t) #reset integral and generate new rescaled ISI csum = 0.0 next_rs_time = gamma.rvs(gamma_alpha, loc=gamma_loc,scale=gamma_beta) new_trials.append(ntrial) #plt.figure() #plt.hist(rs_isis, bins=20, normed=True) #plt.plot(rs_isi_x, gamma_pdf, 'r-') #plt.title('Rescaled ISIs') return new_trials
def _test_fit_trans(self): filename = "../../../bridge/cfg/detection.dat" dat = np.loadtxt(filename) fit_shape, fit_loc, fit_scale = gamma.fit(self.trans_conc_particle(dat[77:, 1])) print fit_shape, fit_loc, fit_scale mean = fit_shape * fit_scale variance = mean * fit_scale print mean, variance x = np.linspace(1, 250, 10000) # ax.plot(x, gamma.pdf(x, shape), 'r-', lw=5, alpha=0.6, label='gamma pdf') rv = gamma(fit_shape, scale=fit_scale) fig, ax = plt.subplots(1, 1) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') plt.show()
def calcSPI(duration, model, cid): """Calculate Standardized Precipitation Index for specified month *duration*. Need a climatology of precipitation stored in the database used in a VIC *model* simulation.""" nt = (date(model.endyear, model.endmonth, model.endday) - date(model.startyear + model.skipyear, model.startmonth, model.startday)).days + 1 # tablename = "precip."+model.precip if duration < 1: print( "WARNING! Cannot calculate SPI with {0} months duration.".format(duration)) spi = np.zeros(nt) else: p = np.loadtxt("{0}/forcings/data_{1:.{3}f}_{2:.{3}f}".format(model.model_path, model.gid[cid][0], model.gid[cid][1], model.grid_decimal))[:, 0] p = pandas.Series(p, [date(model.startyear, model.startmonth, model.startday) + timedelta(t) for t in range(len(p))]) p[duration:] = pandas.rolling_mean(p.resample( 'M', how='mean'), duration).values[duration:] p[:duration] = 0.0 g1, g2, g3 = gamma.fit(p) cdf = gamma.cdf(p, g1, g2, g3) spi = norm.ppf(cdf) return spi
def generate_slm(training_rows, slm_dir, do_plot=True): slm_fxt = os.path.join(slm_dir, "slm.fxt") slength_counts = Counter() slen=1 for r in training_rows: #print r is_boundary = int(r[6]) if(not is_boundary): slen += 1 else: # print "adding slen=",(slen+1) slength_counts[slen]+=1 slen = 1 els = list( slength_counts.elements() ) #Counter.elements() returns iterator that iterates across n instances of each element e where slength_counts[e]=n .. we make this into a list for plotting #print els x_vals = range(0, max(els)+1) (shape, loc, scale) = gamma.fit(els, floc=0) gam_gen = gamma(shape, loc, scale) #use these model params to build a new gamma distrib/n generator write_slm(slm_fxt, x_vals, gam_gen) if do_plot: plot_graph(x_vals, gam_gen, els) compile_slm(slm_dir) #this last step compiles the slm to binary .fst format
with open("lengths_file.pickle", "rb") as lengths_file: lengths = load(lengths_file) plot_count = len(list(filter(lambda x: LENGTH_CUTOFF < len(x), lengths.values()))) num_col = int(floor(sqrt(plot_count))) num_row = int(ceil(plot_count / num_col)) sorted_relationships = sorted(lengths.keys(), key = sum) plt.figure(1) subplot_num = 1 for relationship in sorted_relationships: length_data = lengths[relationship] length_data.sort() if LENGTH_CUTOFF > len(length_data): continue plt.subplot(num_row, num_col, subplot_num) plt.hist(length_data, bins = 200, normed = True) plt.title(str(relationship)) fit = gamma.fit(length_data) pdf = gamma(*fit).pdf(length_data) plt.plot(length_data, pdf) subplot_num += 1 plt.tight_layout() plt.show()
def compile(alphabet, words, nonwords): print(' Generating all possible transitions...') from itertools import product all = [] for state_size in range(args.max_state_size + 1): all += product(product(alphabet, repeat = state_size), [*alphabet, None]) def of(string): for i in range(len(string)): yield string[max(0, i - args.max_state_size):i], string[i] yield string[max(0, len(string) - args.max_state_size):], None from collections import Counter counts = Counter() for word in tqdm(words, ' Counting transitions', leave = True): for state, symbol in of(word): counts[state, symbol] += 1 state_counts = Counter() for state, symbol in tqdm(counts, ' Counting states', leave = True): state_counts[state] += counts[state, symbol] import numpy as np logprobs = np.empty(len(all)) for i, (state, symbol) in enumerate(tqdm(all, ' Computing conditional transition probabilities', leave = True)): try: logprobs[i] = np.log(state_counts[state] / counts[state, symbol]) except ZeroDivisionError: logprobs[i] = np.inf print(' Fitting flattening distribution...') from scipy.stats import gamma params = gamma.fit(logprobs[logprobs != np.inf]) print(' Flattening...') logprobs = gamma.cdf(logprobs, *params) lower_bound = np.min(logprobs) upper_bound = np.max(logprobs[logprobs != 1]) new_logprobs = np.empty(len(logprobs), int) for i, logprob in enumerate(tqdm(logprobs, ' Discretizing', leave = True)): if logprob == 1: new_logprobs[i] = 2 ** args.transition_bits - 1 else: new_logprobs[i] = round((logprob - lower_bound) * ((2 ** args.transition_bits - 2) / (upper_bound - lower_bound))) logprobs = new_logprobs data = bytearray() bit_buffer = 0 bit_buffer_size = 0 for logprob in tqdm(logprobs, ' Packing', leave = True): bit_buffer = bit_buffer << args.transition_bits | int(logprob) bit_buffer_size += args.transition_bits if bit_buffer_size % 8 == 0: data += bit_buffer.to_bytes(bit_buffer_size // 8, 'big') bit_buffer = 0 bit_buffer_size = 0 while bit_buffer_size % 8 != 0: bit_buffer = bit_buffer << args.transition_bits bit_buffer_size += args.transition_bits data += bit_buffer.to_bytes(bit_buffer_size // 8, 'big') old_logprobs = np.empty(len(logprobs)) for i, logprob in enumerate(tqdm(logprobs, ' Undiscretizing...', leave = True)): if logprob == 2 ** args.transition_bits - 1: old_logprobs[i] = 1 else: old_logprobs[i] = lower_bound + logprob * ((upper_bound - lower_bound) / (2 ** args.transition_bits - 2)) print(' Unflattening...') old_logprobs = gamma.ppf(old_logprobs, *params) old_logprobs = dict(zip(all, old_logprobs)) def params_of(strings): strings_logprobs = np.empty(len(strings)) for i, string in enumerate(strings): strings_logprobs[i] = sum(old_logprobs[state, symbol] for state, symbol in of(string)) strings_params = gamma.fit(strings_logprobs[strings_logprobs != np.inf]) _, bins, _ = plt.hist(strings_logprobs[strings_logprobs != np.inf], 500, histtype = 'step', normed = True) plt.plot(bins, gamma.pdf(bins, *strings_params)) return strings_params print(' Fitting words distribution...') words_params = params_of(words) print(' Fitting nonwords distribution...') nonwords_params = params_of(nonwords) def minify(code): if args.minify: import subprocess p = subprocess.run([str(Path(__file__).parent / 'node_modules/uglify-js/bin/uglifyjs'), '--screw-ie8', '--mangle', 'sort,toplevel', '--compress', '--bare-returns', ], input = code.encode(), stdout = subprocess.PIPE, stderr = subprocess.PIPE) if p.returncode != 0: import sys sys.stderr.buffer.write(p.stderr) p.check_returncode() code = p.stdout.decode() return code print(' Generating JS code...') code = minify(r''' exports.init = function(buffer) { exports.test = (new Function('buffer', buffer.utf8Slice(''' + str(len(data)) + r''')))(buffer); }; ''').encode() data += minify(r''' var abs = Math.abs; var min = Math.min; var max = Math.max; var alphabet = [ ''' + r''' '''.join('"' + symbol + '",' for symbol in alphabet) + r''' ]; var of; (function() { function fold(string) { string = Array.from(string); for (var i = alphabet.length - 1; alphabet[i].length > 1; --i) { for (var j = 0; j <= string.length - alphabet[i].length; ++j) { if (string.slice(j, j + alphabet[i].length).join('') == alphabet[i]) { string.splice(j, alphabet[i].length, alphabet[i]); } } } return string; } of = function(string) { string = fold(string); var ofString = []; for (var i = 0; i < string.length; ++i) { ofString.push([string.slice(max(0, i - ''' + str(args.max_state_size) + r'''), i), string[i]]); } ofString.push([string.slice(max(0, string.length - ''' + str(args.max_state_size) + r''')), null]); return ofString; }; })(); var all; (function() { function product(xs, ys) { var result = []; for (var i = 0; i < xs.length; ++i) { for (var j = 0; j < ys.length; ++j) { result.push([xs[i], ys[j]]); } } return result; } function power(a, k) { if (k == 0) { return [[]]; } var result = []; for (var i = 0; i < a.length; ++i) { var b = power(a, k - 1); for (var j = 0; j < b.length; ++j) { result.push([a[i]].concat(b[j])); } } return result; } all = []; for (var stateSize = 0; stateSize <= ''' + str(args.max_state_size) + r'''; ++stateSize) { all = all.concat(product(power(alphabet, stateSize), alphabet.concat([null]))); } })(); var gammaPdf, gammaPpf; (function() { var pow = Math.pow; var exp = Math.exp; var log = Math.log; var sqrt = Math.sqrt; var cof = [ 76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5, ]; function ln(x) { var j = 0; var ser = 1.000000000190015; var xx, y, tmp; tmp = (y = xx = x) + 5.5; tmp -= (xx + 0.5) * log(tmp); for (; j < 6; j++) ser += cof[j] / ++y; return log(2.5066282746310005 * ser / xx) - tmp; } gammaPdf = function(x, a) { if (x < 0) return 0; if (x === 0 && a === 1) return 1; return exp((a - 1) * log(x) - x - ln(a)); }; function lowReg(a, x) { var aln = ln(a); var ap = a; var sum = 1 / a; var del = sum; var b = x + 1 - a; var c = 1 / 1.0e-30; var d = 1 / b; var h = d; var i = 1; var ITMAX = -~(log((a >= 1) ? a : 1 / a) * 8.5 + a * 0.4 + 17); var an, endval; if (x < 0 || a <= 0) { return NaN; } else if (x < a + 1) { for (; i <= ITMAX; i++) { sum += del *= x / ++ap; } return sum * exp(-x + a * log(x) - aln); } for (; i <= ITMAX; i++) { an = -i * (i - a); b += 2; d = an * d + b; c = b + an / c; d = 1 / d; h *= d * c; } return 1 - h * exp(-x + a * log(x) - aln); } gammaPpf = function(p, a) { var j = 0; var a1 = a - 1; var EPS = 1e-8; var gln = ln(a); var x, err, t, u, pp, lna1, afac; if (p > 1) return NaN; if (p == 1) return Infinity; if (p < 0) return NaN; if (p == 0) return 0; if (a > 1) { lna1 = log(a1); afac = exp(a1 * (lna1 - 1) - gln); pp = (p < 0.5) ? p : 1 - p; t = sqrt(-2 * log(pp)); x = (2.30753 + t * 0.27061) / (1 + t * (0.99229 + t * 0.04481)) - t; if (p < 0.5) x = -x; x = max(1e-3, a * pow(1 - 1 / (9 * a) - x / (3 * sqrt(a)), 3)); } else { t = 1 - a * (0.253 + a * 0.12); if (p < t) x = pow(p / t, 1 / a); else x = 1 - log(1 - (p - t) / (1 - t)); } for(; j < 12; j++) { if (x <= 0) return 0; err = lowReg(a, x) - p; if (a > 1) t = afac * exp(-(x - a1) + a1 * (log(x) - lna1)); else t = exp(-x + a1 * log(x) - gln); u = err / t; x -= (t = u / (1 - 0.5 * min(1, u * ((a - 1) / x - 1)))); if (x <= 0) x = 0.5 * (x + t); if (abs(t) < EPS * x) break; } return x; }; })(); var logprobs = {}; var bitBuffer = 0, bitBufferSize = 0; var bufferOffset = 0; for (var i = 0; i < all.length; ++i) { while (bitBufferSize < ''' + str(args.transition_bits) + r''') { bitBuffer = bitBuffer << 8 | buffer.readUInt8(bufferOffset++); bitBufferSize += 8; } var logprob = bitBuffer >> (bitBufferSize - ''' + str(args.transition_bits) + r''') & ''' + hex(2 ** args.transition_bits - 1) + r'''; bitBufferSize -= ''' + str(args.transition_bits) + r'''; if (logprob == ''' + str(2 ** args.transition_bits - 1) + r''') { logprob = 1; } else { logprob = ''' + str(lower_bound) + r''' + logprob * ''' + str((upper_bound - lower_bound) / (2 ** args.transition_bits - 2)) + r'''; } logprob = ''' + str(params[1]) + r''' + gammaPpf(logprob, ''' + str(params[0]) + r''') * ''' + str(params[2]) + r'''; logprobs[all[i]] = logprob; } return function(string) { var stringLogprob = 0; var ofString = of(string); for (var i = 0; i < ofString.length; ++i) { stringLogprob += logprobs[ofString[i]]; } if (stringLogprob == Infinity) { return false; } var wordsDensity = gammaPdf((stringLogprob - ''' + str(words_params[1]) + r''') / ''' + str(words_params[2]) + r''', ''' + str(words_params[0]) + r''') / ''' + str(words_params[2]) + r'''; var nonwordsDensity = gammaPdf((stringLogprob - ''' + str(nonwords_params[1]) + r''') / ''' + str(nonwords_params[2]) + r''', ''' + str(nonwords_params[0]) + r''') / ''' + str(nonwords_params[2]) + r'''; if (wordsDensity > nonwordsDensity) { return true; } if (wordsDensity < nonwordsDensity) { return false; } return Math.random() >= 0.5; }; ''').encode() data, is_gzipped = bytes(data), False if args.gzip: import gzip print(' Gzipping...') gzipped_data = gzip.compress(data) if len(gzipped_data) < len(data): data, is_gzipped = gzipped_data, True return code, data, is_gzipped
print(x.mean(), x.var()) m = 0 # Number of current step print('m', m) # Initial values TODO p_m = [0.9, 0.1] # [1 / k for i in k_underline] alpha_m =[0.2, 0.3] beta_m = [0.0002, 0.0001] print('p_m', p_m) print('alpha_m', alpha_m) print('beta_m', beta_m) # Initial values TODO s, loc, t = gamma.fit(x, loc = 0) alpha_est = s beta_est = 1 / t print(alpha_est, loc, beta_est) p_m = [0.4, 0.6] alpha_m = [alpha_est ** (random() * 2) for i in k_underline] beta_m = [beta_est ** (random() * 2) for i in k_underline] theta_m = p_m + alpha_m + beta_m print('p_m', p_m) print('alpha_m', alpha_m) print('beta_m', beta_m) while True: # Prepare for next step m += 1 print('m', m)
plt.plot(taille_A, lw=2) plt.plot(taille_B, lw=2) plt.xlabel('Nombre de manches') plt.grid(True) # fait un nombre N de partie et affiche les statistiques def play_N_batailles(N): nb_manches = [] for idx in range(N): cards = new_card_set() A, B = sort_cards(cards) taille_A, taille_B = play_bataille(A, B) nb_manches.append(len(taille_A)) return nb_manches N = 10000 nb_manches = np.array(play_N_batailles(N)) plt.figure(2) plt.hist(nb_manches, bins=500, color='b', alpha=0.5) from scipy.stats import gamma params = gamma.fit(nb_manches) x = np.arange(1, N) #plt.figure(3) plt.plot(x, 2*N*gamma.pdf(x, *params[:-2], loc=params[-2], scale=params[-1]), lw=2, color='r') plt.xlim(0, 300)
23304334, 130258928, 18452922, 42620009, 77351045, 76032324, 44196273, 49036974, 23245119, 50656670, 84837088, 2089074, 52517589, 21469409, 106694589, 67063796, 16053222, 101270899, 15620252, 18355964, 839197, 31083111, 66698677] # mean = np.mean(data) # var = np.var(data) # fit = truncnorm.fit(data, 0, 2866387308, loc = mean, scale = var) # print(fit) # print(truncnorm(*fit).rvs(100)) # vector = lengths[(2, 2)] vector = np.array(sorted(lengths[(5, 5)])) mean = np.mean(vector) var = np.var(vector) std = np.std(vector) # fit = truncnorm.fit(vector, a = 0, b = 2866387308, loc = mean, scale = var) fit = gamma.fit(vector) print(fit) # pdf = gamma.pdf(vector, *fit[:-2], loc = fit[-2], scale = fit[-1]) pdf = gamma(*fit).pdf(vector) print(pdf) # print(gamma.pdf(vector, alpha)) plt.figure() plt.hist(vector, bins = 20, normed = True) # plt.hist(truncnorm(*fit).rvs(10000), bins = 20, normed = True, alpha = 0.5) plt.plot(vector, pdf) # plt.hist(gamma(*fit).rvs(10000), bins = 20, normed = True, alpha = 0.5) plt.show()
def match_single_track_dist(model_track, obs_track): label_columns = ["Max_Hail_Size", "Shape", "Location", "Scale"] obs_hail_dists = pd.DataFrame(index=obs_track.times, columns=label_columns) model_hail_dists = pd.DataFrame(index=model_track.times, columns=label_columns) for t, step in enumerate(obs_track.timesteps): step_vals = step[(obs_track.masks[t] == 1) & (obs_track.timesteps[t] > self.mrms_ew.min_thresh)] min_hail = step_vals.min() - 0.1 obs_hail_dists.loc[obs_track.times[t], ["Shape", "Location", "Scale"]] = gamma.fit(step_vals, floc=min_hail) obs_hail_dists.loc[obs_track.times[t], "Max_Hail_Size"] = step_vals.max() if obs_track.times.size > 1 and model_track.times.size > 1: normalized_obs_times = 1.0 / (obs_track.times.max() - obs_track.times.min()) \ * (obs_track.times - obs_track.times.min()) normalized_model_times = 1.0 / (model_track.times.max() - model_track.times.min()) \ * (model_track.times - model_track.times.min()) for col in label_columns: interp_func = interp1d(normalized_obs_times, obs_hail_dists[col], kind="linear", bounds_error=False, fill_value=0) model_hail_dists.loc[model_track.times, col] = interp_func(normalized_model_times) else: for param in obs_hail_dists.columns: model_hail_dists.loc[model_track.times, param] = obs_hail_dists.loc[obs_track.times[0], param] return model_hail_dists
''' import json, sys, collections import numpy as np import scipy from scipy.stats import gamma import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt with open("ALL_PAIR_JACCARD.json") as f: all_pairs = json.load(f) print("Get all pairs distribution") all_pair_dists = np.square(np.array(all_pairs).flatten()) gamma_x = gamma.fit(all_pair_dists) print("Get top k distance distribution") k = 50 topk_dists = np.square(np.sort(all_pairs, axis=1)) gamma_xk = [] for i in range(k): dists = topk_dists[:,i] gamma_xk.append(gamma.fit(dists)) max_x = np.max(all_pair_dists) with open("JACCARD_DIST_DIST.json", "w") as f: d = [gamma_x, gamma_xk, max_x] json.dump(d, f) print("Output file")
def match_hail_size_step_distributions(self, model_tracks, obs_tracks, track_pairings): """ Given a matching set of observed tracks for each model track, Args: model_tracks: obs_tracks: track_pairings: Returns: """ label_columns = ["Matched", "Max_Hail_Size", "Num_Matches", "Shape", "Location", "Scale"] s = 0 for m, model_track in enumerate(model_tracks): model_track.observations = pd.DataFrame(index=model_track.times, columns=label_columns, dtype=np.float64) model_track.observations.loc[:, :] = 0 model_track.observations["Matched"] = model_track.observations["Matched"].astype(np.int32) for t, time in enumerate(model_track.times): model_track.observations.loc[time, "Matched"] = track_pairings.loc[s, "Matched"] if model_track.observations.loc[time, "Matched"] > 0: all_hail_sizes = [] step_pairs = track_pairings.loc[s, "Pairings"] for step_pair in step_pairs: obs_step = obs_tracks[step_pair[0]].timesteps[step_pair[1]].ravel() obs_mask = obs_tracks[step_pair[0]].masks[step_pair[1]].ravel() all_hail_sizes.append(obs_step[(obs_mask == 1) & (obs_step >= self.mrms_ew.min_thresh)]) combined_hail_sizes = np.concatenate(all_hail_sizes) min_hail = combined_hail_sizes.min() - 0.1 model_track.observations.loc[time, "Max_Hail_Size"] = combined_hail_sizes.max() model_track.observations.loc[time, "Num_Matches"] = step_pairs.shape[0] model_track.observations.loc[time, ["Shape", "Location", "Scale"]] = gamma.fit(combined_hail_sizes, floc=min_hail) s += 1