예제 #1
0
    def prob_alias(self, plot=False):
        """Returns tuple (threshold, probability)"""

        from scipy.stats import gamma
        # scipy-ref.pdf Section 5.13 on page 390

        if plot:
            import matplotlib.pyplot as plt
            plt.ion()
            plt.clf()

        nd = self.get_all_noise_dists()
        a, loc, scale = gamma.fit(nd)
        ndrv = gamma(a, loc, scale)
        if plot:
            plt.hist(nd, normed=True)  # 'normed' might become 'density' later?
            x = range(max(nd))
            plt.plot(x, ndrv.pdf(x))

        icd = self.get_all_inter_chip_dists()
        a, loc, scale = gamma.fit(icd)
        icdrv = gamma(a, loc, scale)
        if plot:
            plt.hist(icd, normed=True)
            x = range(max(icd))
            plt.plot(x, icdrv.pdf(x))

        # Here it goes!
        threshold = ndrv.ppf(0.997)
        if plot:
            plt.axvline(threshold)
        prob = icdrv.cdf(threshold)
        print 'Noise 99.7%% threshold: %f, probability of aliasing: %1.3e' % (
            threshold, prob)
        return threshold, prob
예제 #2
0
파일: RvGama.py 프로젝트: duartejr/pyutils
def rv_gama(hind, obs, fcst):

    # Ajuste da distribuição gama para os dados observados
    obs = np.sort(obs)
    n_zeros_obs = len(np.where(obs == 0)[0])
    q = n_zeros_obs / float(len(obs))
    obs = obs[obs > 0]
    gamma_obs = gamma.fit(obs, floc=0)

    # Ajuste da distrivbuição gama para os dados do modelo
    hind = np.sort(hind)
    hind = hind[n_zeros_obs:]
    hind = hind[hind > 0]
    gamma_hind = gamma.fit(hind, floc=0)
    # Correção da previsão usando função gama
    if fcst < hind[0]:
        corr = 0
    else:
        prob_mod = gamma.cdf(fcst, *gamma_hind)
        H = q + (1 - q) * prob_mod
        corr = gamma.ppf(H, *gamma_obs)
        if str(corr) == 'inf':
            print('hey there')

    return corr
예제 #3
0
파일: chipidentify.py 프로젝트: ominux/spat
    def prob_alias(self, plot=False):
        """Returns tuple (threshold, probability)"""

        from scipy.stats import gamma
        # scipy-ref.pdf Section 5.13 on page 390
        
        if plot:
            import matplotlib.pyplot as plt
            plt.ion()
            plt.clf()

        nd = self.get_all_noise_dists()
        a, loc, scale = gamma.fit(nd)
        ndrv = gamma(a, loc, scale)
        if plot:
            plt.hist(nd, normed=True) # 'normed' might become 'density' later? 
            x = range(max(nd))
            plt.plot(x, ndrv.pdf(x))

        icd = self.get_all_inter_chip_dists()
        a, loc, scale = gamma.fit(icd)
        icdrv = gamma(a, loc, scale)
        if plot:
            plt.hist(icd, normed=True)
            x = range(max(icd))
            plt.plot(x, icdrv.pdf(x))

        # Here it goes!
        threshold = ndrv.ppf(0.997)
        if plot:
            plt.axvline(threshold)
        prob = icdrv.cdf(threshold)
        print 'Noise 99.7%% threshold: %f, probability of aliasing: %1.3e' % (threshold, prob)
        return threshold, prob
예제 #4
0
 def fit_gamma(self):
     """ Fit a gamma distribution to each chronology, get mean stats
     """
     self.gamma_alphas = []
     for ie_set in self.interevent_times.T:
         gamfit = gamma.fit(ie_set, floc=0)
         self.gamma_alphas.append(gamfit[0])
     # test fitting to all data at once
     gamfit_all = gamma.fit(self.interevent_times.flatten(), floc=0)
     self.gamma_alphas = np.array(self.gamma_alphas)
     self.mean_gamma_alpha = np.mean(self.gamma_alphas)
     self.mean_gamma_alpha_all = gamfit_all[0]
	def __init__(self, wind_data):
		self.wind_data = wind_data
		#self.wind_data_long =pd.melt(wind_data)

		#initialize fitting model
		self.alpha_hat=np.empty(3) #initialize alpha estimates
		self.sigma_hat=np.empty(3) #initialize sigma estimates
		for i in range(3):
			self.alpha_hat[i], self.sigma_hat[i] = fmin(self.weibul_sq_error, [2,2], args=(self.wind_data.iloc[:,i].tolist(),))

		#parameters for gamma distribution of alpha and sigma
		self.shape_alpha, self.location_alpha, self.scale_alpha = gamma.fit(self.alpha_hat)
		self.shape_sigma, self.location_sigma, self.scale_sigma = gamma.fit(self.sigma_hat)
예제 #6
0
    def fit(self,obs_data,sim_data):

        #: estimates parameters from provided data
        #: dry day fraction
        self.obs_param['c'] = (obs_data[obs_data==0].shape[0]) / (obs_data.shape[0])  
        self.sim_param['c'] = (sim_data[sim_data==0].shape[0]) / (sim_data.shape[0])

        #: fit gamma with non zero values with floc=0
        self.obs_param['a'], _, self.obs_param['b'] = gamma.fit(obs_data[obs_data>0], floc=0)
        self.sim_param['a'], _, self.sim_param['b'] = gamma.fit(sim_data[sim_data>0], floc=0)
        
        
        return self
예제 #7
0
    def returnDistData(cls, self):
        gammaParam = gamma.fit(10**(self.data / 10))
        gammaDist = gamma.pdf(self.data, *gammaParam)

        rayleighParam = rayleigh.fit(self.data)
        rayleighDist = rayleigh.pdf(self.data, *rayleighParam)

        normParam = norm.fit(self.data)
        normDist = norm.pdf(self.data, *normParam)

        logNormParam = lognorm.fit(self.data)
        lognormDist = lognorm.pdf(self.data, *logNormParam)

        nakagamiParam = nakagami.fit(self.data)
        nakagamiDist = nakagami.pdf(self.data, *nakagamiParam)

        exponParam = expon.fit(self.data)
        exponDist = expon.pdf(self.data, *exponParam)

        exponweibParam = exponweib.fit(self.data)
        weibDist = exponweib.pdf(self.data, *exponweibParam)

        distDF = pd.DataFrame(np.column_stack([
            gammaDist, rayleighDist, normDist, lognormDist, nakagamiDist,
            exponDist, weibDist
        ]),
                              columns=[
                                  'gammaDist', 'rayleighDist', 'normDist',
                                  'lognormDist', 'nakagamiDist', 'exponDist',
                                  'weibDist'
                              ])
        self.distDF = distDF
예제 #8
0
def run_kstests(json_path, run_date, member):
    try:
        full_path = json_path + "/{0}/{1}/mesh_*.json".format(run_date, member)
        json_files = sorted(glob(full_path))
        ks_results = {"id":[], "ks":[]}
        for json_file in json_files:
            js = open(json_file)
            mesh_track = json.load(js)
            js.close()
            id = mesh_track["properties"]["id"]
            for m, mesh_obj in enumerate(mesh_track["features"]):
                step_id = id + "_{0:03d}".format(m)
                ts = np.array(mesh_obj["properties"]["timesteps"])
                mask = np.array(mesh_obj["properties"]["masks"])
                vals = ts[mask == 1]
                gdist = gamma.fit(vals, floc=vals.min()-0.1)
                sig = kstest(vals, gamma(*gdist).cdf)
                ks_results["id"].append(step_id)
                ks_results["ks"].append(sig)
                if sig[1] < 0.01:
                    print(step_id,)
                    print(sig[1],gdist)
                    print(np.sort(vals))
                    plt.figure(figsize=(8,8))
                    plt.pcolormesh(ts, alpha=0.5, cmap="YlOrRd", vmin=0, vmax=100)
                    pc = plt.pcolormesh(np.ma.array(ts, mask=mask==0), cmap="YlOrRd", vmin=0, vmax=100)
                    plt.title(step_id)
                    plt.colorbar(pc)
                    plt.savefig(step_id + ".png", bbox_inches="tight", dpi=150)
                    plt.close()
        ks_frame = pd.DataFrame(ks_results["ks"], index=ks_results["id"],columns=["D", "p-val"])
        print(ks_frame.shape[0])
    except Exception as e:
        raise e
    return ks_frame
예제 #9
0
    def fit_gamma_distribution(self, desorption_thresh=5, plot=False, bins=15, normed=True):

        #  first get the detachment time
        dt = self.get_desorption_distribution(thresh=desorption_thresh)

        # you need to invert the data - to be able to fit gamma
        # dt = dt.max() - dt

        fit_alpha, fit_loc, fit_beta = gamma.fit(dt)

        x = np.linspace(0, dt.max(), 100)

        pdf_fitted = gamma.pdf(x, fit_alpha, fit_loc, fit_beta)

        # normalize
        # pdf_fitted = pdf_fitted / pdf_fitted.max()

        # this is the maximum of the distribution
        mode = x[pdf_fitted.argmax()]

        if plot:
            x = np.linspace(0, dt.max(), 100)

            plt.hist(dt, bins=bins, normed=normed)
            plt.plot(x, pdf_fitted)
            plt.show()

        return fit_alpha, fit_loc, fit_beta
예제 #10
0
 def match_single_track_dist(model_track, obs_track):
     label_columns = ["Max_Hail_Size", "Shape", "Location", "Scale"]
     obs_hail_dists = pd.DataFrame(index=obs_track.times,
                                   columns=label_columns)
     model_hail_dists = pd.DataFrame(index=model_track.times,
                                     columns=label_columns)
     for t, step in enumerate(obs_track.timesteps):
         step_vals = step[(obs_track.masks[t] == 1) & (
             obs_track.timesteps[t] > self.mrms_ew.min_intensity)]
         min_hail = step_vals.min() - 0.1
         obs_hail_dists.loc[obs_track.times[t],
                            ["Shape", "Location", "Scale"]] = gamma.fit(
                                step_vals, floc=min_hail)
         obs_hail_dists.loc[obs_track.times[t],
                            "Max_Hail_Size"] = step_vals.max()
     if obs_track.times.size > 1 and model_track.times.size > 1:
         normalized_obs_times = 1.0 / (obs_track.times.max() - obs_track.times.min()) \
                                * (obs_track.times - obs_track.times.min())
         normalized_model_times = 1.0 / (model_track.times.max() - model_track.times.min()) \
                                  * (model_track.times - model_track.times.min())
         for col in label_columns:
             interp_func = interp1d(normalized_obs_times,
                                    obs_hail_dists[col],
                                    kind="linear",
                                    bounds_error=False,
                                    fill_value=0)
             model_hail_dists.loc[model_track.times, col] = interp_func(
                 normalized_model_times)
     else:
         for param in obs_hail_dists.columns:
             model_hail_dists.loc[model_track.times,
                                  param] = obs_hail_dists.loc[
                                      obs_track.times[0], param]
     return model_hail_dists
예제 #11
0
파일: lq_gamma.py 프로젝트: mjafin/LongQC
def estimate_gamma_dist_scipy(vals):
    # shifting can happen (e.g. size selection of DNA fragments), but tentatively ignores this.
    alpha_hat, loc_hat, beta_hat = gamma.fit(vals, floc=0.0)

    logger.info("estimated Gamma dist params are a = %f, b = %f." % (alpha_hat, beta_hat))

    return (alpha_hat, beta_hat)
예제 #12
0
def fit_gamma_param(estacion, mes, year_test='None'):
    # Ajusta parametros de Gamma para precipitacion
    do, dm = select_data_period(estacion, 'precip', mes)
    cdf_limite = .9999999
    # Minimos de precipitacion (obs fijo = 0.1, modelo el que mejor ajusta a frec)
    xo_min = 0.1
    minimos_pp = pd.read_excel('../datos/minimos_pp.xls', index_col=0)
    xm_min = minimos_pp.loc[mes, estacion]
    # Days with precipitacion
    ppo_data = precipitation_days(do, xo_min)
    ppm_data = precipitation_days(dm, xm_min)
    # Fit a Gamma distribution over days with precipitation
    obs_gamma_param = gamma.fit(ppo_data, floc=0)
    mod_gamma_param = gamma.fit(ppm_data, floc=0)

    return obs_gamma_param, mod_gamma_param
예제 #13
0
    def get_tonicdrive_stats(self,
                             remove_bad_data_indices=True,
                             visualize=False):  # Obs?
        """
        Fits a normal distribution to "tonic drive" values

        :param remove_bad_data_indices: True/False (default True)
        :param visualize: True/False (default False)
        :return: mean and SD of the fitted normal distribution
        """
        tonicdrive = self.read_tonicdrive()

        if remove_bad_data_indices:
            good_indices = np.setdiff1d(range(self.n_cells),
                                        self.bad_data_indices)
            tonicdrive = tonicdrive[good_indices]

        skew, mean, sd = gamma.fit(tonicdrive)
        print(len(tonicdrive))

        if visualize:
            x_min, x_max = gamma.ppf([0.001, 0.999],
                                     a=skew,
                                     loc=mean,
                                     scale=sd)
            xs = np.linspace(x_min, x_max, 100)
            plt.plot(xs, gamma.pdf(xs, a=skew, loc=mean, scale=sd))
            plt.hist(tonicdrive, density=True)
            plt.title(self.gc_type + ' ' + self.response_type)
            plt.xlabel('Tonic drive (a.u.)')
            plt.show()

        return mean, sd
예제 #14
0
def _fit_gamma(sampleses, filename):
    """Fits a gamma distribution to the first 16 samples and plots the results

    Assuming that filename ends with ".pdf"
    """
    for i, samples in enumerate(sampleses[:16]):
        sample_mean = np.mean(samples)
        sample_var = np.var(samples)
        sample_median = np.median(samples)
        shape, loc, scale = gamma.fit(samples)
        stat, pval = kstest(
            samples,
            'gamma',
            args=(shape, loc, scale))
        fig, axis = plt.subplots(1, 1)
        axis.hist(samples, normed=True)
        if i == 15:
            fig.savefig('last.pdf')
        plotx = np.linspace(np.min(samples), np.max(samples))
        axis.plot(
            plotx,
            gamma.pdf(plotx, shape, loc=loc, scale=scale),
            linewidth=3)
        axis.set_title(
            'shape='+str(shape)+'; loc='+str(loc) +
            '; scale='+str(scale)+'\n' +
            'stat='+str(stat)+'; pval='+str(pval)+'\n' +
            'mean='+str(shape*scale)+'; var='+str(shape*scale*scale)+'\n' +
            's_mean='+str(sample_mean)+'; s_var='+str(sample_var)+'\n' +
            's_median='+str(sample_median))
        fig.savefig(
            filename[:-4]+'_fit_'+_pad_num(i+1)+'.pdf',
            bbox_inches='tight')
        plt.close()
예제 #15
0
def ssi_gamma(df_SM, acc_per, df_var='sm'):

    # Group data by desired accumulation period and interpolate
    month_values = df_SM[df_var].resample('M').mean()
    month_values = month_values.interpolate()
    accum_period = month_values.rolling(acc_per).mean()

    SSI_gamma = accum_period.copy()

    mesi = np.arange(1, 13, 1)
    #npixel=np.arange(0,len(SSI.columns))

    for jj in mesi:
        dfM = np.where(accum_period.index.month == jj)
        series = accum_period.values[dfM]
        wh = ~np.isnan(series)
        series1 = series[~np.isnan(series)]
        bp = np.float32((np.sum(series1 == 0)) + 1) / (2 * (len(series1) + 1))
        series2 = series1[np.nonzero(series1)]
        alpha, loc, beta = gamma.fit(series2, floc=0)
        val = gamma.cdf(series1, alpha, loc, beta)

        for ii in range(len(series1)):
            if series1[ii] == 0:
                val[ii] = bp

            # Plotting position formula Gringorten
        sta_inv = norm.ppf(val)
        series[wh] = sta_inv
        SSI_gamma.iloc[accum_period.index.month == jj] = series

    return SSI_gamma
예제 #16
0
def generate_slm_from_txt(training_rows, slm_dir, do_plot=True):
    slm_fxt = os.path.join(slm_dir, "slm.fxt")
    slength_counts = Counter()
    slen=1
    maxl=0
    #print training_rows
    for r in training_rows:
        r = r.strip()
        
        segs = r.split(BREAK) # chop the line up into segments
        for s in segs:
            slen = len(s.split())

            if slen > maxl:
                print "new max length = ", slen
                maxl = slen
                print "from seg: ", s
#                print "from row: ", r

            if slen:
                slength_counts[slen]+=1

    #_ = raw_input("hit key")
                           
    els = list( slength_counts.elements() ) #Counter.elements() returns iterator that iterates across n instances of each element e where slength_counts[e]=n .. we make this into a list for plotting
    print els
    x_vals = range(0, max(els)+1)
    
    (shape, loc, scale) = gamma.fit(els, floc=0)
    gam_gen = gamma(shape, loc, scale) #use these model params to build a new gamma distrib/n generator
    write_slm(slm_fxt, x_vals, gam_gen)
    if do_plot:
        plot_graph(x_vals, gam_gen, els)
    compile_slm(slm_dir) #this last step compiles the slm to binary .fst format
예제 #17
0
 def test_random_vars(self):
     gen = libMHCUDA.minhash_cuda_init(1000, 128, devices=1, verbosity=2)
     rs, ln_cs, betas = libMHCUDA.minhash_cuda_retrieve_vars(gen)
     libMHCUDA.minhash_cuda_fini(gen)
     cs = numpy.exp(ln_cs)
     a, loc, scale = gamma.fit(rs)
     self.assertTrue(1.97 < a < 2.03)
     self.assertTrue(-0.01 < loc < 0.01)
     self.assertTrue(0.98 < scale < 1.02)
     a, loc, scale = gamma.fit(cs)
     self.assertTrue(1.97 < a < 2.03)
     self.assertTrue(-0.01 < loc < 0.01)
     self.assertTrue(0.98 < scale < 1.02)
     bmin, bmax = uniform.fit(betas)
     self.assertTrue(0 <= bmin < 0.001)
     self.assertTrue(0.999 <= bmax <= 1)
예제 #18
0
 def _fit(self, X):
     a, loc, scale = gamma.fit(X)
     self._params = {
         'a': a,
         'loc': loc,
         'scale': scale,
     }
예제 #19
0
def gamma_plot(cell, savefig = False):
    data = pd.concat([speed_pair_before, speed_pair_after])
    data = list(data[data[cell] > 0][cell])

    x = np.linspace(0, 35, 1000)
    shape, loc, scale = gamma.fit(data, floc = 0)
    print (f'{cell} Expected: {shape * scale}')

    if savefig:
        sns.distplot(data, kde = False, norm_hist = True, color = '#3498db', bins = np.linspace(0, 8, 32))
        y = gamma.pdf(x, shape, loc, scale)

        plt.title(f'Distribution of {cell} cells\' speed', fontsize = 15)
        plt.axvline(shape * scale, linestyle = 'dashed', color = 'black', zorder = 1, 
            label = 'Expectation', linewidth = 3)
        plt.xlim([0, 8])
        plt.xticks(fontsize = 12)
        plt.yticks(fontsize = 12)
        plt.xlabel(f'Velocity ({chr(956)}m/s)', fontsize = 12)
        plt.plot(x, y, label = 'Fitted Gamma', linewidth = 3)

        leg = plt.legend(prop = {'size': 12})
        for line in leg.get_lines():
            line.set_linewidth(3)
        plt.tight_layout()
        plt.savefig(os.path.join(N_PATH, f'{cell}-Gamma.png'), format = 'png', dpi = 300)
        plt.savefig(os.path.join(N_PATH, f'{cell}-Gamma.pdf'), format = 'pdf', dpi = 500)
        plt.close()

    return shape * scale
예제 #20
0
def do_MLE(data, minimum, maximum):
    pars = gamma.fit(data[np.where((minimum<=data)&(maximum>=data))], floc=0.0)
    a1, loc1, scale1 = pars
    #print minimum, maximum, a1, loc1, scale1, \
    #      gamma.nnlf(pars, data[np.where((minimum<=data)&(maximum>=data))]), \
    #      a1 * scale1, a1 * scale1**2
    return a1, scale1
예제 #21
0
def fit_gamma_param(df, xmin, mes, year_test='None', option=0):
    """
    """
    cdf_limite = .9999999
    if mes - 1 <= 0:
        cnd = [12, 1, 2]
    elif mes + 1 >= 13:
        cnd = [11, 12, 1]
    else:
        cnd = [mes - 1, mes, mes + 1]
    if year_test == 'None':
        datos = df.loc[df['month'].isin(cnd), 'precip'].values
    else:
        id_fm = np.logical_and(df.Fecha >= '01/01/'+str(year_test),
                               df.Fecha <= '12/31/'+str(year_test))
        # generate index to work in cnd and out of year considered.
        im_tot = np.logical_and(df['month'].isin(cnd), np.logical_not(id_fm))
        # extract data to generate the distribution of historical data.
        #print(np.unique(pd.DatetimeIndex(df.loc[im_tot, 'Fecha']).year.to_numpy()))
        #print(np.unique(pd.DatetimeIndex(df.loc[im_tot, 'Fecha']).month.to_numpy()))
        datos = df.loc[im_tot, 'precip'].values
    # Days with precipitacion
    in_dato = np.array([e > xmin if ~np.isnan(e) else False
                        for e in datos], dtype=bool)
    precdias = datos[in_dato]
    # Fit a Gamma distribution over days with precipitation
    param_gamma = gamma.fit(precdias, floc=0)
    gamma_cdf = gamma.cdf(np.sort(precdias), *param_gamma)
    gamma_cdf[gamma_cdf > cdf_limite] = cdf_limite
    if option == 0:
        return param_gamma
    else:
        return param_gamma, precdias, gamma_cdf
예제 #22
0
    def fit_gamma_distribution(self,
                               desorption_thresh=5,
                               plot=False,
                               bins=15,
                               normed=True):

        #  first get the detachment time
        dt = self.get_desorption_distribution(thresh=desorption_thresh)

        # you need to invert the data - to be able to fit gamma
        # dt = dt.max() - dt

        fit_alpha, fit_loc, fit_beta = gamma.fit(dt)

        x = np.linspace(0, dt.max(), 100)

        pdf_fitted = gamma.pdf(x, fit_alpha, fit_loc, fit_beta)

        # normalize
        # pdf_fitted = pdf_fitted / pdf_fitted.max()

        # this is the maximum of the distribution
        mode = x[pdf_fitted.argmax()]

        if plot:
            x = np.linspace(0, dt.max(), 100)

            plt.hist(dt, bins=bins, normed=normed)
            plt.plot(x, pdf_fitted)
            plt.show()

        return fit_alpha, fit_loc, fit_beta
def precision_prior_params(data, num_classes, pseudo_inputs_per_class):

    # load the data into RAM to support sample with replacement
    x = []
    y = []
    for batch in data:
        x.append(batch['image'])
        y.append(batch['label'])
    x = tf.concat(x, axis=0)
    y = tf.concat(y, axis=0)

    # git distribution of precision across pixel positions
    variance = tf.math.reduce_variance(tf.keras.layers.Flatten()(x), axis=0)
    precision = 1 / tf.clip_by_value(
        variance, clip_value_min=(1 / 255), clip_value_max=np.inf)
    a, _, b_inv = gamma.fit(precision, floc=0)
    b = 1 / b_inv

    # randomly select pseudo inputs
    u = []
    for i in range(num_classes):
        i_choice = np.random.choice(np.where(y == i)[0],
                                    size=pseudo_inputs_per_class,
                                    replace=False)
        u.append(tf.gather(params=x, indices=i_choice, axis=0))
    u = tf.concat(u, axis=0)

    return a, b, u
예제 #24
0
    def maximum_likelihood_fit(data):
        """Estimate parameters from samples.

        This is a wrapper around scipy's maximum likelihood estimator to
        estimate the parameters of a gamma distribution from samples.

        Parameters
        ----------
        data : list or list of lists/arrays
            Data to estimate parameters from. Lists of
            different length may be passed.

        Returns
        -------
        parameter : array-like, shape=[..., 2]
            Estimate of parameter obtained by maximum likelihood.
        """
        def is_nested(sample):
            """Check if sample contains an iterable."""
            for el in sample:
                try:
                    return iter(el)
                except TypeError:
                    return False

        if not is_nested(data):
            data = [data]
        parameters = []
        for sample in data:
            sample = gs.array(sample)
            kappa, _, scale = gamma.fit(sample, floc=0)
            nu = 1 / scale
            parameters.append(gs.array([kappa, kappa / nu]))
        return parameters[0] if len(data) == 1 else gs.stack(parameters)
예제 #25
0
def getGammaPdf(dataset, nbins, bins):
    shape, loc, scale = gamma.fit(dataset, floc=0)
    x = np.linspace(min(bins), max(bins), nbins)
    print('GAM: shape=' + str(shape) + ', loc=' + str(loc) + ", scale=" +
          str(scale))
    pdf = gamma.pdf(x, shape, loc, scale)
    return (x, pdf)
예제 #26
0
def gamma_correction(obs_data,
                     mod_data,
                     sce_data,
                     lower_limit=0.1,
                     cdf_threshold=0.9999999):
    obs_raindays, mod_raindays, sce_raindays = [
        x[x >= lower_limit] for x in [obs_data, mod_data, sce_data]
    ]
    obs_gamma, mod_gamma, sce_gamma = [
        gamma.fit(x) for x in [obs_raindays, mod_raindays, sce_raindays]
    ]

    obs_cdf = gamma.cdf(np.sort(obs_raindays), *obs_gamma)
    mod_cdf = gamma.cdf(np.sort(mod_raindays), *mod_gamma)
    sce_cdf = gamma.cdf(np.sort(sce_raindays), *sce_gamma)

    obs_cdf[obs_cdf > cdf_threshold] = cdf_threshold
    mod_cdf[mod_cdf > cdf_threshold] = cdf_threshold
    sce_cdf[sce_cdf > cdf_threshold] = cdf_threshold

    obs_cdf_intpol = np.interp(
        np.linspace(1, len(obs_raindays), len(sce_raindays)),
        np.linspace(1, len(obs_raindays), len(obs_raindays)), obs_cdf)

    mod_cdf_intpol = np.interp(
        np.linspace(1, len(mod_raindays), len(sce_raindays)),
        np.linspace(1, len(mod_raindays), len(mod_raindays)), mod_cdf)

    obs_inverse, mod_inverse, sce_inverse = [
        1. / (1. - x) for x in [obs_cdf_intpol, mod_cdf_intpol, sce_cdf]
    ]

    adapted_cdf = 1 - 1. / (obs_inverse * sce_inverse / mod_inverse)
    adapted_cdf[adapted_cdf < 0.] = 0.

    initial = gamma.ppf(np.sort(adapted_cdf), *obs_gamma) * gamma.ppf(
        sce_cdf, *sce_gamma) / gamma.ppf(sce_cdf, *mod_gamma)

    obs_frequency = 1. * obs_raindays.shape[0] / obs_data.shape[0]
    mod_frequency = 1. * mod_raindays.shape[0] / mod_data.shape[0]
    sce_frequency = 1. * sce_raindays.shape[0] / sce_data.shape[0]

    days_min = len(sce_raindays) * sce_frequency / mod_frequency

    expected_sce_raindays = int(min(days_min, len(sce_data)))

    sce_argsort = np.argsort(sce_data)
    correction = np.zeros(len(sce_data))

    if len(sce_raindays) > expected_sce_raindays:
        initial = np.interp(
            np.linspace(1, len(sce_raindays), expected_sce_raindays),
            np.linspace(1, len(sce_raindays), len(sce_raindays)), initial)
    else:
        initial = np.hstack(
            (np.zeros(expected_sce_raindays - len(sce_raindays)), initial))

    correction[sce_argsort[:expected_sce_raindays]] = initial
    #correction = pd.Series(correction, index=sce_data.index)
    return correction
예제 #27
0
def plot_time(time: pandas.Series):
    """
    make a probability density function estimate based on the data

    in this simulation, time interval is same distribution for all sensors and rooms

    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html
    """

    intervals = time.diff().dropna().dt.total_seconds()
    Nbin = 100

    Fa, Floc, Fscale = gamma.fit(intervals)

    ti = np.arange(0.01, 5, 0.01)  # arbitrary time interval range to plot over
    pd = gamma.pdf(ti, Fa, loc=Floc, scale=Fscale)  # fit

    ax = plt.figure().gca()
    ax.plot(ti, pd)
    ax.set_xlabel("Time Interval (seconds)")
    ax.set_ylabel("Probability")
    ax.set_title("Time interval observed")

    # add the measured data to the plot
    ax.hist(intervals, bins=Nbin)
예제 #28
0
파일: spikes.py 프로젝트: mschachter/LaSP
def simulate_gamma(psth, trials, duration, num_trials=20):

    #rescale the ISIs
    dt = 0.001
    rs_isis = []
    for trial in trials:
        if len(trial) < 1:
            continue
        csum = np.cumsum(psth) * dt
        for k, ti in enumerate(trial[1:]):
            tj = trial[k]
            if ti > duration or tj > duration or ti < 0.0 or tj < 0.0:
                continue
            ti_index = int((ti / duration) * len(psth))
            tj_index = int((tj / duration) * len(psth))
            #print 'k=%d, ti=%0.6f, tj=%0.6f, duration=%0.3f' % (k, ti, tj, duration)
            #print '  ti_index=%d, tj_index=%d, len(psth)=%d, len(csum)=%d' % (ti_index, tj_index, len(psth), len(csum))
            #get rescaled time as difference in cumulative intensity
            ui = csum[ti_index] - csum[tj_index]
            if ui < 0.0:
                print 'ui < 0! ui=%0.6f, csum[ti]=%0.6f, csum[tj]=%0.6f' % (
                    ui, csum[ti_index], csum[tj_index])
            else:
                rs_isis.append(ui)
    rs_isis = np.array(rs_isis)
    rs_isi_x = np.arange(rs_isis.min(), rs_isis.max(), 1e-5)

    #fit a gamma distribution to the rescaled ISIs
    gamma_alpha, gamma_loc, gamma_beta = gamma.fit(rs_isis)
    gamma_pdf = gamma.pdf(rs_isi_x,
                          gamma_alpha,
                          loc=gamma_loc,
                          scale=gamma_beta)
    print 'Rescaled ISI Gamma Fit Params: alpha=%0.3f, beta=%0.3f, loc=%0.3f' % (
        gamma_alpha, gamma_beta, gamma_loc)

    #simulate new trials using rescaled ISIs
    new_trials = []
    for nt in range(num_trials):
        ntrial = []
        next_rs_time = gamma.rvs(gamma_alpha, loc=gamma_loc, scale=gamma_beta)
        csum = 0.0
        for t_index, pval in enumerate(psth):
            csum += pval * dt
            if csum >= next_rs_time:
                #spike!
                t = t_index * dt
                ntrial.append(t)
                #reset integral and generate new rescaled ISI
                csum = 0.0
                next_rs_time = gamma.rvs(gamma_alpha,
                                         loc=gamma_loc,
                                         scale=gamma_beta)
        new_trials.append(ntrial)
    #plt.figure()
    #plt.hist(rs_isis, bins=20, normed=True)
    #plt.plot(rs_isi_x, gamma_pdf, 'r-')
    #plt.title('Rescaled ISIs')

    return new_trials
예제 #29
0
def train_gamma(X, y):
    """
	Description: This is trained by the density of gamma-distributions for each features.

	@params:
		X: training features
		y: training y
	@return:
		model:

	"""
    m, n = X.shape

    model = {}
    ## calculate prob of spam and nonspam
    p_spam = sum(y == 1) * 1.0 / m
    p_nonspam = sum(y == 0) * 1.0 / m

    model["p_spam"] = p_spam
    model["p_nonspam"] = p_nonspam

    index_spam = y == 1
    index_nonspam = y == 0
    gammas_spam = []
    gammas_nonspam = []

    for i in range(n):
        ga = {}
        x_spam = asarray(X[index_spam, i])
        a, floc, scale = gamma.fit(x_spam)
        ga["a"] = a
        ga["floc"] = floc
        ga["scale"] = scale
        gammas_spam.append(ga)

        ga = {}
        x_nonspam = asarray(X[index_nonspam, i])
        a, floc, scale = gamma.fit(x_nonspam)
        ga["a"] = a
        ga["floc"] = floc
        ga["scale"] = scale
        gammas_nonspam.append(ga)

    model["gammas_spam"] = gammas_spam
    model["gammas_nonspam"] = gammas_nonspam

    return model
 def params_of(strings):
     strings_logprobs = np.empty(len(strings))
     for i, string in enumerate(strings):
         strings_logprobs[i] = sum(old_logprobs[state, symbol] for state, symbol in of(string))
     strings_params = gamma.fit(strings_logprobs[strings_logprobs != np.inf])
     _, bins, _ = plt.hist(strings_logprobs[strings_logprobs != np.inf], 500, histtype = 'step', normed = True)
     plt.plot(bins, gamma.pdf(bins, *strings_params))
     return strings_params
예제 #31
0
def HSIC_pval(X,
              Y,
              N_samp=500,
              kernelX="Gaussian",
              kernelY="Gaussian",
              eta=0.001,
              sigmaX=None,
              sigmaY=None,
              p_method="boots",
              return_boots=False):
    """ Calculates HSIC and p-value 
    
    Gram matrices are approximated using incomplete Cholesky decomposition.
    
    X: Data. Each row is a datapoint.
    Y: Data. Each row is a datapoint.
    N_samp: Number of samples
    kernelX: Kernel to use (Gaussian, Linear, Delta)
    kernelY: Kernel to use (Gaussian, Linear, Delta)
    eta: Threshold for incomplete Cholesky decomposition
    sigmaX: sigma for X when using Gaussian kernel
    sigmaY: sigma for Y when using Gaussian kernel
    """
    timeA = time.time()
    m, _ = X.shape

    sigmaX = getSigmaGaussian(X, X, 200) if sigmaX is None else sigmaX
    sigmaY = getSigmaGaussian(Y, Y, 200) if sigmaY is None else sigmaY

    A, max_rankA = incompleteCholeskyKernel(X, m, kernelX, sigmaX, eta)
    B, max_rankB = incompleteCholeskyKernel(Y, m, kernelY, sigmaY, eta)

    centered_A = A.T - A.T.mean(axis=0)
    tmp = B * np.mat(centered_A)
    HSIC = np.trace(tmp * tmp.T) / m**2

    boots = []
    Yrand = np.copy(Y)
    for _ in xrange(N_samp):
        np.random.shuffle(Yrand)

        B, max_rankB = incompleteCholeskyKernel(Yrand, m, kernelY, sigmaY, eta)

        tmp = np.mat(B) * np.mat(centered_A)
        boots.append(np.trace(tmp * tmp.T) / m**2)

    boots = np.array(boots)

    if p_method == "boots":
        pval = (sum(b >= HSIC for b in boots) + 1) / float(len(boots) + 1)
    else:  #gamma
        fit_alpha, fit_loc, fit_beta = gamma.fit(boots)
        pval = 1 - gamma.cdf(HSIC, fit_alpha, scale=fit_beta, loc=fit_loc)

    if return_boots:
        return HSIC, pval, boots
    else:
        return HSIC, pval
예제 #32
0
 def __init__(self, mode=0, elem=None, sample=None):
     if mode == 0:
         self.a = elem[0]
         self.mu = elem[1]
         self.sigma = elem[2]
     else:
         self.a, self.mu, self.sigma = gamma.fit(sample)
     self.math_average = gamma.mean(self.a, loc=self.mu, scale=self.sigma)
     self.dispersion = gamma.var(self.a, loc=self.mu, scale=self.sigma)
예제 #33
0
def fit_gamma_expon_Q(trace,gammafactor=20,exponfactor=2,plot=False):
    params = []
    for i in range(1,len(trace[0])):
        if i < 4:
            a,loc,theta = gamma.fit(trace[:,i],floc = 0)
            params.append([a/gammafactor,loc,theta*gammafactor])
        if i == 4:
            loc,scale = expon.fit(trace[:,i],floc = 0)
            params.append([loc,scale*exponfactor])
    return params
예제 #34
0
def get_outliers(data, filter, plotting):
    if plotting:
        for x, r in [("x1", (0, 1)), ("x2", (0, 30)), ("x3", (0, 1))]:
            plt.violinplot(data[x], vert=False)
            plt.xlim(r)
            plt.savefig("plots/violin/%s.png" % x)
            plt.clf()

    if filter:
        data_fl = data[data["class"] == 0]
    else:
        data_fl = data

    pdf = pd.DataFrame({})

    a, b, loc, scale = beta.fit(data_fl["x1"])
    pdf["x1"] = beta.logpdf(data["x1"], a, b, loc=loc, scale=scale)

    a, loc, scale = gamma.fit(data_fl["x2"])
    pdf["x2"] = gamma.logpdf(data["x2"], a, loc=loc, scale=scale)

    a, b, loc, scale = beta.fit(data_fl["x3"])
    pdf["x3"] = beta.logpdf(data["x3"], a, b, loc=loc, scale=scale)

    pdfs = pdf["x1"] + pdf["x2"] + pdf["x3"]

    if plotting:
        sns.boxplot(y=pdfs, x="class", data=data)
        plt.savefig("plots/boxplot.png")
        plt.clf()

    if plotting:
        plt.plot(np.sort(pdfs))
        splits = [40, 45, 50, 60]
        for split in splits:
            split = np.sort(pdfs)[60]
            plt.plot((0, 1000), (split, split), 'k-', lw=0.5)
            split = np.sort(pdfs)[50]
            plt.plot((0, 1000), (split, split), 'k.', lw=0.5)
            split = np.sort(pdfs)[45]
            plt.plot((0, 1000), (split, split), 'k--', lw=0.5)
            split = np.sort(pdfs)[40]
            plt.plot((0, 1000), (split, split), 'k--', lw=0.5)

        plt.savefig("plots/thresholds.png")
        plt.clf()

    outliers = np.argsort(pdfs)

    final = []
    for outlier in outliers:
        if data["class"][outlier] == -1:
            final.append(outlier)

    return np.array(final[:100])
예제 #35
0
def continuous():
    """Fit distributions to symptoms' duration data."""
    # fetch data
    x = _symptoms_data()
    # fit distributions
    return {
        'x': x,
        'norm': norm.fit(x),
        'lognorm': lognorm.fit(x, floc=0),
        'gamma': gamma.fit(x, floc=0)
    }
예제 #36
0
def fit_gamma_sp(trace,factor=5,plot=False):
    a,loc,theta = gamma.fit(trace[:,4],floc=0)
    if plot == True:
        xmax = max(trace[:,4])
        xmin = min(trace[:,4])
        xdata = np.linspace(0,xmax*2,num=500)
        #plt.plot(xdata,gamma.pdf(xdata,a,loc,theta))
        plt.plot(xdata,gamma.pdf(xdata,a/factor,loc,theta*factor))        
        plt.hist(trace[:,4],bins=50,density=True)
        plt.show()
    return a/factor, loc, theta*factor
예제 #37
0
def gbmodelpredictrv(rvinput, rvhandperc):
    prediction = modelefftr.predict(rvinput)
    distribution = probdensityrv[(probdensityrv[0] > prediction - .025) & (
        probdensityrv[0] < prediction + .025)]['test actual']
    fit_alpha, fit_loc, fit_beta = gamma.fit(distribution)
    #flhandperc = tools.handprobability()
    #flhandperc = flhandperc.sort_values('rank')
    for i in rvhandperc.index:
        rvhandperc.at[i, 'rank'] = rvhandperc.at[i, 'rank'] * gamma.pdf(
            rvhandperc.at[i, 'rank'], fit_alpha, loc=fit_loc, scale=fit_beta)
    return rvhandperc
예제 #38
0
파일: spikes.py 프로젝트: mschachter/LaSP
def simulate_gamma(psth, trials, duration, num_trials=20):

    #rescale the ISIs
    dt = 0.001
    rs_isis = []
    for trial in trials:
        if len(trial) < 1:
            continue
        csum = np.cumsum(psth)*dt
        for k,ti in enumerate(trial[1:]):
            tj = trial[k]
            if ti > duration or tj > duration or ti < 0.0 or tj < 0.0:
                continue
            ti_index = int((ti / duration) * len(psth))
            tj_index = int((tj / duration) * len(psth))
            #print 'k=%d, ti=%0.6f, tj=%0.6f, duration=%0.3f' % (k, ti, tj, duration)
            #print '  ti_index=%d, tj_index=%d, len(psth)=%d, len(csum)=%d' % (ti_index, tj_index, len(psth), len(csum))
            #get rescaled time as difference in cumulative intensity
            ui = csum[ti_index] - csum[tj_index]
            if ui < 0.0:
                print 'ui < 0! ui=%0.6f, csum[ti]=%0.6f, csum[tj]=%0.6f' % (ui, csum[ti_index], csum[tj_index])
            else:
                rs_isis.append(ui)
    rs_isis = np.array(rs_isis)
    rs_isi_x = np.arange(rs_isis.min(), rs_isis.max(), 1e-5)

    #fit a gamma distribution to the rescaled ISIs
    gamma_alpha,gamma_loc,gamma_beta = gamma.fit(rs_isis)
    gamma_pdf = gamma.pdf(rs_isi_x, gamma_alpha, loc=gamma_loc, scale=gamma_beta)
    print 'Rescaled ISI Gamma Fit Params: alpha=%0.3f, beta=%0.3f, loc=%0.3f' % (gamma_alpha, gamma_beta, gamma_loc)

    #simulate new trials using rescaled ISIs
    new_trials = []
    for nt in range(num_trials):
        ntrial = []
        next_rs_time = gamma.rvs(gamma_alpha, loc=gamma_loc,scale=gamma_beta)
        csum = 0.0
        for t_index,pval in enumerate(psth):
            csum += pval*dt
            if csum >= next_rs_time:
                #spike!
                t = t_index*dt
                ntrial.append(t)
                #reset integral and generate new rescaled ISI
                csum = 0.0
                next_rs_time = gamma.rvs(gamma_alpha, loc=gamma_loc,scale=gamma_beta)
        new_trials.append(ntrial)
    #plt.figure()
    #plt.hist(rs_isis, bins=20, normed=True)
    #plt.plot(rs_isi_x, gamma_pdf, 'r-')
    #plt.title('Rescaled ISIs')

    return new_trials
예제 #39
0
파일: gamma.py 프로젝트: dtbinh/Sniffer2
    def _test_fit_trans(self):
        filename = "../../../bridge/cfg/detection.dat"
        dat = np.loadtxt(filename)

        fit_shape, fit_loc, fit_scale = gamma.fit(self.trans_conc_particle(dat[77:, 1]))
        print fit_shape, fit_loc, fit_scale

        mean = fit_shape * fit_scale
        variance = mean * fit_scale
        print mean, variance

        x = np.linspace(1, 250, 10000)
        # ax.plot(x, gamma.pdf(x, shape), 'r-', lw=5, alpha=0.6, label='gamma pdf')

        rv = gamma(fit_shape, scale=fit_scale)

        fig, ax = plt.subplots(1, 1)
        ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

        plt.show()
예제 #40
0
def calcSPI(duration, model, cid):
    """Calculate Standardized Precipitation Index for specified month
    *duration*. Need a climatology of precipitation stored in the database
    used in a VIC *model* simulation."""
    nt = (date(model.endyear, model.endmonth, model.endday) -
          date(model.startyear + model.skipyear, model.startmonth, model.startday)).days + 1
    # tablename = "precip."+model.precip
    if duration < 1:
        print(
            "WARNING! Cannot calculate SPI with {0} months duration.".format(duration))
        spi = np.zeros(nt)
    else:
        p = np.loadtxt("{0}/forcings/data_{1:.{3}f}_{2:.{3}f}".format(model.model_path,
                                                                      model.gid[cid][0], model.gid[cid][1], model.grid_decimal))[:, 0]
        p = pandas.Series(p, [date(model.startyear, model.startmonth,
                                   model.startday) + timedelta(t) for t in range(len(p))])
        p[duration:] = pandas.rolling_mean(p.resample(
            'M', how='mean'), duration).values[duration:]
        p[:duration] = 0.0
        g1, g2, g3 = gamma.fit(p)
        cdf = gamma.cdf(p, g1, g2, g3)
        spi = norm.ppf(cdf)
    return spi
예제 #41
0
def generate_slm(training_rows, slm_dir, do_plot=True):
    slm_fxt = os.path.join(slm_dir, "slm.fxt")
    slength_counts = Counter()
    slen=1
    for r in training_rows:
        #print r
        is_boundary = int(r[6])
        if(not is_boundary):
            slen += 1
        else:
#             print "adding slen=",(slen+1)
            slength_counts[slen]+=1
            slen = 1   
    els = list( slength_counts.elements() ) #Counter.elements() returns iterator that iterates across n instances of each element e where slength_counts[e]=n .. we make this into a list for plotting
    #print els
    x_vals = range(0, max(els)+1)
    
    (shape, loc, scale) = gamma.fit(els, floc=0)
    gam_gen = gamma(shape, loc, scale) #use these model params to build a new gamma distrib/n generator
    write_slm(slm_fxt, x_vals, gam_gen)
    if do_plot:
        plot_graph(x_vals, gam_gen, els)
    compile_slm(slm_dir) #this last step compiles the slm to binary .fst format
예제 #42
0
with open("lengths_file.pickle", "rb") as lengths_file:
    lengths = load(lengths_file)

plot_count = len(list(filter(lambda x: LENGTH_CUTOFF < len(x),
                             lengths.values())))
num_col = int(floor(sqrt(plot_count)))
num_row = int(ceil(plot_count / num_col))

sorted_relationships = sorted(lengths.keys(), key = sum)

plt.figure(1)
subplot_num = 1
for relationship in sorted_relationships:
    length_data = lengths[relationship]
    length_data.sort()
    if LENGTH_CUTOFF > len(length_data):
        continue
    plt.subplot(num_row, num_col, subplot_num)
    plt.hist(length_data, bins = 200, normed = True)
    plt.title(str(relationship))

    fit = gamma.fit(length_data)
    pdf = gamma(*fit).pdf(length_data)
    plt.plot(length_data, pdf)
    subplot_num += 1

plt.tight_layout()
plt.show()
    
예제 #43
0
    def compile(alphabet, words, nonwords):
        print('  Generating all possible transitions...')
        from itertools import product
        all = []
        for state_size in range(args.max_state_size + 1):
            all += product(product(alphabet, repeat = state_size), [*alphabet, None])

        def of(string):
            for i in range(len(string)):
                yield string[max(0, i - args.max_state_size):i], string[i]
            yield string[max(0, len(string) - args.max_state_size):], None

        from collections import Counter
        counts = Counter()
        for word in tqdm(words, '  Counting transitions', leave = True):
            for state, symbol in of(word):
                counts[state, symbol] += 1
        state_counts = Counter()
        for state, symbol in tqdm(counts, '  Counting states', leave = True):
            state_counts[state] += counts[state, symbol]

        import numpy as np
        logprobs = np.empty(len(all))
        for i, (state, symbol) in enumerate(tqdm(all, '  Computing conditional transition probabilities', leave = True)):
            try:
                logprobs[i] = np.log(state_counts[state] / counts[state, symbol])
            except ZeroDivisionError:
                logprobs[i] = np.inf

        print('  Fitting flattening distribution...')
        from scipy.stats import gamma
        params = gamma.fit(logprobs[logprobs != np.inf])

        print('  Flattening...')
        logprobs = gamma.cdf(logprobs, *params)
        lower_bound = np.min(logprobs)
        upper_bound = np.max(logprobs[logprobs != 1])
        new_logprobs = np.empty(len(logprobs), int)
        for i, logprob in enumerate(tqdm(logprobs, '  Discretizing', leave = True)):
            if logprob == 1:
                new_logprobs[i] = 2 ** args.transition_bits - 1
            else:
                new_logprobs[i] = round((logprob - lower_bound) * ((2 ** args.transition_bits - 2) / (upper_bound - lower_bound)))
        logprobs = new_logprobs

        data = bytearray()

        bit_buffer = 0
        bit_buffer_size = 0
        for logprob in tqdm(logprobs, '  Packing', leave = True):
            bit_buffer = bit_buffer << args.transition_bits | int(logprob)
            bit_buffer_size += args.transition_bits
            if bit_buffer_size % 8 == 0:
                data += bit_buffer.to_bytes(bit_buffer_size // 8, 'big')
                bit_buffer = 0
                bit_buffer_size = 0
        while bit_buffer_size % 8 != 0:
            bit_buffer = bit_buffer << args.transition_bits
            bit_buffer_size += args.transition_bits
        data += bit_buffer.to_bytes(bit_buffer_size // 8, 'big')

        old_logprobs = np.empty(len(logprobs))
        for i, logprob in enumerate(tqdm(logprobs, '  Undiscretizing...', leave = True)):
            if logprob == 2 ** args.transition_bits - 1:
                old_logprobs[i] = 1
            else:
                old_logprobs[i] = lower_bound + logprob * ((upper_bound - lower_bound) / (2 ** args.transition_bits - 2))
        print('  Unflattening...')
        old_logprobs = gamma.ppf(old_logprobs, *params)
        old_logprobs = dict(zip(all, old_logprobs))

        def params_of(strings):
            strings_logprobs = np.empty(len(strings))
            for i, string in enumerate(strings):
                strings_logprobs[i] = sum(old_logprobs[state, symbol] for state, symbol in of(string))
            strings_params = gamma.fit(strings_logprobs[strings_logprobs != np.inf])
            _, bins, _ = plt.hist(strings_logprobs[strings_logprobs != np.inf], 500, histtype = 'step', normed = True)
            plt.plot(bins, gamma.pdf(bins, *strings_params))
            return strings_params

        print('  Fitting words distribution...')
        words_params = params_of(words)

        print('  Fitting nonwords distribution...')
        nonwords_params = params_of(nonwords)

        def minify(code):
            if args.minify:
                import subprocess
                p = subprocess.run([str(Path(__file__).parent / 'node_modules/uglify-js/bin/uglifyjs'),
                    '--screw-ie8',
                    '--mangle', 'sort,toplevel',
                    '--compress',
                    '--bare-returns',
                ], input = code.encode(),
                   stdout = subprocess.PIPE,
                   stderr = subprocess.PIPE)
                if p.returncode != 0:
                    import sys
                    sys.stderr.buffer.write(p.stderr)
                    p.check_returncode()
                code = p.stdout.decode()
            return code

        print('  Generating JS code...')
        code = minify(r'''
            exports.init = function(buffer) {
                exports.test = (new Function('buffer', buffer.utf8Slice(''' + str(len(data)) + r''')))(buffer);
            };
        ''').encode()
        data += minify(r'''
            var abs = Math.abs;
            var min = Math.min;
            var max = Math.max;

            var alphabet = [
                ''' + r'''
                '''.join('"' + symbol + '",' for symbol in alphabet) + r'''
            ];

            var of; (function() {
                function fold(string) {
                    string = Array.from(string);
                    for (var i = alphabet.length - 1; alphabet[i].length > 1; --i) {
                        for (var j = 0; j <= string.length - alphabet[i].length; ++j) {
                            if (string.slice(j, j + alphabet[i].length).join('') == alphabet[i]) {
                                string.splice(j, alphabet[i].length, alphabet[i]);
                            }
                        }
                    }
                    return string;
                }

                of = function(string) {
                    string = fold(string);
                    var ofString = [];
                    for (var i = 0; i < string.length; ++i) {
                        ofString.push([string.slice(max(0, i - ''' + str(args.max_state_size) + r'''), i), string[i]]);
                    }
                    ofString.push([string.slice(max(0, string.length - ''' + str(args.max_state_size) + r''')), null]);
                    return ofString;
                };
            })();

            var all; (function() {
                function product(xs, ys) {
                    var result = [];
                    for (var i = 0; i < xs.length; ++i) {
                        for (var j = 0; j < ys.length; ++j) {
                            result.push([xs[i], ys[j]]);
                        }    
                    }
                    return result;
                }

                function power(a, k) {
                    if (k == 0) {
                        return [[]];    
                    }
                    var result = [];
                    for (var i = 0; i < a.length; ++i) {
                        var b = power(a, k - 1);
                        for (var j = 0; j < b.length; ++j) {
                            result.push([a[i]].concat(b[j]));
                        }    
                    }
                    return result;
                }

                all = [];
                for (var stateSize = 0; stateSize <= ''' + str(args.max_state_size) + r'''; ++stateSize) {
                    all = all.concat(product(power(alphabet, stateSize), alphabet.concat([null])));
                }
            })();

            var gammaPdf, gammaPpf; (function() {
                var pow = Math.pow;
                var exp = Math.exp;
                var log = Math.log;
                var sqrt = Math.sqrt;

                var cof = [
                    76.18009172947146,
                    -86.50532032941677,
                    24.01409824083091,
                    -1.231739572450155,
                    0.1208650973866179e-2,
                    -0.5395239384953e-5,
                ];

                function ln(x) {
                    var j = 0;
                    var ser = 1.000000000190015;
                    var xx, y, tmp;

                    tmp = (y = xx = x) + 5.5;
                    tmp -= (xx + 0.5) * log(tmp);
                    for (; j < 6; j++)
                        ser += cof[j] / ++y;
                    return log(2.5066282746310005 * ser / xx) - tmp;
                }

                gammaPdf = function(x, a) {
                    if (x < 0)
                        return 0;
                    if (x === 0 && a === 1)
                        return 1;
                    return exp((a - 1) * log(x) - x - ln(a));
                };

                function lowReg(a, x) {
                    var aln = ln(a);
                    var ap = a;
                    var sum = 1 / a;
                    var del = sum;
                    var b = x + 1 - a;
                    var c = 1 / 1.0e-30;
                    var d = 1 / b;
                    var h = d;
                    var i = 1;
                    var ITMAX = -~(log((a >= 1) ? a : 1 / a) * 8.5 + a * 0.4 + 17);
                    var an, endval;

                    if (x < 0 || a <= 0) {
                        return NaN;
                    } else if (x < a + 1) {
                        for (; i <= ITMAX; i++) {
                            sum += del *= x / ++ap;
                        }
                        return sum * exp(-x + a * log(x) - aln);
                    }

                    for (; i <= ITMAX; i++) {
                        an = -i * (i - a);
                        b += 2;
                        d = an * d + b;
                        c = b + an / c;
                        d = 1 / d;
                        h *= d * c;
                    }

                    return 1 - h * exp(-x + a * log(x) - aln);
                }

                gammaPpf = function(p, a) {
                    var j = 0;
                    var a1 = a - 1;
                    var EPS = 1e-8;
                    var gln = ln(a);
                    var x, err, t, u, pp, lna1, afac;

                    if (p > 1)
                        return NaN;
                    if (p == 1)
                        return Infinity;
                    if (p < 0)
                        return NaN;
                    if (p == 0)
                        return 0;
                    if (a > 1) {
                        lna1 = log(a1);
                        afac = exp(a1 * (lna1 - 1) - gln);
                        pp = (p < 0.5) ? p : 1 - p;
                        t = sqrt(-2 * log(pp));
                        x = (2.30753 + t * 0.27061) / (1 + t * (0.99229 + t * 0.04481)) - t;
                        if (p < 0.5)
                            x = -x;
                        x = max(1e-3, a * pow(1 - 1 / (9 * a) - x / (3 * sqrt(a)), 3));
                    } else {
                        t = 1 - a * (0.253 + a * 0.12);
                        if (p < t)
                            x = pow(p / t, 1 / a);
                        else
                            x = 1 - log(1 - (p - t) / (1 - t));
                    }

                    for(; j < 12; j++) {
                        if (x <= 0)
                            return 0;
                        err = lowReg(a, x) - p;
                        if (a > 1)
                            t = afac * exp(-(x - a1) + a1 * (log(x) - lna1));
                        else
                            t = exp(-x + a1 * log(x) - gln);
                        u = err / t;
                        x -= (t = u / (1 - 0.5 * min(1, u * ((a - 1) / x - 1))));
                        if (x <= 0)
                            x = 0.5 * (x + t);
                        if (abs(t) < EPS * x)
                            break;
                    }

                    return x; 
                };
            })();

            var logprobs = {};
            var bitBuffer = 0, bitBufferSize = 0;
            var bufferOffset = 0;
            for (var i = 0; i < all.length; ++i) {
                while (bitBufferSize < ''' + str(args.transition_bits) + r''') {
                    bitBuffer = bitBuffer << 8 | buffer.readUInt8(bufferOffset++); bitBufferSize += 8;
                }

                var logprob = bitBuffer >> (bitBufferSize - ''' + str(args.transition_bits) + r''') & ''' + hex(2 ** args.transition_bits - 1) + r'''; bitBufferSize -= ''' + str(args.transition_bits) + r''';

                if (logprob == ''' + str(2 ** args.transition_bits - 1) + r''') {
                    logprob = 1;
                } else {
                    logprob = ''' + str(lower_bound) + r''' + logprob * ''' + str((upper_bound - lower_bound) / (2 ** args.transition_bits - 2)) + r''';
                }
                logprob = ''' + str(params[1]) + r''' + gammaPpf(logprob, ''' + str(params[0]) + r''') * ''' + str(params[2]) + r''';
            
                logprobs[all[i]] = logprob;
            }

            return function(string) {
                var stringLogprob = 0;
                var ofString = of(string);
                for (var i = 0; i < ofString.length; ++i) {
                    stringLogprob += logprobs[ofString[i]];    
                }
                if (stringLogprob == Infinity) {
                    return false;    
                }
                var wordsDensity = gammaPdf((stringLogprob - ''' + str(words_params[1]) + r''') / ''' + str(words_params[2]) + r''', ''' + str(words_params[0]) + r''') / ''' + str(words_params[2]) + r''';
                var nonwordsDensity = gammaPdf((stringLogprob - ''' + str(nonwords_params[1]) + r''') / ''' + str(nonwords_params[2]) + r''', ''' + str(nonwords_params[0]) + r''') / ''' + str(nonwords_params[2]) + r''';
                if (wordsDensity > nonwordsDensity) {
                    return true;
                }
                if (wordsDensity < nonwordsDensity) {
                    return false;
                }
                return Math.random() >= 0.5;
            };
        ''').encode()

        data, is_gzipped = bytes(data), False

        if args.gzip:
            import gzip
            print('  Gzipping...')
            gzipped_data = gzip.compress(data)
            if len(gzipped_data) < len(data):
                data, is_gzipped = gzipped_data, True

        return code, data, is_gzipped
예제 #44
0
파일: EM.py 프로젝트: grv87/thesis-code
	print(x.mean(),  x.var())
	
	m = 0 # Number of current step
	print('m', m)
	
	# Initial values TODO
	p_m = [0.9, 0.1] # [1 / k for i in k_underline]
	alpha_m =[0.2,  0.3]
	beta_m = [0.0002,  0.0001]

	print('p_m', p_m)
	print('alpha_m', alpha_m)
	print('beta_m', beta_m)

	# Initial values TODO
	s, loc, t = gamma.fit(x, loc = 0)
	alpha_est = s
	beta_est = 1 / t
	print(alpha_est, loc, beta_est)
	p_m = [0.4, 0.6]
	alpha_m = [alpha_est ** (random() * 2) for i in k_underline]
	beta_m = [beta_est ** (random() * 2) for i in k_underline]
	theta_m = p_m + alpha_m + beta_m
	print('p_m', p_m)
	print('alpha_m', alpha_m)
	print('beta_m', beta_m)
	
	while True:
		# Prepare for next step
		m += 1
		print('m', m)
예제 #45
0
plt.plot(taille_A, lw=2)
plt.plot(taille_B, lw=2)  
plt.xlabel('Nombre de manches')
plt.grid(True)          

# fait un nombre N de partie et affiche les statistiques
def play_N_batailles(N):
    nb_manches = []
    for idx in range(N):
        cards = new_card_set()
        A, B = sort_cards(cards)
        taille_A, taille_B = play_bataille(A, B)
        nb_manches.append(len(taille_A))
    return nb_manches

N = 10000
nb_manches = np.array(play_N_batailles(N))


plt.figure(2)
plt.hist(nb_manches, bins=500, color='b', alpha=0.5)

from scipy.stats import gamma 

params = gamma.fit(nb_manches)

x = np.arange(1, N)
#plt.figure(3)
plt.plot(x, 2*N*gamma.pdf(x, *params[:-2], loc=params[-2], scale=params[-1]),
                          lw=2, color='r')
plt.xlim(0, 300)
예제 #46
0
23304334, 130258928, 18452922, 42620009, 77351045, 76032324, 44196273,
49036974, 23245119, 50656670, 84837088, 2089074, 52517589, 21469409,
106694589, 67063796, 16053222, 101270899, 15620252, 18355964, 839197,
31083111, 66698677]

# mean = np.mean(data)
# var = np.var(data)
# fit = truncnorm.fit(data, 0, 2866387308, loc = mean, scale = var)
# print(fit)
# print(truncnorm(*fit).rvs(100))
# vector = lengths[(2, 2)]
vector = np.array(sorted(lengths[(5, 5)]))

mean = np.mean(vector)
var = np.var(vector)
std = np.std(vector)
# fit = truncnorm.fit(vector, a = 0, b = 2866387308, loc = mean, scale = var)
fit = gamma.fit(vector)
print(fit)
# pdf = gamma.pdf(vector, *fit[:-2], loc = fit[-2], scale = fit[-1])
pdf = gamma(*fit).pdf(vector)
print(pdf)
# print(gamma.pdf(vector, alpha))

plt.figure()
plt.hist(vector, bins = 20, normed = True)
# plt.hist(truncnorm(*fit).rvs(10000), bins = 20, normed = True, alpha = 0.5)
plt.plot(vector, pdf)
# plt.hist(gamma(*fit).rvs(10000), bins = 20, normed = True, alpha = 0.5)
plt.show()
예제 #47
0
 def match_single_track_dist(model_track, obs_track):
     label_columns = ["Max_Hail_Size", "Shape", "Location", "Scale"]
     obs_hail_dists = pd.DataFrame(index=obs_track.times,
                                   columns=label_columns)
     model_hail_dists = pd.DataFrame(index=model_track.times,
                                     columns=label_columns)
     for t, step in enumerate(obs_track.timesteps):
         step_vals = step[(obs_track.masks[t] == 1) & (obs_track.timesteps[t] > self.mrms_ew.min_thresh)]
         min_hail = step_vals.min() - 0.1
         obs_hail_dists.loc[obs_track.times[t], ["Shape", "Location", "Scale"]] = gamma.fit(step_vals,
                                                                                            floc=min_hail)
         obs_hail_dists.loc[obs_track.times[t], "Max_Hail_Size"] = step_vals.max()
     if obs_track.times.size > 1 and model_track.times.size > 1:
         normalized_obs_times = 1.0 / (obs_track.times.max() - obs_track.times.min()) \
                                * (obs_track.times - obs_track.times.min())
         normalized_model_times = 1.0 / (model_track.times.max() - model_track.times.min()) \
                                  * (model_track.times - model_track.times.min())
         for col in label_columns:
             interp_func = interp1d(normalized_obs_times, obs_hail_dists[col], kind="linear",
                                    bounds_error=False, fill_value=0)
             model_hail_dists.loc[model_track.times, col] = interp_func(normalized_model_times)
     else:
         for param in obs_hail_dists.columns:
             model_hail_dists.loc[model_track.times, param] = obs_hail_dists.loc[obs_track.times[0], param]
     return model_hail_dists
예제 #48
0
'''

import json, sys, collections
import numpy as np
import scipy
from scipy.stats import gamma
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

with open("ALL_PAIR_JACCARD.json") as f:
    all_pairs = json.load(f)

print("Get all pairs distribution")
all_pair_dists = np.square(np.array(all_pairs).flatten())
gamma_x = gamma.fit(all_pair_dists)

print("Get top k distance distribution")
k = 50
topk_dists = np.square(np.sort(all_pairs, axis=1))
gamma_xk = []
for i in range(k):
    dists = topk_dists[:,i]
    gamma_xk.append(gamma.fit(dists))
max_x = np.max(all_pair_dists)
with open("JACCARD_DIST_DIST.json", "w") as f:
    d = [gamma_x, gamma_xk, max_x] 
    json.dump(d, f)
print("Output file")

예제 #49
0
    def match_hail_size_step_distributions(self, model_tracks, obs_tracks, track_pairings):
        """
        Given a matching set of observed tracks for each model track, 
        
        Args:
            model_tracks: 
            obs_tracks: 
            track_pairings: 

        Returns:

        """
        label_columns = ["Matched", "Max_Hail_Size", "Num_Matches", "Shape", "Location", "Scale"]
        s = 0
        for m, model_track in enumerate(model_tracks):
            model_track.observations = pd.DataFrame(index=model_track.times, columns=label_columns, dtype=np.float64)
            model_track.observations.loc[:, :] = 0
            model_track.observations["Matched"] = model_track.observations["Matched"].astype(np.int32)
            for t, time in enumerate(model_track.times):
                model_track.observations.loc[time, "Matched"] = track_pairings.loc[s, "Matched"]
                if model_track.observations.loc[time, "Matched"] > 0:
                    all_hail_sizes = []
                    step_pairs = track_pairings.loc[s, "Pairings"]
                    for step_pair in step_pairs:
                        obs_step = obs_tracks[step_pair[0]].timesteps[step_pair[1]].ravel()
                        obs_mask = obs_tracks[step_pair[0]].masks[step_pair[1]].ravel()
                        all_hail_sizes.append(obs_step[(obs_mask == 1) & (obs_step >= self.mrms_ew.min_thresh)])
                    combined_hail_sizes = np.concatenate(all_hail_sizes)
                    min_hail = combined_hail_sizes.min() - 0.1
                    model_track.observations.loc[time, "Max_Hail_Size"] = combined_hail_sizes.max()
                    model_track.observations.loc[time, "Num_Matches"] = step_pairs.shape[0]
                    model_track.observations.loc[time, ["Shape", "Location", "Scale"]] = gamma.fit(combined_hail_sizes,
                                                                                                   floc=min_hail)
                s += 1