def nc_of_expon(): E1 = lambda x : x*0.5*np.exp(-x/2) E2 = lambda x : x**2*0.5*np.exp(-x/2) print integrate.quad(E1, 0, np.inf) print integrate.quad(E2, 0, np.inf) print expon(scale=2).moment(1) print expon(scale=2).var()
def metro_exp_poison(chute=[1], N=1000): valores = chute taxa = [] priori = expon(1) for i in range(N): expo_aux = expon(1) valor = expo_aux.rvs() U = random.rand() x_dado_y = poisson(valores[-1]) y_dado_x = poisson(valor) teste = ( priori.pdf(valor) * x_dado_y.pmf(int(valores[-1])) ) / ( priori.pdf(valores[-1]) * y_dado_x.pmf(int(valor)) ) if min([teste,1]) > U: valores.append(valor) taxa.append(1) else: valores.append(valores[-1]) taxa.append(0) return {"valores":valores , "taxa":sum(taxa)/len(taxa)}
def testExponentialSampleMultiDimensional(self): with self.test_session(): batch_size = 2 lam_v = [3.0, 22.0] lam = constant_op.constant([lam_v] * batch_size) exponential = exponential_lib.Exponential(rate=lam) n = 100000 samples = exponential.sample(n, seed=138) self.assertEqual(samples.get_shape(), (n, batch_size, 2)) sample_values = samples.eval() self.assertFalse(np.any(sample_values < 0.0)) for i in range(2): self.assertLess( stats.kstest( sample_values[:, 0, i], stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01) self.assertLess( stats.kstest( sample_values[:, 1, i], stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01)
def do_train_rand(train, valid, params=None, max_models=32): """Do randomized hyper-parameter search Args: train (SFrame): training set valid (SFrame): validataion set params (dict): parameters for random search max_models (int): maximum number of models to run Returns: res (SFrame): table of choices of parameters sorted by valid RMSE """ if not params: params = {'user_id': ['username'], 'item_id': ['course_id'], 'target': ['label'], 'binary_target': [True], 'num_factors': stats.randint(4, 128), 'regularization': stats.expon(scale=1e-4), 'linear_regularization': stats.expon(scale=1e-7)} try: job = gl.toolkits.model_parameter_search \ .random_search.create((train, valid), gl.recommender. factorization_recommender.create, params, max_models=max_models) res = job.get_results() res = res.sort('validation_rmse') print 'Best params for random search are: {}'.format(res[0]) res.save('rand_search.csv', format='csv') except: print job.get_metrics() res = None return res
def get_param_grid(cur_model, points, rand): print('\nRetrieving parameter grid...') try: c_range = 10.0 ** np.arange(-2, 3) # print 'Getting Parameter grid...' # out_txt.write('Getting Parameter grid...') gamma_range = [0, .01, .1, .3] # neighbor_range = np.arange(2, points, step=5) # leaf_range = np.arange(10, points, step=5) neighbor_range = np.arange(2, 17, step=5) leaf_range = np.arange(10, 60, step=5) if not rand: grid_params = {'SVC()': [{'C': c_range, 'kernel': ['poly'], 'degree': [3, 5, 8], 'gamma': gamma_range, 'probability': [True], 'class_weight': ['auto', None]}, {'C': c_range, 'kernel': ['rbf', 'sigmoid'], 'gamma': gamma_range, 'probability': [True], 'class_weight': ['auto', None]}, {'C': c_range, 'kernel': ['linear'], 'random_state': [10], 'probability': [True], 'class_weight': ['auto', None]}], 'KNeighborsClassifier()': [{'n_neighbors': neighbor_range, 'weights': ['uniform'], 'algorithm': ['brute'], 'metric': ['euclidean', 'manhattan']}, {'n_neighbors': neighbor_range, 'weights': ['uniform'], 'algorithm': ['ball_tree', 'kd_tree'], 'metric': ['euclidean', 'manhattan'], 'leaf_size': leaf_range}], 'LogisticRegression()': [{'penalty': ['l1', 'l2'], 'C': c_range, 'class_weight': [None, 'auto']}]} return grid_params[cur_model] else: rand_params = {'SVC()': {'C': stats.expon(scale=300), 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'degree': [3, 4, 5, 6, 7, 8], 'gamma': stats.expon(scale=1/3), 'random_state': [10], 'probability': [True], 'class_weight': ['auto', None]}, 'KNeighborsClassifier()': {'n_neighbors': stats.randint(low=2, high=20), 'weights': ['uniform', 'distance'], 'algorithm': ['ball_tree', 'kd_tree', 'brute'], 'metric': ['euclidean', 'manhattan'], 'leaf_size': stats.randint(low=10, high=60)}, 'LogisticRegression()': {'penalty': ['l1', 'l2'], 'C': stats.expon(scale=300), 'class_weight': [None, 'auto']}} return rand_params[cur_model] except: print('could not get parameter grid')
def generate_receiver_events(self, trial_start_time, trial_duration_seconds, num_receivers_at_time_zero, arrival_rate, service_rate): """Generates receiver init and termination events. Receiver initialization events are generated as a poission process with arrival rate: arrival_rate (in receivers per second). Each receiver has an exponential service time with rate: service_rate. This should be called at the start of a simulation run, just after initialization of mininet. """ if self.event_list is not None: return self.event_list = [] self.past_event_list = [] self.trial_start_time = trial_start_time # First, generate the initial receievers (active at time 0) for i in range(0, num_receivers_at_time_zero): # Generate a service time service_time = expon(loc = 0, scale=(1.0 / service_rate)).rvs(1)[0] # Select a host through a uniform random distribution receiver = self.net_hosts[randint(0,len(self.net_hosts))] receiver = MulticastReceiverApplication(receiver, self.group_ip, self.mcast_port, self.echo_port, trial_start_time, service_time) self.receiver_applications.append(receiver) self.event_list.append((trial_start_time, DynamicMulticastGroupDefinition.EVENT_RECEIVER_INIT, receiver)) self.event_list.append((trial_start_time + service_time, DynamicMulticastGroupDefinition.EVENT_RECEIVER_TERMINATION, receiver)) # Alternative: Generate inter-arrival times using exponential distribution arrival_times = [] expo_rv = expon(loc = 0, scale=(1.0 / arrival_rate)) last_arrival_time = 0 while last_arrival_time < trial_duration_seconds: next_arrival_time = expo_rv.rvs(1)[0] + last_arrival_time if next_arrival_time < trial_duration_seconds: arrival_times.append(next_arrival_time) last_arrival_time = next_arrival_time # Alternative Method # Find the number of arrivals in the interval [0, trial_duration_seconds] # Size = trial_duration_seconds, since we want the number of arrivals in trial_duration_seconds time units # num_arrivals = sum(poisson.rvs(arrival_rate, size=trial_duration_seconds)) # Once the number of arrivals is known, generate arrival times uniform on [0, trial_duration_seconds] # arrival_times = [] # for i in range(0, num_arrivals): # arrival_times.append(uniform(0, trial_duration_seconds)) # Now, for each arrival, generate a corresponding receiver application and events for arrival_time in arrival_times: # Generate a service time service_time = expon(loc = 0, scale=(1.0 / service_rate)).rvs(1)[0] # Select a host through a uniform random distribution receiver = self.net_hosts[randint(0,len(self.net_hosts))] receiver = MulticastReceiverApplication(receiver, self.group_ip, self.mcast_port, self.echo_port, trial_start_time + arrival_time, service_time) self.receiver_applications.append(receiver) self.event_list.append((trial_start_time + arrival_time, DynamicMulticastGroupDefinition.EVENT_RECEIVER_INIT, receiver)) self.event_list.append((trial_start_time + arrival_time + service_time, DynamicMulticastGroupDefinition.EVENT_RECEIVER_TERMINATION, receiver)) # Sort the event list by time self.event_list = sorted(self.event_list, key=lambda tup: tup[0]) # Debug printing for event in self.event_list: print 'Time:' + str(event[0]) + ' ' + str(event[1]) + ' ' + str(event[2])
def test_randomized_search_grid_scores(): # Make a dataset with a lot of noise to get various kind of prediction # errors across CV folds and parameter settings X, y = make_classification(n_samples=200, n_features=100, n_informative=3, random_state=0) # XXX: as of today (scipy 0.12) it's not possible to set the random seed # of scipy.stats distributions: the assertions in this test should thus # not depend on the randomization params = dict(C=expon(scale=10), gamma=expon(scale=0.1)) n_cv_iter = 3 n_search_iter = 30 search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_cv_iter, param_distributions=params, iid=False) search.fit(X, y) assert_equal(len(search.grid_scores_), n_search_iter) # Check consistency of the structure of each cv_score item for cv_score in search.grid_scores_: assert_equal(len(cv_score.cv_validation_scores), n_cv_iter) # Because we set iid to False, the mean_validation score is the # mean of the fold mean scores instead of the aggregate sample-wise # mean score assert_almost_equal(np.mean(cv_score.cv_validation_scores), cv_score.mean_validation_score) assert_equal(list(sorted(cv_score.parameters.keys())), list(sorted(params.keys()))) # Check the consistency with the best_score_ and best_params_ attributes sorted_grid_scores = list(sorted(search.grid_scores_, key=lambda x: x.mean_validation_score)) best_score = sorted_grid_scores[-1].mean_validation_score assert_equal(search.best_score_, best_score) tied_best_params = [s.parameters for s in sorted_grid_scores if s.mean_validation_score == best_score] assert_true( search.best_params_ in tied_best_params, "best_params_={0} is not part of the" " tied best models: {1}".format(search.best_params_, tied_best_params), )
def nc_of_expon(): # 1st non-center moment of expon distribution whose lambda is 0.5 E1 = lambda x: x * 0.5 * np.exp(-x / 2) # 2nd non-center moment of expon distribution whose lambda is 0.5 E2 = lambda x: x ** 2 * 0.5 * np.exp(-x / 2) print(integrate.quad(E1, 0, np.inf)) print(integrate.quad(E2, 0, np.inf)) print(expon(scale=2).moment(1)) print(expon(scale=2).moment(2))
def SalehValenzuela(**kwargs): """ generic Saleh and Valenzuela Model Parameters ---------- Lam : clusters Poisson Process parameter (ns) lam : rays Poisson Process parameter (ns) Gam : clusters exponential decay factor gam : rays exponential decay factor T : observation duration """ defaults = { 'Lam' : .1, 'lam' : .5, 'Gam' : 30, 'gam' : 5 , 'T' : 100} for k in defaults: if k not in kwargs: kwargs[k]=defaults[k] Lam = kwargs['Lam'] lam = kwargs['lam'] Gam = kwargs['Gam'] gam = kwargs['gam'] T = kwargs['T'] Nr = 1.2*T/Lam Nc = 1.2*T/lam e1 = st.expon(1./Lam) e2 = st.expon(1./lam) # cluster time of arrival tc = np.cumsum(e1.rvs(Nr)) tc = tc[np.where(tc<T)] Nc = len(tc) tauc = np.kron(tc,np.ones((1,Nr)))[0,:] # rays time of arrival taur = np.cumsum(e2.rvs((Nr,Nc)),axis=0).ravel() # exponential decays of cluster and rays etc = np.exp(-tauc/(1.0*Gam)) etr = np.exp(-taur/(1.0*gam)) et = etc*etr tau = tauc+taur # filtering < T and reordering in delay domain tau = tau[np.where(tau<T)] et = et[np.where(tau<T)] u = np.argsort(tau) taus = tau[u] ets = et[u]*np.sign(np.random.rand(len(u))-0.5) SVir = bs.TBsignal(taus,ets) return(SVir)
def testExponential1(self): distV = expon(scale=5) distW = expon(scale=3) v = 2 w = 3 expected = ([0,1],[0,3]) bids = [v,w] distributions = [distV,distW] obtained = myersonAuction(bids,distributions) self.assertAlmostEqual(expected, obtained, msg="Myerson auction with inputs: " + str(bids) + ", " + str(distributions) + ". Expected " + str(expected) + "but obtained " + str(obtained) + ".")
def sgd(pd, pl, qd, ql): params = {'loss':['squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'], 'alpha':expon(scale=1), 'epsilon':expon(scale=1), 'l1_ratio':uniform(), 'penalty':[ 'l2', 'l1', 'elasticnet']} clf = SGDRegressor() #clf = RandomizedSearchCV(clf, params, n_jobs=2, n_iter=10, verbose=10) print("Training Linear SVM Randomly") clf.fit(pd, pl) print("Score: " + str(clf.score(qd, ql))) return clf
def __init__(self, scenario_flag = "Freeway_Free"): """ Totally five scenarios are supported here: Freeway_Night, Freeway_Free, Freeway_Rush; Urban_Peak, Urban_Nonpeak. The PDFs of the vehicle speed and the inter-vehicle space are adapted from existing references. """ if scenario_flag == "Freeway_Night": self.headway_random = expon(0.0, 1.0/256.41) meanSpeed = 30.93 #m/s stdSpeed = 1.2 #m/s elif scenario_flag == "Freeway_Free": self.headway_random = lognorm(0.75, 0.0, np.exp(3.4)) meanSpeed = 29.15 #m/s stdSpeed = 1.5 #m/s elif scenario_flag == "Freeway_Rush": self.headway_random = lognorm(0.5, 0.0, np.exp(2.5)) meanSpeed = 10.73 #m/s stdSpeed = 2.0 #m/s elif scenario_flag == "Urban_Peak": scale = 1.096 c = 0.314 loc = 0.0 self.headway_random = fisk(c, loc, scale) meanSpeed = 6.083 #m/s stdSpeed = 1.2 #m/s elif scenario_flag == "Urban_Nonpeak": self.headway_random = lognorm(0.618, 0.0, np.exp(0.685)) meanSpeed = 12.86 #m/s stdSpeed = 1.5 #m/s else: raise self.speed_random = norm(meanSpeed, stdSpeed)
def test_random_vector(self): comp = (stats.expon(), stats.beta(0.4, 0.8), stats.norm()) rv = best.random.RandomVectorIndependent(comp) print str(rv) x = rv.rvs() print 'One sample: ', x print 'pdf:', rv.pdf(x) x = rv.rvs(size=10) print '10 samples: ', x print 'pdf: ', rv.pdf(x) print rv.mean() print rv.var() print rv.std() print rv.stats() # Split it in two: rv1, rv2 = rv.split(0) print str(rv1) x = rv1.rvs(size=5) print x print rv1.pdf(x) print rv2.pdf(x) print str(rv2) print x x = rv2.rvs(size=5) print rv2.pdf(x) rv3, rv4 = rv1.split(0) print str(rv3) print str(rv4) rv5, rv6 = rv3.split(1) print str(rv5) print str(rv6) rv7, rv8 = rv5.split(2) print str(rv7) print str(rv8)
def gillespie_logistique2(taille_ini, b1,b2,d1,d2,temps): """une autre implémentation de l'algorithme de Gillepie on ne conserve la taille qu'à des instants prédéfinis""" taille = zeros(temps.size) # préalocation de la mémoire # initialisation des temps et taille courantes temps_courant, taille_courante = 0.0, taille_ini t_nais = (b1 + b2 * taille_courante) * taille_courante # taux de naissance t_mort = (d1 + d2 * taille_courante) * taille_courante # taux de mort tau = t_nais + t_mort # taux global ee = expon() uu = uniform() delta_temps = ee.rvs() / tau for k in range(temps.size): # on simule sans dépasser temps[k] while temps_courant + delta_temps < temps[k]: temps_courant += delta_temps # mise à jour instant courant if uu.rvs() < (b1 * taille_courante) / tau: taille_courante += 1 # naissance else: taille_courante -= 1 # mort t_nais = (b1 + b2 * taille_courante) * taille_courante # taux de naissance t_mort = (d1 + d2 * taille_courante) * taille_courante # taux de mort tau = t_nais + t_mort # taux global delta_temps = ee.rvs() / tau # temps de séjour taille[k] = taille_courante return taille
def test_conditional_rv(self): return px = stats.expon() py = best.random.RandomVariableConditional(px, (1, 2), name='Conditioned Exponential') print str(py) print py.rvs(size=10) print py.interval(0.5) print py.median() print py.mean() print py.var() print py.std() print py.stats() print py.moment(10) return i = (0, 4) t = np.linspace(i[0], i[1], 100) plt.plot(t, py.pdf(t), t, py.cdf(t), linewidth=2.) plt.legend(['PDF', 'CDF']) #plt.show() py1, py2 = py.split() print str(py1) print str(py2) plt.plot(t, py1.pdf(t), t, py1.cdf(t), t, py2.pdf(t), t, py2.cdf(t), linewidth=2) plt.legend(['PDF $y_1$', 'CDF $y_1$', 'PDF $y_2$', 'CDF $y_2$'])
def get_value(self): fig = figure() xname = "invariantMass" xmin,xmax,xbins = -5.,15.,50 index = "run*" x,counts = vis_bokeh.get_1d_hist(xname,xmin,xmax,xbins,es,index=index) deltas = np.sqrt(counts) fig=vis_bokeh.whiskered_histogram(xmin,xmax,xbins,counts,deltas,-deltas) #fit params model = mix model.fit(sample_weight=counts)#,values_init={'sig_weightlog':np.log(0.4),'bck_weightlog':np.log(0.6)}) parameters = model.parameters w_sig,w_bkg =np.exp(parameters['sig_weightlog']),np.exp(parameters['bck_weightlog']) w_sum = w_sig+w_bkg w_sig,w_bkg = w_sig/w_sum,w_bkg/w_sum n_events = np.sum(counts) norm = n_events*(xmax-xmin)/xbins #plot lines expo = lambda x_arr:st.expon(0,1./parameters['slope']).pdf(x_arr)*w_bkg*norm gauss = lambda x_arr:st.norm(parameters['mean'],parameters['sigma']).pdf(x_arr)*w_sig*norm pdf_x = np.arange(1000,dtype='float')/1000.*(xmax-xmin) + xmin fig.line(pdf_x, expo(pdf_x), legend="Background", line_width=2,color = 'red') fig.line(pdf_x, gauss(pdf_x), legend="Signal", line_width=2,color='blue') fig.line(pdf_x, gauss(pdf_x)+expo(pdf_x), legend="Sum", line_width=2,color='green') fig.xaxis.axis_label = time.strftime("%H:%M:%S") return vis_bokeh.fig_to_html(fig)
def fit_exponential(vect): exponential_dist = stats.expon(scale=mean(vect)) # lambda = 1 / mean print 'Performing the KS test on exponential data:' dstat, p = stats.kstest(vect, exponential_dist.pdf) print 'D-Statistic:\t{}'.format(dstat) print 'P Value:\t\t{}'.format(p) print '----------------------------'
def particlesF(t,pRecs,tol,xs,sim,distFunc,ii): ''' # Filter particles step. ''' p_num = len(pRecs) np.random.seed() sys.stdout.flush() a_star = np.zeros(p_num) rho = tol+1. N = np.size(pRecs[0],axis=1) #number of particles. Should check all pRec match. n = 1000 #upper bound for number of samples rejected before raising error. rejects = 0 #count number of rejects. while(rho>tol and rejects < n): #FIXED: if no particles are accepted after a number of steps tolerance may be too low # fix by adding condition to raise error after n particles being rejected. r = np.random.randint(0,high=N) for i in range(p_num): if (pRecs[i][t-1,r] > 0): a_star[i] = stats.gamma.rvs(pRecs[i][t-1,r]/rw_var,scale=rw_var)#p1A[t-1,r] + stats.norm.rvs(scale=0.1) else: a_star[i] = stats.expon(scale=0.1).rvs() ys = sim(*a_star)#sim(a_star[0],a_star[1],ii) rho = distFunc(ys,xs) rejects += 1 if (rejects >= n): raise NameError('Rejected all particles. Try increasing tolerances or increasing number of particles to reject.') res = a_star.tolist() res.append(rho) return res #return parameters and accepted distance.
def svc_appr(): """ Best params: {'C': 0.022139881953014046} Submission: E_val: E_in: E_out: """ from sklearn.svm import LinearSVC from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.cross_validation import StratifiedKFold from sklearn.grid_search import RandomizedSearchCV from scipy.stats import expon X, y = dataset.load_train() raw_scaler = StandardScaler() raw_scaler.fit(X) X_scaled = raw_scaler.transform(X) svc = LinearSVC(dual=False, class_weight='auto') rs = RandomizedSearchCV(svc, n_iter=50, scoring='roc_auc', n_jobs=-1, cv=StratifiedKFold(y, 5), verbose=2, param_distributions={'C': expon()}) rs.fit(X_scaled, y) logger.debug('Got best SVC.') logger.debug('Best params: %s', rs.best_params_) logger.debug('Grid scores:') for i, grid_score in enumerate(rs.grid_scores_): print('\t%s' % grid_score) logger.debug('Best score (E_val): %s', rs.best_score_) logger.debug('E_in: %f', Util.auc_score(rs, X_scaled, y))
def setup_logistic(): """ Creates clf pipeline for Logistic Regression Returns pipeline and parameters for GridSearchCV """ pipeline = Pipeline(steps=[('scaler', MinMaxScaler()), ('kbest', SelectKBest(score_func=f_classif)), ('pca', PCA()), ('clf', LogisticRegression()) ] ) params = {'kbest__k': range(3,30), 'pca__whiten': (True, False), # 'clf__C': [ 0.001, 0.1, 10, 10**2, 10**5, 10**10], 'clf__C': expon(), 'clf__class_weight': [{False: 1, True: 12}, {False: 1, True: 10}, {False: 1, True: 8}, {False: 1, True: 6}, {False: 1, True: 4}], 'clf__tol': [2**i for i in range(-20, -1)], 'clf__penalty': ('l1','l2') } return pipeline, params
def create_svm(pd, pl, qd, ql): lsvc = LinearSVC() params = {'C': expon(scale=100)} svm = RandomizedSearchCV(lsvc, params, n_jobs=4, n_iter=10, verbose=10) print("Training Linear SVM Randomly") svm.fit(pd, pl) print("SVM Score: " + str(svm.score(qd, ql))) return svm
def exponential(location = 0.0, scale = 1.0, N = None, quantiles = None): # Exponential distribution # similar usage to scipy.stats.expon(loc, scale) # The scale parameter is equal to scale = 1.0 / lambda # see http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.expon.html from scipy.stats import expon ppf_engine = expon(location, scale).ppf return continuous(ppf = ppf_engine, quantiles = quantiles, N = N, Str = 'exponential distribution with location = %g and scale = %g' % (location, scale))
def __init__(self, rate_func, dt): SpikingInputStream.__init__(self) self.dt = dt self.length = 1 self.rate_func = rate_func self.spikes = np.ones([1, SPIKE_HISTORY_LENGTH])*-10000.0 self.rv_expon = expon() self.reset()
def test_random_vector_independent(self): return comp = (stats.expon(), stats.beta(0.4, 0.8), stats.norm()) rv = best.random.RandomVectorIndependent(comp) subdomain = [[0.1, 4.], [0.1, 0.8], [-1., 1.]] rvc = best.random.RandomVectorConditional(rv, subdomain) print str(rvc) return
def nc_of_expon(): rv = expon(scale=2) print(rv.mean()) print(rv.var()) print(rv.moment(1)) print(rv.moment(2)) print(rv.moment(3)) print(rv.moment(4)) print(rv.stats(moments='mvsk'))
def Exp(lamda, tag=None): """ An Exponential random variate Parameters ---------- lamda : scalar The inverse scale (as shown on Wikipedia), FYI: mu = 1/lamda. """ return uv(rv=ss.expon(scale=1./lamda), tag=tag)
def test_random_search_cv_results(): # Make a dataset with a lot of noise to get various kind of prediction # errors across CV folds and parameter settings X, y = make_classification(n_samples=200, n_features=100, n_informative=3, random_state=0) # scipy.stats dists now supports `seed` but we still support scipy 0.12 # which doesn't support the seed. Hence the assertions in the test for # random_search alone should not depend on randomization. n_splits = 3 n_search_iter = 30 params = dict(C=expon(scale=10), gamma=expon(scale=0.1)) random_search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_splits, iid=False, param_distributions=params) random_search.fit(X, y) random_search_iid = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_splits, iid=True, param_distributions=params) random_search_iid.fit(X, y) param_keys = ('param_C', 'param_gamma') score_keys = ('mean_test_score', 'mean_train_score', 'rank_test_score', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split0_train_score', 'split1_train_score', 'split2_train_score', 'std_test_score', 'std_train_score', 'mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time') n_cand = n_search_iter for search, iid in zip((random_search, random_search_iid), (False, True)): assert_equal(iid, search.iid) cv_results = search.cv_results_ # Check results structure check_cv_results_array_types(cv_results, param_keys, score_keys) check_cv_results_keys(cv_results, param_keys, score_keys, n_cand) # For random_search, all the param array vals should be unmasked assert_false(any(cv_results['param_C'].mask) or any(cv_results['param_gamma'].mask)) check_cv_results_grid_scores_consistency(search)
def __init__(self, totaljobs, numServers, labda, mu, failurerate, maintenance): np.random.seed(1) self.rho = labda/mu self.scheduler = Scheduler() now = self.scheduler.now self.sender = Sender() arrival = expon(scale = 1./labda) service = expon(scale = 1./mu) self.sender.setTotalJobs( totaljobs ) self.sender.setTimeBetweenConsecutiveJobs(arrival) self.queue = Server() self.queue.setServiceTimeDistribution(service) self.sink = Sink() self.sender.Out = self.queue self.queue.In = self.sender self.queue.Out = self.sink self.sink.In = self.queue self.scheduler.register(self.sender, self.queue) self.sender.start()
def get_simulation(dv=.001, update_method='approx', approx_order=None, tol=1e-8): import scipy.stats as sps # Create simulation: b1 = ExternalPopulation(100) i1 = InternalPopulation(v_min=0, v_max=.02, dv=dv, update_method=update_method, approx_order=approx_order, tol=tol) b1_i1 = Connection(b1, i1, 1, delays=0.0, weights=(sps.expon(0,.005), 201)) simulation = Network([b1, i1], [b1_i1]) return simulation
def test_random_vector_poly(self): return comp = (stats.expon(), stats.beta(0.4, 0.8), stats.norm()) rv = best.random.RandomVectorIndependent(comp) print str(rv) prod = best.gpc.ProductBasis(degree=5, rv=rv) print str(prod) x = rv.rvs(num_samples=10) print x print prod(x)
def ppf(self,p): if self.rtype=="n": return(norm(loc=self.args[0],scale=self.args[1]).ppf(p)) elif self.rtype=="ln": return(lognorm(s=self.args[1],scale=math.exp(self.args[0])).ppf(p)) elif self.rtype=="g": return(gumbel_r(loc=self.args[0],scale=self.args[1]).ppf(p)) elif self.rtype=="e": return(expon(loc=self.args[0],scale=self.args[1]).ppf(p)) elif self.rtype=="u": return(uniform_r(loc=self.args[0],scale=self.args[1]).ppf(p)) else: print("distribution {0} not found" .format(rtype)) return("error - distribution")
def part2(): def create_exponential_estimator(k): k_factorial = scipy.special.gamma(k) def exponential_estimator(samples): return (k_factorial / (samples**k).mean(axis=1))**1 / k return exponential_estimator exponential_estimators = [(create_exponential_estimator(k), '$k = {}$'.format(k)) for k in range(1, 5 + 1)] grid_for_tetta = np.arange(0.01, 5 + 0.01, 0.01) draw_risk(grid_for_tetta, exponential_estimators, lambda tetta: sps.expon(scale=1 / tetta))
def get_area_distribution(tracks, fit=False): area = np.sum(tracks > 0, axis=(1, 2)) if not fit: count = np.bincount(area) probability = count / float(np.sum(count)) return stats.rv_discrete(a=0, b=np.max(probability.shape[0]), name='signal distribution', values=(np.arange(count.shape[0]), probability)) else: exp_params = stats.expon.fit(area) return stats.expon(*exp_params)
def shop_4(): N, L = 10, 100 G = uniform(loc=4, scale=2) # G is called a frozen distribution. a = superposition(G.rvs((N, L))) labda = 1.0 / 5 E = expon(scale=1.0 / (N * labda)) print(E.mean()) x, y = cdf(a) dist_name = "U[4,6]" plot_distributions(x, y, N, L, E, dist_name) print(KS(a, E))
def histexponencial(self): np.random.seed(2016) # replicar random # parametros esteticos de seaborn sns.set_palette("deep", desat=.6) sns.set_context(rc={"figure.figsize": (8, 4)}) a = float(self.para.text()) exponencial = stats.expon(a) aleatorios = exponencial.rvs(1000) # genera aleatorios cuenta, cajas, ignorar = plt.hist(aleatorios, 20) plt.ylabel('frequencia') plt.xlabel('valores') plt.title('Histograma Exponencial') plt.show()
def KSTestCDFPlot(Stat_Stn, Stn, Setting): MonthlyStat = Stat_Stn["MonthlyStat"] Prep = Stat_Stn["PrepDF"]["P"] fig, axs = plt.subplots(nrows=4, ncols=3, sharex=True, sharey=True) m = 0 for i in range(3): for j in range(4): Prep_m = Prep[Prep.index.month == (m + 1)].dropna() Prep_m = Prep_m[Prep_m != 0] coef1 = MonthlyStat.loc[m + 1, "exp"] coef2 = MonthlyStat.loc[m + 1, "gamma"] coef3 = MonthlyStat.loc[m + 1, "weibull"] coef4 = MonthlyStat.loc[m + 1, "lognorm"] # Plot ecdf = ECDF(Prep_m) x = np.arange(0, max(Prep_m), 0.1) axs[j, i].plot(x, expon(coef1[0], coef1[1]).cdf(x), label='exp', linestyle=':') axs[j, i].plot(x, gamma(coef2[0], coef2[1], coef2[2]).cdf(x), label='gamma', linestyle=':') axs[j, i].plot(x, weibull_min(coef3[0], coef3[1], coef3[2]).cdf(x), label='weibull', linestyle=':') xlog = np.arange(min(np.log(Prep_m)), max(np.log(Prep_m)), 0.1) axs[j, i].plot(np.exp(xlog), norm(coef4[0], coef4[1]).cdf(xlog), label='lognorm', linestyle=':') axs[j, i].plot(ecdf.x, ecdf.y, label='ecdf', color="red") axs[j, i].axvline(x=130, color="black", linestyle="--", linewidth=1) # Definition of storm defined by CWB axs[j, i].set_title(str(m + 1)) axs[j, i].legend() m += 1 fig.suptitle("KStest CDF " + Stn, fontsize=16) # Add common axis label fig.text(0.5, 0.04, 'Precipitation (mm)', ha='center', fontsize=14) fig.text(0.05, 0.5, 'CDF', va='center', rotation='vertical', fontsize=14) fig.set_size_inches(18.5, 10.5) plt.tight_layout(rect=[0.06, 0.05, 0.94, 0.94]) #rect : tuple (left, bottom, right, top) SaveFig(fig, "KStest CDF " + Stn, Setting) plt.show() return None
def _impose_white_noise(self, data): import scipy.stats as stats original_shape = data.shape noise_area_distr = stats.expon(scale = self._white_noise_rate) data = data.reshape(data.shape[0], -1) s = data.shape[1] for i in xrange(data.shape[0]): n_white_noise = int(np.minimum(noise_area_distr.rvs(size=1), self._white_noise_maximum) * s) indx = np.random.choice(s, size=n_white_noise, replace=False) data[i, indx] = self._signal_level return data.reshape(original_shape)
def simulation_4(): N = 1 # number of customers L = 300 labda = 1.0 / 5 # lambda is a function in python. Hence we write labda E = expon(scale=1.0 / labda) print(E.mean()) # to check that we chose the right scale a = E.rvs(L) print(KS(a, E)) x, y = cdf(a) dist_name = "U[4,6]" plot_distributions(x, y, N, L, E, dist_name)
def shop_3(): N, L = 3, 100 G = uniform(loc=4, scale=2) a = superposition(G.rvs((N, L))) labda = 1.0 / 5 E = expon(scale=1.0 / (N * labda)) print(E.mean()) x, y = cdf(a) dist_name = "U[4,6]" plot_distributions(x, y, N, L, E, dist_name) print(KS(a, E)) # Compute KS statistic using the function defined earlier
def testExponentialSampleMultiDimensional(self): batch_size = 2 lam_v = [3.0, 22.0] lam = tf.constant([lam_v] * batch_size) exponential = tfd.Exponential(rate=lam, validate_args=True) n = 100000 samples = exponential.sample(n, seed=test_util.test_seed()) self.assertEqual(samples.shape, (n, batch_size, 2)) sample_values = self.evaluate(samples) self.assertFalse(np.any(sample_values < 0.0)) for i in range(2): self.assertLess( sp_stats.kstest(sample_values[:, 0, i], sp_stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01) self.assertLess( sp_stats.kstest(sample_values[:, 1, i], sp_stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01)
def __init__(self, pose, agent=None, sensor=None, color="black", \ noise_per_meter=5, noise_std=math.pi/60,\ bias_rate_stds=(0.1,0.1),\ expected_stuck_time=1e100, expected_escape_time=1e-100,\ expected_kidnap_time=1e100, kidnap_range_x=(-5.0,5.0), kidnap_range_y=(-5.0,5.0)): super().__init__(pose,agent,sensor,color) self.noise_pdf = expon(scale=1.0/(1e-100+noise_per_meter))#exponは指数分布の関数なので、パラメーターを入れるだけでよい(λを掛けなくてよい) self.distance_until_noise = self.noise_pdf.rvs() self.theta_noise = norm(scale=noise_std) self.bias_rate_nu = norm.rvs(loc=1.0,scale=bias_rate_stds[0]) self.bias_rate_omega = norm.rvs(loc=1.0,scale=bias_rate_stds[1]) self.stuck_pdf = expon(scale=expected_stuck_time)#回数の逆数は間隔なのでそのまま入れる self.escape_pdf = expon(scale=expected_escape_time) self.time_until_stuck = self.stuck_pdf.rvs() self.time_until_escape = self.escape_pdf.rvs() self.is_stuck = False self.kidnap_pdf = expon(scale=expected_kidnap_time) self.time_until_kidnap = self.kidnap_pdf.rvs() rx, ry =kidnap_range_x, kidnap_range_y self.kidnap_dist = uniform(loc=(rx[0],ry[0],0.0),scale=(rx[1]-rx[0],ry[1]-ry[0],2*math.pi))#x,y,theta の3次元
def testExponentialSample(self): lam = tf.constant([3.0, 4.0]) lam_v = [3.0, 4.0] n = tf.constant(100000) exponential = exponential_lib.Exponential(rate=lam) samples = exponential.sample(n, seed=tfp_test_util.test_seed()) sample_values = self.evaluate(samples) self.assertEqual(sample_values.shape, (100000, 2)) self.assertFalse(np.any(sample_values < 0.0)) for i in range(2): self.assertLess( sp_stats.kstest(sample_values[:, i], sp_stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01)
def record_factory(): """Generator for fake sales records.""" pid = 0 while True: pid += 1 gender = random.choice(["male", "female"]) if gender == "male": first_name = fkr.first_name_male() else: first_name = fkr.first_name_female() data = { "id": pid, "first_name": first_name, "last_name": fkr.last_name(), "birthdate": fkr.date_between(start_date="-30y", end_date="-18y"), "gender": gender, "city": random.choice( ["Amsterdam", "Den Haag", "Eindhoven", "Utrecht", "Rotterdam"]), "orders": 1 + int(stats.expon(0.1).rvs() * 3), "order_amount": 1 + int(stats.expon(0.1).rvs() * 20), "opt_in": random.choice([True, False]), } yield data
def __init__(self, pose, agent=None, sensor=None, color="black", \ noise_per_meter=5, noise_std=math.pi/60, \ bias_rate_stds=(0.1,0.1), \ expected_stuck_time = 1e100, expected_escape_time = 1e-100, \ expected_kidnap_time=1e100, kidnap_range_x=(-5.0,5.0), kidnap_range_y=(-5.0,5.0)): super().__init__(pose, agent, sensor, color) # IdeealRobotの__init__メソッドを呼び出す # 踏み石用確率密度関数の作成(指数分布) self.noise_pdf = expon(scale=1.0 / (1e-100 + noise_per_meter)) # 最初に小石を踏むまでの道のり self.distance_until_noise = self.noise_pdf.rvs() # thetaに加えるノイズ self.theta_noise = norm(scale=noise_std) # ロボット固有のバイアスの作成 self.bias_rate_nu = norm.rvs(loc=1.0, scale=bias_rate_stds[0]) self.bias_rate_omega = norm.rvs(loc=1.0, scale=bias_rate_stds[1]) # スタック用確率密度関数の作成(指数分布) self.stuck_pdf = expon(scale=expected_stuck_time) self.escape_pdf = expon(scale=expected_escape_time) # 時間の初期化 self.time_until_stuck = self.stuck_pdf.rvs() self.time_until_escape = self.escape_pdf.rvs() # ロボットがスタック中が表すグラフ self.is_stuck = False # 誘拐が起こる確率密度関数(指数分布) self.kidnap_pdf = expon(scale=expected_kidnap_time) self.time_until_kidnap = self.kidnap_pdf.rvs() # 誘拐後のロボット位置の範囲 rx, ry = kidnap_range_x, kidnap_range_y # 誘拐後のロボットの位置・姿勢の確率密度関数(一様分布) self.kidnap_dist = uniform(loc=(rx[0], ry[0], 0.0), scale=(rx[1] - rx[0], ry[1] - ry[0], 2 * math.pi))
def random_point(self, shape): """ Sample uniformly from the constraint set. L1 and L2 are implemented here. Linf implemented in the subclass. https://arxiv.org/abs/math/0503650 """ if self.p == 2: distrib = Normal(0, 1) elif self.p == 1: distrib = Laplace(0, 1) x = distrib.sample(shape) e = expon(.5).rvs() denom = torch.sqrt(e + (x**2).sum()) return self.alpha * x / denom
def get_simulation(dv=.001, update_method='approx', tol=1e-8): import scipy.stats as sps # Create simulation: b1 = ExternalPopulation(50) b2 = ExternalPopulation(1000) i1 = InternalPopulation(v_min=-.04, v_max=.02, dv=dv, update_method=update_method, tol=tol) b1_i1 = Connection(b1, i1, 1, delays=0.0, weights=(sps.expon(0, .00196), 301)) b2_i1 = Connection(b2, i1, 1, delays=0.0, weights=(sps.expon(0, .001), 301)) simulation = Network([b1, b2, i1], [b1_i1, b2_i1]) return simulation
def checkFitService(vData, dMu, sDistribution): dMax = np.max(vData) vK = np.arange(dMax) iScale = 1 / dMu vDistribution = st.expon.pdf(vK, scale=iScale) plt.figure() plt.subplot(1, 2, 1) plotDistribution(vData, vDistribution, sDistribution) plt.subplot(1, 2, 2) st.probplot(vData, dist=st.expon(scale=iScale), plot=plt) plt.grid() plt.tight_layout() plt.show()
def generate_exp(): a = request.args.get('a', 0, type=float) # generate pdf and return json results thetas = np.linspace(0, 5, 200) # expon is a standardized version of exponential dist # set scale to 1/lambda for non-scaled version prior = st.expon(scale= (1/a)) ydat = prior.pdf(thetas) ycdf = prior.cdf(thetas) validIndex = ~np.isinf(ydat) d = collections.OrderedDict() d['x'] = list(thetas[validIndex]) d['y'] = list(ydat[validIndex]) d['cdf'] = list(ycdf[validIndex]) return jsonify(result = d)
def IMPORTANCE_SAMPLING(q, Ns): lbd1 = [lbdi + qi for qi, lbdi in zip(q, lbd)] X = [expon(scale=1 / lbdi).rvs(Ns) for lbdi in lbd1] W = [0] * Ns C = [0] * Ns for ns in range(Ns): if ns != 0 and ns % 10**4 == 0: print(ns) x = [Xi[ns] for Xi in X] W[ns] = exp(CALC_LOGW(x, lbd, lbd1, T)) C[ns] = CALC_F(x) # print ns, W[ns], C[ns], x return W, C
def validlhs_regular(): from pyDOE import lhs as lhsd n_samples = 0 while n_samples != 300: lhs = lhsd(6, 2800) #expand x, y coordinates to their real values lhs[:, 0] = expon(scale=10).ppf(lhs[:, 0]) lhs[:, 1] = norm(0, 2.5).ppf(lhs[:, 1]) lhs[:, 2] = expon(scale=10).ppf(lhs[:, 2]) lhs[:, 3] = norm(0, 2.5).ppf(lhs[:, 3]) lhs[:, 4] = expon(scale=10).ppf(lhs[:, 4]) lhs[:, 5] = norm(0, 2.5).ppf(lhs[:, 5]) #exclude points where turbine 2 is closer than turbine 1 valid_1_2 = calculate_distance(lhs[:, 2], lhs[:, 3]) \ - calculate_distance(lhs[:, 0], lhs[:, 1]) lhs = lhs[valid_1_2 > 0] #exclude points where turbine 3 is closer than turbine 2 valid_2_3 = calculate_distance(lhs[:, 4], lhs[:, 5]) \ - calculate_distance(lhs[:, 2], lhs[:, 3]) lhs = lhs[valid_2_3 > 0] #exclude points where turbines are closer than 2D to origin dist_1 = np.sqrt(lhs[:, 0]**2 + lhs[:, 1]**2) lhs = lhs[dist_1 > 2] dist_2 = np.sqrt(lhs[:, 2]**2 + lhs[:, 3]**2) lhs = lhs[dist_2 > 2] dist_3 = np.sqrt(lhs[:, 4]**2 + lhs[:, 5]**2) lhs = lhs[dist_3 > 2] #exclude points where turbines are closer than 2D from #each other dist_1_2 = np.sqrt((lhs[:, 0] - lhs[:, 2])**2 + (lhs[:, 1] - lhs[:, 3])**2) lhs = lhs[dist_1_2 > 2] dist_2_3 = np.sqrt((lhs[:, 2] - lhs[:, 4])**2 + (lhs[:, 3] - lhs[:, 5])**2) lhs = lhs[dist_2_3 > 2] dist_3_1 = np.sqrt((lhs[:, 4] - lhs[:, 0])**2 + (lhs[:, 5] - lhs[:, 1])**2) lhs = lhs[dist_3_1 > 2] n_samples = len(lhs) print(n_samples) #return to transformed coordinates lhs[:, 0] = expon(scale=10).cdf(lhs[:, 0]) lhs[:, 1] = norm(0, 2.5).cdf(lhs[:, 1]) lhs[:, 2] = expon(scale=10).cdf(lhs[:, 2]) lhs[:, 3] = norm(0, 2.5).cdf(lhs[:, 3]) lhs[:, 4] = expon(scale=10).cdf(lhs[:, 4]) lhs[:, 5] = norm(0, 2.5).cdf(lhs[:, 5]) # replace lhs points with nearest regular arrays X_reg_tran = np.loadtxt('regular_arrays_no_rot_transformed.txt') min_dist = np.zeros(len(X_reg_tran)) for i in range(len(lhs)): diff = np.linalg.norm(X_reg_tran - lhs[i, :], axis=1) lhs[i, :] = X_reg_tran[np.argmin(diff), :] return lhs
def KRRModel(n_jobs=int(os.getenv("SLURM_CPUS_PER_TASK", 3)), cv=5, n_iter=30, verbose=2, best_params=None): if best_params is None: kr = RandomizedSearchCV(KernelRidge(kernel="rbf"), param_distributions={ "alpha": expon(scale=.02), "gamma": expon(scale=.06) }, verbose=verbose, n_jobs=n_jobs, cv=cv, n_iter=n_iter) else: if "kernel" not in best_params: best_params["kernel"] = "rbf" kr = KernelRidge(**best_params) model = make_pipeline(StandardScaler(), kr) return model
def __init__(self, pose, agent=None, sensor=None, color="black", noise_per_meter=5, noise_std=math.pi / 60): super().__init__(pose, agent, sensor, color) # IdeealRobotの__init__メソッドを呼び出す # 指数分布のオブジェクト self.noise_pdf = expon(scale=1.0 / (1e-100 + noise_per_meter)) # 最初に小石を踏むまでの道のり self.distance_until_noise = self.noise_pdf.rvs() # thetaに加えるノイズ self.theta_noise = norm(scale=noise_std)
def __init__(self, numBeams=41, sparsity=1): self.pHit = 0.95 self.pShort = 0.02 self.pMax = 0.02 self.pRand = 0.01 self.sigmaHit = 0.05 self.lambdaShort = 1 self.zMax = 20 self.zMaxEps = 0.02 self.Angles = np.linspace(-np.pi, np.pi, numBeams) # array of angles self.Angles = self.Angles[::sparsity] # Pre-compute for efficiency self.normal = norm(0, self.sigmaHit) self.exponential = expon(self.lambdaShort)
def mpdf(x, Ns, Nb, mu, sg, lb, comps=["sig", "bkg"]): sig = norm(mu, sg) sigN = np.diff(sig.cdf(mrange)) bkg = expon(mrange[0], lb) bkgN = np.diff(bkg.cdf(mrange)) tot = 0 if "sig" in comps: tot += Ns * sig.pdf(x) / sigN if "bkg" in comps: tot += Nb * bkg.pdf(x) / bkgN return tot
def __init__(self, pose, agent=None, sensor=None, color="black", noise_per_meter=5, noise_std=math.pi/60, bias_rate_stds=(0.1, 0.1), expected_stuck_time=1e100, expected_escape_time=1e-100, expected_kidnap_time=1e100, kidnap_range_x=(-5.0, 5.0), kidnap_range_y=(-5.0, 5.0)): super().__init__(pose, agent, sensor, color) # noise # self.noise_pdf = expon(scale=1.0/(1e-100 + noise_per_meter)) self.distance_until_noise = self.noise_pdf.rvs() self.theta_noise = norm(scale=noise_std) # bias # self.bias_rate_nu = norm.rvs(loc=1.0, scale=bias_rate_stds[0]) self.bias_rate_omega = norm.rvs(loc=1.0, scale=bias_rate_stds[1]) # stuck # self.stuck_pdf = expon(scale=expected_stuck_time) self.escape_pdf = expon(scale=expected_escape_time) self.time_until_stuck = self.stuck_pdf.rvs() self.time_until_escape = self.escape_pdf.rvs() self.is_stuck = False # kidnap # self.kidnap_pdf = expon(scale=expected_kidnap_time) self.time_until_kidnap = self.kidnap_pdf.rvs() rx, ry = kidnap_range_x, kidnap_range_y self.kidnap_dist = uniform(loc=(rx[0], ry[0], 0.0), scale=(rx[1]-rx[0], ry[1]-ry[0], 2*math.pi))
def test_random_search_results(): # Make a dataset with a lot of noise to get various kind of prediction # errors across CV folds and parameter settings X, y = make_classification(n_samples=200, n_features=100, n_informative=3, random_state=0) # scipy.stats dists now supports `seed` but we still support scipy 0.12 # which doesn't support the seed. Hence the assertions in the test for # random_search alone should not depend on randomization. n_folds = 3 n_search_iter = 30 params = dict(C=expon(scale=10), gamma=expon(scale=0.1)) random_search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_folds, iid=False, param_distributions=params) random_search.fit(X, y) random_search_iid = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_folds, iid=True, param_distributions=params) random_search_iid.fit(X, y) param_keys = ('param_C', 'param_gamma') score_keys = ('test_mean_score', 'test_rank_score', 'test_split0_score', 'test_split1_score', 'test_split2_score', 'test_std_score') n_cand = n_search_iter for search, iid in zip((random_search, random_search_iid), (False, True)): assert_equal(iid, search.iid) results = search.results_ # Check results structure check_results_array_types(results, param_keys, score_keys) check_results_keys(results, param_keys, score_keys, n_cand) # For random_search, all the param array vals should be unmasked assert_false(any(results['param_C'].mask) or any(results['param_gamma'].mask)) check_results_grid_scores_consistency(search)
def tpdf(x, Ns, Nb, tlb, comps=["sig", "bkg"]): sig = expon(trange[0], tlb) sigN = np.diff(sig.cdf(trange)) bkg = uniform(trange[0], trange[1] - trange[0]) bkgN = np.diff(bkg.cdf(trange)) tot = 0 if "sig" in comps: tot += Ns * sig.pdf(x) / sigN if "bkg" in comps: tot += Nb * bkg.pdf(x) / bkgN return tot
def square_error_exp(_lambda): from scipy.stats import expon distribution = expon(scale=1 / _lambda) square_errors = [ np.power(mean - distribution.mean(), 2.0) * mean_error_weight, np.power(lejp - distribution.ppf(percentile_lower), 2.0) * lejp_error_weight, np.power(uejp - distribution.ppf(percentile_upper), 2.0) * uejp_error_weight ] return square_errors
def testExponentialSampleMultiDimensional(self): with self.test_session(): batch_size = 2 lam_v = [3.0, 22.0] lam = tf.constant([lam_v] * batch_size) exponential = tf.contrib.distributions.Exponential(lam=lam) n = 100000 samples = exponential.sample_n(n, seed=138) self.assertEqual(samples.get_shape(), (n, batch_size, 2)) sample_values = samples.eval() self.assertFalse(np.any(sample_values < 0.0)) for i in range(2): self.assertLess( stats.kstest(sample_values[:, 0, i], stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01) self.assertLess( stats.kstest(sample_values[:, 1, i], stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01)
def __init__(self, t, mu_s, sigma_s, mu, stot, btot, bin_edges, use_gaussian_appoximation=False): # render all params positive t, mu_s, sigma_s, mu, stot, btot = np.abs( [t, mu_s, sigma_s, mu, stot, btot]) stot, btot = int(stot), int(btot) self.t, self.mu_s, self.sigma_s, self.mu, self.stot, self.btot = t, mu_s, sigma_s, mu, stot, btot self.bin_edges = bin_edges self.n_bins = len(bin_edges) - 1 self.params = { 'stot': self.stot, 'btot': self.btot, 't': self.t, 'mu_s': self.mu_s, 'sigma_s': self.sigma_s, "mu": self.mu } # base continuous distributions self.b = stats.expon(scale=1 / t) self.s = stats.norm(loc=mu_s, scale=sigma_s) # compute distribution for each bin self.si = self.stot * pd.Series( [self.s.cdf(i) for i in self.bin_edges]).diff().dropna().values self.bi = self.btot * pd.Series( [self.b.cdf(i) for i in self.bin_edges]).diff().dropna().values if len(self.si) != len(self.bi): print("ERROR!!", self.t, self.mu_s, self.sigma_s, mu, btot, stot) self.ni = self.mu * self.si + self.bi if not use_gaussian_appoximation: self.bins_distributions = [ stats.poisson(mu=self.ni[i]).pmf for i in range(len(self.ni)) ] else: self.bins_distributions = [ stats.norm(loc=self.ni[i], scale=np.sqrt(self.ni[i]) + 1e-50).pdf for i in range(len(self.ni)) ]