def montecarlo_integral_M2(x, y, iteration=10**8): y = np.squeeze(np.asarray(y)) result = 0 iteration = int(iteration) B = STD_DEV * STD_DEV * np.matrix([[1, -0.9, -0.5], [-0.9, 1, 0.5], [-0.5, 0.5, 1]]) W = np.random.multivariate_normal([0, 0], B[0:2, 0:2], iteration) W = W.T #W = np.random.normal(MEAN,STD_DEV,iteration*2) #W.shape = (2,iteration) XW = np.dot(x[:, 0:2], W) i = 0 while i < 9: XW[i, :] = y[i] * XW[i, :] i = i + 1 logistic(XW, out=XW) XW = np.prod(XW, 0) result = np.sum(XW) / float(iteration) return result
def plot_P(self, t, F): P = logistic(F) if self.dataset.has_true_F: P_true = logistic(self.dataset.F) self._compare_F_or_P(P_true, P, f'{t}_P.png') else: fname = f'{t}_P.png' self._plot_F_or_P(P, fname)
def predict_proba_single(self, team_i, team_j): r_i = self.ratings[team_i] r_j = self.ratings[team_j] d_ij = r_i - r_j + self.h p1 = logistic(-self.c + d_ij) p3 = 1.0 - logistic(self.c + d_ij) p2 = 1.0 - p1 - p3 return [p1, p2, p3]
def generate_wsbm_adj(n, pi_vector, theta_in=3, theta_out=-3): #toDo : check sum to 1 c = generate_clusters(n, pi_vector) Adj = np.zeros((n, n)) for i in range(n - 1): for j in range(i + 1, n): if c[i] == c[j]: Adj[i, j] = logistic(norm.rvs(theta_in)) Adj[j, i] = Adj[i, j] else: Adj[i, j] = logistic(norm.rvs(theta_out)) Adj[j, i] = Adj[i, j] np.fill_diagonal(Adj, 1) return Adj
def sampling(self, samples, sigmoids, epsilon=1e-8, shift_percent=60.0, ranking=None): sigmoids = np.clip(sigmoids.astype(np.float), 1e-14, 1 - 1e-14) # Update upper bound D_tilde = logit(sigmoids) self.D_tilde_M = np.maximum(self.D_tilde_M, np.amax(D_tilde)) # Compute probability D_delta = D_tilde - self.D_tilde_M F = D_delta - np.log(1 - np.exp(D_delta - epsilon)) if shift_percent is not None: gamma = np.percentile(F, shift_percent) F = F - gamma P = np.squeeze(logistic(F)) if ranking is None: accept = np.random.rand(len(D_delta)) < P good_samples = samples[accept] else: raise NotImplementedError return good_samples
def p_link(prosoc_left, condition, actor, trace): logodds = ( trace["a"] + trace["a_actor"][:, actor] + (trace["bp"] + trace["bpC"] * condition) * prosoc_left ) return logistic(logodds)
def logistic_lambda(z, logistic_z=None): """ Evaluate the $\\lambda$ function in logistic regression. """ if logistic_z is None: logistic_z = logistic(z) return np.where(z == 0, .125, (logistic_z - 0.5) / (2 * z))
def likelyhood_M3(x, y, w): result = 1.0 for i in range(9): result = result * logistic(y[0, i] * (w[0] * x[i, 0] + w[1] * x[i, 1] + w[2])) return result
def gen_s_curve(rng, emissions): """Generate synthetic data from datasets generating process. """ N = 500 J = 100 D = 2 # Generate latent manifold. # ------------------------- X, t = make_s_curve(N, random_state=rng) X = np.delete(X, obj=1, axis=1) X = X / np.std(X, axis=0) inds = t.argsort() X = X[inds] t = t[inds] # Generate kernel `K` and latent GP-distributed maps `F`. # ------------------------------------------------------- K = kern.RBF(input_dim=D, lengthscale=1).K(X) F = rng.multivariate_normal(np.zeros(N), K, size=J).T # Generate emissions using `F` and/or `K`. # ---------------------------------------- if emissions == 'bernoulli': P = logistic(F) Y = rng.binomial(1, P).astype(np.double) return Dataset('s-curve', False, Y, X, F, K, None, t) if emissions == 'gaussian': Y = F + np.random.normal(0, scale=0.5, size=F.shape) return Dataset('s-curve', False, Y, X, F, K, None, t) elif emissions == 'multinomial': C = 100 pi = np.exp(F - logsumexp(F, axis=1)[:, None]) Y = np.zeros(pi.shape) for n in range(N): Y[n] = rng.multinomial(C, pi[n]) return Dataset('s-curve', False, Y, X, F, K, None, t) elif emissions == 'negbinom': P = logistic(F) R = np.arange(1, J + 1, dtype=float) Y = rng.negative_binomial(R, 1 - P) return Dataset('s-curve', False, Y, X, F, K, R, t) else: assert (emissions == 'poisson') theta = np.exp(F) Y = rng.poisson(theta) return Dataset('s-curve', False, Y, X, F, K, None, t)
def likelihood(self, parameters=None, design_matrix=None, observations=None): """ Evaluate the likelihood. """ # Get default values if available parameters = parameters if parameters is not None else self.parameters design_matrix = design_matrix if design_matrix is not None else self.design_matrix observations = observations if observations is not None else self.observations return logistic(observations * self.predictor(parameters, design_matrix))
def p_link(prosoc_left, condition, actor_sim, trace): Nsim = actor_sim.shape[0] // trace.nchains trace = trace[:Nsim] logodds = ( trace["a"] + np.mean(actor_sim, axis=1) + (trace["bp"] + trace["bpC"] * condition) * prosoc_left ) return logistic(logodds)
def get_output(weight, data, regression="logistic"): dot_product = np.matmul(data, weight) if regression == "logistic": output = logistic(dot_product) elif regression == "probit": output = norm.cdf(dot_product) elif regression == "multiclass": output = softmax(dot_product, axis=1) return output, dot_product
def predict(self, X, return_latent=False): """Predict data `Y` given latent variable `X`. """ phi_X = self.phi(X, self.W, add_bias=True) F = phi_X @ self.beta.T Y = logistic(F) if return_latent: K = phi_X @ phi_X.T return Y, F, K return Y
def decimalize_test(): N = np.random.randint(low=1, high=3) M = np.random.randint(low=1, high=3) # Keeping it sorted makes it easier to compare before and after methods = sorted(np.random.choice(list(ascii_letters), N, replace=False)) metrics = sorted(np.random.choice(list(ascii_letters), M, replace=False)) stats = (sp.MEAN_COL, sp.ERR_COL, sp.PVAL_COL) cols = pd.MultiIndex.from_product([metrics, stats], names=['metric', 'stat']) perf_tbl = pd.DataFrame(index=methods, columns=cols, dtype=object) perf_tbl.index.name = 'method' crap_limit_max = {} crap_limit_min = {} for metric in metrics: mu = fp_rnd_list(N, all_finite=True) EB = np.abs(fp_rnd_list(N)) pval = logistic(fp_rnd_list(N)) perf_tbl.loc[:, (metric, sp.MEAN_COL)] = mu perf_tbl.loc[:, (metric, sp.ERR_COL)] = EB perf_tbl.loc[:, (metric, sp.PVAL_COL)] = pval min_clip = np.random.randint(-6, 6) max_clip = np.random.randint(-6, 6) if np.random.rand() <= 0.5: crap_limit_min[metric] = min_clip if np.random.rand() <= 0.5: crap_limit_max[metric] = max_clip print perf_tbl err_digits = np.random.randint(low=1, high=6) pval_digits = np.random.randint(low=1, high=6) default_digits = np.random.randint(low=1, high=6) perf_tbl_dec = sp.decimalize(perf_tbl, err_digits, pval_digits, default_digits) print perf_tbl_dec assert(not (perf_tbl_dec.xs(sp.PVAL_COL, axis=1, level=sp.STAT) < perf_tbl.xs(sp.PVAL_COL, axis=1, level=sp.STAT)).any().any()) assert(not (perf_tbl_dec.xs(sp.ERR_COL, axis=1, level=sp.STAT) < perf_tbl.xs(sp.ERR_COL, axis=1, level=sp.STAT)).any().any()) shift_mod = np.random.randint(low=0, high=6) shift_mod = None if shift_mod == 0 else shift_mod pad = True perf_tbl_str, shifts = sp.format_table(perf_tbl_dec, shift_mod, pad, crap_limit_max, crap_limit_min) print perf_tbl_str print '-' * 10
def log_likelihood(self): """ Likelihood of data, given parameters log prod_positions Bernouli(logistic(legislator_ideology * vote_ideology + vote_bias)) = sum_positions log Bernouli(logistic(legislator_ideology * vote_ideology + vote_bias)) """ p = logistic(self.legislator_ideology * self.vote_ideology + self.vote_bias) # _(p) # p if position, 1 - p if not position actual_p = np.where(self.position, p, 1 - p) # _(actual_p) # _(np.log(actual_p).sum()) return np.log(actual_p).sum()
def _predict_proba2(self, X, thresholds, betas, n_classes, eps): Xb = X.dot(betas) if not (np.diff(thresholds) > 0).all(): return np.full((X.shape[0], n_classes), eps) preds = np.zeros((X.shape[0], n_classes)) # Below we use the fact that logistic distribution is symmetric for c in range(n_classes - 1): z = logistic(thresholds[c] + Xb) preds[:, c] = z if c > 0: # Probability of intermediate classes (draw) preds[:, c] -= preds[:, c - 1] # The last class (away team win) preds[:, -1] = 1 - z preds = np.maximum(preds, eps) return preds
def _sample_r(self): """Sample negative binomial dispersion parameter `R` based on (Zhou 2012). For code, see: https://mingyuanzhou.github.io/Softwares/LGNB_Regression_v0.zip """ phi_X = self.phi(self.X, self.W, add_bias=True) F = phi_X @ self.beta.T P = logistic(F) for j in range(self.J): A = self._crt_sum(j) # `maximum` is element-wise, while `max` is not. maxes = np.maximum(1 - P[:, j], -np.inf) B = 1. / -np.sum(np.log(maxes)) self.R[j] = np.random.gamma(A, B) # `R` cannot be zero. self.R[np.isclose(self.R, 0)] = 0.0000001
def forward_pass(W, batch): '''Propagate data through the network given by W. Parameters ---------- W : np.ndarray Array of shape (1, 2) giving the network weights batch : np.ndarray Array of shape (N, 2) with the data Returns ------- np.ndarray Network predictions for each sample in shape (N, 1) ''' # sanity check our shapes assert batch.shape[1] == 2, 'Data shape is incorrect' assert W.shape == (1, 2), 'Weights shape is incorrect' return logistic(np.dot(W, batch.T)).T
def rejection_sample(d_score, epsilon=1e-6, shift_percent=95.0, score_max=None, random=np.random): '''Rejection scheme from: https://arxiv.org/pdf/1810.06758.pdf ''' assert (np.ndim(d_score) == 1 and len(d_score) > 0) assert (0 <= np.min(d_score) and np.max(d_score) <= 1) assert (np.ndim(score_max) == 0) # Chop off first since we assume that is real point and reject does not # start with real point. d_score = d_score[1:] # Make sure logit finite d_score = np.clip(d_score.astype(np.float), 1e-14, 1 - 1e-14) max_burnin_d_score = np.clip(score_max.astype(np.float), 1e-14, 1 - 1e-14) log_M = logit(max_burnin_d_score) D_tilde = logit(d_score) # Bump up M if found something bigger D_tilde_M = np.maximum(log_M, np.maximum.accumulate(D_tilde)) D_delta = D_tilde - D_tilde_M F = D_delta - np.log(1 - np.exp(D_delta - epsilon)) if shift_percent is not None: gamma = np.percentile(F, shift_percent) F = F - gamma P = logistic(F) accept = random.rand(len(d_score)) <= P if np.any(accept): idx = np.argmax(accept) # Stop at first true, default to 0 else: idx = np.argmax(d_score) # Revert to cherry if no accept # Now shift idx because we took away the real init point return idx + 1, P[idx]
def variational_step(self, **kwargs): """ Infer parameters from observations. """ parameter_means = kwargs['parameter_means'] # Get lambda_xi if not given if 'xi' in kwargs: lambda_xi = logistic_lambda(kwargs['xi']) elif 'lambda_xi' in kwargs: lambda_xi = kwargs['lambda_xi'] else: lambda_xi = 0.125 # Compute the expected prior precision if self.ard: hyper_shape = self.hyper_shape_0 + 0.5 hyper_scale = self.hyper_scale_0 + 0.5 * parameter_means * parameter_means else: hyper_shape = self.hyper_shape_0 + 0.5 * self.p hyper_scale = self.hyper_scale_0 + 0.5 * parameter_means.dot(parameter_means) tau = hyper_shape / hyper_scale # Compute the parameter covariance and mean parameter_precision = tau * np.eye(self.p) + 2 * np.dot(self.design_matrix.T * lambda_xi * self.weights, self.design_matrix) parameter_cov = np.linalg.inv(parameter_precision) parameter_means = 0.5 * parameter_cov.dot((self.observations * self.weights).dot(self.design_matrix)) # Evaluate the extra variational parameter xi = np.sqrt(np.sum(np.dot(self.design_matrix, parameter_cov + parameter_means[:, None] * parameter_means[None, :]) * self.design_matrix, axis=1)) logistic_xi = logistic(xi) lambda_xi = logistic_lambda(xi, logistic_xi) # Evaluate the evidence lower bound elbo = .5 * parameter_means.dot(parameter_precision).dot(parameter_means) \ - .5 * np.log(np.linalg.det(parameter_precision)) \ + np.sum(np.log(logistic_xi) - .5 * xi + lambda_xi * xi ** 2) \ + np.sum(- gammaln(self.hyper_shape_0) + self.hyper_shape_0 * np.log(self.hyper_scale_0) - self.hyper_scale_0 * hyper_shape / hyper_scale - hyper_shape * np.log(hyper_scale) + gammaln(hyper_shape) + hyper_shape) return elbo, dict(parameter_means=parameter_means, xi=xi, lambda_xi=lambda_xi, parameter_cov=parameter_cov)
def sampling(self, samples, sigmoids, epsilon=1e-8, shift_percent=95.0, rank=None): sigmoids = np.clip(sigmoids.astype(np.float), 1e-14, 1 - 1e-14) # Update upper bound D_tilde = logit(sigmoids) self.D_tilde_M = np.maximum(self.D_tilde_M, np.amax(D_tilde)) # Compute probability D_delta = D_tilde - self.D_tilde_M F = D_delta - np.log(1 - np.exp(D_delta - epsilon)) if shift_percent is not None: gamma = np.percentile(F, shift_percent) # print("gamma", gamma) F = F - gamma P = np.squeeze(logistic(F)) # Filter out samples # accept = np.random.rand(len(D_delta)) < P # good_samples = samples[accept] # print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format(len(D_delta), np.sum(accept), np.sum(accept)/len(D_delta) )) if rank is not None: order = np.argsort(P)[::-1] accept = order[:int(rank * len(D_delta))] good_samples = samples[accept, :] print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format( len(D_delta), np.size(accept, 0), np.size(accept, 0) / len(D_delta))) else: accept = np.random.rand(len(D_delta)) < P good_samples = samples[accept] print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format( len(D_delta), np.sum(accept), np.sum(accept) / len(D_delta))) return good_samples
def inverse_transform(Xt): from scipy.special import expit as logistic return logistic(Xt)
) print(mcmc_result) mcmc_result.plot() plt.show() mcmc_sample = mcmc_result.extract() df = pandas.DataFrame(mcmc_sample) print(df.head()) label_one = ['shade', 'sunshine'] label_two = np.arange(1,11) y_s = [] for i in label_two: y_s.append(logistic(df['Intercept'].mean() + df['b_nutrition'].mean() * i + df['b_solar'].mean() * 1)*10) y_c = [] for i in label_two: y_c.append(logistic(df['Intercept'].mean() + df['b_nutrition'].mean() * i + df['b_solar'].mean() * 0)*10) print(label_two.shape) print(np.array(y_s).shape) plt.plot(label_two, np.array(y_s), 'red', label='sunshine') plt.scatter(germination_dat_d.query('solar_sunshine == 1')["nutrition"], germination_dat_d.query('solar_sunshine == 1')["germination"], c='r') plt.plot(label_two, np.array(y_c), 'blue', label='shade') plt.scatter(germination_dat_d.query('solar_shade == 1')["nutrition"],
ax1.grid(False) ax2.grid(False) plt.savefig('B11197_04_06.png', dpi=300) # In[16]: df = az.summary(trace_1, var_names=varnames) df # In[17]: x_1 = 4.5 # sepal_length x_2 = 3 # sepal_width log_odds_versicolor_i = (df['mean'] * [1, x_1, x_2]).sum() probability_versicolor_i = logistic(log_odds_versicolor_i) log_odds_versicolor_f = (df['mean'] * [1, x_1 + 1, x_2]).sum() probability_versicolor_f = logistic(log_odds_versicolor_f) log_odds_versicolor_f - log_odds_versicolor_i, probability_versicolor_f - probability_versicolor_i # ## Dealing with correlated variables # In[18]: corr = iris[iris['species'] != 'virginica'].corr() mask = np.tri(*corr.shape).T sns.heatmap(corr.abs(), mask=mask, annot=True, cmap='viridis') plt.savefig('B11197_04_07.png', dpi=300, bbox_inches='tight')
def expected_response(theta, X): # compute the conditional expectation of the response as defined by logistic # regression, i.e. E[y_n|x_n, theta] = sigmoid(theta dot x_n), which is # also equal to the posterior class probability P(y_n=1|x_n, theta) return logistic(X.dot(theta))
def sim_actor(tr, i): sim_a_actor = np.random.randn() * tr["sigma_actor"][i] P = np.array([0, 1, 0, 1]) C = np.array([0, 0, 1, 1]) p = logistic(tr["a"][i] + sim_a_actor + (tr["bp"][i] + tr["bpC"][i] * C) * P) return p
def compute_prob(r1, r2, c, h): d = r1 - r2 + h return logistic(-c + d), logistic(c + d) - logistic(-c + d), 1 - logistic(c + d)
a = pm.Normal("a", 0.0, 1.0) sigma = pm.HalfCauchy("sigma", 1.0) a_tank = pm.Normal("a_tank", a, sigma, shape=d.shape[0]) p = pm.math.invlogit(a_tank[tank]) surv = pm.Binomial("surv", n=d.density, p=p, observed=d.surv) trace_12_2 = pm.sample(10000, tune=10000) # %% comp_df = az.compare({"m12_1": trace_12_1, "m12_2": trace_12_2}) comp_df # %% post = pm.trace_to_dataframe(trace_12_2, varnames=["a_tank"]) d.loc[:, "propsurv_est"] = pd.Series( logistic(post.median(axis=0).values), index=d.index ) # %% _, ax = plt.subplots(1, 1, figsize=(12, 5)) # display raw proportions surviving in each tank ax.scatter(np.arange(1, 49), d.propsurv) ax.scatter(np.arange(1, 49), d.propsurv_est, facecolors="none", edgecolors="k", lw=1) ax.hlines(logistic(np.median(trace_12_2["a"], axis=0)), 0, 49, linestyles="--") ax.vlines([16.5, 32.5], -0.05, 1.05, lw=0.5) ax.text(8, 0, "small tanks", horizontalalignment="center") ax.text(16 + 8, 0, "medium tanks", horizontalalignment="center") ax.text(32 + 8, 0, "large tanks", horizontalalignment="center") ax.set_xlabel("tank", fontsize=14) ax.set_ylabel("proportion survival", fontsize=14) ax.set_xlim(-1, 50)
def log_lik_elo(r1, r2, res): r_diff = r1 - r2 return res * np.log(logistic(-r_diff)) + (1 - res) * np.log((1 - logistic(-r_diff)))
f = gp.prior("f", X=X_1) # logistic inverse link function and Bernoulli likelihood y_ = pm.Bernoulli("y", p=pm.math.sigmoid(f), observed=y) trace_iris = pm.sample(1000, chains=1, compute_convergence_checks=False) # Posterior predictive with model_iris: f_pred = gp.conditional('f_pred', X_new) pred_samples = pm.sample_posterior_predictive( trace_iris, vars=[f_pred], samples=1000) # Plot results _, ax = plt.subplots(figsize=(10, 6)) fp = logistic(pred_samples['f_pred']) fp_mean = np.mean(fp, 0) ax.plot(X_new[:, 0], fp_mean) # plot the data (with some jitter) and the true latent function ax.scatter(x_1, np.random.normal(y, 0.02), marker='.', color=[f'C{x}' for x in y]) az.plot_hpd(X_new[:, 0], fp, color='C2') db = np.array([find_midpoint(f, X_new[:, 0], 0.5) for f in fp]) db_mean = db.mean() db_hpd = az.hpd(db) ax.vlines(db_mean, 0, 1, color='k') ax.fill_betweenx([0, 1], db_hpd[0], db_hpd[1], color='k', alpha=0.5) ax.set_xlabel('sepal_length')
def experiment(disease_no, lag): # For seasonal correlation, we always use no-lag data dir = 'data/0/' X_train = pd.read_csv(dir + 'D{}_X_train.csv'.format(disease_no), index_col=0) y_train = pd.read_csv(dir + 'D{}_y_train.csv'.format(disease_no), index_col=0) y_train = y_train['infection-rate'] corr_seasonal = calc_corr_seasonal(X_train, y_train) dir = 'data/{}/'.format(lag) X_train = pd.read_csv(dir + 'D{}_X_train.csv'.format(disease_no), index_col=0) y_train = pd.read_csv(dir + 'D{}_y_train.csv'.format(disease_no), index_col=0) X_test = pd.read_csv(dir + 'D{}_X_test.csv'.format(disease_no), index_col=0) y_test = pd.read_csv(dir + 'D{}_y_test.csv'.format(disease_no), index_col=0) y_train = y_train['infection-rate'] y_test = y_test['infection-rate'] # FEATURE SELECTION print('- Ranking feature ...') corr_trend = calc_corr_trend(X_train, y_train) corr_irregular = calc_corr_irregular(X_train, y_train) ranking_trend = rank(corr_trend, corr_seasonal) ranking_irregular = rank(corr_irregular, corr_seasonal) display_top_terms(ranking_trend, 'for TREND') display_top_terms(ranking_irregular, 'for IRREGULAR') # Calculate components for the data frame X_train_trend = calc_df_trend(X_train.drop('date', axis=1), 52) X_train_irregular = calc_df_irregular(X_train.drop('date', axis=1), 52) y_train_trend, _, y_train_irregular = decompose(fix_inf(logit(y_train).values), 52) print('- Selecting best feature subset ... ', end='') subset_trend , alpha_trend = subset_select(X_train_trend , y_train_trend , ranking_trend) subset_irregular, alpha_irregular = subset_select(X_train_irregular, y_train_irregular, ranking_irregular) agg_x_train_trend = aggregate(X_train_trend, subset_trend) agg_x_train_irregular = aggregate(X_train_irregular, subset_irregular) print('selected', len(subset_trend), 'for trend,', len(subset_irregular), 'for irregular.') print("- Selected search terms saved at " "'output/selected/T_{}_{}.txt' and " "'output/selected/T_{}_{}.txt'." .format(disease_no, lag, disease_no, lag)) # Logging selected term with open('output/selected/T_{}_{}.txt'.format(disease_no, lag), 'w') as f: for term in subset_trend: f.write(term + '\n') with open('output/selected/I_{}_{}.txt'.format(disease_no, lag), 'w') as f: for term in subset_irregular: f.write(term + '\n') # RELEARN AND PREDICT print('- Learning the final model and predicting...', end=' ') model_trend = train(agg_x_train_trend , y_train_trend , alpha_trend) model_irregular = train(agg_x_train_irregular, y_train_irregular, alpha_irregular) # We will calculate each component invidually for each week in test period # We need the train data for decomposing the test time series # First, let's make a copy of train data X_agg_curr_trend = aggregate(X_train, subset_trend) X_agg_curr_irregular = aggregate(X_train, subset_irregular) X_agg_test_trend = aggregate(X_test , subset_trend) X_agg_test_irregular = aggregate(X_test , subset_irregular) # We use the seasonal component # From the historical data of the epidemic _, historical_seasonal, _ = decompose(fix_inf(logit(y_train).values), 52) historical_seasonal = list(historical_seasonal) predict_y = [] predict_trends = [] predict_irregulars = [] # Now let's predict, one week at a time for idx in range(len(X_test.index)): # Add data of the new week X_agg_curr_trend = X_agg_curr_trend.append(X_agg_test_trend.loc[idx, :]) X_agg_curr_irregular = X_agg_curr_irregular.append(X_agg_test_irregular.loc[idx, :]) # Re-decompose the search time series X_curr_trend = calc_df_trend(X_agg_curr_trend, 52) X_curr_irregular = calc_df_irregular(X_agg_curr_irregular, 52) historical_seasonal.append(historical_seasonal[-52]) # We need only the latest one curr_trend = X_curr_trend.iloc[-1:] curr_irregular = X_curr_irregular.iloc[-1:] curr_seasonal = historical_seasonal[-1] # Let's predict each component predict_trend = predict(model_trend, curr_trend).values[0] predict_irregular = predict(model_irregular, curr_irregular).values[0] predict_seasonal = curr_seasonal # And then add them to the result list predict_y.append(logistic(predict_trend * predict_irregular * predict_seasonal)) predict_trends.append(predict_trend) predict_irregulars.append(predict_irregular) _mape = mape(y_test, predict_y) _coef = corr_coef(y_test, predict_y) # THE CODE BELOW IS JUST FOR VISUALIZATION predict_y_train_trend = predict(model_trend, agg_x_train_trend) predict_y_train_irregular = predict(model_irregular, agg_x_train_irregular) predict_y_train_seasonal = historical_seasonal[:len(y_train_trend)] predict_y_train = logistic(predict_y_train_trend * predict_y_train_irregular * predict_y_train_seasonal) predict_y_all_trend = np.append(predict_y_train_trend , np.array(predict_trends)) predict_y_all_irregular = np.append(predict_y_train_irregular, np.array(predict_irregulars)) predict_y_all = np.append(predict_y_train , predict_y) print('Finished.') return _mape, _coef, (predict_y_all_trend, predict_y_all_irregular, predict_y_all)
def vect_likelyhood_M3(x, y, w): y = np.squeeze(np.asarray(y)) print(np.dot(x, w)) print(np.multiply(y.T, np.dot(x, w))) return np.prod(logistic(np.multiply(y.T, np.dot(x, w))))
def fit_model(name, func): """Fits a model (in a Bayesian sense) to the data. This was written as a function so that some of the code can be re-used for the secondary model. Args: name (str): Descriptive name of the model. Posterior samples, statistics, and figures are generated and saved in a subdirectory with this name. func (:obj:`<class 'function'>): Function for model construction. Should return a formatted copy of the data. Returns: data (pandas.DataFrame): The formatted copy of the data augmented with the results from the model fitting. """ with pm.Model() as m: # construct model and load data data = func() if exists(f"{name}") is False: # sample posterior trace = pm.sample(10000, tune=1000, chains=2) pm.save_trace(trace, f"{name}") else: # load samples trace = pm.load_trace(f"{name}") if exists(f"{name}/ppc.npz") is False: # perform ppc ppc = pm.sample_posterior_predictive(trace, samples=10000)["y"] np.savez_compressed(f"{name}/ppc.npz", ppc) else: # load pp samples ppc = np.load(f"{name}/ppc.npz")["arr_0"] if exists(f"{name}/summary.csv") is False: # make a summary csv summary = pm.summary(trace, var_names=m.free_RVs) summary.to_csv(f"{name}/summary.csv") else: summary = pd.read_csv(f"{name}/summary.csv") if exists(f"{name}/details.txt") is False: details = f"Minimum Rhat = {summary.Rhat.min()}\n" details += f"Minimum Neff = {summary.n_eff.min()}\n" n = data.trials.mean() r2 = pm.stats.r2_score(data.num.values, trace["p"] * n, 3) details += f"Bayesian median R2 = {r2[0]}\n" try: details += f"BFMI = {pm.stats.bfmi(trace)}\n" except KeyError: details += f"No BFMI generated!\n" open(f"{name}/details.txt", "w").write(details) if exists(f"{name}/traceplot.png") is False: pm.traceplot(trace, compact=True) plt.savefig(f"{name}/traceplot.png") if exists(f"{name}/data.csv") is False: data["a"] = logistic(trace[r"$\alpha$"].mean(axis=0)) data["b"] = trace[r"$\beta$"].mean(axis=0) data["l"] = logistic(trace[r"$\lambda$"].mean(axis=0)) data["s"] = np.exp(trace[r"$\varsigma$"].mean(axis=0)) data["d"] = np.exp(trace[r"$\delta$"].mean(axis=0)) ppq = pd.DataFrame(ppc).quantile([0.025, 0.975]).T ppq.columns = ["ppc_lo", "ppc_hi"] data = pd.concat([data, ppq], axis=1) data["ppc_lo"] /= data.trials data["ppc_hi"] /= data.trials ppe = np.abs(ppc - np.tile(data.num.values, (10000, 1))).mean(axis=0) data["pro_pp_errors"] = ppe / data.trials data.to_csv(f"{name}/data.csv", index=False) else: data = pd.read_csv(f"{name}/data.csv") return data