コード例 #1
0
def montecarlo_integral_M2(x, y, iteration=10**8):
    y = np.squeeze(np.asarray(y))
    result = 0
    iteration = int(iteration)
    B = STD_DEV * STD_DEV * np.matrix([[1, -0.9, -0.5], [-0.9, 1, 0.5],
                                       [-0.5, 0.5, 1]])
    W = np.random.multivariate_normal([0, 0], B[0:2, 0:2], iteration)
    W = W.T
    #W = np.random.normal(MEAN,STD_DEV,iteration*2)
    #W.shape = (2,iteration)
    XW = np.dot(x[:, 0:2], W)

    i = 0

    while i < 9:
        XW[i, :] = y[i] * XW[i, :]
        i = i + 1

    logistic(XW, out=XW)

    XW = np.prod(XW, 0)

    result = np.sum(XW) / float(iteration)

    return result
コード例 #2
0
ファイル: visualizer.py プロジェクト: jxzhangjhu/rflvm
 def plot_P(self, t, F):
     P = logistic(F)
     if self.dataset.has_true_F:
         P_true = logistic(self.dataset.F)
         self._compare_F_or_P(P_true, P, f'{t}_P.png')
     else:
         fname = f'{t}_P.png'
         self._plot_F_or_P(P, fname)
コード例 #3
0
 def predict_proba_single(self, team_i, team_j):
     r_i = self.ratings[team_i]
     r_j = self.ratings[team_j]
     d_ij = r_i - r_j + self.h
     p1 = logistic(-self.c + d_ij)
     p3 = 1.0 - logistic(self.c + d_ij)
     p2 = 1.0 - p1 - p3
     return [p1, p2, p3]
コード例 #4
0
def generate_wsbm_adj(n, pi_vector, theta_in=3, theta_out=-3):
    #toDo : check sum to 1
    c = generate_clusters(n, pi_vector)
    Adj = np.zeros((n, n))
    for i in range(n - 1):
        for j in range(i + 1, n):
            if c[i] == c[j]:
                Adj[i, j] = logistic(norm.rvs(theta_in))
                Adj[j, i] = Adj[i, j]
            else:
                Adj[i, j] = logistic(norm.rvs(theta_out))
                Adj[j, i] = Adj[i, j]
    np.fill_diagonal(Adj, 1)
    return Adj
コード例 #5
0
    def sampling(self,
                 samples,
                 sigmoids,
                 epsilon=1e-8,
                 shift_percent=60.0,
                 ranking=None):

        sigmoids = np.clip(sigmoids.astype(np.float), 1e-14, 1 - 1e-14)

        # Update upper bound
        D_tilde = logit(sigmoids)
        self.D_tilde_M = np.maximum(self.D_tilde_M, np.amax(D_tilde))

        # Compute probability
        D_delta = D_tilde - self.D_tilde_M
        F = D_delta - np.log(1 - np.exp(D_delta - epsilon))
        if shift_percent is not None:
            gamma = np.percentile(F, shift_percent)
            F = F - gamma
        P = np.squeeze(logistic(F))

        if ranking is None:
            accept = np.random.rand(len(D_delta)) < P
            good_samples = samples[accept]
        else:
            raise NotImplementedError

        return good_samples
コード例 #6
0
def p_link(prosoc_left, condition, actor, trace):
    logodds = (
        trace["a"]
        + trace["a_actor"][:, actor]
        + (trace["bp"] + trace["bpC"] * condition) * prosoc_left
    )
    return logistic(logodds)
コード例 #7
0
def logistic_lambda(z, logistic_z=None):
    """
    Evaluate the $\\lambda$ function in logistic regression.
    """
    if logistic_z is None:
        logistic_z = logistic(z)
    return np.where(z == 0, .125, (logistic_z - 0.5) / (2 * z))
コード例 #8
0
def likelyhood_M3(x, y, w):
    result = 1.0
    for i in range(9):
        result = result * logistic(y[0, i] *
                                   (w[0] * x[i, 0] + w[1] * x[i, 1] + w[2]))

    return result
コード例 #9
0
def gen_s_curve(rng, emissions):
    """Generate synthetic data from datasets generating process.
    """
    N = 500
    J = 100
    D = 2

    # Generate latent manifold.
    # -------------------------
    X, t = make_s_curve(N, random_state=rng)
    X = np.delete(X, obj=1, axis=1)
    X = X / np.std(X, axis=0)
    inds = t.argsort()
    X = X[inds]
    t = t[inds]

    # Generate kernel `K` and latent GP-distributed maps `F`.
    # -------------------------------------------------------
    K = kern.RBF(input_dim=D, lengthscale=1).K(X)
    F = rng.multivariate_normal(np.zeros(N), K, size=J).T

    # Generate emissions using `F` and/or `K`.
    # ----------------------------------------
    if emissions == 'bernoulli':
        P = logistic(F)
        Y = rng.binomial(1, P).astype(np.double)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    if emissions == 'gaussian':
        Y = F + np.random.normal(0, scale=0.5, size=F.shape)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    elif emissions == 'multinomial':
        C = 100
        pi = np.exp(F - logsumexp(F, axis=1)[:, None])
        Y = np.zeros(pi.shape)
        for n in range(N):
            Y[n] = rng.multinomial(C, pi[n])
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    elif emissions == 'negbinom':
        P = logistic(F)
        R = np.arange(1, J + 1, dtype=float)
        Y = rng.negative_binomial(R, 1 - P)
        return Dataset('s-curve', False, Y, X, F, K, R, t)
    else:
        assert (emissions == 'poisson')
        theta = np.exp(F)
        Y = rng.poisson(theta)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
コード例 #10
0
 def likelihood(self, parameters=None, design_matrix=None, observations=None):
     """
     Evaluate the likelihood.
     """
     # Get default values if available
     parameters = parameters if parameters is not None else self.parameters
     design_matrix = design_matrix if design_matrix is not None else self.design_matrix
     observations = observations if observations is not None else self.observations
     return logistic(observations * self.predictor(parameters, design_matrix))
コード例 #11
0
def p_link(prosoc_left, condition, actor_sim, trace):
    Nsim = actor_sim.shape[0] // trace.nchains
    trace = trace[:Nsim]
    logodds = (
        trace["a"]
        + np.mean(actor_sim, axis=1)
        + (trace["bp"] + trace["bpC"] * condition) * prosoc_left
    )
    return logistic(logodds)
コード例 #12
0
def get_output(weight, data, regression="logistic"):
    dot_product = np.matmul(data, weight)
    if regression == "logistic":
        output = logistic(dot_product)
    elif regression == "probit":
        output = norm.cdf(dot_product)
    elif regression == "multiclass":
        output = softmax(dot_product, axis=1)

    return output, dot_product
コード例 #13
0
 def predict(self, X, return_latent=False):
     """Predict data `Y` given latent variable `X`.
     """
     phi_X = self.phi(X, self.W, add_bias=True)
     F = phi_X @ self.beta.T
     Y = logistic(F)
     if return_latent:
         K = phi_X @ phi_X.T
         return Y, F, K
     return Y
コード例 #14
0
def decimalize_test():
    N = np.random.randint(low=1, high=3)
    M = np.random.randint(low=1, high=3)
    # Keeping it sorted makes it easier to compare before and after
    methods = sorted(np.random.choice(list(ascii_letters), N, replace=False))
    metrics = sorted(np.random.choice(list(ascii_letters), M, replace=False))

    stats = (sp.MEAN_COL, sp.ERR_COL, sp.PVAL_COL)
    cols = pd.MultiIndex.from_product([metrics, stats],
                                      names=['metric', 'stat'])
    perf_tbl = pd.DataFrame(index=methods, columns=cols, dtype=object)
    perf_tbl.index.name = 'method'
    crap_limit_max = {}
    crap_limit_min = {}
    for metric in metrics:
        mu = fp_rnd_list(N, all_finite=True)
        EB = np.abs(fp_rnd_list(N))
        pval = logistic(fp_rnd_list(N))

        perf_tbl.loc[:, (metric, sp.MEAN_COL)] = mu
        perf_tbl.loc[:, (metric, sp.ERR_COL)] = EB
        perf_tbl.loc[:, (metric, sp.PVAL_COL)] = pval

        min_clip = np.random.randint(-6, 6)
        max_clip = np.random.randint(-6, 6)

        if np.random.rand() <= 0.5:
            crap_limit_min[metric] = min_clip
        if np.random.rand() <= 0.5:
            crap_limit_max[metric] = max_clip

    print perf_tbl
    err_digits = np.random.randint(low=1, high=6)
    pval_digits = np.random.randint(low=1, high=6)
    default_digits = np.random.randint(low=1, high=6)
    perf_tbl_dec = sp.decimalize(perf_tbl, err_digits, pval_digits,
                                 default_digits)
    print perf_tbl_dec

    assert(not (perf_tbl_dec.xs(sp.PVAL_COL, axis=1, level=sp.STAT) <
                perf_tbl.xs(sp.PVAL_COL, axis=1, level=sp.STAT)).any().any())
    assert(not (perf_tbl_dec.xs(sp.ERR_COL, axis=1, level=sp.STAT) <
                perf_tbl.xs(sp.ERR_COL, axis=1, level=sp.STAT)).any().any())

    shift_mod = np.random.randint(low=0, high=6)
    shift_mod = None if shift_mod == 0 else shift_mod

    pad = True
    perf_tbl_str, shifts = sp.format_table(perf_tbl_dec, shift_mod, pad,
                                           crap_limit_max, crap_limit_min)
    print perf_tbl_str
    print '-' * 10
コード例 #15
0
 def log_likelihood(self):
     """
     Likelihood of data, given parameters
     log prod_positions Bernouli(logistic(legislator_ideology * vote_ideology + vote_bias))
     = sum_positions log Bernouli(logistic(legislator_ideology * vote_ideology + vote_bias))
     """
     p = logistic(self.legislator_ideology * self.vote_ideology +
                  self.vote_bias)
     # _(p)
     # p if position, 1 - p if not position
     actual_p = np.where(self.position, p, 1 - p)
     # _(actual_p)
     # _(np.log(actual_p).sum())
     return np.log(actual_p).sum()
コード例 #16
0
 def _predict_proba2(self, X, thresholds, betas, n_classes, eps):
     Xb = X.dot(betas)
     if not (np.diff(thresholds) > 0).all():
         return np.full((X.shape[0], n_classes), eps)
     preds = np.zeros((X.shape[0], n_classes))
     # Below we use the fact that logistic distribution is symmetric
     for c in range(n_classes - 1):
         z = logistic(thresholds[c] + Xb)
         preds[:, c] = z
         if c > 0:  # Probability of intermediate classes (draw)
             preds[:, c] -= preds[:, c - 1]
     # The last class (away team win)
     preds[:, -1] = 1 - z
     preds = np.maximum(preds, eps)
     return preds
コード例 #17
0
ファイル: negbinom_rflvm.py プロジェクト: jxzhangjhu/rflvm
    def _sample_r(self):
        """Sample negative binomial dispersion parameter `R` based on
        (Zhou 2012). For code, see:

        https://mingyuanzhou.github.io/Softwares/LGNB_Regression_v0.zip
        """
        phi_X = self.phi(self.X, self.W, add_bias=True)
        F = phi_X @ self.beta.T
        P = logistic(F)
        for j in range(self.J):
            A = self._crt_sum(j)
            # `maximum` is element-wise, while `max` is not.
            maxes = np.maximum(1 - P[:, j], -np.inf)
            B = 1. / -np.sum(np.log(maxes))
            self.R[j] = np.random.gamma(A, B)
        # `R` cannot be zero.
        self.R[np.isclose(self.R, 0)] = 0.0000001
コード例 #18
0
def forward_pass(W, batch):
    '''Propagate data through the network given by W.
    Parameters
    ----------
    W   :   np.ndarray
            Array of shape (1, 2) giving the network weights
    batch   :   np.ndarray
                Array of shape (N, 2) with the data

    Returns
    -------
    np.ndarray
            Network predictions for each sample in shape (N, 1)
    '''
    # sanity check our shapes
    assert batch.shape[1] == 2, 'Data shape is incorrect'
    assert W.shape == (1, 2), 'Weights shape is incorrect'
    return logistic(np.dot(W, batch.T)).T
コード例 #19
0
def rejection_sample(d_score,
                     epsilon=1e-6,
                     shift_percent=95.0,
                     score_max=None,
                     random=np.random):
    '''Rejection scheme from:
    https://arxiv.org/pdf/1810.06758.pdf
    '''
    assert (np.ndim(d_score) == 1 and len(d_score) > 0)
    assert (0 <= np.min(d_score) and np.max(d_score) <= 1)
    assert (np.ndim(score_max) == 0)

    # Chop off first since we assume that is real point and reject does not
    # start with real point.
    d_score = d_score[1:]

    # Make sure logit finite
    d_score = np.clip(d_score.astype(np.float), 1e-14, 1 - 1e-14)
    max_burnin_d_score = np.clip(score_max.astype(np.float), 1e-14, 1 - 1e-14)

    log_M = logit(max_burnin_d_score)

    D_tilde = logit(d_score)
    # Bump up M if found something bigger
    D_tilde_M = np.maximum(log_M, np.maximum.accumulate(D_tilde))

    D_delta = D_tilde - D_tilde_M
    F = D_delta - np.log(1 - np.exp(D_delta - epsilon))

    if shift_percent is not None:
        gamma = np.percentile(F, shift_percent)
        F = F - gamma

    P = logistic(F)
    accept = random.rand(len(d_score)) <= P

    if np.any(accept):
        idx = np.argmax(accept)  # Stop at first true, default to 0
    else:
        idx = np.argmax(d_score)  # Revert to cherry if no accept

    # Now shift idx because we took away the real init point
    return idx + 1, P[idx]
コード例 #20
0
    def variational_step(self, **kwargs):
        """
        Infer parameters from observations.
        """
        parameter_means = kwargs['parameter_means']
        # Get lambda_xi if not given
        if 'xi' in kwargs:
            lambda_xi = logistic_lambda(kwargs['xi'])
        elif 'lambda_xi' in kwargs:
            lambda_xi = kwargs['lambda_xi']
        else:
            lambda_xi = 0.125

        # Compute the expected prior precision
        if self.ard:
            hyper_shape = self.hyper_shape_0 + 0.5
            hyper_scale = self.hyper_scale_0 + 0.5 * parameter_means * parameter_means
        else:
            hyper_shape = self.hyper_shape_0 + 0.5 * self.p
            hyper_scale = self.hyper_scale_0 + 0.5 * parameter_means.dot(parameter_means)
        tau = hyper_shape / hyper_scale

        # Compute the parameter covariance and mean
        parameter_precision = tau * np.eye(self.p) + 2 * np.dot(self.design_matrix.T * lambda_xi * self.weights,
                                                                self.design_matrix)
        parameter_cov = np.linalg.inv(parameter_precision)
        parameter_means = 0.5 * parameter_cov.dot((self.observations * self.weights).dot(self.design_matrix))

        # Evaluate the extra variational parameter
        xi = np.sqrt(np.sum(np.dot(self.design_matrix, parameter_cov + parameter_means[:, None] *
                                   parameter_means[None, :]) * self.design_matrix, axis=1))
        logistic_xi = logistic(xi)
        lambda_xi = logistic_lambda(xi, logistic_xi)

        # Evaluate the evidence lower bound
        elbo = .5 * parameter_means.dot(parameter_precision).dot(parameter_means) \
                    - .5 * np.log(np.linalg.det(parameter_precision)) \
                    + np.sum(np.log(logistic_xi) - .5 * xi + lambda_xi * xi ** 2) \
                    + np.sum(- gammaln(self.hyper_shape_0) + self.hyper_shape_0 * np.log(self.hyper_scale_0)
                             - self.hyper_scale_0 * hyper_shape / hyper_scale - hyper_shape * np.log(hyper_scale)
                             + gammaln(hyper_shape) + hyper_shape)

        return elbo, dict(parameter_means=parameter_means, xi=xi, lambda_xi=lambda_xi, parameter_cov=parameter_cov)
コード例 #21
0
    def sampling(self,
                 samples,
                 sigmoids,
                 epsilon=1e-8,
                 shift_percent=95.0,
                 rank=None):

        sigmoids = np.clip(sigmoids.astype(np.float), 1e-14, 1 - 1e-14)

        # Update upper bound
        D_tilde = logit(sigmoids)
        self.D_tilde_M = np.maximum(self.D_tilde_M, np.amax(D_tilde))

        # Compute probability
        D_delta = D_tilde - self.D_tilde_M
        F = D_delta - np.log(1 - np.exp(D_delta - epsilon))
        if shift_percent is not None:
            gamma = np.percentile(F, shift_percent)
            # print("gamma", gamma)
            F = F - gamma
        P = np.squeeze(logistic(F))

        # Filter out samples
        # accept = np.random.rand(len(D_delta)) < P
        # good_samples = samples[accept]
        # print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format(len(D_delta), np.sum(accept), np.sum(accept)/len(D_delta) ))

        if rank is not None:
            order = np.argsort(P)[::-1]
            accept = order[:int(rank * len(D_delta))]
            good_samples = samples[accept, :]
            print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format(
                len(D_delta), np.size(accept, 0),
                np.size(accept, 0) / len(D_delta)))
        else:
            accept = np.random.rand(len(D_delta)) < P
            good_samples = samples[accept]
            print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format(
                len(D_delta), np.sum(accept),
                np.sum(accept) / len(D_delta)))

        return good_samples
コード例 #22
0
ファイル: transformers.py プロジェクト: cog-imperial/entmoot
 def inverse_transform(Xt):
     from scipy.special import expit as logistic
     return logistic(Xt)
コード例 #23
0
)

print(mcmc_result)
mcmc_result.plot()
plt.show()

mcmc_sample = mcmc_result.extract()
df = pandas.DataFrame(mcmc_sample)
print(df.head())

label_one = ['shade', 'sunshine']
label_two = np.arange(1,11)

y_s = []
for i in label_two:
    y_s.append(logistic(df['Intercept'].mean() + df['b_nutrition'].mean() * i + df['b_solar'].mean() * 1)*10)

y_c = []
for i in label_two:
    y_c.append(logistic(df['Intercept'].mean() + df['b_nutrition'].mean() * i + df['b_solar'].mean() * 0)*10)


print(label_two.shape)
print(np.array(y_s).shape)
plt.plot(label_two, np.array(y_s), 'red', label='sunshine')
plt.scatter(germination_dat_d.query('solar_sunshine == 1')["nutrition"],
            germination_dat_d.query('solar_sunshine == 1')["germination"],
            c='r')

plt.plot(label_two, np.array(y_c), 'blue', label='shade')
plt.scatter(germination_dat_d.query('solar_shade == 1')["nutrition"],
コード例 #24
0
ax1.grid(False)
ax2.grid(False)
plt.savefig('B11197_04_06.png', dpi=300)

# In[16]:

df = az.summary(trace_1, var_names=varnames)
df

# In[17]:

x_1 = 4.5  # sepal_length
x_2 = 3  # sepal_width

log_odds_versicolor_i = (df['mean'] * [1, x_1, x_2]).sum()
probability_versicolor_i = logistic(log_odds_versicolor_i)

log_odds_versicolor_f = (df['mean'] * [1, x_1 + 1, x_2]).sum()
probability_versicolor_f = logistic(log_odds_versicolor_f)

log_odds_versicolor_f - log_odds_versicolor_i, probability_versicolor_f - probability_versicolor_i

# ## Dealing with correlated variables

# In[18]:

corr = iris[iris['species'] != 'virginica'].corr()
mask = np.tri(*corr.shape).T
sns.heatmap(corr.abs(), mask=mask, annot=True, cmap='viridis')
plt.savefig('B11197_04_07.png', dpi=300, bbox_inches='tight')
コード例 #25
0
ファイル: logistic_regression.py プロジェクト: zxsted/PRMLPY
def expected_response(theta, X):
    # compute the conditional expectation of the response as defined by logistic
    # regression, i.e. E[y_n|x_n, theta] = sigmoid(theta dot x_n), which is
    # also equal to the posterior class probability P(y_n=1|x_n, theta)
    return logistic(X.dot(theta))
コード例 #26
0
def sim_actor(tr, i):
    sim_a_actor = np.random.randn() * tr["sigma_actor"][i]
    P = np.array([0, 1, 0, 1])
    C = np.array([0, 0, 1, 1])
    p = logistic(tr["a"][i] + sim_a_actor + (tr["bp"][i] + tr["bpC"][i] * C) * P)
    return p
コード例 #27
0
def compute_prob(r1, r2, c, h):
    d = r1 - r2 + h
    return logistic(-c + d), logistic(c + d) - logistic(-c + d), 1 - logistic(c + d)
コード例 #28
0
    a = pm.Normal("a", 0.0, 1.0)
    sigma = pm.HalfCauchy("sigma", 1.0)
    a_tank = pm.Normal("a_tank", a, sigma, shape=d.shape[0])
    p = pm.math.invlogit(a_tank[tank])
    surv = pm.Binomial("surv", n=d.density, p=p, observed=d.surv)
    trace_12_2 = pm.sample(10000, tune=10000)

# %%
comp_df = az.compare({"m12_1": trace_12_1, "m12_2": trace_12_2})
comp_df

# %%
post = pm.trace_to_dataframe(trace_12_2, varnames=["a_tank"])

d.loc[:, "propsurv_est"] = pd.Series(
    logistic(post.median(axis=0).values), index=d.index
)

# %%
_, ax = plt.subplots(1, 1, figsize=(12, 5))
# display raw proportions surviving in each tank
ax.scatter(np.arange(1, 49), d.propsurv)
ax.scatter(np.arange(1, 49), d.propsurv_est, facecolors="none", edgecolors="k", lw=1)
ax.hlines(logistic(np.median(trace_12_2["a"], axis=0)), 0, 49, linestyles="--")
ax.vlines([16.5, 32.5], -0.05, 1.05, lw=0.5)
ax.text(8, 0, "small tanks", horizontalalignment="center")
ax.text(16 + 8, 0, "medium tanks", horizontalalignment="center")
ax.text(32 + 8, 0, "large tanks", horizontalalignment="center")
ax.set_xlabel("tank", fontsize=14)
ax.set_ylabel("proportion survival", fontsize=14)
ax.set_xlim(-1, 50)
コード例 #29
0
def log_lik_elo(r1, r2, res):
    r_diff = r1 - r2
    return res * np.log(logistic(-r_diff)) + (1 - res) * np.log((1 - logistic(-r_diff)))
       f = gp.prior("f", X=X_1)
       # logistic inverse link function and Bernoulli likelihood
       y_ = pm.Bernoulli("y", p=pm.math.sigmoid(f), observed=y)
       trace_iris = pm.sample(1000, chains=1, compute_convergence_checks=False)
       
   # Posterior predictive
 
   with model_iris:
       f_pred = gp.conditional('f_pred', X_new)
       pred_samples = pm.sample_posterior_predictive(
           trace_iris, vars=[f_pred], samples=1000)
   
   # Plot results
   _, ax = plt.subplots(figsize=(10, 6))
   
   fp = logistic(pred_samples['f_pred'])
   fp_mean = np.mean(fp, 0)
   
   ax.plot(X_new[:, 0], fp_mean)
   # plot the data (with some jitter) and the true latent function
   ax.scatter(x_1, np.random.normal(y, 0.02),
              marker='.', color=[f'C{x}' for x in y])
   
   az.plot_hpd(X_new[:, 0], fp, color='C2')
   
   db = np.array([find_midpoint(f, X_new[:, 0], 0.5) for f in fp])
   db_mean = db.mean()
   db_hpd = az.hpd(db)
   ax.vlines(db_mean, 0, 1, color='k')
   ax.fill_betweenx([0, 1], db_hpd[0], db_hpd[1], color='k', alpha=0.5)
   ax.set_xlabel('sepal_length')
コード例 #31
0
def experiment(disease_no, lag):
    # For seasonal correlation, we always use no-lag data
    dir = 'data/0/'
    X_train = pd.read_csv(dir + 'D{}_X_train.csv'.format(disease_no), index_col=0)
    y_train = pd.read_csv(dir + 'D{}_y_train.csv'.format(disease_no), index_col=0)
    y_train = y_train['infection-rate']
    corr_seasonal = calc_corr_seasonal(X_train, y_train)

    dir = 'data/{}/'.format(lag)
    X_train = pd.read_csv(dir + 'D{}_X_train.csv'.format(disease_no), index_col=0)
    y_train = pd.read_csv(dir + 'D{}_y_train.csv'.format(disease_no), index_col=0)
    X_test  = pd.read_csv(dir + 'D{}_X_test.csv'.format(disease_no), index_col=0)
    y_test  = pd.read_csv(dir + 'D{}_y_test.csv'.format(disease_no), index_col=0)
    y_train = y_train['infection-rate']
    y_test  = y_test['infection-rate']

    # FEATURE SELECTION
    print('- Ranking feature ...')
    corr_trend        = calc_corr_trend(X_train, y_train)
    corr_irregular    = calc_corr_irregular(X_train, y_train)
    ranking_trend     = rank(corr_trend, corr_seasonal)
    ranking_irregular = rank(corr_irregular, corr_seasonal)
    display_top_terms(ranking_trend, 'for TREND')
    display_top_terms(ranking_irregular, 'for IRREGULAR')

    # Calculate components for the data frame
    X_train_trend     = calc_df_trend(X_train.drop('date', axis=1), 52)
    X_train_irregular = calc_df_irregular(X_train.drop('date', axis=1), 52)
    y_train_trend, _, y_train_irregular = decompose(fix_inf(logit(y_train).values), 52)

    print('- Selecting best feature subset ... ', end='')
    subset_trend    , alpha_trend     = subset_select(X_train_trend    , y_train_trend    , ranking_trend)
    subset_irregular, alpha_irregular = subset_select(X_train_irregular, y_train_irregular, ranking_irregular)
    agg_x_train_trend     = aggregate(X_train_trend, subset_trend)
    agg_x_train_irregular = aggregate(X_train_irregular, subset_irregular)
    print('selected', len(subset_trend), 'for trend,', len(subset_irregular), 'for irregular.')
    print("- Selected search terms saved at "
          "'output/selected/T_{}_{}.txt' and "
          "'output/selected/T_{}_{}.txt'."
          .format(disease_no, lag, disease_no, lag))

    # Logging selected term
    with open('output/selected/T_{}_{}.txt'.format(disease_no, lag), 'w') as f:
        for term in subset_trend:
            f.write(term + '\n')

    with open('output/selected/I_{}_{}.txt'.format(disease_no, lag), 'w') as f:
        for term in subset_irregular:
            f.write(term + '\n')

    # RELEARN AND PREDICT
    print('- Learning the final model and predicting...', end=' ')
    model_trend     = train(agg_x_train_trend    , y_train_trend    , alpha_trend)
    model_irregular = train(agg_x_train_irregular, y_train_irregular, alpha_irregular)

    # We will calculate each component invidually for each week in test period
    # We need the train data for decomposing the test time series
    # First, let's make a copy of train data
    X_agg_curr_trend     = aggregate(X_train, subset_trend)
    X_agg_curr_irregular = aggregate(X_train, subset_irregular)
    X_agg_test_trend     = aggregate(X_test , subset_trend)
    X_agg_test_irregular = aggregate(X_test , subset_irregular)

    # We use the seasonal component
    # From the historical data of the epidemic
    _, historical_seasonal, _ = decompose(fix_inf(logit(y_train).values), 52)
    historical_seasonal = list(historical_seasonal)

    predict_y = []
    predict_trends = []
    predict_irregulars = []

    # Now let's predict, one week at a time
    for idx in range(len(X_test.index)):
        # Add data of the new week
        X_agg_curr_trend     = X_agg_curr_trend.append(X_agg_test_trend.loc[idx, :])
        X_agg_curr_irregular = X_agg_curr_irregular.append(X_agg_test_irregular.loc[idx, :])

        # Re-decompose the search time series
        X_curr_trend         = calc_df_trend(X_agg_curr_trend, 52)
        X_curr_irregular     = calc_df_irregular(X_agg_curr_irregular, 52)
        historical_seasonal.append(historical_seasonal[-52])

        # We need only the latest one
        curr_trend     = X_curr_trend.iloc[-1:]
        curr_irregular = X_curr_irregular.iloc[-1:]
        curr_seasonal  = historical_seasonal[-1]

        # Let's predict each component
        predict_trend = predict(model_trend, curr_trend).values[0]
        predict_irregular = predict(model_irregular, curr_irregular).values[0]
        predict_seasonal = curr_seasonal

        # And then add them to the result list
        predict_y.append(logistic(predict_trend * predict_irregular * predict_seasonal))
        predict_trends.append(predict_trend)
        predict_irregulars.append(predict_irregular)

    _mape = mape(y_test, predict_y)
    _coef = corr_coef(y_test, predict_y)

    # THE CODE BELOW IS JUST FOR VISUALIZATION
    predict_y_train_trend     = predict(model_trend, agg_x_train_trend)
    predict_y_train_irregular = predict(model_irregular, agg_x_train_irregular)
    predict_y_train_seasonal  = historical_seasonal[:len(y_train_trend)]
    predict_y_train           = logistic(predict_y_train_trend * predict_y_train_irregular * predict_y_train_seasonal)

    predict_y_all_trend       = np.append(predict_y_train_trend    , np.array(predict_trends))
    predict_y_all_irregular   = np.append(predict_y_train_irregular, np.array(predict_irregulars))
    predict_y_all             = np.append(predict_y_train          , predict_y)

    print('Finished.')
    return _mape, _coef, (predict_y_all_trend,
                          predict_y_all_irregular,
                          predict_y_all)
コード例 #32
0
def vect_likelyhood_M3(x, y, w):
    y = np.squeeze(np.asarray(y))
    print(np.dot(x, w))
    print(np.multiply(y.T, np.dot(x, w)))
    return np.prod(logistic(np.multiply(y.T, np.dot(x, w))))
コード例 #33
0
def fit_model(name, func):
    """Fits a model (in a Bayesian sense) to the data.

    This was written as a function so that some of the code can be re-used for the
    secondary model.

    Args:
        name (str): Descriptive name of the model. Posterior samples, statistics, and
            figures are generated and saved in a subdirectory with this name.
        func (:obj:`<class 'function'>): Function for model construction. Should
            return a formatted copy of the data.

    Returns:
        data (pandas.DataFrame): The formatted copy of the data augmented with the
            results from the model fitting.

    """
    with pm.Model() as m:

        # construct model and load data
        data = func()

        if exists(f"{name}") is False:
            # sample posterior
            trace = pm.sample(10000, tune=1000, chains=2)
            pm.save_trace(trace, f"{name}")
        else:
            # load samples
            trace = pm.load_trace(f"{name}")

        if exists(f"{name}/ppc.npz") is False:
            # perform ppc
            ppc = pm.sample_posterior_predictive(trace, samples=10000)["y"]
            np.savez_compressed(f"{name}/ppc.npz", ppc)
        else:
            # load pp samples
            ppc = np.load(f"{name}/ppc.npz")["arr_0"]

        if exists(f"{name}/summary.csv") is False:
            # make a summary csv
            summary = pm.summary(trace, var_names=m.free_RVs)
            summary.to_csv(f"{name}/summary.csv")
        else:
            summary = pd.read_csv(f"{name}/summary.csv")

        if exists(f"{name}/details.txt") is False:

            details = f"Minimum Rhat = {summary.Rhat.min()}\n"
            details += f"Minimum Neff = {summary.n_eff.min()}\n"
            n = data.trials.mean()
            r2 = pm.stats.r2_score(data.num.values, trace["p"] * n, 3)
            details += f"Bayesian median R2 = {r2[0]}\n"
            try:
                details += f"BFMI = {pm.stats.bfmi(trace)}\n"
            except KeyError:
                details += f"No BFMI generated!\n"

            open(f"{name}/details.txt", "w").write(details)

        if exists(f"{name}/traceplot.png") is False:

            pm.traceplot(trace, compact=True)
            plt.savefig(f"{name}/traceplot.png")

        if exists(f"{name}/data.csv") is False:

            data["a"] = logistic(trace[r"$\alpha$"].mean(axis=0))
            data["b"] = trace[r"$\beta$"].mean(axis=0)
            data["l"] = logistic(trace[r"$\lambda$"].mean(axis=0))
            data["s"] = np.exp(trace[r"$\varsigma$"].mean(axis=0))
            data["d"] = np.exp(trace[r"$\delta$"].mean(axis=0))

            ppq = pd.DataFrame(ppc).quantile([0.025, 0.975]).T
            ppq.columns = ["ppc_lo", "ppc_hi"]
            data = pd.concat([data, ppq], axis=1)
            data["ppc_lo"] /= data.trials
            data["ppc_hi"] /= data.trials

            ppe = np.abs(ppc -
                         np.tile(data.num.values, (10000, 1))).mean(axis=0)
            data["pro_pp_errors"] = ppe / data.trials
            data.to_csv(f"{name}/data.csv", index=False)

        else:

            data = pd.read_csv(f"{name}/data.csv")

    return data