Example #1
0
def set_model(num_teams, home_team, away_team, observed_home_goals,
              observed_away_goals):

    with pm.Model() as model:
        # global model parameters
        home = pm.Flat('home')
        sd_att = pm.HalfStudentT('sd_att', nu=3, sd=2.5)
        sd_def = pm.HalfStudentT('sd_def', nu=3, sd=2.5)
        intercept = pm.Flat('intercept')

        # team-specific model parameters
        atts_star = pm.Normal("atts_star", mu=0, sd=sd_att, shape=num_teams)
        defs_star = pm.Normal("defs_star", mu=0, sd=sd_def, shape=num_teams)

        atts = pm.Deterministic('atts', atts_star - tt.mean(atts_star))
        defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star))
        home_theta = tt.exp(intercept + home + atts[home_team] +
                            defs[away_team])
        away_theta = tt.exp(intercept + atts[away_team] + defs[home_team])

        # likelihood of observed data
        home_points = pm.Poisson('home_points',
                                 mu=home_theta,
                                 observed=observed_home_goals)
        away_points = pm.Poisson('away_points',
                                 mu=away_theta,
                                 observed=observed_away_goals)

    return model
Example #2
0
def run():
    teams = df.home_team.unique()
    teams = pd.DataFrame(teams, columns=['team'])
    teams['i'] = teams.index
    
    df = pd.merge(df, teams, left_on='home_team', right_on='team', how='left')
    df = df.rename(columns = {'i': 'i_home'}).drop('team', 1)
    df = pd.merge(df, teams, left_on='away_team', right_on='team', how='left')
    df = df.rename(columns = {'i': 'i_away'}).drop('team', 1)
    
    observed_home_goals = df.home_score.values
    observed_away_goals = df.away_score.values
    
    home_team = df.i_home.values
    away_team = df.i_away.values
    
    num_teams = len(df.i_home.drop_duplicates())
    num_games = len(home_team)
    
    g = df.groupby('i_away')
    att_starting_points = np.log(g.away_score.mean())
    g = df.groupby('i_home')
    def_starting_points = -np.log(g.away_score.mean())
    with pm.Model() as model:
        # global model parameters
        home = pm.Flat('home')
        sd_att = pm.HalfStudentT('sd_att', nu=3, sigma=2.5)
        sd_def = pm.HalfStudentT('sd_def', nu=3, sigma=2.5)
        intercept = pm.Flat('intercept')
    
        # team-specific model parameters
        atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, shape=num_teams)
        defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, shape=num_teams)
    
        atts = pm.Deterministic('atts', atts_star - tt.mean(atts_star))
        defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star))
        home_theta = tt.exp(intercept + home + atts[home_team] + defs[away_team])
        away_theta = tt.exp(intercept + atts[away_team] + defs[home_team])
    
        # likelihood of observed data
        home_points = pm.Poisson('home_points', mu=home_theta, observed=observed_home_goals)
        away_points = pm.Poisson('away_points', mu=away_theta, observed=observed_away_goals)
    trace = pm.sample(1000, tune=1000, cores=3)
    pm.traceplot(trace, var_names=['intercept', 'home', 'sd_att', 'sd_def']);
    bfmi = pm.bfmi(trace)
    max_gr = max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values())
    (pm.energyplot(trace, legend=False, figsize=(6, 4))
Example #3
0
    def _build_gains_time(self, Bx_gains):
        self.dims.update({
            'gains_time_sd_raw': ('algo', ),
            'gains_time_sd': ('algo', ),
            'gains_time_raw': ('algo', 'time_raw_gains'),
            'gains_time': ('algo', 'time'),
        })
        k = self.n_algos
        n_knots_gains = len(self.coords['time_raw_gains'])

        gains_time_alpha = pm.HalfNormal('gains_time_alpha', sd=0.1)
        if 'log_gains_time_sd_sd_trace_mu' in self.params:
            mu = self.params.pop('log_gains_time_sd_sd_trace_mu')
            sd = self.params.pop('log_gains_time_sd_sd_trace_sd')
            log_gains_time_sd_sd = pm.Normal('log_gains_time_sd_sd',
                                             mu=mu,
                                             sd=sd)
            gains_time_sd_sd = pm.Deterministic('gains_time_sd_sd',
                                                tt.exp(log_gains_time_sd_sd))
        else:
            gains_time_sd_sd = pm.HalfStudentT('gains_time_sd_sd',
                                               nu=3,
                                               sd=0.1)
            pm.Deterministic('log_gains_time_sd_sd', tt.log(gains_time_sd_sd))
        gains_time_sd_raw = pm.HalfNormal('gains_time_sd_raw', shape=k)
        gains_time_sd = pm.Deterministic('gains_time_sd',
                                         gains_time_sd_sd * gains_time_sd_raw)
        gains_time_raw = GPExponential('gains_time_raw',
                                       mu=0,
                                       alpha=gains_time_alpha,
                                       sigma=1,
                                       shape=(k, n_knots_gains))
        gains_time = gains_time_sd[:, None] * gains_time_raw
        gains_time = sparse_dot(Bx_gains, gains_time.T).T

        pm.Deterministic('gains_time', gains_time)
        return gains_time
Example #4
0
def main(args):
    print("Loading data...")
    teams, df = load_data()
    nt = len(teams)
    train = df[df["split"] == "train"]

    print("Starting inference...")
    with pm.Model() as model:
        # priors
        alpha = pm.Normal("alpha", mu=0, sigma=1)
        sd_att = pm.HalfStudentT("sd_att", nu=3, sigma=2.5)
        sd_def = pm.HalfStudentT("sd_def", nu=3, sigma=2.5)

        home = pm.Normal("home", mu=0, sigma=1)  # home advantage

        # team-specific model parameters
        attack = pm.Normal("attack", mu=0, sigma=sd_att, shape=nt)
        defend = pm.Normal("defend", mu=0, sigma=sd_def, shape=nt)

        # data
        home_id = pm.Data("home_data", train["Home_id"])
        away_id = pm.Data("away_data", train["Away_id"])

        # likelihood
        theta1 = tt.exp(alpha + home + attack[home_id] - defend[away_id])
        theta2 = tt.exp(alpha + attack[away_id] - defend[home_id])

        pm.Poisson("s1", mu=theta1, observed=train["score1"])
        pm.Poisson("s2", mu=theta2, observed=train["score2"])

    with model:
        fit = pm.sample(
            draws=args.num_samples,
            tune=args.num_warmup,
            chains=args.num_chains,
            cores=args.num_cores,
            random_seed=args.rng_seed,
        )

    print("Analyse posterior...")
    az.plot_forest(
        fit,
        var_names=("alpha", "home", "sd_att", "sd_def"),
        backend="bokeh",
    )

    az.plot_trace(
        fit,
        var_names=("alpha", "home", "sd_att", "sd_def"),
        backend="bokeh",
    )

    # Attack and defence
    quality = teams.copy()
    quality = quality.assign(
        attack=fit["attack"].mean(axis=0),
        attacksd=fit["attack"].std(axis=0),
        defend=fit["defend"].mean(axis=0),
        defendsd=fit["defend"].std(axis=0),
    )
    quality = quality.assign(
        attack_low=quality["attack"] - quality["attacksd"],
        attack_high=quality["attack"] + quality["attacksd"],
        defend_low=quality["defend"] - quality["defendsd"],
        defend_high=quality["defend"] + quality["defendsd"],
    )

    plot_quality(quality)

    # Predicted goals and table
    predict = df[df["split"] == "predict"]

    with model:
        pm.set_data({"home_data": predict["Home_id"]})
        pm.set_data({"away_data": predict["Away_id"]})

        predicted_score = pm.sample_posterior_predictive(
            fit, var_names=["s1", "s2"], random_seed=1)

    predicted_full = predict.copy()
    predicted_full = predicted_full.assign(
        score1=predicted_score["s1"].mean(axis=0).round(),
        score1error=predicted_score["s1"].std(axis=0),
        score2=predicted_score["s2"].mean(axis=0).round(),
        score2error=predicted_score["s2"].std(axis=0),
    )

    predicted_full = train.append(
        predicted_full.drop(columns=["score1error", "score2error"]))

    print(score_table(df))
    print(score_table(predicted_full))
Example #5
0
                     g=n)
    Y_tensor = tt.matrix("Y")
    (s, P, ll), _ = K.filter(Y_tensor)
    kf = theano.function(inputs=[Y_tensor, sv_tnsr],
                         outputs=[s, P, ll],
                         mode=theano.Mode(optimizer="unsafe"))

    s, P, ll = kf(Y, 2 * np.ones(m))

    import pymc3 as pm

    with pm.Model() as model:
        # Phi, Q, L, c, H, Sv, d, s0, P0, n, m, g

        phi = pm.Normal("phi", shape=(1, 1))
        q = pm.HalfStudentT("q", nu=1.0, sd=2.0, shape=(1, 1))
        K = KalmanFilter("kf",
                         phi,
                         q,
                         np.array([[1.]]),
                         np.array([0.]),
                         np.array([[1.]]),
                         np.array([[0.0]]),
                         np.array([0.]),
                         np.array([0.]),
                         np.array([[10.]]),
                         1,
                         1,
                         1,
                         observed=y)
    return df


if __name__ == '__main__':
    df = get_tidy_data()
    obs_h_score = df.home_score.values
    obs_a_score = df.away_score.values
    home_team = df.i_home.values
    away_team = df.i_away.values
    num_teams = max(home_team) + 1

    with pm.Model() as model:
        # home court advantage!
        home = pm.Flat('home')

        sd_atk = pm.HalfStudentT('sd_atk', nu=3, sd=2.5)
        sd_def = pm.HalfStudentT('sd_def', nu=3, sd=2.5)

        # intercept
        intercept = pm.Flat('intercept')

        # team-specific parameters
        # shape parameter for vector of values
        atks_star = pm.Normal('atks_star', mu=0, sd=sd_atk, shape=num_teams)
        defs_star = pm.Normal('defs_star', mu=0, sd=sd_def, shape=num_teams)

        # transformation
        atks = pm.Deterministic('atks', atks_star - tt.mean(atks_star))
        defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star))

        # theta as a function of parameters
params_model = dict(
    new_cases_obs=new_cases_obs[:],
    data_begin=data_begin,
    fcast_len=num_days_forecast,
    diff_data_sim=diff_data_sim,
    N_population=args.population,
)

# Median of the prior for the delay in case reporting, we assume 10 days
pr_delay = 10

# Create model compartments
with cov19.model.Cov19Model(**params_model) as this_model:

    # Edit pr_sigma_lambda for each cp
    sigma_lambda = pm.HalfStudentT(name="sigma_lambda_cps", nu=4, sigma=0.5)
    for i, cp in enumerate(change_points[1:]):
        cp["pr_sigma_lambda"] = sigma_lambda

    # Create the an array of the time dependent infection rate lambda
    lambda_t_log = cov19.model.lambda_t_with_sigmoids(
        pr_median_lambda_0=0.4,
        pr_sigma_lambda_0=0.5,
        change_points_list=
        change_points,  # The change point priors we constructed earlier
        name_lambda_t=
        "lambda_t",  # Name for the variable in the trace (see later)
    )

    # set prior distribution for the recovery rate
    mu = pm.Lognormal(name="mu", mu=np.log(1 / 8), sigma=0.2)
Example #8
0
home_team = df.i_home.values
away_team = df.i_away.values

num_teams = len(df.i_home.drop_duplicates())
num_games = len(home_team)

g = df.groupby('i_away')
att_starting_points = np.log(g.away_score.mean())
g = df.groupby('i_home')
def_starting_points = -np.log(g.away_score.mean())

with pm.Model() as model:
    # global model parameters
    home = pm.Flat('home')
    sd_att = pm.HalfStudentT('sd_att', nu=3, sigma=2.5)
    sd_def = pm.HalfStudentT('sd_def', nu=3, sigma=2.5)
    intercept = pm.Flat('intercept')

    # team-specific model parameters
    atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, shape=num_teams)
    defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, shape=num_teams)

    atts = pm.Deterministic('atts', atts_star - tt.mean(atts_star))
    defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star))
    home_theta = tt.exp(intercept + home + atts[home_team] + defs[away_team])
    away_theta = tt.exp(intercept + atts[away_team] + defs[home_team])

    # likelihood of observed data
    home_points = pm.Poisson('home_points',
                             mu=home_theta,
Example #9
0
observed_home_goals = df.home_score.values
observed_away_goals = df.away_score.values

home_team = df.i_home.values
away_team = df.i_away.values

num_teams = len(df.i_home.drop_duplicates())
num_games = len(home_team)


# define model
with pm.Model() as model:
    # global model parameters
    home = pm.Flat("home")
    sd_att = pm.HalfStudentT("sd_att", nu=3, sigma=2.5)
    sd_def = pm.HalfStudentT("sd_def", nu=3, sigma=2.5)
    intercept = pm.Flat("intercept")

    # team-specific model parameters
    atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, shape=num_teams)
    defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, shape=num_teams)

    atts = pm.Deterministic("atts", atts_star - tt.mean(atts_star))
    defs = pm.Deterministic("defs", defs_star - tt.mean(defs_star))
    home_theta = tt.exp(intercept + home + atts[home_team] + defs[away_team])
    away_theta = tt.exp(intercept + atts[away_team] + defs[home_team])

    # likelihood of observed data
    home_goals = pm.Poisson("home_goals", mu=home_theta, observed=observed_home_goals)
    away_goals = pm.Poisson("away_goals", mu=away_theta, observed=observed_away_goals)
# initial values
g = df.groupby('i_away')
att_tries_init = np.log(g.away_tries.mean())
g = df.groupby('i_home')
def_tries_init = -np.log(g.away_tries.mean())
g = df.groupby('i_away')
att_pens_init = np.log(g.away_pens.mean())
g = df.groupby('i_home')
def_pens_init = -np.log(g.away_pens.mean())

with pm.Model() as model:
    # global model parameters
    home_tries = pm.Flat('home_tries')  # intercept for home advantage
    home_pens = pm.Flat('home_pens')  # intercept for home advantage
    sd_att_tries = pm.HalfStudentT('sd_att_tries', nu=3, sd=2.5)
    sd_def_tries = pm.HalfStudentT('sd_def_tries', nu=3, sd=2.5)
    sd_att_pens = pm.HalfStudentT('sd_att_pens', nu=3, sd=2.5)
    sd_def_pens = pm.HalfStudentT('sd_def_pens', nu=3, sd=2.5)
    # sd_att_drops = pm.HalfStudentT('sd_att_drops', nu=3, sd=2.5)
    # sd_def_drops = pm.HalfStudentT('sd_def_drops', nu=3, sd=2.5)

    intercept_tries = pm.Flat('intercept_tries')
    intercept_pens = pm.Flat('intercept_pens')
    # intercept_drops = pm.Flat('intercept_drops')

    # team-specific model parameters
    atts_tries_star = pm.Normal("atts_tries_star", mu=0, sd=sd_att_tries, shape=num_teams)
    defs_tries_star = pm.Normal("defs_tries_star", mu=0, sd=sd_def_tries, shape=num_teams)
    atts_pens_star = pm.Normal("atts_pens_star", mu=0, sd=sd_att_pens, shape=num_teams)
    defs_pens_star = pm.Normal("defs_pens_star", mu=0, sd=sd_def_pens, shape=num_teams)