Exemple #1
0
def train_strat_model(weights, data_train, data_val, data_test, lambd):

    loss = strat_models.nonparametric_discrete_loss()
    reg = strat_models.scaled_plus_sum_squares_reg(A=D, lambd=lambd)

    bm = strat_models.BaseModel(loss=loss, reg=reg)

    G_week = nx.cycle_graph(53)
    G_hr = nx.cycle_graph(24)
    strat_models.set_edge_weight(G_week, weights[0])
    strat_models.set_edge_weight(G_hr, weights[1])
    G = strat_models.cartesian_product([G_week, G_hr])

    sm = strat_models.StratifiedModel(bm, graph=G)

    info = sm.fit(data_train, **kwargs)
    anll_train = sm.anll(data_train)
    anll_val = sm.anll(data_val)
    anll_test = sm.anll(data_test)

    print("Stratified model with (weights, lambd) =", (weights, lambd))
    print("\t", info)
    print("\t", anll_train, anll_val, anll_test)

    return anll_train, anll_val, anll_test
Exemple #2
0
def make_G(w1, w2, w3):
    G_vix = nx.path_graph(10)  #vix quantiles (deciles)
    G_unemp = nx.path_graph(10)  #volume quantiles
    G_inflation = nx.path_graph(10)  #volume quantiles

    strat_models.set_edge_weight(G_vix, w1)
    strat_models.set_edge_weight(G_unemp, w2)
    strat_models.set_edge_weight(G_inflation, w3)

    G = strat_models.cartesian_product([G_vix, G_unemp, G_inflation])

    return G.copy()
    for state2 in states:
        if state2 in list(
                neighbors[neighbors.StateCode == state1]['NeighborStateCode']):
            G_state.add_edge(state1, state2)

n_years = len(years)
G_time = nx.path_graph(n_years)
G_time = nx.relabel_nodes(G_time, dict(zip(np.arange(n_years), years)))

kwargs = dict(abs_tol=1e-5, rel_tol=1e-5, maxiter=200, n_jobs=4, verbose=1)

loss = strat_models.bernoulli_loss()
reg = strat_models.clip_reg(lambd=(1e-5, 1 - 1e-5))
bm = strat_models.BaseModel(loss=loss, reg=reg)

strat_models.set_edge_weight(G_state, 0)
strat_models.set_edge_weight(G_time, 0)
G = strat_models.cartesian_product([G_state, G_time])
sm_fully = strat_models.StratifiedModel(bm, graph=G)

info = sm_fully.fit(data_train, **kwargs)
anll_train = sm_fully.anll(data_train)
anll_test = sm_fully.anll(data_test)
print("Separate model")
print("\t", info)
print("\t", anll_train, anll_test)

strat_models.set_edge_weight(G_state, 1)
strat_models.set_edge_weight(G_time, 4)
G = strat_models.cartesian_product([G_state, G_time])
sm_strat = strat_models.StratifiedModel(bm, graph=G)
data_train = dict(X=X_train, Y=Y_train, Z=Z_train)
data_test = dict(X=X_test, Y=Y_test, Z=Z_test)

loss = strat_models.logistic_loss(intercept=True)

# Fit models
print("fitting...")
kwargs = dict(rel_tol=1e-4,
              abs_tol=1e-4,
              maxiter=500,
              n_jobs=12,
              verbose=True,
              rho=2.,
              max_cg_iterations=30)

strat_models.set_edge_weight(G_sex, 0)
strat_models.set_edge_weight(G_age, 0)
G = strat_models.utils.cartesian_product([G_sex, G_age])

bm_fully = strat_models.BaseModel(loss=loss)
sm_fully = strat_models.StratifiedModel(bm_fully, graph=G)

info = sm_fully.fit(data_train, **kwargs)
anll_test = sm_fully.anll(data_test)
pred_error = prediction_error(data_test, sm_fully)

print('Separate model')
print('\t', info)
print('\t', anll_test, pred_error)

strat_models.set_edge_weight(G_sex, 10)
Exemple #5
0
    Z = []
    for node in G.nodes():
        Y.append(events[node])
        Z.append(node)

    return Y, Z

Y_train, Z_train = df_to_data(df_2017)
Y_test, Z_test = df_to_data(df_2018)
print(len(Y_train), len(Y_test))
del G

# Fit models and evaluate log likelihood

kwargs = dict(rel_tol=1e-6, abs_tol=1e-6, maxiter=2000)
strat_models.set_edge_weight(G_location, 0)
strat_models.set_edge_weight(G_week, 0)
strat_models.set_edge_weight(G_day, 0)
strat_models.set_edge_weight(G_hour, 0)
G = strat_models.cartesian_product([G_location, G_week, G_day, G_hour])
fully = strat_models.Poisson()
info = fully.fit(Y_train, Z_train, G, **kwargs)
anll_train = fully.anll(Y_train, Z_train)
anll_test = fully.anll(Y_test, Z_test)
print("Fully")
print("\t", info)
print("\t", anll_train, anll_test)
del G

strat_models.set_edge_weight(G_location, 100)
strat_models.set_edge_weight(G_week, 100)
Exemple #6
0
# Fit models
data_train = dict(X=X_train, Y=Y_train, Z=Z_train)
data_test = dict(X=X_test, Y=Y_test, Z=Z_test)

kwargs = dict(rel_tol=1e-5, abs_tol=1e-5, maxiter=1000, n_jobs=2, verbose=1)


def rms(x):
    return np.sqrt(np.mean(np.square(x)))


loss = strat_models.sum_squares_loss(intercept=True)
reg = strat_models.sum_squares_reg(lambd=1e-4)
bm = strat_models.BaseModel(loss=loss, reg=reg)

strat_models.set_edge_weight(G, 1e-8)

sm_fully = strat_models.StratifiedModel(bm, graph=G)

info = sm_fully.fit(data_train, **kwargs)
score = sm_fully.scores(data_test)
print("Fully")
print("\t", info)
print("\t", score)

strat_models.set_edge_weight(G, 15)
sm_strat = strat_models.StratifiedModel(bm, graph=G)

info = sm_strat.fit(data_train, **kwargs)
score = sm_strat.scores(data_test)
print("Strat")
Exemple #7
0
## Eigen-stratified model
print("fitting eigen-stratified models...")
kwargs["maxiter"] = 600
kwargs["verbose"] = False

K = 53 * 24

weight_week = .45
weight_hr = .55
lambd = (0.01, 0.001)
m = 90

G_week = nx.cycle_graph(53)
G_hr = nx.cycle_graph(24)
strat_models.set_edge_weight(G_week, weight_week)
strat_models.set_edge_weight(G_hr, weight_hr)
G_eigen = strat_models.cartesian_product([G_week, G_hr])

loss = strat_models.nonparametric_discrete_loss()
reg = strat_models.scaled_plus_sum_squares_reg(A=D, lambd=lambd)
bm_eigen = strat_models.BaseModel(loss=loss, reg=reg)

sm_eigen = strat_models.StratifiedModel(bm_eigen, graph=G_eigen)

info = sm_eigen.fit(data_train, num_eigen=m, **kwargs)
anll_train = sm_eigen.anll(data_train)
anll_val = sm_eigen.anll(data_val)
anll_test = sm_eigen.anll(data_test)

print('Eigen-stratified model, {} eigenvectors used'.format(m))