def train_strat_model(weights, data_train, data_val, data_test, lambd): loss = strat_models.nonparametric_discrete_loss() reg = strat_models.scaled_plus_sum_squares_reg(A=D, lambd=lambd) bm = strat_models.BaseModel(loss=loss, reg=reg) G_week = nx.cycle_graph(53) G_hr = nx.cycle_graph(24) strat_models.set_edge_weight(G_week, weights[0]) strat_models.set_edge_weight(G_hr, weights[1]) G = strat_models.cartesian_product([G_week, G_hr]) sm = strat_models.StratifiedModel(bm, graph=G) info = sm.fit(data_train, **kwargs) anll_train = sm.anll(data_train) anll_val = sm.anll(data_val) anll_test = sm.anll(data_test) print("Stratified model with (weights, lambd) =", (weights, lambd)) print("\t", info) print("\t", anll_train, anll_val, anll_test) return anll_train, anll_val, anll_test
def make_G(w1, w2, w3): G_vix = nx.path_graph(10) #vix quantiles (deciles) G_unemp = nx.path_graph(10) #volume quantiles G_inflation = nx.path_graph(10) #volume quantiles strat_models.set_edge_weight(G_vix, w1) strat_models.set_edge_weight(G_unemp, w2) strat_models.set_edge_weight(G_inflation, w3) G = strat_models.cartesian_product([G_vix, G_unemp, G_inflation]) return G.copy()
for state2 in states: if state2 in list( neighbors[neighbors.StateCode == state1]['NeighborStateCode']): G_state.add_edge(state1, state2) n_years = len(years) G_time = nx.path_graph(n_years) G_time = nx.relabel_nodes(G_time, dict(zip(np.arange(n_years), years))) kwargs = dict(abs_tol=1e-5, rel_tol=1e-5, maxiter=200, n_jobs=4, verbose=1) loss = strat_models.bernoulli_loss() reg = strat_models.clip_reg(lambd=(1e-5, 1 - 1e-5)) bm = strat_models.BaseModel(loss=loss, reg=reg) strat_models.set_edge_weight(G_state, 0) strat_models.set_edge_weight(G_time, 0) G = strat_models.cartesian_product([G_state, G_time]) sm_fully = strat_models.StratifiedModel(bm, graph=G) info = sm_fully.fit(data_train, **kwargs) anll_train = sm_fully.anll(data_train) anll_test = sm_fully.anll(data_test) print("Separate model") print("\t", info) print("\t", anll_train, anll_test) strat_models.set_edge_weight(G_state, 1) strat_models.set_edge_weight(G_time, 4) G = strat_models.cartesian_product([G_state, G_time]) sm_strat = strat_models.StratifiedModel(bm, graph=G)
data_train = dict(X=X_train, Y=Y_train, Z=Z_train) data_test = dict(X=X_test, Y=Y_test, Z=Z_test) loss = strat_models.logistic_loss(intercept=True) # Fit models print("fitting...") kwargs = dict(rel_tol=1e-4, abs_tol=1e-4, maxiter=500, n_jobs=12, verbose=True, rho=2., max_cg_iterations=30) strat_models.set_edge_weight(G_sex, 0) strat_models.set_edge_weight(G_age, 0) G = strat_models.utils.cartesian_product([G_sex, G_age]) bm_fully = strat_models.BaseModel(loss=loss) sm_fully = strat_models.StratifiedModel(bm_fully, graph=G) info = sm_fully.fit(data_train, **kwargs) anll_test = sm_fully.anll(data_test) pred_error = prediction_error(data_test, sm_fully) print('Separate model') print('\t', info) print('\t', anll_test, pred_error) strat_models.set_edge_weight(G_sex, 10)
Z = [] for node in G.nodes(): Y.append(events[node]) Z.append(node) return Y, Z Y_train, Z_train = df_to_data(df_2017) Y_test, Z_test = df_to_data(df_2018) print(len(Y_train), len(Y_test)) del G # Fit models and evaluate log likelihood kwargs = dict(rel_tol=1e-6, abs_tol=1e-6, maxiter=2000) strat_models.set_edge_weight(G_location, 0) strat_models.set_edge_weight(G_week, 0) strat_models.set_edge_weight(G_day, 0) strat_models.set_edge_weight(G_hour, 0) G = strat_models.cartesian_product([G_location, G_week, G_day, G_hour]) fully = strat_models.Poisson() info = fully.fit(Y_train, Z_train, G, **kwargs) anll_train = fully.anll(Y_train, Z_train) anll_test = fully.anll(Y_test, Z_test) print("Fully") print("\t", info) print("\t", anll_train, anll_test) del G strat_models.set_edge_weight(G_location, 100) strat_models.set_edge_weight(G_week, 100)
# Fit models data_train = dict(X=X_train, Y=Y_train, Z=Z_train) data_test = dict(X=X_test, Y=Y_test, Z=Z_test) kwargs = dict(rel_tol=1e-5, abs_tol=1e-5, maxiter=1000, n_jobs=2, verbose=1) def rms(x): return np.sqrt(np.mean(np.square(x))) loss = strat_models.sum_squares_loss(intercept=True) reg = strat_models.sum_squares_reg(lambd=1e-4) bm = strat_models.BaseModel(loss=loss, reg=reg) strat_models.set_edge_weight(G, 1e-8) sm_fully = strat_models.StratifiedModel(bm, graph=G) info = sm_fully.fit(data_train, **kwargs) score = sm_fully.scores(data_test) print("Fully") print("\t", info) print("\t", score) strat_models.set_edge_weight(G, 15) sm_strat = strat_models.StratifiedModel(bm, graph=G) info = sm_strat.fit(data_train, **kwargs) score = sm_strat.scores(data_test) print("Strat")
## Eigen-stratified model print("fitting eigen-stratified models...") kwargs["maxiter"] = 600 kwargs["verbose"] = False K = 53 * 24 weight_week = .45 weight_hr = .55 lambd = (0.01, 0.001) m = 90 G_week = nx.cycle_graph(53) G_hr = nx.cycle_graph(24) strat_models.set_edge_weight(G_week, weight_week) strat_models.set_edge_weight(G_hr, weight_hr) G_eigen = strat_models.cartesian_product([G_week, G_hr]) loss = strat_models.nonparametric_discrete_loss() reg = strat_models.scaled_plus_sum_squares_reg(A=D, lambd=lambd) bm_eigen = strat_models.BaseModel(loss=loss, reg=reg) sm_eigen = strat_models.StratifiedModel(bm_eigen, graph=G_eigen) info = sm_eigen.fit(data_train, num_eigen=m, **kwargs) anll_train = sm_eigen.anll(data_train) anll_val = sm_eigen.anll(data_val) anll_test = sm_eigen.anll(data_test) print('Eigen-stratified model, {} eigenvectors used'.format(m))