def test_poisson(): print("Poisson test...") K = 1000 G = nx.cycle_graph(K) Z = np.random.randint(K, size=10000) Y = np.random.randint(1, 10, size=10000) bm = strat_models.BaseModel( loss=strat_models.losses.poisson_loss(min_theta=1e-3), reg=strat_models.regularizers.min_threshold_reg_one_elem(lambd=1e-3)) sm = strat_models.StratifiedModel(bm, graph=G) data = dict(Y=Y, Z=Z) kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=500) info = sm.fit(data, **kwargs) assert info["optimal"] data_sample = dict(Z=np.random.randint(2, size=100)) samples = sm.sample(data=data_sample) print("ANLL is {}".format(sm.anll(data))) print("Poisson done.")
def test_nonparametric_discrete(): print("Non-parametric discrete distibution test...") K = 100 num_classes = 10 SIZE = 100 G = nx.cycle_graph(K) strat_models.utils.set_edge_weight(G, 10) K_eye = np.eye(K) Z = np.random.randint(K, size=SIZE) Y = np.random.randint(0, num_classes, size=SIZE) print(Z) bm = strat_models.BaseModel( loss=strat_models.losses.nonparametric_discrete_loss(), reg=strat_models.regularizers.sum_squares_reg(lambd=0.4)) sm = strat_models.StratifiedModel(bm, graph=G) data = dict(Y=Y, Z=Z) kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=500) info = sm.fit(data, **kwargs) assert info["optimal"] data_test = dict(Z=np.random.randint(K, size=SIZE), Y=np.random.randint(0, num_classes, size=SIZE)) print("ANLL is {}".format(sm.anll(data_test))) print("Non-parametric discrete loss done.")
def test_eigen(): """Example: solve ||X\theta - Y||^2 + ||\theta||^2""" print("ridge regression test...") K = 100 G = nx.cycle_graph(K) n = 10 m = 2 X = np.random.randn(500, n) Z = np.random.randint(K, size=500) Y = np.random.randn(500, m) bm = strat_models.BaseModel( loss=strat_models.losses.sum_squares_loss(intercept=False), reg=strat_models.regularizers.sum_squares_reg(lambd=1)) sm = strat_models.StratifiedModel(bm, graph=G) data = dict(X=X, Y=Y, Z=Z) kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=500) info = sm.fit(data, num_eigen=30, **kwargs) assert info["optimal"] predictions = sm.predict(data=data) print("ANLL is {}".format(sm.anll(data))) print("eigen-stratified ridge regression done.")
def train_strat_model(weights, data_train, data_val, data_test, lambd): loss = strat_models.nonparametric_discrete_loss() reg = strat_models.scaled_plus_sum_squares_reg(A=D, lambd=lambd) bm = strat_models.BaseModel(loss=loss, reg=reg) G_week = nx.cycle_graph(53) G_hr = nx.cycle_graph(24) strat_models.set_edge_weight(G_week, weights[0]) strat_models.set_edge_weight(G_hr, weights[1]) G = strat_models.cartesian_product([G_week, G_hr]) sm = strat_models.StratifiedModel(bm, graph=G) info = sm.fit(data_train, **kwargs) anll_train = sm.anll(data_train) anll_val = sm.anll(data_val) anll_test = sm.anll(data_test) print("Stratified model with (weights, lambd) =", (weights, lambd)) print("\t", info) print("\t", anll_train, anll_val, anll_test) return anll_train, anll_val, anll_test
def test_bernoulli(): print("Bernoulli test...") K = 2 G = nx.cycle_graph(K) Z = np.random.randint(K, size=1000) Y = np.random.randint(0, 2, size=1000) # p = strat_strat_models.Bernoulli() # p.fit(Y, Z, G, inplace=True, verbose=True, n_jobs=12) # anll = p.anll(Y, Z) # sample = p.sample(Z) # print(sample) # print(anll) bm = strat_models.BaseModel(loss=strat_models.losses.bernoulli_loss( 1e-5, 1 - 1e-5), reg=strat_models.regularizers.clip_reg( (1e-5, 1 - 1e-5))) sm = strat_models.StratifiedModel(bm, graph=G) data = dict(Y=Y, Z=Z) kwargs = dict(verbose=True, abs_tol=1e-4, maxiter=500, n_jobs=2) info = sm.fit(data, **kwargs) assert info["optimal"] data_sample = dict(Z=np.random.randint(2, size=100)) samples = sm.sample(data=data_sample) print("ANLL is {}".format(sm.anll(data))) print("Bernoulli done.")
def test_trace_minus_logdet(): print("Trace minus logdet test...") K = 3 n = 10 G = nx.cycle_graph(K) for edge in G.edges(): G.add_edge(edge[0], edge[1], weight=0.1) Z = np.array(list(G.nodes())) Y = [np.cov(np.random.randn(n,n)) + np.eye(n) for _ in range(K)] bm = strat_models.BaseModel(loss=strat_models.losses.covariance_max_likelihood_loss(), reg=strat_models.regularizers.L1_reg(lambd=1)) sm = strat_models.StratifiedModel(bm, graph=G) data = dict(Y=Y, Z=Z, n=n) kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=900) info = sm.fit(data, **kwargs) # print(info) print("ANLL is {}".format(sm.anll(data))) assert info["optimal"] data_sample = dict(Z=np.random.randint(K, size=5)) samples = sm.sample(data=data_sample) print("Trace minus logdet done.")
def test_joint_mean_covariance(): print("Joint mean covariance test...") K = 3 G = nx.cycle_graph(K) G.add_edge(0,1,weight=0.01) G.add_edge(1,2,weight=0.01) G.add_edge(2,0,weight=0.01) Z = np.array(list(G.nodes())) n = 10 mus = [np.ones(n) for _ in range(K)] S = [np.random.randn(n,n) for _ in range(K)] S = [np.cov(s) + np.eye(n) for s in S] Y = [np.random.multivariate_normal(mus[k], S[k], 9).T for k in range(K)] [print(np.mean(y,1)) for y in Y] bm = strat_models.BaseModel(loss=strat_models.losses.mean_covariance_max_likelihood_loss(), reg=strat_models.regularizers.sum_squares_reg(lambd=0)) sm = strat_models.StratifiedModel(bm, graph=G) data = dict(Y=Y, Z=Z, n=n) kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=20, n_jobs=2) info = sm.fit(data, **kwargs) Snu = sm.G._node[0]["theta"] S_star = np.linalg.inv(Snu[:,:-1]) mu_star = S_star @ Snu[:,-1] print(S[0], mus[0]) print(S_star, mu_star) print(info) print("ANLL is {}".format(sm.anll(data))) data_sample = dict(Z=np.random.randint(K, size=5)) samples = sm.sample(data=data_sample) print("Joint mean covariance done.")
def test_log_reg(): print("Logistic regression test...") K = 30 G = nx.cycle_graph(K) n = 10 X = np.random.randn(1000, n) Z = np.random.randint(K, size=1000) Y = np.random.randint(1, 10, size=1000) bm = strat_models.BaseModel(loss=strat_models.losses.logistic_loss(intercept=True)) sm = strat_models.StratifiedModel(bm, graph=G) data = dict(X=X, Y=Y, Z=Z) kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=500) info = sm.fit(data, **kwargs) assert info["optimal"] data_predict = dict(X=X[:20, :], Z=Z[:20]) predictions = sm.predict(data=data_predict) print("ANLL is {}".format(sm.anll(data))) print("logreg done.")
for state1 in states: for state2 in states: if state2 in list( neighbors[neighbors.StateCode == state1]['NeighborStateCode']): G_state.add_edge(state1, state2) n_years = len(years) G_time = nx.path_graph(n_years) G_time = nx.relabel_nodes(G_time, dict(zip(np.arange(n_years), years))) kwargs = dict(abs_tol=1e-5, rel_tol=1e-5, maxiter=200, n_jobs=4, verbose=1) loss = strat_models.bernoulli_loss() reg = strat_models.clip_reg(lambd=(1e-5, 1 - 1e-5)) bm = strat_models.BaseModel(loss=loss, reg=reg) strat_models.set_edge_weight(G_state, 0) strat_models.set_edge_weight(G_time, 0) G = strat_models.cartesian_product([G_state, G_time]) sm_fully = strat_models.StratifiedModel(bm, graph=G) info = sm_fully.fit(data_train, **kwargs) anll_train = sm_fully.anll(data_train) anll_test = sm_fully.anll(data_test) print("Separate model") print("\t", info) print("\t", anll_train, anll_test) strat_models.set_edge_weight(G_state, 1) strat_models.set_edge_weight(G_time, 4)
# Fit models print("fitting...") kwargs = dict(rel_tol=1e-4, abs_tol=1e-4, maxiter=500, n_jobs=12, verbose=True, rho=2., max_cg_iterations=30) strat_models.set_edge_weight(G_sex, 0) strat_models.set_edge_weight(G_age, 0) G = strat_models.utils.cartesian_product([G_sex, G_age]) bm_fully = strat_models.BaseModel(loss=loss) sm_fully = strat_models.StratifiedModel(bm_fully, graph=G) info = sm_fully.fit(data_train, **kwargs) anll_test = sm_fully.anll(data_test) pred_error = prediction_error(data_test, sm_fully) print('Separate model') print('\t', info) print('\t', anll_test, pred_error) strat_models.set_edge_weight(G_sex, 10) strat_models.set_edge_weight(G_age, 500) G = strat_models.utils.cartesian_product([G_sex, G_age]) bm_strat = strat_models.BaseModel(loss=loss)
abs_tol=1e-4, maxiter=400, n_jobs=4, verbose=False, rho=3., max_cg_iterations=30) ## Separate model G_sex = create_sex_graph(weight=0) G_age = create_age_graph(weight=0) G = strat_models.utils.cartesian_product([G_sex, G_age]) loss = strat_models.logistic_loss(intercept=True) reg = strat_models.sum_squares_reg(lambd=35) bm_sep = strat_models.BaseModel(loss=loss, reg=reg) sm_sep = strat_models.StratifiedModel(bm_sep, graph=G) info = sm_sep.fit(data_train, **kwargs) anll_train_sep = sm_sep.anll(data_train) anll_val_sep = sm_sep.anll(data_val) anll_test_sep = sm_sep.anll(data_test) print('Separate model') print('\tlambda =', 35) print('\t', info) print('\t', anll_train_sep, anll_val_sep, anll_test_sep) ## Common model G = nx.empty_graph(1)
return Y, Z Y_train, Z_train = df_to_data(df_2017) Y_test, Z_test = df_to_data(df_2018) print(len(Y_train), len(Y_test)) data_train = dict(Y=Y_train, Z=Z_train) data_test = dict(Y=Y_test, Z=Z_test) del G # Fit models and evaluate log likelihood loss = strat_models.poisson_loss() bm = strat_models.BaseModel(loss=loss) kwargs = dict(rel_tol=1e-6, abs_tol=1e-6, maxiter=2000) strat_models.set_edge_weight(G_location, 0) strat_models.set_edge_weight(G_week, 0) strat_models.set_edge_weight(G_day, 0) strat_models.set_edge_weight(G_hour, 0) G = strat_models.cartesian_product([G_location, G_week, G_day, G_hour]) sm_fully = strat_models.StratifiedModel(bm, graph=G) info = sm_fully.fit(data_train, **kwargs) anll_train = sm_fully.anll(data_train) anll_test = sm_fully.anll(data_test) print("Separate model") print("\t", info)
kwargs["verbose"] = False K = 53 * 24 weight_week = .45 weight_hr = .55 lambd = (0.01, 0.001) m = 90 G_week = nx.cycle_graph(53) G_hr = nx.cycle_graph(24) strat_models.set_edge_weight(G_week, weight_week) strat_models.set_edge_weight(G_hr, weight_hr) G_eigen = strat_models.cartesian_product([G_week, G_hr]) loss = strat_models.nonparametric_discrete_loss() reg = strat_models.scaled_plus_sum_squares_reg(A=D, lambd=lambd) bm_eigen = strat_models.BaseModel(loss=loss, reg=reg) sm_eigen = strat_models.StratifiedModel(bm_eigen, graph=G_eigen) info = sm_eigen.fit(data_train, num_eigen=m, **kwargs) anll_train = sm_eigen.anll(data_train) anll_val = sm_eigen.anll(data_val) anll_test = sm_eigen.anll(data_test) print('Eigen-stratified model, {} eigenvectors used'.format(m)) print('\t(weight_week, weight_hour, lambd, m)=', (weight_week, weight_hr, lambd, m)) print('\t', info) print('\t', anll_train, anll_val, anll_test)