Ejemplo n.º 1
0
def train_strat_model(weights, data_train, data_val, data_test, lambd):

    loss = strat_models.nonparametric_discrete_loss()
    reg = strat_models.scaled_plus_sum_squares_reg(A=D, lambd=lambd)

    bm = strat_models.BaseModel(loss=loss, reg=reg)

    G_week = nx.cycle_graph(53)
    G_hr = nx.cycle_graph(24)
    strat_models.set_edge_weight(G_week, weights[0])
    strat_models.set_edge_weight(G_hr, weights[1])
    G = strat_models.cartesian_product([G_week, G_hr])

    sm = strat_models.StratifiedModel(bm, graph=G)

    info = sm.fit(data_train, **kwargs)
    anll_train = sm.anll(data_train)
    anll_val = sm.anll(data_val)
    anll_test = sm.anll(data_test)

    print("Stratified model with (weights, lambd) =", (weights, lambd))
    print("\t", info)
    print("\t", anll_train, anll_val, anll_test)

    return anll_train, anll_val, anll_test
Ejemplo n.º 2
0
def make_G(w1, w2, w3):
    G_vix = nx.path_graph(10)  #vix quantiles (deciles)
    G_unemp = nx.path_graph(10)  #volume quantiles
    G_inflation = nx.path_graph(10)  #volume quantiles

    strat_models.set_edge_weight(G_vix, w1)
    strat_models.set_edge_weight(G_unemp, w2)
    strat_models.set_edge_weight(G_inflation, w3)

    G = strat_models.cartesian_product([G_vix, G_unemp, G_inflation])

    return G.copy()
Ejemplo n.º 3
0
                neighbors[neighbors.StateCode == state1]['NeighborStateCode']):
            G_state.add_edge(state1, state2)

n_years = len(years)
G_time = nx.path_graph(n_years)
G_time = nx.relabel_nodes(G_time, dict(zip(np.arange(n_years), years)))

kwargs = dict(abs_tol=1e-5, rel_tol=1e-5, maxiter=200, n_jobs=4, verbose=1)

loss = strat_models.bernoulli_loss()
reg = strat_models.clip_reg(lambd=(1e-5, 1 - 1e-5))
bm = strat_models.BaseModel(loss=loss, reg=reg)

strat_models.set_edge_weight(G_state, 0)
strat_models.set_edge_weight(G_time, 0)
G = strat_models.cartesian_product([G_state, G_time])
sm_fully = strat_models.StratifiedModel(bm, graph=G)

info = sm_fully.fit(data_train, **kwargs)
anll_train = sm_fully.anll(data_train)
anll_test = sm_fully.anll(data_test)
print("Separate model")
print("\t", info)
print("\t", anll_train, anll_test)

strat_models.set_edge_weight(G_state, 1)
strat_models.set_edge_weight(G_time, 4)
G = strat_models.cartesian_product([G_state, G_time])
sm_strat = strat_models.StratifiedModel(bm, graph=G)

info = sm_strat.fit(data_train, **kwargs)
Ejemplo n.º 4
0
index_to_age = dict(zip(np.arange(num_ages), list_of_ages))
G_age = nx.relabel_nodes(G_age, index_to_age)

# Fit models
kwargs = dict(rel_tol=1e-4,
              abs_tol=1e-4,
              maxiter=500,
              n_jobs=12,
              verbose=0,
              rho=2.,
              max_cg_iterations=30)

fully = strat_models.LogisticRegression(lambd=.1)
strat_models.set_edge_weight(G_sex, 0)
strat_models.set_edge_weight(G_age, 0)
G = strat_models.cartesian_product([G_sex, G_age])
info = fully.fit(X_train, Y_train, Z_train, G, **kwargs)
anll_test = fully.anll(X_test, Y_test, Z_test)
pred_error = prediction_error(X_test, Y_test, Z_test, fully)

print('Separate model')
print('\t', info)
print('\t', anll_test, pred_error)

strat = strat_models.LogisticRegression(lambd=.1)
strat_models.set_edge_weight(G_sex, 10)
strat_models.set_edge_weight(G_age, 500)
G = strat_models.cartesian_product([G_sex, G_age])
info = strat.fit(X_train, Y_train, Z_train, G, **kwargs)
anll_test = strat.anll(X_test, Y_test, Z_test)
pred_error = prediction_error(X_test, Y_test, Z_test, strat)
Ejemplo n.º 5
0
         "X Coordinate", "Y Coordinate", "Updated On", "Location", "Date",
         "Primary Type", "Location Description", "IUCR"], axis=1, inplace=True, errors='ignore')

df_2017 = shuffle(df.query('Year == 2017'))
df_2018 = shuffle(df.query('Year == 2018'))
len(df_2017), len(df_2018)

del raw_df
del df

# Create regularization graph
G_location = nx.grid_2d_graph(bins, bins)
G_week = nx.cycle_graph(52)
G_day = nx.cycle_graph(7)
G_hour = nx.cycle_graph(24)
G = strat_models.cartesian_product([G_location, G_week, G_day, G_hour])
L = nx.laplacian_matrix(G)
K = L.shape[0]

print("Laplacian matrix:", repr(L))
del L

# Create dataset


def df_to_data(df):
    events = {}
    for node in G.nodes():
        events[node] = 0

    for _, r in df.iterrows():
Ejemplo n.º 6
0
print("fitting eigen-stratified models...")
kwargs["maxiter"] = 600
kwargs["verbose"] = False

K = 53 * 24

weight_week = .45
weight_hr = .55
lambd = (0.01, 0.001)
m = 90

G_week = nx.cycle_graph(53)
G_hr = nx.cycle_graph(24)
strat_models.set_edge_weight(G_week, weight_week)
strat_models.set_edge_weight(G_hr, weight_hr)
G_eigen = strat_models.cartesian_product([G_week, G_hr])

loss = strat_models.nonparametric_discrete_loss()
reg = strat_models.scaled_plus_sum_squares_reg(A=D, lambd=lambd)
bm_eigen = strat_models.BaseModel(loss=loss, reg=reg)

sm_eigen = strat_models.StratifiedModel(bm_eigen, graph=G_eigen)

info = sm_eigen.fit(data_train, num_eigen=m, **kwargs)
anll_train = sm_eigen.anll(data_train)
anll_val = sm_eigen.anll(data_val)
anll_test = sm_eigen.anll(data_test)

print('Eigen-stratified model, {} eigenvectors used'.format(m))
print('\t(weight_week, weight_hour, lambd, m)=',
      (weight_week, weight_hr, lambd, m))