Exemple #1
0
def test_poisson():
    print("Poisson test...")

    K = 1000
    G = nx.cycle_graph(K)
    Z = np.random.randint(K, size=10000)
    Y = np.random.randint(1, 10, size=10000)

    bm = strat_models.BaseModel(
        loss=strat_models.losses.poisson_loss(min_theta=1e-3),
        reg=strat_models.regularizers.min_threshold_reg_one_elem(lambd=1e-3))

    sm = strat_models.StratifiedModel(bm, graph=G)

    data = dict(Y=Y, Z=Z)
    kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=500)

    info = sm.fit(data, **kwargs)
    assert info["optimal"]

    data_sample = dict(Z=np.random.randint(2, size=100))
    samples = sm.sample(data=data_sample)

    print("ANLL is {}".format(sm.anll(data)))

    print("Poisson done.")
Exemple #2
0
def test_nonparametric_discrete():
    print("Non-parametric discrete distibution test...")

    K = 100
    num_classes = 10
    SIZE = 100

    G = nx.cycle_graph(K)
    strat_models.utils.set_edge_weight(G, 10)

    K_eye = np.eye(K)

    Z = np.random.randint(K, size=SIZE)
    Y = np.random.randint(0, num_classes, size=SIZE)

    print(Z)

    bm = strat_models.BaseModel(
        loss=strat_models.losses.nonparametric_discrete_loss(),
        reg=strat_models.regularizers.sum_squares_reg(lambd=0.4))

    sm = strat_models.StratifiedModel(bm, graph=G)

    data = dict(Y=Y, Z=Z)
    kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=500)

    info = sm.fit(data, **kwargs)
    assert info["optimal"]

    data_test = dict(Z=np.random.randint(K, size=SIZE),
                     Y=np.random.randint(0, num_classes, size=SIZE))

    print("ANLL is {}".format(sm.anll(data_test)))

    print("Non-parametric discrete loss done.")
Exemple #3
0
def test_eigen():
    """Example: solve ||X\theta - Y||^2 + ||\theta||^2"""

    print("ridge regression test...")
    K = 100
    G = nx.cycle_graph(K)
    n = 10
    m = 2
    X = np.random.randn(500, n)
    Z = np.random.randint(K, size=500)
    Y = np.random.randn(500, m)

    bm = strat_models.BaseModel(
        loss=strat_models.losses.sum_squares_loss(intercept=False),
        reg=strat_models.regularizers.sum_squares_reg(lambd=1))

    sm = strat_models.StratifiedModel(bm, graph=G)

    data = dict(X=X, Y=Y, Z=Z)
    kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=500)

    info = sm.fit(data, num_eigen=30, **kwargs)
    assert info["optimal"]

    predictions = sm.predict(data=data)

    print("ANLL is {}".format(sm.anll(data)))

    print("eigen-stratified ridge regression done.")
Exemple #4
0
def train_strat_model(weights, data_train, data_val, data_test, lambd):

    loss = strat_models.nonparametric_discrete_loss()
    reg = strat_models.scaled_plus_sum_squares_reg(A=D, lambd=lambd)

    bm = strat_models.BaseModel(loss=loss, reg=reg)

    G_week = nx.cycle_graph(53)
    G_hr = nx.cycle_graph(24)
    strat_models.set_edge_weight(G_week, weights[0])
    strat_models.set_edge_weight(G_hr, weights[1])
    G = strat_models.cartesian_product([G_week, G_hr])

    sm = strat_models.StratifiedModel(bm, graph=G)

    info = sm.fit(data_train, **kwargs)
    anll_train = sm.anll(data_train)
    anll_val = sm.anll(data_val)
    anll_test = sm.anll(data_test)

    print("Stratified model with (weights, lambd) =", (weights, lambd))
    print("\t", info)
    print("\t", anll_train, anll_val, anll_test)

    return anll_train, anll_val, anll_test
Exemple #5
0
def test_bernoulli():
    print("Bernoulli test...")

    K = 2
    G = nx.cycle_graph(K)
    Z = np.random.randint(K, size=1000)
    Y = np.random.randint(0, 2, size=1000)
    # p = strat_strat_models.Bernoulli()
    # p.fit(Y, Z, G, inplace=True, verbose=True, n_jobs=12)

    # anll = p.anll(Y, Z)
    # sample = p.sample(Z)
    # print(sample)
    # print(anll)

    bm = strat_models.BaseModel(loss=strat_models.losses.bernoulli_loss(
        1e-5, 1 - 1e-5),
                                reg=strat_models.regularizers.clip_reg(
                                    (1e-5, 1 - 1e-5)))
    sm = strat_models.StratifiedModel(bm, graph=G)
    data = dict(Y=Y, Z=Z)
    kwargs = dict(verbose=True, abs_tol=1e-4, maxiter=500, n_jobs=2)

    info = sm.fit(data, **kwargs)
    assert info["optimal"]

    data_sample = dict(Z=np.random.randint(2, size=100))
    samples = sm.sample(data=data_sample)

    print("ANLL is {}".format(sm.anll(data)))

    print("Bernoulli done.")
Exemple #6
0
def test_trace_minus_logdet():
	print("Trace minus logdet test...")
	K = 3
	n = 10

	G = nx.cycle_graph(K)
	for edge in G.edges():
		G.add_edge(edge[0], edge[1], weight=0.1)

	Z = np.array(list(G.nodes()))
	Y = [np.cov(np.random.randn(n,n)) + np.eye(n) for _ in range(K)]
	bm = strat_models.BaseModel(loss=strat_models.losses.covariance_max_likelihood_loss(), 
		reg=strat_models.regularizers.L1_reg(lambd=1))
	sm = strat_models.StratifiedModel(bm, graph=G)

	data = dict(Y=Y, Z=Z, n=n)

	kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=900)

	info = sm.fit(data, **kwargs)
	# print(info)

	print("ANLL is {}".format(sm.anll(data)))

	assert info["optimal"]

	data_sample = dict(Z=np.random.randint(K, size=5))
	samples = sm.sample(data=data_sample)

	print("Trace minus logdet done.")
Exemple #7
0
def test_joint_mean_covariance():
	print("Joint mean covariance test...")
	K = 3
	G = nx.cycle_graph(K)
	G.add_edge(0,1,weight=0.01)
	G.add_edge(1,2,weight=0.01)
	G.add_edge(2,0,weight=0.01)
	Z = np.array(list(G.nodes()))

	n = 10
	mus = [np.ones(n) for _ in range(K)]
	S = [np.random.randn(n,n) for _ in range(K)]
	S = [np.cov(s) + np.eye(n) for s in S]

	Y = [np.random.multivariate_normal(mus[k], S[k], 9).T for k in range(K)]

	[print(np.mean(y,1)) for y in Y]

	bm = strat_models.BaseModel(loss=strat_models.losses.mean_covariance_max_likelihood_loss(),
			reg=strat_models.regularizers.sum_squares_reg(lambd=0))
	sm = strat_models.StratifiedModel(bm, graph=G)

	data = dict(Y=Y, Z=Z, n=n)

	kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=20, n_jobs=2)

	info = sm.fit(data, **kwargs)

	Snu = sm.G._node[0]["theta"]

	S_star = np.linalg.inv(Snu[:,:-1])
	mu_star = S_star @ Snu[:,-1]

	print(S[0], mus[0])
	print(S_star, mu_star)

	print(info)
	print("ANLL is {}".format(sm.anll(data)))

	data_sample = dict(Z=np.random.randint(K, size=5))
	samples = sm.sample(data=data_sample)

	print("Joint mean covariance done.")
Exemple #8
0
def test_log_reg():
	print("Logistic regression test...")
	K = 30
	G = nx.cycle_graph(K)
	n = 10
	X = np.random.randn(1000, n)
	Z = np.random.randint(K, size=1000)
	Y = np.random.randint(1, 10, size=1000)

	bm = strat_models.BaseModel(loss=strat_models.losses.logistic_loss(intercept=True))
	sm = strat_models.StratifiedModel(bm, graph=G)
	data = dict(X=X, Y=Y, Z=Z)
	kwargs = dict(verbose=True, abs_tol=1e-6, maxiter=500)

	info = sm.fit(data, **kwargs)
	assert info["optimal"]

	data_predict = dict(X=X[:20, :], Z=Z[:20])
	predictions = sm.predict(data=data_predict)

	print("ANLL is {}".format(sm.anll(data)))

	print("logreg done.")
            G_state.add_edge(state1, state2)

n_years = len(years)
G_time = nx.path_graph(n_years)
G_time = nx.relabel_nodes(G_time, dict(zip(np.arange(n_years), years)))

kwargs = dict(abs_tol=1e-5, rel_tol=1e-5, maxiter=200, n_jobs=4, verbose=1)

loss = strat_models.bernoulli_loss()
reg = strat_models.clip_reg(lambd=(1e-5, 1 - 1e-5))
bm = strat_models.BaseModel(loss=loss, reg=reg)

strat_models.set_edge_weight(G_state, 0)
strat_models.set_edge_weight(G_time, 0)
G = strat_models.cartesian_product([G_state, G_time])
sm_fully = strat_models.StratifiedModel(bm, graph=G)

info = sm_fully.fit(data_train, **kwargs)
anll_train = sm_fully.anll(data_train)
anll_test = sm_fully.anll(data_test)
print("Separate model")
print("\t", info)
print("\t", anll_train, anll_test)

strat_models.set_edge_weight(G_state, 1)
strat_models.set_edge_weight(G_time, 4)
G = strat_models.cartesian_product([G_state, G_time])
sm_strat = strat_models.StratifiedModel(bm, graph=G)

info = sm_strat.fit(data_train, **kwargs)
anll_train = sm_strat.anll(data_train)
              maxiter=400,
              n_jobs=4,
              verbose=False,
              rho=3.,
              max_cg_iterations=30)

## Separate model
G_sex = create_sex_graph(weight=0)
G_age = create_age_graph(weight=0)
G = strat_models.utils.cartesian_product([G_sex, G_age])

loss = strat_models.logistic_loss(intercept=True)
reg = strat_models.sum_squares_reg(lambd=35)

bm_sep = strat_models.BaseModel(loss=loss, reg=reg)
sm_sep = strat_models.StratifiedModel(bm_sep, graph=G)

info = sm_sep.fit(data_train, **kwargs)
anll_train_sep = sm_sep.anll(data_train)
anll_val_sep = sm_sep.anll(data_val)
anll_test_sep = sm_sep.anll(data_test)

print('Separate model')
print('\tlambda =', 35)
print('\t', info)
print('\t', anll_train_sep, anll_val_sep, anll_test_sep)

## Common model
G = nx.empty_graph(1)

loss = strat_models.logistic_loss(intercept=True)
Exemple #11
0
kwargs["verbose"] = False

K = 53 * 24

weight_week = .45
weight_hr = .55
lambd = (0.01, 0.001)
m = 90

G_week = nx.cycle_graph(53)
G_hr = nx.cycle_graph(24)
strat_models.set_edge_weight(G_week, weight_week)
strat_models.set_edge_weight(G_hr, weight_hr)
G_eigen = strat_models.cartesian_product([G_week, G_hr])

loss = strat_models.nonparametric_discrete_loss()
reg = strat_models.scaled_plus_sum_squares_reg(A=D, lambd=lambd)
bm_eigen = strat_models.BaseModel(loss=loss, reg=reg)

sm_eigen = strat_models.StratifiedModel(bm_eigen, graph=G_eigen)

info = sm_eigen.fit(data_train, num_eigen=m, **kwargs)
anll_train = sm_eigen.anll(data_train)
anll_val = sm_eigen.anll(data_val)
anll_test = sm_eigen.anll(data_test)

print('Eigen-stratified model, {} eigenvectors used'.format(m))
print('\t(weight_week, weight_hour, lambd, m)=',
      (weight_week, weight_hr, lambd, m))
print('\t', info)
print('\t', anll_train, anll_val, anll_test)