def test_edge_feature_latent_node_crf_no_latent(): # no latent nodes # Test inference with different weights in different directions X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1, size_x=10) x, y = X[0], Y[0] n_states = x.shape[-1] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) pw_horz = -1 * np.eye(n_states + 5) xx, yy = np.indices(pw_horz.shape) # linear ordering constraint horizontally pw_horz[xx > yy] = 1 # high cost for unequal labels vertically pw_vert = -1 * np.eye(n_states + 5) pw_vert[xx != yy] = 1 pw_vert *= 10 # generate edge weights edge_weights_horizontal = np.repeat(pw_horz[np.newaxis, :, :], edge_list[0].shape[0], axis=0) edge_weights_vertical = np.repeat(pw_vert[np.newaxis, :, :], edge_list[1].shape[0], axis=0) edge_weights = np.vstack([edge_weights_horizontal, edge_weights_vertical]) # do inference # pad x for hidden states... x_padded = -100 * np.ones((x.shape[0], x.shape[1], x.shape[2] + 5)) x_padded[:, :, :x.shape[2]] = x res = lp_general_graph(-x_padded.reshape(-1, n_states + 5), edges, edge_weights) edge_features = edge_list_to_features(edge_list) x = (x.reshape(-1, n_states), edges, edge_features, 0) y = y.ravel() for inference_method in get_installed(["lp"]): # same inference through CRF inferface crf = EdgeFeatureLatentNodeCRF(n_labels=3, inference_method=inference_method, n_edge_features=2, n_hidden_states=5) w = np.hstack([np.eye(3).ravel(), -pw_horz.ravel(), -pw_vert.ravel()]) y_pred = crf.inference(x, w, relaxed=True) assert_array_almost_equal(res[0], y_pred[0].reshape(-1, n_states + 5), 4) assert_array_almost_equal(res[1], y_pred[1], 4) assert_array_equal(y, np.argmax(y_pred[0], axis=-1)) for inference_method in get_installed(["lp", "ad3", "qpbo"]): # again, this time discrete predictions only crf = EdgeFeatureLatentNodeCRF(n_labels=3, inference_method=inference_method, n_edge_features=2, n_hidden_states=5) w = np.hstack([np.eye(3).ravel(), -pw_horz.ravel(), -pw_vert.ravel()]) y_pred = crf.inference(x, w, relaxed=False) assert_array_equal(y, y_pred)
def test_joint_feature_continuous(): # FIXME # first make perfect prediction, including pairwise part X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] n_states = x.shape[-1] pw_horz = -1 * np.eye(n_states) xx, yy = np.indices(pw_horz.shape) # linear ordering constraint horizontally pw_horz[xx > yy] = 1 # high cost for unequal labels vertically pw_vert = -1 * np.eye(n_states) pw_vert[xx != yy] = 1 pw_vert *= 10 # create crf, assemble weight, make prediction for inference_method in get_installed(["lp", "ad3"]): crf = DirectionalGridCRF(inference_method=inference_method) crf.initialize(X, Y) w = np.hstack([np.eye(3).ravel(), -pw_horz.ravel(), -pw_vert.ravel()]) y_pred = crf.inference(x, w, relaxed=True) # compute joint_feature for prediction joint_feature_y = crf.joint_feature(x, y_pred) assert_equal(joint_feature_y.shape, (crf.size_joint_feature,))
def test_joint_feature_discrete(): X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) edge_features = edge_list_to_features(edge_list) x = (x.reshape(-1, 3), edges, edge_features) y_flat = y.ravel() for inference_method in get_installed(["lp", "ad3", "qpbo"]): crf = EdgeFeatureGraphCRF(inference_method=inference_method) crf.initialize([x], [y_flat]) joint_feature_y = crf.joint_feature(x, y_flat) assert_equal(joint_feature_y.shape, (crf.size_joint_feature, )) # first horizontal, then vertical # we trust the unaries ;) pw_joint_feature_horz, pw_joint_feature_vert = joint_feature_y[ crf.n_states * crf.n_features:].reshape(2, crf.n_states, crf.n_states) xx, yy = np.indices(y.shape) assert_array_equal(pw_joint_feature_vert, np.diag([9 * 4, 9 * 4, 9 * 4])) vert_joint_feature = np.diag([10 * 3, 10 * 3, 10 * 3]) vert_joint_feature[0, 1] = 10 vert_joint_feature[1, 2] = 10 assert_array_equal(pw_joint_feature_horz, vert_joint_feature)
def test_multinomial_blocks_frankwolfe(): X, Y = generate_blocks_multinomial(n_samples=10, noise=0.5, seed=0) crf = GridCRF(inference_method='qpbo') clf = FrankWolfeSSVM(model=crf, C=1, max_iter=50, verbose=3) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_switch_to_ad3(): # test if switching between qpbo and ad3 works if not get_installed(['qpbo']) or not get_installed(['ad3']): return X, Y = generate_blocks_multinomial(n_samples=5, noise=1.5, seed=0) crf = GridCRF(n_states=3, inference_method='qpbo') ssvm = NSlackSSVM(crf, max_iter=10000) ssvm_with_switch = NSlackSSVM(crf, max_iter=10000, switch_to=('ad3')) ssvm.fit(X, Y) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, 'ad3') # we check that the dual is higher with ad3 inference # as it might use the relaxation, that is pretty much guraranteed assert_greater(ssvm_with_switch.objective_curve_[-1], ssvm.objective_curve_[-1]) # test that convergence also results in switch ssvm_with_switch = NSlackSSVM(crf, max_iter=10000, switch_to=('ad3'), tol=10) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, 'ad3')
def test_switch_to_ad3(): # test if switching between qpbo and ad3 works if not get_installed(['qpbo']) or not get_installed(['ad3']): return X, Y = generate_blocks_multinomial(n_samples=5, noise=1.5, seed=0) crf = GridCRF(n_states=3, inference_method='qpbo') ssvm = NSlackSSVM(crf, max_iter=10000) ssvm_with_switch = NSlackSSVM(crf, max_iter=10000, switch_to=('ad3')) ssvm.fit(X, Y) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, 'ad3') # we check that the dual is higher with ad3 inference # as it might use the relaxation, that is pretty much guraranteed assert_greater(ssvm_with_switch.objective_curve_[-1], ssvm.objective_curve_[-1]) print(ssvm_with_switch.objective_curve_[-1], ssvm.objective_curve_[-1]) # test that convergence also results in switch ssvm_with_switch = NSlackSSVM(crf, max_iter=10000, switch_to=('ad3'), tol=10) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, 'ad3')
def test_multinomial_blocks(): X, Y = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) crf = GridCRF(n_states=X.shape[-1]) clf = StructuredPerceptron(model=crf, max_iter=10) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_psi_continuous(): # FIXME # first make perfect prediction, including pairwise part X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] n_states = x.shape[-1] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) edge_features = edge_list_to_features(edge_list) x = (x.reshape(-1, 3), edges, edge_features) y = y.ravel() pw_horz = -1 * np.eye(n_states) xx, yy = np.indices(pw_horz.shape) # linear ordering constraint horizontally pw_horz[xx > yy] = 1 # high cost for unequal labels vertically pw_vert = -1 * np.eye(n_states) pw_vert[xx != yy] = 1 pw_vert *= 10 # create crf, assemble weight, make prediction for inference_method in get_installed(["lp", "ad3"]): crf = EdgeFeatureGraphCRF(inference_method=inference_method) w = np.hstack([np.eye(3).ravel(), -pw_horz.ravel(), -pw_vert.ravel()]) crf.initialize([x], [y]) y_pred = crf.inference(x, w, relaxed=True) # compute psi for prediction psi_y = crf.psi(x, y_pred) assert_equal(psi_y.shape, (crf.size_psi, ))
def test_initialization(): X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] n_states = x.shape[-1] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) edge_features = edge_list_to_features(edge_list) x = (x.reshape(-1, n_states), edges, edge_features) y = y.ravel() crf = EdgeFeatureGraphCRF() crf.initialize([x], [y]) assert_equal(crf.n_edge_features, 2) assert_equal(crf.n_features, 3) assert_equal(crf.n_states, 3) crf = EdgeFeatureGraphCRF(n_states=3, n_features=3, n_edge_features=2) # no-op crf.initialize([x], [y]) crf = EdgeFeatureGraphCRF(n_states=4, n_edge_features=2) # incompatible assert_raises(ValueError, crf.initialize, X=[x], Y=[y])
def test_multinomial_blocks_frankwolfe_batch(): X, Y = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) crf = GridCRF(inference_method='qpbo') clf = FrankWolfeSSVM(model=crf, C=1, max_iter=500, batch_mode=True) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_one_slack_constraint_caching(): # testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.5, seed=0, size_x=9) n_labels = len(np.unique(Y)) exact_inference = get_installed([('ad3', {'branch_and_bound': True}), "lp"])[0] crf = GridCRF(n_states=n_labels, inference_method=exact_inference) clf = OneSlackSSVM(model=crf, max_iter=150, C=1, check_constraints=True, break_on_bad=True, inference_cache=50, inactive_window=0) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred) assert_equal(len(clf.inference_cache_), len(X)) # there should be 13 constraints, which are less than the 94 iterations # that are done # check that we didn't change the behavior of how we construct the cache constraints_per_sample = [len(cache) for cache in clf.inference_cache_] if exact_inference == "lp": assert_equal(len(clf.inference_cache_[0]), 18) assert_equal(np.max(constraints_per_sample), 18) assert_equal(np.min(constraints_per_sample), 18) else: assert_equal(len(clf.inference_cache_[0]), 13) assert_equal(np.max(constraints_per_sample), 20) assert_equal(np.min(constraints_per_sample), 11)
def test_joint_feature_discrete(): """ Testing with a single type of nodes. Must de aw well as EdgeFeatureGraphCRF """ X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) edge_features = edge_list_to_features(edge_list) x = ([x.reshape(-1, 3)], [edges], [edge_features]) y_flat = y.ravel() #for inference_method in get_installed(["lp", "ad3", "qpbo"]): if True: crf = NodeTypeEdgeFeatureGraphCRF(1, [3], [3], [[2]]) joint_feature_y = crf.joint_feature(x, y_flat) assert_equal(joint_feature_y.shape, (crf.size_joint_feature,)) # first horizontal, then vertical # we trust the unaries ;) n_states = crf.l_n_states[0] n_features = crf.l_n_features[0] pw_joint_feature_horz, pw_joint_feature_vert = joint_feature_y[n_states * n_features:].reshape( 2, n_states, n_states) assert_array_equal(pw_joint_feature_vert, np.diag([9 * 4, 9 * 4, 9 * 4])) vert_joint_feature = np.diag([10 * 3, 10 * 3, 10 * 3]) vert_joint_feature[0, 1] = 10 vert_joint_feature[1, 2] = 10 assert_array_equal(pw_joint_feature_horz, vert_joint_feature)
def test_blocks_multinomial_crf(): X, Y = generate_blocks_multinomial(n_samples=1, size_x=9, seed=0) x, y = X[0], Y[0] w = np.array([ 1., 0., 0., # unaryA 0., 1., 0., 0., 0., 1., .4, # pairwise -.3, .3, -.5, -.1, .3 ]) for inference_method in get_installed(): crf = GridCRF(inference_method=inference_method) crf.initialize(X, Y) y_hat = crf.inference(x, w) assert_array_equal(y, y_hat)
def test_one_slack_constraint_caching(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.5, seed=0, size_x=9) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method='lp') clf = OneSlackSSVM(model=crf, max_iter=150, C=1, check_constraints=True, break_on_bad=True, inference_cache=50, inactive_window=0, verbose=10) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred) assert_equal(len(clf.inference_cache_), len(X)) # there should be 11 constraints, which are less than the 94 iterations # that are done assert_equal(len(clf.inference_cache_[0]), 11) # check that we didn't change the behavior of how we construct the cache constraints_per_sample = [len(cache) for cache in clf.inference_cache_] assert_equal(np.max(constraints_per_sample), 19) assert_equal(np.min(constraints_per_sample), 11)
def test_psi_continuous(): # FIXME # first make perfect prediction, including pairwise part X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] n_states = x.shape[-1] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) edge_features = edge_list_to_features(edge_list) x = (x.reshape(-1, 3), edges, edge_features) y = y.ravel() pw_horz = -1 * np.eye(n_states) xx, yy = np.indices(pw_horz.shape) # linear ordering constraint horizontally pw_horz[xx > yy] = 1 # high cost for unequal labels vertically pw_vert = -1 * np.eye(n_states) pw_vert[xx != yy] = 1 pw_vert *= 10 # create crf, assemble weight, make prediction for inference_method in get_installed(["lp", "ad3"]): crf = EdgeFeatureGraphCRF(inference_method=inference_method) w = np.hstack([np.eye(3).ravel(), -pw_horz.ravel(), -pw_vert.ravel()]) crf.initialize([x], [y]) y_pred = crf.inference(x, w, relaxed=True) # compute psi for prediction psi_y = crf.psi(x, y_pred) assert_equal(psi_y.shape, (crf.size_psi,))
def test_max_product_multinomial_crf(): X, Y = generate_blocks_multinomial(n_samples=1) x, y = X[0], Y[0] w = np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.4, -0.3, 0.3, -0.5, -0.1, 0.3]) # unary # pairwise crf = GridCRF(inference_method="max-product") crf.initialize(X, Y) y_hat = crf.inference(x, w) assert_array_equal(y, y_hat)
def test_multinomial_blocks_subgradient_offline(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.6, seed=1) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = SubgradientSSVM(model=crf, max_iter=100, online=False) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_blocks_frankwolfe(): X, Y = generate_blocks_multinomial(n_samples=50, noise=0.5, seed=0) crf = GridCRF(inference_method='qpbo') clf = FrankWolfeSSVM(model=crf, C=1, line_search=True, batch_mode=False, check_dual_every=500) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_inference(): # Test inference with different weights in different directions X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] n_states = x.shape[-1] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) pw_horz = -1 * np.eye(n_states) xx, yy = np.indices(pw_horz.shape) # linear ordering constraint horizontally pw_horz[xx > yy] = 1 # high cost for unequal labels vertically pw_vert = -1 * np.eye(n_states) pw_vert[xx != yy] = 1 pw_vert *= 10 # generate edge weights edge_weights_horizontal = np.repeat(pw_horz[np.newaxis, :, :], edge_list[0].shape[0], axis=0) edge_weights_vertical = np.repeat(pw_vert[np.newaxis, :, :], edge_list[1].shape[0], axis=0) edge_weights = np.vstack([edge_weights_horizontal, edge_weights_vertical]) # do inference res = lp_general_graph(-x.reshape(-1, n_states), edges, edge_weights) edge_features = edge_list_to_features(edge_list) x = (x.reshape(-1, n_states), edges, edge_features) y = y.ravel() for inference_method in get_installed(["lp", "ad3"]): # same inference through CRF inferface crf = EdgeFeatureGraphCRF(inference_method=inference_method) crf.initialize([x], [y]) w = np.hstack([np.eye(3).ravel(), -pw_horz.ravel(), -pw_vert.ravel()]) y_pred = crf.inference(x, w, relaxed=True) if isinstance(y_pred, tuple): # ad3 produces an integer result if it found the exact solution assert_array_almost_equal(res[1], y_pred[1]) assert_array_almost_equal(res[0], y_pred[0].reshape(-1, n_states)) assert_array_equal(y, np.argmax(y_pred[0], axis=-1)) for inference_method in get_installed(["lp", "ad3", "qpbo"]): # again, this time discrete predictions only crf = EdgeFeatureGraphCRF(n_states=3, inference_method=inference_method, n_edge_features=2) crf.initialize([x], [y]) w = np.hstack([np.eye(3).ravel(), -pw_horz.ravel(), -pw_vert.ravel()]) y_pred = crf.inference(x, w, relaxed=False) assert_array_equal(y, y_pred)
def test_multinomial_blocks_subgradient(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.6, seed=1) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = SubgradientSSVM(model=crf, max_iter=50) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_blocks_multinomial_crf(): X, Y = generate_blocks_multinomial(n_samples=1, size_x=9, seed=0) x, y = X[0], Y[0] w = np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.4, -0.3, 0.3, -0.5, -0.1, 0.3]) # unaryA # pairwise for inference_method in get_installed(): crf = GridCRF(inference_method=inference_method) crf.initialize(X, Y) y_hat = crf.inference(x, w) assert_array_equal(y, y_hat)
def test_multinomial_blocks_subgradient(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=1) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = SubgradientSSVM(model=crf, max_iter=50, C=10, momentum=.98, learning_rate=0.001) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_blocks_cutting_plane(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=40, noise=0.5, seed=0) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=100, C=100, check_constraints=False, batch_size=1) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_blocks_one_slack(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.5, seed=0) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = OneSlackSSVM(model=crf, max_iter=150, C=1, check_constraints=True, break_on_bad=True, tol=.1, inference_cache=50) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_k_means_initialization(): n_samples = 10 X, Y = generate_big_checker(n_samples=n_samples) edges = [make_grid_edges(x, return_lists=True) for x in X] # flatten the grid Y = Y.reshape(Y.shape[0], -1) X = X.reshape(X.shape[0], -1, X.shape[-1]) n_labels = len(np.unique(Y)) X = X.reshape(n_samples, -1, n_labels) # sanity check for one state H = kmeans_init(X, Y, edges, n_states_per_label=[1] * n_labels, n_labels=n_labels) H = np.vstack(H) assert_array_equal(Y, H) # check number of states H = kmeans_init(X, Y, edges, n_states_per_label=[3] * n_labels, n_labels=n_labels) H = np.vstack(H) assert_array_equal(np.unique(H), np.arange(6)) assert_array_equal(Y, H / 3) # for dataset with more than two states X, Y = generate_blocks_multinomial(n_samples=10) edges = [make_grid_edges(x, return_lists=True) for x in X] Y = Y.reshape(Y.shape[0], -1) X = X.reshape(X.shape[0], -1, X.shape[-1]) n_labels = len(np.unique(Y)) # sanity check for one state H = kmeans_init(X, Y, edges, n_states_per_label=[1] * n_labels, n_labels=n_labels) H = np.vstack(H) assert_array_equal(Y, H) # check number of states H = kmeans_init(X, Y, edges, n_states_per_label=[2] * n_labels, n_labels=n_labels) H = np.vstack(H) assert_array_equal(np.unique(H), np.arange(6)) assert_array_equal(Y, H / 2)
def test_multinomial_blocks_directional(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) n_labels = len(np.unique(Y)) crf = DirectionalGridCRF(n_states=n_labels, inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=100, C=100, verbose=0, check_constraints=True, batch_size=1) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_averaged(): # Under a lot of noise, averaging helps. This fails with less noise. X, Y = generate_blocks_multinomial(n_samples=15, noise=3, seed=0) X_train, Y_train = X[:10], Y[:10] X_test, Y_test = X[10:], Y[10:] crf = GridCRF() clf = StructuredPerceptron(model=crf, max_iter=3) clf.fit(X_train, Y_train) no_avg_test = clf.score(X_test, Y_test) clf.set_params(average=True) clf.fit(X_train, Y_train) avg_test = clf.score(X_test, Y_test) assert_greater(avg_test, no_avg_test)
def test_multinomial_blocks_directional_simple(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X_, Y_ = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) G = [make_grid_edges(x, return_lists=True) for x in X_] edge_features = [edge_list_to_features(edge_list) for edge_list in G] edges = [np.vstack(g) for g in G] X = list(zip([x.reshape(-1, 3) for x in X_], edges, edge_features)) Y = [y.ravel() for y in Y_] crf = EdgeFeatureGraphCRF(n_states=3, n_edge_features=2) clf = NSlackSSVM(model=crf, max_iter=10, C=1, check_constraints=False) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_blocks_directional_simple(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X_, Y_ = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) G = [make_grid_edges(x, return_lists=True) for x in X_] edge_features = [edge_list_to_features(edge_list) for edge_list in G] edges = [np.vstack(g) for g in G] X = zip([x.reshape(-1, 3) for x in X_], edges, edge_features) Y = [y.ravel() for y in Y_] crf = EdgeFeatureGraphCRF(n_states=3, n_edge_features=2) clf = NSlackSSVM(model=crf, max_iter=10, C=1, check_constraints=False) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_blocks_multinomial_crf(): X, Y = generate_blocks_multinomial(n_samples=1, size_x=9, seed=0) x, y = X[0], Y[0] w = np.array([1., 0., 0., # unaryA 0., 1., 0., 0., 0., 1., .4, # pairwise -.3, .3, -.5, -.1, .3]) for inference_method in get_installed(): #NOTE: ad3+ fails because it requires a different data structure if inference_method == 'ad3+': continue crf = GridCRF(inference_method=inference_method) crf.initialize(X, Y) y_hat = crf.inference(x, w) assert_array_equal(y, y_hat)
def test_switch_to_ad3(): # test if switching between qpbo and ad3 works if not get_installed(["qpbo"]) or not get_installed(["ad3"]): return X, Y = generate_blocks_multinomial(n_samples=5, noise=1.5, seed=0) crf = GridCRF(n_states=3, inference_method="qpbo") ssvm = OneSlackSSVM(crf, inference_cache=50, max_iter=10000) ssvm_with_switch = OneSlackSSVM(crf, inference_cache=50, max_iter=10000, switch_to=("ad3")) ssvm.fit(X, Y) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, "ad3") # we check that the dual is higher with ad3 inference # as it might use the relaxation, that is pretty much guraranteed assert_greater(ssvm_with_switch.objective_curve_[-1], ssvm.objective_curve_[-1])
def test_joint_feature_discrete(): X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] for inference_method in get_installed(["lp", "ad3", "qpbo"]): crf = DirectionalGridCRF(inference_method=inference_method) crf.initialize(X, Y) joint_feature_y = crf.joint_feature(x, y) assert_equal(joint_feature_y.shape, (crf.size_joint_feature,)) # first horizontal, then vertical # we trust the unaries ;) pw_joint_feature_horz, pw_joint_feature_vert = joint_feature_y[crf.n_states * crf.n_features:].reshape( 2, crf.n_states, crf.n_states) xx, yy = np.indices(y.shape) assert_array_equal(pw_joint_feature_vert, np.diag([9 * 4, 9 * 4, 9 * 4])) vert_joint_feature = np.diag([10 * 3, 10 * 3, 10 * 3]) vert_joint_feature[0, 1] = 10 vert_joint_feature[1, 2] = 10 assert_array_equal(pw_joint_feature_horz, vert_joint_feature)
def test_k_means_initialization(): n_samples = 10 X, Y = generate_big_checker(n_samples=n_samples) edges = [make_grid_edges(x, return_lists=True) for x in X] # flatten the grid Y = Y.reshape(Y.shape[0], -1) X = X.reshape(X.shape[0], -1, X.shape[-1]) n_labels = len(np.unique(Y)) X = X.reshape(n_samples, -1, n_labels) # sanity check for one state H = kmeans_init(X, Y, edges, n_states_per_label=[1] * n_labels, n_labels=n_labels) H = np.vstack(H) assert_array_equal(Y, H) # check number of states H = kmeans_init(X, Y, edges, n_states_per_label=[3] * n_labels, n_labels=n_labels) H = np.vstack(H) assert_array_equal(np.unique(H), np.arange(6)) assert_array_equal(Y, H // 3) # for dataset with more than two states X, Y = generate_blocks_multinomial(n_samples=10) edges = [make_grid_edges(x, return_lists=True) for x in X] Y = Y.reshape(Y.shape[0], -1) X = X.reshape(X.shape[0], -1, X.shape[-1]) n_labels = len(np.unique(Y)) # sanity check for one state H = kmeans_init(X, Y, edges, n_states_per_label=[1] * n_labels, n_labels=n_labels) H = np.vstack(H) assert_array_equal(Y, H) # check number of states H = kmeans_init(X, Y, edges, n_states_per_label=[2] * n_labels, n_labels=n_labels) H = np.vstack(H) assert_array_equal(np.unique(H), np.arange(6)) assert_array_equal(Y, H // 2)
def test_joint_feature_continuous(): """ Testing with a single type of nodes. Must de aw well as EdgeFeatureGraphCRF """ # FIXME # first make perfect prediction, including pairwise part X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] n_states = x.shape[-1] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) edge_features = edge_list_to_features(edge_list) #x = (x.reshape(-1, 3), edges, edge_features) x = ([x.reshape(-1, 3)], [edges], [edge_features]) y = y.ravel() pw_horz = -1 * np.eye(n_states) xx, yy = np.indices(pw_horz.shape) # linear ordering constraint horizontally pw_horz[xx > yy] = 1 # high cost for unequal labels vertically pw_vert = -1 * np.eye(n_states) pw_vert[xx != yy] = 1 pw_vert *= 10 # create crf, assemble weight, make prediction # for inference_method in get_installed(["lp", "ad3"]): # crf = EdgeFeatureGraphCRF(inference_method=inference_method) if True: crf = NodeTypeEdgeFeatureGraphCRF(1, [3], [3], [[2]]) w = np.hstack([np.eye(3).ravel(), -pw_horz.ravel(), -pw_vert.ravel()]) #crf.initialize([x], [y]) #report_model_config(crf) crf.initialize(x, y) y_pred = crf.inference(x, w, relaxed=True) # compute joint_feature for prediction joint_feature_y = crf.joint_feature(x, y_pred) assert_equal(joint_feature_y.shape, (crf.size_joint_feature,))
def test_multinomial_blocks_directional_anti_symmetric(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X_, Y_ = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) G = [make_grid_edges(x, return_lists=True) for x in X_] edge_features = [edge_list_to_features(edge_list) for edge_list in G] edges = [np.vstack(g) for g in G] X = list(zip([x.reshape(-1, 3) for x in X_], edges, edge_features)) Y = [y.ravel() for y in Y_] crf = EdgeFeatureGraphCRF(symmetric_edge_features=[0], antisymmetric_edge_features=[1]) clf = NSlackSSVM(model=crf, C=100) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred) pairwise_params = clf.w[-9 * 2 :].reshape(2, 3, 3) sym = pairwise_params[0] antisym = pairwise_params[1] assert_array_equal(sym, sym.T) assert_array_equal(antisym, -antisym.T)
def test_multinomial_blocks_directional_anti_symmetric(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X_, Y_ = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) G = [make_grid_edges(x, return_lists=True) for x in X_] edge_features = [edge_list_to_features(edge_list) for edge_list in G] edges = [np.vstack(g) for g in G] X = zip([x.reshape(-1, 3) for x in X_], edges, edge_features) Y = [y.ravel() for y in Y_] crf = EdgeFeatureGraphCRF(symmetric_edge_features=[0], antisymmetric_edge_features=[1]) clf = NSlackSSVM(model=crf, C=100) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred) pairwise_params = clf.w[-9 * 2:].reshape(2, 3, 3) sym = pairwise_params[0] antisym = pairwise_params[1] assert_array_equal(sym, sym.T) assert_array_equal(antisym, -antisym.T)
def test_ssvm_objectives(): # test that the algorithms provide consistent objective curves. # this is not that strong a test now but at least makes sure that # the objective function is called. X, Y = generate_blocks_multinomial(n_samples=10, noise=1.5, seed=0) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) # once for n-slack clf = NSlackSSVM(model=crf, max_iter=5, C=1, tol=.1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # once for one-slack clf = OneSlackSSVM(model=crf, max_iter=5, C=1, tol=.1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C, variant='one_slack') assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # now subgradient. Should also work in batch-mode. clf = SubgradientSSVM(model=crf, max_iter=5, C=1, batch_size=-1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.objective_curve_[-1], primal_objective) # frank wolfe clf = FrankWolfeSSVM(model=crf, max_iter=5, C=1, batch_mode=True) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # block-coordinate Frank-Wolfe clf = FrankWolfeSSVM(model=crf, max_iter=5, C=1, batch_mode=False) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective)
def test_objective(): # test that SubgradientLatentSSVM does the same as SubgradientSVM, # in particular that it has the same loss, if there are no latent states. X, Y = generate_blocks_multinomial(n_samples=10, noise=.3, seed=1) inference_method = get_installed(["qpbo", "ad3", "lp"])[0] n_labels = 3 crfl = LatentGridCRF(n_labels=n_labels, n_states_per_label=1, inference_method=inference_method) clfl = SubgradientLatentSSVM(model=crfl, max_iter=20, C=10., learning_rate=0.001, momentum=0.98) crfl.initialize(X, Y) clfl.w = np.zeros(crfl.size_joint_feature) # this disables random init clfl.fit(X, Y) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = SubgradientSSVM(model=crf, max_iter=20, C=10., learning_rate=0.001, momentum=0.98) clf.fit(X, Y) assert_array_almost_equal(clf.w, clfl.w) assert_almost_equal(clf.objective_curve_[-1], clfl.objective_curve_[-1]) assert_array_equal(clf.predict(X), clfl.predict(X)) assert_array_equal(clf.predict(X), Y)
def inference_data(): """ Testing with a single type of nodes. Must do as well as EdgeFeatureGraphCRF """ # Test inference with different weights in different directions X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] n_states = x.shape[-1] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) pw_horz = -1 * np.eye(n_states) xx, yy = np.indices(pw_horz.shape) # linear ordering constraint horizontally pw_horz[xx > yy] = 1 # high cost for unequal labels vertically pw_vert = -1 * np.eye(n_states) pw_vert[xx != yy] = 1 pw_vert *= 10 # generate edge weights edge_weights_horizontal = np.repeat(pw_horz[np.newaxis, :, :], edge_list[0].shape[0], axis=0) edge_weights_vertical = np.repeat(pw_vert[np.newaxis, :, :], edge_list[1].shape[0], axis=0) edge_weights = np.vstack([edge_weights_horizontal, edge_weights_vertical]) # do inference res = lp_general_graph(-x.reshape(-1, n_states), edges, edge_weights) edge_features = edge_list_to_features(edge_list) x = ([x.reshape(-1, n_states)], [edges], [edge_features]) y = y.ravel() return x, y, pw_horz, pw_vert, res, n_states
def test_max_product_multinomial_crf(): X, Y = generate_blocks_multinomial(n_samples=1) x, y = X[0], Y[0] w = np.array([ 1., 0., 0., # unary 0., 1., 0., 0., 0., 1., .4, # pairwise -.3, .3, -.5, -.1, .3 ]) crf = GridCRF(inference_method='max-product') crf.initialize(X, Y) y_hat = crf.inference(x, w) assert_array_equal(y, y_hat)
def test_joint_feature_discrete(): X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1) x, y = X[0], Y[0] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) edge_features = edge_list_to_features(edge_list) x = (x.reshape(-1, 3), edges, edge_features) y_flat = y.ravel() for inference_method in get_installed(["lp", "ad3", "qpbo"]): crf = EdgeFeatureGraphCRF(inference_method=inference_method) crf.initialize([x], [y_flat]) joint_feature_y = crf.joint_feature(x, y_flat) assert_equal(joint_feature_y.shape, (crf.size_joint_feature,)) # first horizontal, then vertical # we trust the unaries ;) pw_joint_feature_horz, pw_joint_feature_vert = joint_feature_y[crf.n_states * crf.n_features:].reshape( 2, crf.n_states, crf.n_states) xx, yy = np.indices(y.shape) assert_array_equal(pw_joint_feature_vert, np.diag([9 * 4, 9 * 4, 9 * 4])) vert_joint_feature = np.diag([10 * 3, 10 * 3, 10 * 3]) vert_joint_feature[0, 1] = 10 vert_joint_feature[1, 2] = 10 assert_array_equal(pw_joint_feature_horz, vert_joint_feature)
This example illustrates the role of approximate inference and caching in exact learning of a 1-slack SSVM. Please see plot_objetive_curve.py for an interpretation of the curves. We start learning by using an undergenerating inference method, QPBO-based alpha expansion. One the algorithm can not find a violated constraint any more, we switch to a less efficient but exact inference procedure, branch-and-bound based on AD3. The switch to AD3 can be seen in the graph after the (approximate) primal objective and the cutting plane lower bound touch. (zoom in) After the switch to exact inference, the red circles show the true primal objective. """ from pystruct.models import DirectionalGridCRF import pystruct.learners as ssvm from pystruct.datasets import generate_blocks_multinomial from pystruct.plot_learning import plot_learning X, Y = generate_blocks_multinomial(noise=2, n_samples=20, seed=1) crf = DirectionalGridCRF(inference_method="qpbo", neighborhood=4) clf = ssvm.OneSlackSSVM(model=crf, n_jobs=-1, inference_cache=100, show_loss_every=10, switch_to=("ad3", {'branch_and_bound': True})) clf.fit(X, Y) plot_learning(clf, time=False)
def test_edge_feature_latent_node_crf_no_latent(): # no latent nodes # Test inference with different weights in different directions X, Y = generate_blocks_multinomial(noise=2, n_samples=1, seed=1, size_x=10) x, y = X[0], Y[0] n_states = x.shape[-1] edge_list = make_grid_edges(x, 4, return_lists=True) edges = np.vstack(edge_list) pw_horz = -1 * np.eye(n_states + 5) xx, yy = np.indices(pw_horz.shape) # linear ordering constraint horizontally pw_horz[xx > yy] = 1 # high cost for unequal labels vertically pw_vert = -1 * np.eye(n_states + 5) pw_vert[xx != yy] = 1 pw_vert *= 10 # generate edge weights edge_weights_horizontal = np.repeat(pw_horz[np.newaxis, :, :], edge_list[0].shape[0], axis=0) edge_weights_vertical = np.repeat(pw_vert[np.newaxis, :, :], edge_list[1].shape[0], axis=0) edge_weights = np.vstack([edge_weights_horizontal, edge_weights_vertical]) # do inference # pad x for hidden states... x_padded = -100 * np.ones((x.shape[0], x.shape[1], x.shape[2] + 5)) x_padded[:, :, :x.shape[2]] = x res = lp_general_graph(-x_padded.reshape(-1, n_states + 5), edges, edge_weights) edge_features = edge_list_to_features(edge_list) x = (x.reshape(-1, n_states), edges, edge_features, 0) y = y.ravel() for inference_method in get_installed(["lp"]): # same inference through CRF inferface crf = EdgeFeatureLatentNodeCRF(n_labels=3, inference_method=inference_method, n_edge_features=2, n_hidden_states=5) w = np.hstack([np.eye(3).ravel(), -pw_horz.ravel(), -pw_vert.ravel()]) y_pred = crf.inference(x, w, relaxed=True) assert_array_almost_equal(res[0], y_pred[0].reshape(-1, n_states + 5)) assert_array_almost_equal(res[1], y_pred[1]) assert_array_equal(y, np.argmax(y_pred[0], axis=-1)) for inference_method in get_installed(["lp", "ad3", "qpbo"]): # again, this time discrete predictions only crf = EdgeFeatureLatentNodeCRF(n_labels=3, inference_method=inference_method, n_edge_features=2, n_hidden_states=5) w = np.hstack([np.eye(3).ravel(), -pw_horz.ravel(), -pw_vert.ravel()]) y_pred = crf.inference(x, w, relaxed=False) assert_array_equal(y, y_pred)