def test_latent_node_boxes_standard_latent(): # learn the "easy" 2x2 boxes dataset. # a 2x2 box is placed randomly in a 4x4 grid # we add a latent variable for each 2x2 patch # that should make the model fairly simple X, Y = make_simple_2x2(seed=1, n_samples=40) latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=2, n_features=1) one_slack = OneSlackSSVM(latent_crf) n_slack = NSlackSSVM(latent_crf) subgradient = SubgradientSSVM(latent_crf, max_iter=100) for base_svm in [one_slack, n_slack, subgradient]: base_svm.C = 10 latent_svm = LatentSSVM(base_svm, latent_iter=10) G = [make_grid_edges(x) for x in X] # make edges for hidden states: edges = make_edges_2x2() G = [np.vstack([make_grid_edges(x), edges]) for x in X] # reshape / flatten x and y X_flat = [x.reshape(-1, 1) for x in X] Y_flat = [y.ravel() for y in Y] X_ = zip(X_flat, G, [2 * 2 for x in X_flat]) latent_svm.fit(X_[:20], Y_flat[:20]) assert_array_equal(latent_svm.predict(X_[:20]), Y_flat[:20]) assert_equal(latent_svm.score(X_[:20], Y_flat[:20]), 1) # test that score is not always 1 assert_true(.98 < latent_svm.score(X_[20:], Y_flat[20:]) < 1)
def test_inference_chain(): # same with pairwise edges: features = np.array([-1, 1, -1, 1, -1, 1]) unary_parameters = np.array([-1, 1]) pairwise_parameters = np.array([+1, +0, 1, +3, 0, 0, +0, 3, 0, 0]) w = np.hstack([unary_parameters, pairwise_parameters]) crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2) edges = np.vstack([np.arange(5), np.arange(1, 6)]).T # edges for latent states. Latent states named 6, 7 node_indices = np.arange(features.size) other_edges = [] for n in node_indices[:3]: other_edges.append([n, 6]) for n in node_indices[3:]: other_edges.append([n, 7]) all_edges = np.vstack([edges, other_edges]) x = (features.reshape(-1, 1), all_edges, 2) h, energy_lp = crf.inference(x, w, return_energy=True) y = np.argmax(crf._get_unary_potentials(x, w), axis=1)[:6] energy_psi = np.dot(w, crf.psi(x, h)) assert_almost_equal(energy_psi, -energy_lp) assert_array_equal(y, features > 0) assert_array_equal(h, [0, 0, 0, 1, 1, 1, 2, 3]) # continuous inference and psi: h, energy_lp = crf.inference(x, w, return_energy=True, relaxed=True) energy_psi = np.dot(w, crf.psi(x, h)) assert_almost_equal(energy_psi, -energy_lp)
def test_initialize(): # 17 nodes, three features, 5 visible states, 2 hidden states rnd = np.random.RandomState(0) feats = rnd.normal(size=(17, 3)) edges = np.zeros((0, 2), dtype=np.int) # no edges x = (feats, edges, 4) # 4 latent variables y = rnd.randint(5, size=17) crf = LatentNodeCRF(n_labels=5, n_features=3) # no-op crf.initialize([x], [y]) assert_equal(crf.n_states, 5 + 2) #test initialization works crf = LatentNodeCRF() crf.initialize([x], [y]) assert_equal(crf.n_labels, 5) assert_equal(crf.n_states, 5 + 2) assert_equal(crf.n_features, 3) crf = LatentNodeCRF(n_labels=3) assert_raises(ValueError, crf.initialize, X=[x], Y=[y])
def test_inference_chain(): # same with pairwise edges: features = np.array([-1, 1, -1, 1, -1, 1]) unary_parameters = np.array([-1, 1]) pairwise_parameters = np.array([+1, +0, 1, +3, 0, 0, +0, 3, 0, 0]) w = np.hstack([unary_parameters, pairwise_parameters]) crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2) edges = np.vstack([np.arange(5), np.arange(1, 6)]).T # edges for latent states. Latent states named 6, 7 node_indices = np.arange(features.size) other_edges = [] for n in node_indices[:3]: other_edges.append([n, 6]) for n in node_indices[3:]: other_edges.append([n, 7]) all_edges = np.vstack([edges, other_edges]) x = (features.reshape(-1, 1), all_edges, 2) h, energy_lp = crf.inference(x, w, return_energy=True) y = np.argmax(crf._get_unary_potentials(x, w), axis=1)[:6] energy_joint_feature = np.dot(w, crf.joint_feature(x, h)) assert_almost_equal(energy_joint_feature, -energy_lp) assert_array_equal(y, features > 0) assert_array_equal(h, [0, 0, 0, 1, 1, 1, 2, 3]) # continuous inference and joint_feature: h, energy_lp = crf.inference(x, w, return_energy=True, relaxed=True) energy_joint_feature = np.dot(w, crf.joint_feature(x, h)) assert_almost_equal(energy_joint_feature, -energy_lp)
def test_binary_blocks_cutting_plane_latent_node(): #testing cutting plane ssvm on easy binary dataset # we use the LatentNodeCRF without latent nodes and check that it does the # same as GraphCRF X, Y = generate_blocks(n_samples=3) crf = GraphCRF() clf = NSlackSSVM(model=crf, max_iter=20, C=100, check_constraints=True, break_on_bad=False, n_jobs=1) x1, x2, x3 = X y1, y2, y3 = Y n_states = len(np.unique(Y)) # delete some rows to make it more fun x1, y1 = x1[:, :-1], y1[:, :-1] x2, y2 = x2[:-1], y2[:-1] # generate graphs X_ = [x1, x2, x3] G = [make_grid_edges(x) for x in X_] # reshape / flatten x and y X_ = [x.reshape(-1, n_states) for x in X_] Y = [y.ravel() for y in [y1, y2, y3]] X = zip(X_, G) clf.fit(X, Y) Y_pred = clf.predict(X) for y, y_pred in zip(Y, Y_pred): assert_array_equal(y, y_pred) latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=0) latent_svm = LatentSSVM(NSlackSSVM(model=latent_crf, max_iter=20, C=100, check_constraints=True, break_on_bad=False, n_jobs=1), latent_iter=3) X_latent = zip(X_, G, np.zeros(len(X_))) latent_svm.fit(X_latent, Y, H_init=Y) Y_pred = latent_svm.predict(X_latent) for y, y_pred in zip(Y, Y_pred): assert_array_equal(y, y_pred) assert_array_almost_equal(latent_svm.w, clf.w)
def test_latent_node_boxes_standard_latent_features(): # learn the "easy" 2x2 boxes dataset. # we make it even easier now by adding features that encode the correct # latent state. This basically tests that the features are actually used X, Y = make_simple_2x2(seed=1, n_samples=20, n_flips=6) latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=2, n_features=1, latent_node_features=True) one_slack = OneSlackSSVM(latent_crf) n_slack = NSlackSSVM(latent_crf) subgradient = SubgradientSSVM(latent_crf, max_iter=100, learning_rate=0.01, momentum=0) for base_svm in [one_slack, n_slack, subgradient]: base_svm.C = 10 latent_svm = LatentSSVM(base_svm, latent_iter=10) G = [make_grid_edges(x) for x in X] # make edges for hidden states: edges = make_edges_2x2() G = [np.vstack([make_grid_edges(x), edges]) for x in X] # reshape / flatten x and y X_flat = [x.reshape(-1, 1) for x in X] # augment X with the features for hidden units X_flat = [ np.vstack([x, y[::2, ::2].reshape(-1, 1)]) for x, y in zip(X_flat, Y) ] Y_flat = [y.ravel() for y in Y] X_ = zip(X_flat, G, [2 * 2 for x in X_flat]) latent_svm.fit(X_[:10], Y_flat[:10]) assert_array_equal(latent_svm.predict(X_[:10]), Y_flat[:10]) assert_equal(latent_svm.score(X_[:10], Y_flat[:10]), 1) # we actually become prefect ^^ assert_true(.98 < latent_svm.score(X_[10:], Y_flat[10:]) <= 1)
def test_latent_node_boxes_latent_subgradient(): # same as above, now with elementary subgradients X, Y = make_simple_2x2(seed=1) latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=2, n_features=1) latent_svm = SubgradientLatentSSVM(model=latent_crf, max_iter=50, C=10) G = [make_grid_edges(x) for x in X] edges = make_edges_2x2() G = [np.vstack([make_grid_edges(x), edges]) for x in X] # reshape / flatten x and y X_flat = [x.reshape(-1, 1) for x in X] Y_flat = [y.ravel() for y in Y] X_ = zip(X_flat, G, [4 * 4 for x in X_flat]) latent_svm.fit(X_, Y_flat) assert_equal(latent_svm.score(X_, Y_flat), 1)
def test_inference_trivial(): # size 6 chain graph # first three and last three have a latent variable features = np.array([-1, 1, -1, 1, -1, 1]) unary_parameters = np.array([-1, 1]) pairwise_parameters = np.array([+0, +0, 0, +3, 0, 0, +0, 3, 0, 0]) w = np.hstack([unary_parameters, pairwise_parameters]) crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2) # edges for latent states. Latent states named 6, 7 node_indices = np.arange(features.size) other_edges = [] for n in node_indices[:3]: other_edges.append([n, 6]) for n in node_indices[3:]: other_edges.append([n, 7]) all_edges = np.vstack(other_edges) x = (features.reshape(-1, 1), all_edges, 2) # test inference h, energy_lp = crf.inference(x, w, return_energy=True) assert_array_equal(h, [0, 0, 0, 1, 1, 1, 2, 3]) y = crf.label_from_latent(h) assert_array_equal(y, [0, 0, 0, 1, 1, 1]) y_unaries = np.argmax(crf._get_unary_potentials(x, w), axis=1)[:6] assert_array_equal(y_unaries, features > 0) # test joint_feature energy_joint_feature = np.dot(w, crf.joint_feature(x, h)) assert_almost_equal(energy_joint_feature, -energy_lp) # test loss h_unaries = crf.latent(x, y_unaries, w) assert_equal(crf.loss(h, h_unaries), 2) # continuous inference and joint_feature: h_continuous, energy_lp = crf.inference(x, w, return_energy=True, relaxed=True) energy_joint_feature = np.dot(w, crf.joint_feature(x, h)) assert_almost_equal(energy_joint_feature, -energy_lp) # test continuous loss assert_equal(crf.loss(h, h_continuous), 0) #test loss-augmented inference energy h_hat, energy_lp = crf.loss_augmented_inference(x, h, w, return_energy=True) assert_almost_equal(-energy_lp, np.dot(w, crf.joint_feature(x, h_hat)) + crf.loss(h_hat, y))
def test_inference_trivial(): # size 6 chain graph # first three and last three have a latent variable features = np.array([-1, 1, -1, 1, -1, 1]) unary_parameters = np.array([-1, 1]) pairwise_parameters = np.array([+0, +0, 0, +3, 0, 0, +0, 3, 0, 0]) w = np.hstack([unary_parameters, pairwise_parameters]) crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2) # edges for latent states. Latent states named 6, 7 node_indices = np.arange(features.size) other_edges = [] for n in node_indices[:3]: other_edges.append([n, 6]) for n in node_indices[3:]: other_edges.append([n, 7]) all_edges = np.vstack(other_edges) x = (features.reshape(-1, 1), all_edges, 2) # test inference h, energy_lp = crf.inference(x, w, return_energy=True) assert_array_equal(h, [0, 0, 0, 1, 1, 1, 2, 3]) y = crf.label_from_latent(h) assert_array_equal(y, [0, 0, 0, 1, 1, 1]) y_unaries = np.argmax(crf._get_unary_potentials(x, w), axis=1)[:6] assert_array_equal(y_unaries, features > 0) # test psi energy_psi = np.dot(w, crf.psi(x, h)) assert_almost_equal(energy_psi, -energy_lp) # test loss h_unaries = crf.latent(x, y_unaries, w) assert_equal(crf.loss(h, h_unaries), 2) # continuous inference and psi: h_continuous, energy_lp = crf.inference(x, w, return_energy=True, relaxed=True) energy_psi = np.dot(w, crf.psi(x, h)) assert_almost_equal(energy_psi, -energy_lp) # test continuous loss assert_equal(crf.loss(h, h_continuous), 0) #test loss-augmented inference energy h_hat, energy_lp = crf.loss_augmented_inference(x, h, w, return_energy=True) assert_almost_equal(-energy_lp, np.dot(w, crf.psi(x, h_hat)) + crf.loss(h_hat, y))
Y_flat = [y.ravel() for y in Y] # first, use standard graph CRF. Can't do much, high loss. crf = GraphCRF() svm = NSlackSSVM(model=crf, max_iter=200, C=1, n_jobs=1) G = [make_grid_edges(x) for x in X] asdf = zip(X_flat, G) svm.fit(asdf, Y_flat) plot_boxes(svm.predict(asdf), title="Non-latent SSVM predictions") print("Training score binary grid CRF: %f" % svm.score(asdf, Y_flat)) # using one latent variable for each 2x2 rectangle latent_crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2, inference_method='lp') ssvm = OneSlackSSVM(model=latent_crf, max_iter=200, C=100, verbose=1, n_jobs=-1, show_loss_every=10, inference_cache=50) latent_svm = LatentSSVM(ssvm) # make edges for hidden states: edges = [] node_indices = np.arange(4 * 4).reshape(4, 4) for i, (x, y) in enumerate(itertools.product([0, 2], repeat=2)):
def svm_on_segments(C=.1, learning_rate=.001, subgradient=True): # load and prepare data lateral = True latent = True test = False #data_train = load_data(which="piecewise") #data_train = add_edges(data_train, independent=False) #data_train = add_kraehenbuehl_features(data_train, which="train_30px") #data_train = add_kraehenbuehl_features(data_train, which="train") #if lateral: #data_train = add_edge_features(data_train) data_train = load_data_global_probs(latent=latent) X_org_ = data_train.X #data_train = make_hierarchical_data(data_train, lateral=lateral, #latent=latent, latent_lateral=True) data_train = discard_void(data_train, 21, latent_features=True) X_, Y_ = data_train.X, data_train.Y # remove edges if not lateral: X_org_ = [(x[0], np.zeros((0, 2), dtype=np.int)) for x in X_org_] if test: data_val = load_data('val', which="piecewise") data_val = add_edges(data_val, independent=False) data_val = add_kraehenbuehl_features(data_val) data_val = make_hierarchical_data(data_val, lateral=lateral, latent=latent) data_val = discard_void(data_val, 21) X_.extend(data_val.X) Y_.extend(data_val.Y) n_states = 21 class_weights = 1. / np.bincount(np.hstack(Y_)) class_weights *= 21. / np.sum(class_weights) experiment_name = ("latent5_features_C%f_top_node" % C) logger = SaveLogger(experiment_name + ".pickle", save_every=10) if latent: model = LatentNodeCRF(n_labels=n_states, n_features=data_train.X[0][0].shape[1], n_hidden_states=5, inference_method='qpbo' if lateral else 'dai', class_weight=class_weights, latent_node_features=True) if subgradient: ssvm = learners.LatentSubgradientSSVM(model, C=C, verbose=1, show_loss_every=10, logger=logger, n_jobs=-1, learning_rate=learning_rate, decay_exponent=1, momentum=0., max_iter=100000) else: latent_logger = SaveLogger("lssvm_" + experiment_name + "_%d.pickle", save_every=1) base_ssvm = learners.OneSlackSSVM(model, verbose=2, C=C, max_iter=100000, n_jobs=-1, tol=0.001, show_loss_every=200, inference_cache=50, logger=logger, cache_tol='auto', inactive_threshold=1e-5, break_on_bad=False, switch_to_ad3=True) ssvm = learners.LatentSSVM(base_ssvm, logger=latent_logger) warm_start = False if warm_start: ssvm = logger.load() ssvm.logger = SaveLogger(experiment_name + "_retrain.pickle", save_every=10) ssvm.max_iter = 100000 ssvm.learning_rate = 0.00001 ssvm.momentum = 0 else: #model = GraphCRF(n_states=n_states, #n_features=data_train.X[0][0].shape[1], #inference_method='qpbo' if lateral else 'dai', #class_weight=class_weights) model = EdgeFeatureGraphCRF( n_states=n_states, n_features=data_train.X[0][0].shape[1], inference_method='qpbo' if lateral else 'dai', class_weight=class_weights, n_edge_features=4, symmetric_edge_features=[0, 1], antisymmetric_edge_features=[2]) ssvm = learners.OneSlackSSVM(model, verbose=2, C=C, max_iter=100000, n_jobs=-1, tol=0.0001, show_loss_every=200, inference_cache=50, logger=logger, cache_tol='auto', inactive_threshold=1e-5, break_on_bad=False) #ssvm = logger.load() X_, Y_ = shuffle(X_, Y_) #ssvm.fit(data_train.X, data_train.Y) #ssvm.fit(X_, Y_, warm_start=warm_start) ssvm.fit(X_, Y_) print("fit finished!")
def svm_on_segments(C=.1, learning_rate=.001, subgradient=False): data_file = "data_train_XY.pickle" ds = PascalSegmentation() if os.path.exists(data_file): X_, Y_ = cPickle.load(open(data_file)) else: # load and prepare data data_train = load_pascal("train", sp_type="cpmc") data_train = make_cpmc_hierarchy(ds, data_train) data_train = discard_void(ds, data_train) X_, Y_ = data_train.X, data_train.Y cPickle.dump((X_, Y_), open(data_file, 'wb'), -1) class_weights = 1. / np.bincount(np.hstack(Y_)) class_weights *= 21. / np.sum(class_weights) experiment_name = ("latent_25_cpmc_%f_qpbo_n_slack_blub3" % C) logger = SaveLogger(experiment_name + ".pickle", save_every=10) model = LatentNodeCRF(n_hidden_states=25, inference_method='qpbo', class_weight=class_weights, latent_node_features=False) if subgradient: ssvm = learners.LatentSubgradientSSVM(model, C=C, verbose=1, show_loss_every=10, logger=logger, n_jobs=-1, learning_rate=learning_rate, decay_exponent=1, momentum=0., max_iter=100000, decay_t0=100) else: latent_logger = SaveLogger("lssvm_" + experiment_name + "_%d.pickle", save_every=1) #base_ssvm = learners.OneSlackSSVM( #model, verbose=2, C=C, max_iter=100, n_jobs=-1, tol=0.001, #show_loss_every=200, inference_cache=50, logger=logger, #cache_tol='auto', inactive_threshold=1e-5, break_on_bad=False, #switch_to=('ogm', {'alg': 'dd'})) base_ssvm = learners.NSlackSSVM(model, verbose=4, C=C, n_jobs=-1, tol=0.1, show_loss_every=20, logger=logger, inactive_threshold=1e-8, break_on_bad=False, batch_size=36, inactive_window=10, switch_to=('ad3', { 'branch_and_bound': True })) ssvm = learners.LatentSSVM(base_ssvm, logger=latent_logger, latent_iter=3) #warm_start = True warm_start = False if warm_start: ssvm = logger.load() ssvm.logger = SaveLogger(experiment_name + "_retrain.pickle", save_every=10) ssvm.max_iter = 10000 ssvm.decay_exponent = 1 #ssvm.decay_t0 = 1000 #ssvm.learning_rate = 0.00001 #ssvm.momentum = 0 X_, Y_ = shuffle(X_, Y_) #ssvm.fit(data_train.X, data_train.Y) ssvm.fit(X_, Y_) #H_init = [np.hstack([y, np.random.randint(21, 26)]) for y in Y_] #ssvm.fit(X_, Y_, H_init=H_init) print("fit finished!")