def fresh_train(self, x, y, iterations=10): self.model = EdgeFeatureGraphCRF(inference_method="max-product") self.learner = SubgradientSSVM( model=self.model, max_iter=iterations, logger=SaveLogger(model_file.format(self.userId + "-learner"))) self.learner.fit(x, y, warm_start=False) self.save()
def test_binary_blocks(): #testing subgradient ssvm on easy binary dataset X, Y = generate_blocks(n_samples=5) crf = GridCRF(inference_method=inference_method) clf = SubgradientSSVM(model=crf) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_checker_subgradient(): X, Y = generate_checker_multinomial(n_samples=10, noise=0.4) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = SubgradientSSVM(model=crf, max_iter=50) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_binary_blocks_subgradient(): #testing subgradient ssvm on easy binary dataset X, Y = toy.generate_blocks(n_samples=10) crf = GridCRF() clf = SubgradientSSVM(model=crf, max_iter=200, C=100, learning_rate=0.1) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_checker_subgradient(): X, Y = toy.generate_checker_multinomial(n_samples=10, noise=0.0) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels) clf = SubgradientSSVM(model=crf, max_iter=50, C=10, momentum=.98, learning_rate=0.01) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_blocks_subgradient(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.6, seed=1) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = SubgradientSSVM(model=crf, max_iter=50) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_blocks_subgradient_offline(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.6, seed=1) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = SubgradientSSVM(model=crf, max_iter=100, online=False) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_binary_blocks(): #testing subgradient ssvm on easy binary dataset X, Y = generate_blocks(n_samples=5) crf = GridCRF(inference_method=inference_method) clf = SubgradientSSVM(model=crf, C=100, learning_rate=1, decay_exponent=1, momentum=0, decay_t0=10) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_binary_checker_subgradient(): #testing subgradient ssvm on non-submodular binary dataset X, Y = toy.generate_checker(n_samples=10) crf = GridCRF() clf = SubgradientSSVM(model=crf, max_iter=100, C=100, momentum=.9, learning_rate=0.1) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def fresh_train(self, x, y, iterations=10): self.model = ChainCRF(inference_method="max-product") self.learner = SubgradientSSVM( model=self.model, max_iter=iterations, logger=SaveLogger( MODEL_PATH_TEMPLATE.format(self.userId + "-learner")), show_loss_every=50) self.learner.fit(x, y, warm_start=False) self.save()
def test_multinomial_blocks_subgradient(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=1) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = SubgradientSSVM(model=crf, max_iter=50, C=10, momentum=.98, learning_rate=0.001) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_blobs_2d_subgradient(): # make two gaussian blobs X, Y = make_blobs(n_samples=80, centers=3, random_state=42) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] pbl = MultiClassClf(n_features=3, n_classes=3) svm = SubgradientSSVM(pbl, C=1000) svm.fit(X_train, Y_train) assert_array_equal(Y_test, np.hstack(svm.predict(X_test)))
def test_latent_node_boxes_standard_latent(): # learn the "easy" 2x2 boxes dataset. # a 2x2 box is placed randomly in a 4x4 grid # we add a latent variable for each 2x2 patch # that should make the model fairly simple X, Y = make_simple_2x2(seed=1, n_samples=40) latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=2, n_features=1) one_slack = OneSlackSSVM(latent_crf) n_slack = NSlackSSVM(latent_crf) subgradient = SubgradientSSVM(latent_crf, max_iter=100) for base_svm in [one_slack, n_slack, subgradient]: base_svm.C = 10 latent_svm = LatentSSVM(base_svm, latent_iter=10) G = [make_grid_edges(x) for x in X] # make edges for hidden states: edges = make_edges_2x2() G = [np.vstack([make_grid_edges(x), edges]) for x in X] # reshape / flatten x and y X_flat = [x.reshape(-1, 1) for x in X] Y_flat = [y.ravel() for y in Y] X_ = zip(X_flat, G, [2 * 2 for x in X_flat]) latent_svm.fit(X_[:20], Y_flat[:20]) assert_array_equal(latent_svm.predict(X_[:20]), Y_flat[:20]) assert_equal(latent_svm.score(X_[:20], Y_flat[:20]), 1) # test that score is not always 1 assert_true(.98 < latent_svm.score(X_[20:], Y_flat[20:]) < 1)
def test_subgradient_svm_as_crf_pickling(): iris = load_iris() X, y = iris.data, iris.target X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_, Y, random_state=1) _, file_name = mkstemp() pbl = GraphCRF(n_features=4, n_states=3, inference_method='unary') logger = SaveLogger(file_name) svm = SubgradientSSVM(pbl, logger=logger, max_iter=100) svm.fit(X_train, y_train) assert_less(.97, svm.score(X_test, y_test)) assert_less(.97, logger.load().score(X_test, y_test))
class ChainCRFClassifier(PystructClassifier): def fresh_train(self, x, y, iterations=10): self.model = ChainCRF(inference_method="max-product") self.learner = SubgradientSSVM( model=self.model, max_iter=iterations, logger=SaveLogger( MODEL_PATH_TEMPLATE.format(self.userId + "-learner")), show_loss_every=50) self.learner.fit(x, y, warm_start=False) self.save() def check_featurizer_set(self): if not self.featurizer: featurizer = PystructChainFeaturizer() self.set_featurizer(featurizer) logger.debug("WARNING! Featurizer not set, setting new default " "featurizer")
def test_objective(): # test that LatentSubgradientSSVM does the same as SubgradientSVM, # in particular that it has the same loss, if there are no latent states. X, Y = toy.generate_blocks_multinomial(n_samples=10) n_labels = 3 crfl = LatentGridCRF(n_labels=n_labels, n_states_per_label=1) clfl = LatentSubgradientSSVM(model=crfl, max_iter=50, C=10., learning_rate=0.001, momentum=0.98, decay_exponent=0) clfl.w = np.zeros(crfl.size_psi) # this disables random init clfl.fit(X, Y) crf = GridCRF(n_states=n_labels) clf = SubgradientSSVM(model=crf, max_iter=50, C=10., learning_rate=0.001, momentum=0.98, decay_exponent=0) clf.fit(X, Y) assert_array_almost_equal(clf.w, clfl.w) assert_array_equal(clf.predict(X), Y) assert_almost_equal(clf.objective_curve_[-1], clfl.objective_curve_[-1])
class EdgeCRFClassifier(PystructSimplificationPipeline): def fresh_train(self, x, y, iterations=10, decay_rate=1): self.model = EdgeFeatureGraphCRF(inference_method="max-product") self.learner = SubgradientSSVM( model=self.model, max_iter=iterations, logger=SaveLogger( MODEL_PATH_TEMPLATE.format(self.userId + "-learner")), show_loss_every=50, decay_exponent=decay_rate) self.learner.fit(x, y, warm_start=False) self.save() def check_featurizer_set(self): if not self.featurizer: featurizer = PystructEdgeFeaturizer() self.set_featurizer(featurizer) logger.info("WARNING! Featurizer not set, setting new default " "featurizer")
def test_objective(): # test that SubgradientLatentSSVM does the same as SubgradientSVM, # in particular that it has the same loss, if there are no latent states. X, Y = generate_blocks_multinomial(n_samples=10, noise=.3, seed=1) inference_method = get_installed(["qpbo", "ad3", "lp"])[0] n_labels = 3 crfl = LatentGridCRF(n_labels=n_labels, n_states_per_label=1, inference_method=inference_method) clfl = SubgradientLatentSSVM(model=crfl, max_iter=20, C=10., learning_rate=0.001, momentum=0.98) crfl.initialize(X, Y) clfl.w = np.zeros(crfl.size_joint_feature) # this disables random init clfl.fit(X, Y) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = SubgradientSSVM(model=crf, max_iter=20, C=10., learning_rate=0.001, momentum=0.98) clf.fit(X, Y) assert_array_almost_equal(clf.w, clfl.w) assert_almost_equal(clf.objective_curve_[-1], clfl.objective_curve_[-1]) assert_array_equal(clf.predict(X), clfl.predict(X)) assert_array_equal(clf.predict(X), Y)
def test_binary_ssvm_attractive_potentials_edgefeaturegraph(inference_method="qpbo"): X, Y = generate_blocks(n_samples=10) crf = GridCRF(inference_method=inference_method) ####### # convert X,Y to EdgeFeatureGraphCRF instances crf_edge = EdgeFeatureGraphCRF(inference_method=inference_method, symmetric_edge_features=[0] ) X_edge = [] Y_edge = [] for i in range(X.shape[0]): unaries = X[i].reshape((-1, 2)) edges = crf._get_edges(X[i]) edge_feats = np.ones((edges.shape[0], 1)) X_edge.append((unaries, edges, edge_feats)) Y_edge.append((Y[i].reshape((-1,)))) submodular_clf_edge = SubgradientSSVM(model=crf_edge, max_iter=100, C=1, verbose=1, zero_constraint=[4,7], negativity_constraint=[5,6], ) # fit the model with non-negativity constraint on the off-diagonal potential submodular_clf_edge.fit(X_edge, Y_edge) assert submodular_clf_edge.w[5] == submodular_clf_edge.w[6] # symmetry constraint on edge features # # # bias doesn't matter # submodular_clf_edge.w += 10*np.ones(submodular_clf_edge.w.shape) # print len(submodular_clf_edge.w), submodular_clf_edge.w Y_pred = submodular_clf_edge.predict(X_edge) assert_array_equal(Y_edge, Y_pred) # try to fit the model with non-negativity constraint on the off-diagonal potential, this time # with inverted sign on the edge features X_edge_neg = [ (x[0], x[1], -x[2]) for x in X_edge ] submodular_clf_edge = SubgradientSSVM(model=crf_edge, max_iter=100, C=1, verbose=1, zero_constraint=[4,7], negativity_constraint=[5,6], ) submodular_clf_edge.fit(X_edge_neg, Y_edge) Y_pred = submodular_clf_edge.predict(X_edge_neg) assert_array_equal(Y_edge, Y_pred)
def test_latent_node_boxes_standard_latent_features(): # learn the "easy" 2x2 boxes dataset. # we make it even easier now by adding features that encode the correct # latent state. This basically tests that the features are actually used X, Y = make_simple_2x2(seed=1, n_samples=20, n_flips=6) latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=2, n_features=1, latent_node_features=True) one_slack = OneSlackSSVM(latent_crf) n_slack = NSlackSSVM(latent_crf) subgradient = SubgradientSSVM(latent_crf, max_iter=100, learning_rate=0.01, momentum=0) for base_svm in [one_slack, n_slack, subgradient]: base_svm.C = 10 latent_svm = LatentSSVM(base_svm, latent_iter=10) G = [make_grid_edges(x) for x in X] # make edges for hidden states: edges = make_edges_2x2() G = [np.vstack([make_grid_edges(x), edges]) for x in X] # reshape / flatten x and y X_flat = [x.reshape(-1, 1) for x in X] # augment X with the features for hidden units X_flat = [ np.vstack([x, y[::2, ::2].reshape(-1, 1)]) for x, y in zip(X_flat, Y) ] Y_flat = [y.ravel() for y in Y] X_ = zip(X_flat, G, [2 * 2 for x in X_flat]) latent_svm.fit(X_[:10], Y_flat[:10]) assert_array_equal(latent_svm.predict(X_[:10]), Y_flat[:10]) assert_equal(latent_svm.score(X_[:10], Y_flat[:10]), 1) # we actually become prefect ^^ assert_true(.98 < latent_svm.score(X_[10:], Y_flat[10:]) <= 1)
def test_with_crosses_base_svms(): # very simple dataset. k-means init is perfect n_labels = 2 crf = LatentGridCRF(n_labels=n_labels, n_states_per_label=[1, 2]) one_slack = OneSlackSSVM(crf, inference_cache=50) n_slack = NSlackSSVM(crf) subgradient = SubgradientSSVM(crf, max_iter=400, learning_rate=.01, decay_exponent=0, decay_t0=10) X, Y = generate_crosses(n_samples=10, noise=5, n_crosses=1, total_size=8) for base_ssvm in [one_slack, n_slack, subgradient]: base_ssvm.C = 100. clf = LatentSSVM(base_ssvm=base_ssvm) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(np.array(Y_pred), Y) assert_equal(clf.score(X, Y), 1)
def test_ssvm_objectives(): # test that the algorithms provide consistent objective curves. # this is not that strong a test now but at least makes sure that # the objective function is called. X, Y = generate_blocks_multinomial(n_samples=10, noise=1.5, seed=0) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) # once for n-slack clf = NSlackSSVM(model=crf, max_iter=5, C=1, tol=.1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # once for one-slack clf = OneSlackSSVM(model=crf, max_iter=5, C=1, tol=.1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C, variant='one_slack') assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # now subgradient. Should also work in batch-mode. clf = SubgradientSSVM(model=crf, max_iter=5, C=1, batch_size=-1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.objective_curve_[-1], primal_objective) # frank wolfe clf = FrankWolfeSSVM(model=crf, max_iter=5, C=1, batch_mode=True) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # block-coordinate Frank-Wolfe clf = FrankWolfeSSVM(model=crf, max_iter=5, C=1, batch_mode=False) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective)
X, y = digits.data, digits.target # make binary task by doing odd vs even numers y = y % 2 # code as +1 and -1 y = 2 * y - 1 X /= X.max() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) pbl = BinaryClf() n_slack_svm = NSlackSSVM(pbl, C=10, batch_size=-1) one_slack_svm = OneSlackSSVM(pbl, C=10, tol=0.1) subgradient_svm = SubgradientSSVM(pbl, C=10, learning_rate=0.1, max_iter=100, batch_size=10) # we add a constant 1 feature for the bias X_train_bias = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test_bias = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) # n-slack cutting plane ssvm start = time() n_slack_svm.fit(X_train_bias, y_train) time_n_slack_svm = time() - start acc_n_slack = n_slack_svm.score(X_test_bias, y_test) print("Score with pystruct n-slack ssvm: %f (took %f seconds)" % (acc_n_slack, time_n_slack_svm))
# Calculate HMM transitions for each frame and gesture n_gestures = len(np.unique(gesture_labels)) frame_prior_train, frame_transition_matrix_train = calculate_hmm_params(frame_labels, n_gestures) gesture_prior_train, gesture_transition_matrix_train = calculate_hmm_params(gesture_labels, n_gestures) print "Unary (frame) score:", frame_clf_train.score(np.vstack(frame_hists_train), np.hstack(frame_labels)) print "Unary (gesture) score:", gesture_clf_train.score(np.vstack(gesture_hists_train), np.hstack(gesture_labels)) gesture_transition_matrix_train = np.ones([n_gestures,3])/3. # Markov CRF markovCRF = MarkovCRF(n_states=n_gestures, clf=frame_clf_train, prior=frame_prior_train, transition=frame_transition_matrix_train, inference_method='dai') markov_svm = SubgradientSSVM(markovCRF, verbose=1, C=1., n_jobs=1) markov_svm.fit(frame_hists_train, frame_labels) m_predict = markov_svm.predict(frame_hists_train) print 'Markov w:', markov_svm.w print 'Markov CRF score: {}%'.format(100*np.sum([np.sum(np.equal(m_predict[i],x)) for i,x in enumerate(frame_labels)]) / np.sum([np.size(x) for x in frame_labels], dtype=np.float)) # semi-Markov CRF sm_crf = SemiMarkovCRF(n_states=n_gestures,clf=gesture_clf_train, prior=gesture_prior_train, transition_matrix=gesture_transition_matrix_train) sm_svm = SubgradientSSVM(sm_crf, verbose=1, C=1., n_jobs=1) sm_svm.fit(frame_hists_train, frame_labels) sm_predict = sm_svm.predict(frame_hists_train) print 'Semi-Markov w:', sm_svm.w print 'Semi-Markov CRF score: {}%'.format(100*np.sum([np.sum(sm_predict[i]==x) for i,x in enumerate(frame_labels)]) / np.sum([np.size(x) for x in frame_labels], dtype=np.float)) # Markov semi-Markov CRF
def make_random_trees(n_samples=50, n_nodes=100, n_states=7, n_features=10): crf = GraphCRF(inference_method='max-product', n_states=n_states, n_features=n_features) weights = np.random.randn(crf.size_joint_feature) X, y = [], [] for i in range(n_samples): distances = np.random.randn(n_nodes, n_nodes) features = np.random.randn(n_nodes, n_features) tree = minimum_spanning_tree(sparse.csr_matrix(distances)) edges = np.c_[tree.nonzero()] X.append((features, edges)) y.append(crf.inference(X[-1], weights)) return X, y, weights X, y, weights = make_random_trees(n_nodes=1000) X_train, X_test, y_train, y_test = train_test_split(X, y) #tree_model = MultiLabelClf(edges=tree, inference_method=('ogm', {'alg': 'dyn'})) tree_model = GraphCRF(inference_method='max-product') tree_ssvm = SubgradientSSVM(tree_model, max_iter=4, C=1, verbose=10) print("fitting tree model...") tree_ssvm.fit(X_train, y_train) print("Training loss tree model: %f" % tree_ssvm.score(X_train, y_train)) print("Test loss tree model: %f" % tree_ssvm.score(X_test, y_test))
import matplotlib.pyplot as plt from pystruct.models import GridCRF from pystruct.learners import (NSlackSSVM, OneSlackSSVM, SubgradientSSVM, FrankWolfeSSVM) from pystruct.datasets import generate_crosses_explicit X, Y = generate_crosses_explicit(n_samples=50, noise=10, size=6, n_crosses=1) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=("ad3", {'branch_and_bound': True})) n_slack_svm = NSlackSSVM(crf, check_constraints=False, max_iter=50, batch_size=1, tol=0.001) one_slack_svm = OneSlackSSVM(crf, check_constraints=False, max_iter=100, tol=0.001, inference_cache=50) subgradient_svm = SubgradientSSVM(crf, learning_rate=0.001, max_iter=20, decay_exponent=0, momentum=0) bcfw_svm = FrankWolfeSSVM(crf, max_iter=50, check_dual_every=4) #n-slack cutting plane ssvm n_slack_svm.fit(X, Y) # 1-slack cutting plane ssvm one_slack_svm.fit(X, Y) # online subgradient ssvm subgradient_svm.fit(X, Y) # Block coordinate Frank-Wolfe bcfw_svm.fit(X, Y) # don't plot objective from chached inference for 1-slack
digits = load_digits() X, y = digits.data, digits.target #X = X / 255. X = X / 16. #y = y.astype(np.int) - 1 X_train, X_test, y_train, y_test = train_test_split(X, y) # we add a constant 1 feature for the bias X_train_bias = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test_bias = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) model = MultiClassClf(n_features=X_train_bias.shape[1], n_classes=10) n_slack_svm = NSlackSSVM(model, verbose=2, check_constraints=False, C=0.1, batch_size=100, tol=1e-2) one_slack_svm = OneSlackSSVM(model, verbose=2, C=.10, tol=.001) subgradient_svm = SubgradientSSVM(model, C=0.1, learning_rate=0.000001, max_iter=1000, verbose=0) fw_bc_svm = FrankWolfeSSVM(model, C=.1, max_iter=50) fw_batch_svm = FrankWolfeSSVM(model, C=.1, max_iter=50, batch_mode=True) # n-slack cutting plane ssvm start = time() n_slack_svm.fit(X_train_bias, y_train) time_n_slack_svm = time() - start y_pred = np.hstack(n_slack_svm.predict(X_test_bias)) print("Score with pystruct n-slack ssvm: %f (took %f seconds)" % (np.mean(y_pred == y_test), time_n_slack_svm)) ## 1-slack cutting plane ssvm start = time() one_slack_svm.fit(X_train_bias, y_train)
tol=0.01, cache_tol=0.1) os_ssvm.fit(list(X_train_tsvd), y_train) test_os_ssvm_preds = [[id2label[i] for i in sent] for sent in os_ssvm.predict(X_test_tsvd)] test_conll_os_ssvm = conlleval_fmt(iob_test, test_os_ssvm_preds) test_conll_os_ssvm_file = open('test_conll_os_ssvm.txt', 'wb') for sentence in test_conll_os_ssvm: test_conll_os_ssvm_file.write(bytes(sentence, 'UTF-8')) test_conll_os_ssvm_file.close() print(conlleval_results('test_conll_os_ssvm.txt')) if args.subgrad: ### fit subgradient ssvm crf = ChainCRF() sg_ssvm = SubgradientSSVM(crf, max_iter=200, verbose=args.verbose, n_jobs=-1, use_memmapping_pool=0, show_loss_every=20, shuffle=True) sg_ssvm.fit(list(X_train_tsvd), y_train) test_sg_ssvm_preds = [[id2label[i] for i in sent] for sent in sg_ssvm.predict(X_test_tsvd)] test_conll_sg_ssvm = conlleval_fmt(iob_test, test_sg_ssvm_preds) test_conll_sg_ssvm_file = open('test_conll_sg_ssvm.txt', 'wb') for sentence in test_conll_sg_ssvm: test_conll_sg_ssvm_file.write(bytes(sentence, 'UTF-8')) test_conll_sg_ssvm_file.close() print(conlleval_results('test_conll_sg_ssvm.txt')) if args.evals: print(conlleval_results('test_conll_svc.txt')) print(conlleval_results('test_conll_crfsuite.txt')) print(conlleval_results('test_conll_searn.txt'))
# do a binary digit classification digits = load_digits() X, y = digits.data, digits.target # make binary task by doing odd vs even numers y = y % 2 # code as +1 and -1 y = 2 * y - 1 X /= X.max() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) pbl = BinaryClf() n_slack_svm = NSlackSSVM(pbl, C=10, batch_size=-1) one_slack_svm = OneSlackSSVM(pbl, C=10, tol=0.1) subgradient_svm = SubgradientSSVM(pbl, C=10, learning_rate=0.1, max_iter=100, batch_size=10) # we add a constant 1 feature for the bias X_train_bias = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test_bias = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) # n-slack cutting plane ssvm start = time() n_slack_svm.fit(X_train_bias, y_train) time_n_slack_svm = time() - start acc_n_slack = n_slack_svm.score(X_test_bias, y_test) print("Score with pystruct n-slack ssvm: %f (took %f seconds)" % (acc_n_slack, time_n_slack_svm)) ## 1-slack cutting plane ssvm start = time()
digits = load_digits() X, y = digits.data, digits.target X /= X.max() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) # we add a constant 1 feature for the bias X_train_bias = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) pbl = CrammerSingerSVMModel(n_features=X_train_bias.shape[1], n_classes=10) n_slack_svm = StructuredSVM(pbl, verbose=0, check_constraints=False, C=20, max_iter=500, batch_size=10) one_slack_svm = OneSlackSSVM(pbl, verbose=0, check_constraints=False, C=20, max_iter=1000, tol=0.001) subgradient_svm = SubgradientSSVM(pbl, C=20, learning_rate=0.01, max_iter=300, decay_exponent=0, momentum=0, verbose=0) # n-slack cutting plane ssvm n_slack_svm.fit(X_train_bias, y_train) ## 1-slack cutting plane ssvm one_slack_svm.fit(X_train_bias, y_train) # online subgradient ssvm subgradient_svm.fit(X_train_bias, y_train) #plt.plot(n_slack_svm.objective_curve_, label="n-slack lower bound") plt.plot(n_slack_svm.objective_curve_, label="n-slack lower bound") plt.plot(one_slack_svm.objective_curve_, label="one-slack lower bound") plt.plot(one_slack_svm.primal_objective_curve_, label="one-slack primal") plt.plot(subgradient_svm.objective_curve_, label="subgradient")
'branch_and_bound': True })) n_slack_svm = NSlackSSVM(crf, check_constraints=False, max_iter=50, batch_size=1, tol=0.001) one_slack_svm = OneSlackSSVM(crf, check_constraints=False, max_iter=100, tol=0.001, inference_cache=50) subgradient_svm = SubgradientSSVM(crf, learning_rate=0.001, max_iter=20, decay_exponent=0, momentum=0) bcfw_svm = FrankWolfeSSVM(crf, max_iter=50, check_dual_every=4) #n-slack cutting plane ssvm n_slack_svm.fit(X, Y) # 1-slack cutting plane ssvm one_slack_svm.fit(X, Y) # online subgradient ssvm subgradient_svm.fit(X, Y) # Block coordinate Frank-Wolfe bcfw_svm.fit(X, Y)
class EdgeCRFClassifier: def __init__(self, userId="anonymous"): self.model = None self.learner = None self.featurizer = None self.userId = userId def fresh_train(self, x, y, iterations=10): self.model = EdgeFeatureGraphCRF(inference_method="max-product") self.learner = SubgradientSSVM( model=self.model, max_iter=iterations, logger=SaveLogger(model_file.format(self.userId + "-learner"))) self.learner.fit(x, y, warm_start=False) self.save() def fresh_train_default(self, iterations=10): default_train = scriptdir + '/../../../data/compression/' \ 'googlecomp100.train.lbl' featurizer = edge_featurize.Featurizer() x, y = featurizer.fit_transform(default_train) self.fresh_train(x, y, iterations=iterations) def update(self, x, y): """ Performs an online update of the model :param x: Input data :param y: List of Numpy array of label IDs :return: """ self.learner.fit(x, y, warm_start=False) def predict(self, x): self.check_featurizer_set() label_ids = self.learner.predict(x) labels = [] for sent in label_ids: labels.append(np.array(self.featurizer.map_inv(sent))) return labels, label_ids def set_featurizer(self, featurizer): self.featurizer = featurizer def featurize_train(self, train_data, iterations=10): self.check_featurizer_set() x, y = self.featurizer.fit_transform(train_data) self.fresh_train(x, y, iterations) def featurize_update(self, src, y): self.check_featurizer_set() x, _ = self.featurizer.transform(src) self.update(x, y) def featurize_predict(self, data): self.check_featurizer_set() x, _ = self.featurizer.transform(data) return self.predict(x) def save(self, userId=None): if not userId: userId = self.userId with open(model_file.format(userId), 'wb') as pf: pickle.dump((self.learner, self.model, self.featurizer), pf, pickle.HIGHEST_PROTOCOL) def load(self, userId=None): if not userId: userId = self.userId with open(model_file.format(userId), 'rb') as pf: self.learner, self.model, self.featurizer = pickle.load(pf) return self def load_default_init(self): with open(model_file.format("default"), 'rb') as pf: self.learner, self.model, self.featurizer = pickle.load(pf) def check_featurizer_set(self): if not self.featurizer: raise RuntimeError("Featurizer not set. Use set_featurizer().") def text_predict(self, input_txt): original = [] simplified = [] X, parses = self.featurizer.transform_plain(input_txt) for x, parse in zip(X, parses): labels = self.predict([x])[0] # tokens = parses[0]['form'] tokens = parse['form'] original.append(detokenizer.detokenize([t for t in tokens], True)) # original.append(" ".join([t for t in tokens])) # print('#\n#\n#') # print(" ".join(tokens) + "\t===>\t", end='') graph = nx.DiGraph() for s, t in x[1]: # graph.add_edge(tokens[s], tokens[t]) graph.add_edge(s, t) # print(graph.nodes()) for i, l in enumerate(labels[0]): if l == 'DEL': for s, t in graph.edges(): # print(t, s) if t == i: # print("DEL", t) for n in dfs_tree(graph, t).nodes(): # print(n) graph.remove_node(n) # print(graph.nodes()) simplified.append( detokenizer.detokenize( [tokens[n] for n in sorted(graph.nodes())], True)) # simplified.append(" ".join( # [tokens[n] for n in sorted(graph.nodes())])) return original, simplified