def test_binary_blocks_cutting_plane(): #testing cutting plane ssvm on easy binary dataset # generate graphs explicitly for each example for inference_method in get_installed(["dai", "lp", "qpbo", "ad3", 'ogm']): print("testing %s" % inference_method) X, Y = generate_blocks(n_samples=3) crf = GraphCRF(inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=20, C=100, check_constraints=True, break_on_bad=False, n_jobs=1) x1, x2, x3 = X y1, y2, y3 = Y n_states = len(np.unique(Y)) # delete some rows to make it more fun x1, y1 = x1[:, :-1], y1[:, :-1] x2, y2 = x2[:-1], y2[:-1] # generate graphs X_ = [x1, x2, x3] G = [make_grid_edges(x) for x in X_] # reshape / flatten x and y X_ = [x.reshape(-1, n_states) for x in X_] Y = [y.ravel() for y in [y1, y2, y3]] X = zip(X_, G) clf.fit(X, Y) Y_pred = clf.predict(X) for y, y_pred in zip(Y, Y_pred): assert_array_equal(y, y_pred)
def test_logging(): iris = load_iris() X, y = iris.data, iris.target X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_, Y, random_state=1) _, file_name = mkstemp() pbl = GraphCRF(n_features=4, n_states=3, inference_method=inference_method) logger = SaveLogger(file_name) svm = NSlackSSVM(pbl, C=100, n_jobs=1, logger=logger) svm.fit(X_train, y_train) score_current = svm.score(X_test, y_test) score_auto_saved = logger.load().score(X_test, y_test) alt_file_name = file_name + "alt" logger.save(svm, alt_file_name) logger.file_name = alt_file_name logger.load() score_manual_saved = logger.load().score(X_test, y_test) assert_less(.97, score_current) assert_less(.97, score_auto_saved) assert_less(.97, score_manual_saved) assert_almost_equal(score_auto_saved, score_manual_saved)
def test_binary_blocks_cutting_plane(): #testing cutting plane ssvm on easy binary dataset # generate graphs explicitly for each example for inference_method in get_installed(["lp", "qpbo", "ad3", 'ogm']): X, Y = generate_blocks(n_samples=3) crf = GraphCRF(inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=20, C=100, check_constraints=True, break_on_bad=False, n_jobs=1) x1, x2, x3 = X y1, y2, y3 = Y n_states = len(np.unique(Y)) # delete some rows to make it more fun x1, y1 = x1[:, :-1], y1[:, :-1] x2, y2 = x2[:-1], y2[:-1] # generate graphs X_ = [x1, x2, x3] G = [make_grid_edges(x) for x in X_] # reshape / flatten x and y X_ = [x.reshape(-1, n_states) for x in X_] Y = [y.ravel() for y in [y1, y2, y3]] X = list(zip(X_, G)) clf.fit(X, Y) Y_pred = clf.predict(X) for y, y_pred in zip(Y, Y_pred): assert_array_equal(y, y_pred)
def test_binary_blocks_batches_n_slack(): #testing cutting plane ssvm on easy binary dataset X, Y = generate_blocks(n_samples=5) crf = GridCRF(inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=20, batch_size=1, C=100) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_checker_cutting_plane(): X, Y = generate_checker_multinomial(n_samples=10, noise=.1) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=20, C=100000, check_constraints=True) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_binary_blocks_cutting_plane(): #testing cutting plane ssvm on easy binary dataset X, Y = generate_blocks(n_samples=5) crf = GridCRF(inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=20, C=100, check_constraints=True, break_on_bad=False) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_binary_blocks_batches_n_slack(): #testing cutting plane ssvm on easy binary dataset X, Y = toy.generate_blocks(n_samples=5) crf = GridCRF() clf = NSlackSSVM(model=crf, max_iter=20, C=100, check_constraints=True, break_on_bad=False, n_jobs=1, batch_size=1) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def crammer_singer_classifier(X_train_bias, y_train, num_classes, n_jobs=2, C=1): model = MultiClassClf(n_features=X_train_bias.shape[1], n_classes=num_classes) # n-slack cutting plane ssvm n_slack_svm = NSlackSSVM(model, n_jobs=n_jobs, verbose=0, check_constraints=False, C=C, batch_size=100, tol=1e-2) n_slack_svm.fit(X_train_bias, y_train) return n_slack_svm
def test_multinomial_blocks_cutting_plane(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=40, noise=0.5, seed=0) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=100, C=100, check_constraints=False, batch_size=1) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def fit_ssvm(self, X, Y): self.inference_calls = 0 self.size_joint_feature = self.n_parameters ssvm_learner = NSlackSSVM(self, C=1.0 / self.lambda_0, max_iter=self.max_iter, verbose=self.verbose) Y = [self.vectorize_label(y) for y in Y] ssvm_learner.fit(X, Y) self.set_weights(ssvm_learner.w)
def test_simple_1d_dataset_cutting_plane(): # 10 1d datapoints between 0 and 1 X = np.random.uniform(size=(30, 1)) Y = (X.ravel() > 0.5).astype(np.int) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) pbl = MultiClassClf(n_features=2) svm = NSlackSSVM(pbl, check_constraints=True, C=10000) svm.fit(X, Y) assert_array_equal(Y, np.hstack(svm.predict(X)))
def test_simple_1d_dataset_cutting_plane(): # 10 1d datapoints between 0 and 1 X = np.random.uniform(size=(30, 1)) Y = (X.ravel() > 0.5).astype(np.int) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) pbl = MultiClassClf(n_features=2) svm = NSlackSSVM(pbl, check_constraints=True, C=10000) svm.fit(X, Y) assert_array_equal(Y, np.hstack(svm.predict(X)))
def train_cue_learner(sentence_dicts, C_value): cue_lexicon, affixal_cue_lexicon = get_cue_lexicon(sentence_dicts) cue_sentence_dicts, cue_instances, cue_labels = extract_features_cue( sentence_dicts, cue_lexicon, affixal_cue_lexicon, 'training') vectorizer = DictVectorizer() fvs = vectorizer.fit_transform(cue_instances).toarray() model = BinaryClf() cue_ssvm = NSlackSSVM(model, C=C_value, batch_size=-1) cue_ssvm.fit(fvs, np.asarray(cue_labels)) return cue_ssvm, vectorizer, cue_lexicon, affixal_cue_lexicon
def test_binary_ssvm_attractive_potentials(): # test that submodular SSVM can learn the block dataset X, Y = toy.generate_blocks(n_samples=10) crf = GridCRF() submodular_clf = NSlackSSVM(model=crf, max_iter=200, C=100, check_constraints=True, positive_constraint=[5]) submodular_clf.fit(X, Y) Y_pred = submodular_clf.predict(X) assert_array_equal(Y, Y_pred) assert_true(submodular_clf.w[5] < 0) # don't ask me about signs
def CRF_oneNode(x_train, x_test, y_train, y_test): pbl = GraphCRF(n_states = 4,n_features=20) svm = NSlackSSVM(pbl,max_iter=200, C=10,n_jobs=2) svm.fit(x_train,y_train) y_pred = svm.predict(x_test) target_names = ['Start','Mid','End','Others'] #eclf = EnsembleClassifier(clfs=[pipe1, pipe2],voting='soft',weights=[0.5,0.2]) #eclf.fit(x_train,y_train) #y_pred = eclf.predict(x_test) print classification_report(y_test, y_pred, target_names=target_names)
def test_multinomial_blocks_directional(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X, Y = toy.generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) n_labels = len(np.unique(Y)) crf = DirectionalGridCRF(n_states=n_labels) clf = NSlackSSVM(model=crf, max_iter=100, C=100, verbose=0, check_constraints=True, batch_size=1) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_binary_ssvm_attractive_potentials(): # test that submodular SSVM can learn the block dataset X, Y = generate_blocks(n_samples=10) crf = GridCRF(inference_method=inference_method) submodular_clf = NSlackSSVM(model=crf, max_iter=200, C=100, check_constraints=True, negativity_constraint=[5]) submodular_clf.fit(X, Y) Y_pred = submodular_clf.predict(X) assert_array_equal(Y, Y_pred) assert_true(submodular_clf.w[5] < 0)
def test_simple_1d_dataset_cutting_plane(): # 10 1d datapoints between 0 and 1 X = np.random.uniform(size=(30, 1)) # linearly separable labels Y = 1 - 2 * (X.ravel() < .5) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) pbl = BinaryClf(n_features=2) svm = NSlackSSVM(pbl, check_constraints=True, C=1000) svm.fit(X, Y) assert_array_equal(Y, np.hstack(svm.predict(X)))
def test_multinomial_blocks_directional(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) n_labels = len(np.unique(Y)) crf = DirectionalGridCRF(n_states=n_labels, inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=100, C=100, verbose=0, check_constraints=True, batch_size=1) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_blocks_cutting_plane(): #testing cutting plane ssvm on easy multinomial dataset X, Y = toy.generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) n_labels = len(np.unique(Y)) for inference_method in get_installed(['lp', 'qpbo', 'ad3']): crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=10, C=100, check_constraints=False) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_simple_1d_dataset_cutting_plane(): # 10 1d datapoints between 0 and 1 X = np.random.uniform(size=(30, 1)) # linearly separable labels Y = 1 - 2 * (X.ravel() < .5) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) pbl = BinaryClf(n_features=2) svm = NSlackSSVM(pbl, check_constraints=True, C=1000) svm.fit(X, Y) assert_array_equal(Y, np.hstack(svm.predict(X)))
def test_blobs_2d_cutting_plane(): # make two gaussian blobs X, Y = make_blobs(n_samples=80, centers=2, random_state=1) Y = 2 * Y - 1 # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] pbl = BinaryClf(n_features=3) svm = NSlackSSVM(pbl, check_constraints=True, C=1000) svm.fit(X_train, Y_train) assert_array_equal(Y_test, np.hstack(svm.predict(X_test)))
def test_blobs_2d_cutting_plane(): # make two gaussian blobs X, Y = make_blobs(n_samples=80, centers=3, random_state=42) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] pbl = MultiClassClf(n_features=3, n_classes=3) svm = NSlackSSVM(pbl, check_constraints=True, C=1000, batch_size=1) svm.fit(X_train, Y_train) assert_array_equal(Y_test, np.hstack(svm.predict(X_test)))
def cue_trainer(filename, corenlp): newfilename = process_data(filename, corenlp) sentence_dicts = file_to_sentence_dict(newfilename) cue_dict, affix_cue_dict = get_cue_dict(sentence_dicts) sentence_dicts, cue_instances, cue_labels = extract_features_cue( sentence_dicts, cue_dict, affix_cue_dict, 'training') cue_vec = DictVectorizer() model = cue_vec.fit_transform(cue_instances).toarray() cue_ssvm = NSlackSSVM(BinaryClf(), C=0.2, batch_size=-1) #cue_ssvm = SVC(C = 0.2) cue_ssvm.fit(model, cue_labels) return sentence_dicts, cue_ssvm, cue_vec, cue_dict, affix_cue_dict """pickle.dump(cue_ssvm, open("cue_model_%s.pkl" %filename, "wb"))
def test_multinomial_blocks_directional_simple(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X_, Y_ = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) G = [make_grid_edges(x, return_lists=True) for x in X_] edge_features = [edge_list_to_features(edge_list) for edge_list in G] edges = [np.vstack(g) for g in G] X = list(zip([x.reshape(-1, 3) for x in X_], edges, edge_features)) Y = [y.ravel() for y in Y_] crf = EdgeFeatureGraphCRF(n_states=3, n_edge_features=2) clf = NSlackSSVM(model=crf, max_iter=10, C=1, check_constraints=False) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_blocks_directional_simple(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X_, Y_ = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) G = [make_grid_edges(x, return_lists=True) for x in X_] edge_features = [edge_list_to_features(edge_list) for edge_list in G] edges = [np.vstack(g) for g in G] X = zip([x.reshape(-1, 3) for x in X_], edges, edge_features) Y = [y.ravel() for y in Y_] crf = EdgeFeatureGraphCRF(n_states=3, n_edge_features=2) clf = NSlackSSVM(model=crf, max_iter=10, C=1, check_constraints=False) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_binary_blocks_cutting_plane_latent_node(): #testing cutting plane ssvm on easy binary dataset # we use the LatentNodeCRF without latent nodes and check that it does the # same as GraphCRF X, Y = generate_blocks(n_samples=3) crf = GraphCRF() clf = NSlackSSVM(model=crf, max_iter=20, C=100, check_constraints=True, break_on_bad=False, n_jobs=1) x1, x2, x3 = X y1, y2, y3 = Y n_states = len(np.unique(Y)) # delete some rows to make it more fun x1, y1 = x1[:, :-1], y1[:, :-1] x2, y2 = x2[:-1], y2[:-1] # generate graphs X_ = [x1, x2, x3] G = [make_grid_edges(x) for x in X_] # reshape / flatten x and y X_ = [x.reshape(-1, n_states) for x in X_] Y = [y.ravel() for y in [y1, y2, y3]] X = zip(X_, G) clf.fit(X, Y) Y_pred = clf.predict(X) for y, y_pred in zip(Y, Y_pred): assert_array_equal(y, y_pred) latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=0) latent_svm = LatentSSVM(NSlackSSVM(model=latent_crf, max_iter=20, C=100, check_constraints=True, break_on_bad=False, n_jobs=1), latent_iter=3) X_latent = zip(X_, G, np.zeros(len(X_))) latent_svm.fit(X_latent, Y, H_init=Y) Y_pred = latent_svm.predict(X_latent) for y, y_pred in zip(Y, Y_pred): assert_array_equal(y, y_pred) assert_array_almost_equal(latent_svm.w, clf.w)
def test_n_slack_svm_as_crf_pickling(): iris = load_iris() X, y = iris.data, iris.target X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_, Y, random_state=1) _, file_name = mkstemp() pbl = GraphCRF(n_features=4, n_states=3, inference_method='lp') logger = SaveLogger(file_name) svm = NSlackSSVM(pbl, C=100, n_jobs=1, logger=logger) svm.fit(X_train, y_train) assert_less(.97, svm.score(X_test, y_test)) assert_less(.97, logger.load().score(X_test, y_test))
def multiClf(x_train, x_test, y_train, y_test): #lb = preprocessing.LabelBinarizer() #y=y_train.reshape((1,y_train.shape[0])) #lb.fit(y_train) #y=lb.transform(y_train) x_train = np.array(x_train) y_train = np.array(y_train) #full = np.vstack([x for x in itertools.combinations(range(4), 2)]) clf = pystruct.models.MultiClassClf(n_features=x_train.shape[1],n_classes=4) ssvm = NSlackSSVM(clf, C=.1, tol=0.01) ssvm.fit(x_train,y_train) y_pred = clf.predict(np.array(x_test)) target_names = ['Start','Mid','End','Others'] #eclf = EnsembleClassifier(clfs=[pipe1, pipe2],voting='soft',weights=[0.5,0.2]) #eclf.fit(x_train,y_train) #y_pred = eclf.predict(x_test) print classification_report(y_test, y_pred, target_names=target_names)
def runIt(train_list): X_org = list2features(train_list) X = np.array(X_org) y = list2labels_sleep(train_list) y_org = np.array(y) Y = y_org.reshape(-1, 1) X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] X_train, X_test, y_train, y_test = train_test_split(X_, Y, test_size=.5) pbl = GraphCRF(inference_method='unary') svm = NSlackSSVM(pbl, C=100) start = time() svm.fit(X_train, y_train) time_svm = time() - start y_pred = np.vstack(svm.predict(X_test)) print("Score with pystruct crf svm: %f (took %f seconds)" % (np.mean(y_pred == y_test), time_svm))
def test_multinomial_blocks_directional_anti_symmetric(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X_, Y_ = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) G = [make_grid_edges(x, return_lists=True) for x in X_] edge_features = [edge_list_to_features(edge_list) for edge_list in G] edges = [np.vstack(g) for g in G] X = list(zip([x.reshape(-1, 3) for x in X_], edges, edge_features)) Y = [y.ravel() for y in Y_] crf = EdgeFeatureGraphCRF(symmetric_edge_features=[0], antisymmetric_edge_features=[1]) clf = NSlackSSVM(model=crf, C=100) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred) pairwise_params = clf.w[-9 * 2 :].reshape(2, 3, 3) sym = pairwise_params[0] antisym = pairwise_params[1] assert_array_equal(sym, sym.T) assert_array_equal(antisym, -antisym.T)
def test_binary_blocks_cutting_plane_latent_node(): #testing cutting plane ssvm on easy binary dataset # we use the LatentNodeCRF without latent nodes and check that it does the # same as GraphCRF X, Y = generate_blocks(n_samples=3) crf = GraphCRF() clf = NSlackSSVM(model=crf, max_iter=20, C=100, check_constraints=True, break_on_bad=False, n_jobs=1) x1, x2, x3 = X y1, y2, y3 = Y n_states = len(np.unique(Y)) # delete some rows to make it more fun x1, y1 = x1[:, :-1], y1[:, :-1] x2, y2 = x2[:-1], y2[:-1] # generate graphs X_ = [x1, x2, x3] G = [make_grid_edges(x) for x in X_] # reshape / flatten x and y X_ = [x.reshape(-1, n_states) for x in X_] Y = [y.ravel() for y in [y1, y2, y3]] X = zip(X_, G) clf.fit(X, Y) Y_pred = clf.predict(X) for y, y_pred in zip(Y, Y_pred): assert_array_equal(y, y_pred) latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=0) latent_svm = LatentSSVM(NSlackSSVM(model=latent_crf, max_iter=20, C=100, check_constraints=True, break_on_bad=False, n_jobs=1), latent_iter=3) X_latent = zip(X_, G, np.zeros(len(X_))) latent_svm.fit(X_latent, Y, H_init=Y) Y_pred = latent_svm.predict(X_latent) for y, y_pred in zip(Y, Y_pred): assert_array_equal(y, y_pred) assert_array_almost_equal(latent_svm.w, clf.w)
def test_multinomial_blocks_directional_anti_symmetric(): # testing cutting plane ssvm with directional CRF on easy multinomial # dataset X_, Y_ = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) G = [make_grid_edges(x, return_lists=True) for x in X_] edge_features = [edge_list_to_features(edge_list) for edge_list in G] edges = [np.vstack(g) for g in G] X = zip([x.reshape(-1, 3) for x in X_], edges, edge_features) Y = [y.ravel() for y in Y_] crf = EdgeFeatureGraphCRF(symmetric_edge_features=[0], antisymmetric_edge_features=[1]) clf = NSlackSSVM(model=crf, C=100) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred) pairwise_params = clf.w[-9 * 2:].reshape(2, 3, 3) sym = pairwise_params[0] antisym = pairwise_params[1] assert_array_equal(sym, sym.T) assert_array_equal(antisym, -antisym.T)
def test_binary_ssvm_repellent_potentials(): # test non-submodular problem with and without submodularity constraint # dataset is checkerboard X, Y = generate_checker() crf = GridCRF(inference_method=inference_method) clf = NSlackSSVM(model=crf, max_iter=10, C=100, check_constraints=True) clf.fit(X, Y) Y_pred = clf.predict(X) # standard crf can predict perfectly assert_array_equal(Y, Y_pred) submodular_clf = NSlackSSVM(model=crf, max_iter=10, C=100, check_constraints=True, negativity_constraint=[4, 5, 6]) submodular_clf.fit(X, Y) Y_pred = submodular_clf.predict(X) # submodular crf can not do better than unaries for i, x in enumerate(X): y_pred_unaries = crf.inference(x, np.array([1, 0, 0, 1, 0, 0, 0])) assert_array_equal(y_pred_unaries, Y_pred[i])
def test_switch_to_ad3(): # test if switching between qpbo and ad3 works if not get_installed(['qpbo']) or not get_installed(['ad3']): return X, Y = generate_blocks_multinomial(n_samples=5, noise=1.5, seed=0) crf = GridCRF(n_states=3, inference_method='qpbo') ssvm = NSlackSSVM(crf, max_iter=10000) ssvm_with_switch = NSlackSSVM(crf, max_iter=10000, switch_to=('ad3')) ssvm.fit(X, Y) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, 'ad3') # we check that the dual is higher with ad3 inference # as it might use the relaxation, that is pretty much guraranteed assert_greater(ssvm_with_switch.objective_curve_[-1], ssvm.objective_curve_[-1]) # test that convergence also results in switch ssvm_with_switch = NSlackSSVM(crf, max_iter=10000, switch_to=('ad3'), tol=10) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, 'ad3')
def test_switch_to_ad3(): # test if switching between qpbo and ad3 works if not get_installed(['qpbo']) or not get_installed(['ad3']): return X, Y = toy.generate_blocks_multinomial(n_samples=5, noise=1.5, seed=0) crf = GridCRF(n_states=3, inference_method='qpbo') ssvm = NSlackSSVM(crf, max_iter=10000) ssvm_with_switch = NSlackSSVM(crf, max_iter=10000, switch_to=('ad3')) ssvm.fit(X, Y) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, 'ad3') # we check that the dual is higher with ad3 inference # as it might use the relaxation, that is pretty much guraranteed assert_greater(ssvm_with_switch.objective_curve_[-1], ssvm.objective_curve_[-1]) print(ssvm_with_switch.objective_curve_[-1], ssvm.objective_curve_[-1]) # test that convergence also results in switch ssvm_with_switch = NSlackSSVM(crf, max_iter=10000, switch_to=('ad3'), tol=10) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, 'ad3')
def test_ssvm_objectives(): # test that the algorithms provide consistent objective curves. # this is not that strong a test now but at least makes sure that # the objective function is called. X, Y = generate_blocks_multinomial(n_samples=10, noise=1.5, seed=0) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) # once for n-slack clf = NSlackSSVM(model=crf, max_iter=5, C=1, tol=.1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # once for one-slack clf = OneSlackSSVM(model=crf, max_iter=5, C=1, tol=.1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C, variant='one_slack') assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # now subgradient. Should also work in batch-mode. clf = SubgradientSSVM(model=crf, max_iter=5, C=1, batch_size=-1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.objective_curve_[-1], primal_objective) # frank wolfe clf = FrankWolfeSSVM(model=crf, max_iter=5, C=1, batch_mode=True) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # block-coordinate Frank-Wolfe clf = FrankWolfeSSVM(model=crf, max_iter=5, C=1, batch_mode=False) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective)
def test_ssvm_objectives(): # test that the algorithms provide consistent objective curves. # this is not that strong a test now but at least makes sure that # the objective function is called. X, Y = generate_blocks_multinomial(n_samples=10, noise=1.5, seed=0) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) # once for n-slack clf = NSlackSSVM(model=crf, max_iter=5, C=1, tol=.1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # once for one-slack clf = OneSlackSSVM(model=crf, max_iter=5, C=1, tol=.1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C, variant='one_slack') assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # now subgradient. Should also work in batch-mode. clf = SubgradientSSVM(model=crf, max_iter=5, C=1, batch_size=-1) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.objective_curve_[-1], primal_objective) # frank wolfe clf = FrankWolfeSSVM(model=crf, max_iter=5, C=1, batch_mode=True) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective) # block-coordinate Frank-Wolfe clf = FrankWolfeSSVM(model=crf, max_iter=5, C=1, batch_mode=False) clf.fit(X, Y) primal_objective = objective_primal(clf.model, clf.w, X, Y, clf.C) assert_almost_equal(clf.primal_objective_curve_[-1], primal_objective)
Y_train = [labels[j] for j in train] X_test = [examples[j] for j in test] Y_test = [labels[j] for j in test] # if verbose: # print np.mean(map(np.mean,Y_train)), 'pm',np.var(map(np.mean,Y_train)) # print np.mean(map(np.mean,Y_test)), 'pm',np.var(map(np.mean,Y_test)) # --- Train model --- # model = EdgeFeatureGraphCRF(n_states, n_features, n_edge_features) ssvm = NSlackSSVM(model=model, C=0.1, tol=0.001, verbose=0, show_loss_every=10) # ssvm = OneSlackSSVM(model=model, C=.1, inference_cache=50, tol=0.1, verbose=0,show_loss_every=10) ssvm.fit(X_train, Y_train) # --- Test with pystruct --- # # print("Test score with graph CRF: %f" % ssvm.score(X_test, Y_test)) # --- Test manually - get contingency tables --- # prediction = ssvm.predict(X_test) contingency = np.array([0, 0, 0, 0]) for i in xrange(len(test)): pred = prediction[i] true = Y_test[i] contingency = contingency + get_contingency(pred, true) TP, FP, TN, FN = contingency[0], contingency[1], contingency[ 2], contingency[3]
# that should make the model fairly simple X, Y = make_simple_2x2(seed=1) # flatten X and Y X_flat = [x.reshape(-1, 1).astype(np.float) for x in X] Y_flat = [y.ravel() for y in Y] # first, use standard graph CRF. Can't do much, high loss. crf = GraphCRF() svm = NSlackSSVM(model=crf, max_iter=200, C=1, n_jobs=1) G = [make_grid_edges(x) for x in X] asdf = zip(X_flat, G) svm.fit(asdf, Y_flat) plot_boxes(svm.predict(asdf), title="Non-latent SSVM predictions") print("Training score binary grid CRF: %f" % svm.score(asdf, Y_flat)) # using one latent variable for each 2x2 rectangle latent_crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2, inference_method='lp') ssvm = OneSlackSSVM(model=latent_crf, max_iter=200, C=100, verbose=1, n_jobs=-1, show_loss_every=10,
X_train_bias = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test_bias = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) model = MultiClassClf(n_features=X_train_bias.shape[1], n_classes=10) n_slack_svm = NSlackSSVM(model, verbose=2, check_constraints=False, C=0.1, batch_size=100, tol=1e-2) one_slack_svm = OneSlackSSVM(model, verbose=2, C=.10, tol=.001) subgradient_svm = SubgradientSSVM(model, C=0.1, learning_rate=0.000001, max_iter=1000, verbose=0) fw_bc_svm = FrankWolfeSSVM(model, C=.1, max_iter=50) fw_batch_svm = FrankWolfeSSVM(model, C=.1, max_iter=50, batch_mode=True) # n-slack cutting plane ssvm start = time() n_slack_svm.fit(X_train_bias, y_train) time_n_slack_svm = time() - start y_pred = np.hstack(n_slack_svm.predict(X_test_bias)) print("Score with pystruct n-slack ssvm: %f (took %f seconds)" % (np.mean(y_pred == y_test), time_n_slack_svm)) ## 1-slack cutting plane ssvm start = time() one_slack_svm.fit(X_train_bias, y_train) time_one_slack_svm = time() - start y_pred = np.hstack(one_slack_svm.predict(X_test_bias)) print("Score with pystruct 1-slack ssvm: %f (took %f seconds)" % (np.mean(y_pred == y_test), time_one_slack_svm)) #online subgradient ssvm start = time()
from time import time import numpy as np from sklearn.datasets import load_iris from sklearn.cross_validation import train_test_split from pystruct.models import GraphCRF from pystruct.learners import NSlackSSVM iris = load_iris() X, y = iris.data, iris.target # make each example into a tuple of a single feature vector and an empty edge # list X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_, Y) pbl = GraphCRF(n_features=4, n_states=3, inference_method='lp') svm = NSlackSSVM(pbl, verbose=1, check_constraints=True, C=100, n_jobs=1) start = time() svm.fit(X_train, y_train) time_svm = time() - start y_pred = np.vstack(svm.predict(X_test)) print("Score with pystruct crf svm: %f (took %f seconds)" % (np.mean(y_pred == y_test), time_svm))
from pystruct.learners import NSlackSSVM, OneSlackSSVM, LatentSSVM from sklearn.model_selection import train_test_split mat_content = h5py.File('feat_train_1.mat') EEG_feature = np.array(mat_content['feat_train']) EEG_feature = EEG_feature.transpose() mat_content = h5py.File('LABEL_train_1.mat') EEG_label = np.array(mat_content['LABEL_train']) EEG_label = EEG_label.transpose() X_train, X_test, y_train, y_test = train_test_split(EEG_feature, EEG_label, test_size=0.4, random_state=0) X_train = X_train.astype(float) X_test = X_test.astype(float) X_train_ = np.expand_dims(X_train, axis=1) X_test_ = np.expand_dims(X_test, axis=1) #latent_crf = LatentNodeCRF(n_labels=2, n_features=2140, n_hidden_states=2, inference_method='lp') #ssvm = OneSlackSSVM(model=latent_crf, max_iter=200, C=100, n_jobs=-1, show_loss_every=10, inference_cache=50) #latent_svm = LatentSSVM(ssvm) # Random initialization #H_init = #latent_svm.fit(X_train, Y_train, H_init) #print("Training score with latent nodes: %f" % latent_svm.score(X, Y)) #H = latent_svm.predict_latent(X) crf = ChainCRF() svm = NSlackSSVM(model=crf, max_iter=200, C=1, n_jobs=1) svm.fit(X_train, y_train) ssvm.score(X_test, y_test)
# Make each example into a tuple of a single feature vector and an empty edge # list X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train_, X_test_, X_train, X_test, y_train, y_test, y_org_train, y_org_test =\ train_test_split(X_, X, Y, y_org, test_size=.5) # First, perform the equivalent of the usual SVM. This is represented as # a CRF problem with no edges. pbl = GraphCRF(inference_method='unary') # We use batch_size=-1 as a binary problem can be solved in one go. svm = NSlackSSVM(pbl, C=1, batch_size=-1) svm.fit(X_train_, y_train) # Now, use a latent-variabile CRF model with SVM training. # 5 states per label is enough capacity to encode the 5 digit classes. latent_pbl = LatentGraphCRF(n_states_per_label=5, inference_method='unary') base_ssvm = NSlackSSVM(latent_pbl, C=1, tol=.01, inactive_threshold=1e-3, batch_size=10) latent_svm = LatentSSVM(base_ssvm=base_ssvm, latent_iter=2) latent_svm.fit(X_train_, y_train) print("Score with binary SVM:") print("Train: {:2.2f}".format(svm.score(X_train_, y_train)))
#%% edgeFeatures=[] for i in range(len(X_flat)): feature=[] for j in range(len(edgeList)): feature.append( np.append(X_flat[i][edgeList[j][0]] , X_flat[i][edgeList[j][1]]) ) edgeFeatures.append(feature) edgeFeatures=np.array(edgeFeatures) #asdf = zip(X_flat,G,edgeFeatures) asdf = zip(X_flat,G) #%% svm.fit(asdf,Y_flat) #%% G2 = [edgeList for x in testDirty[0:n_test]] X_flat2 = [getNeighborhoodData(i) for i in testDirty[0:n_test]] Y_flat2 = np.array(testLabels[0:n_test]) edgeFeatures2=[] for i in range(len(X_flat2)): feature=[] for j in range(len(edgeList)): feature.append( np.append(X_flat2[i][edgeList[j][0]] , X_flat2[i][edgeList[j][1]]) ) edgeFeatures2.append(feature)
from pystruct.datasets import generate_crosses_explicit X, Y = generate_crosses_explicit(n_samples=50, noise=10, size=6, n_crosses=1) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=("ad3", {'branch_and_bound': True})) n_slack_svm = NSlackSSVM(crf, check_constraints=False, max_iter=50, batch_size=1, tol=0.001) one_slack_svm = OneSlackSSVM(crf, check_constraints=False, max_iter=100, tol=0.001, inference_cache=50) subgradient_svm = SubgradientSSVM(crf, learning_rate=0.001, max_iter=20, decay_exponent=0, momentum=0) bcfw_svm = FrankWolfeSSVM(crf, max_iter=50, check_dual_every=4) #n-slack cutting plane ssvm n_slack_svm.fit(X, Y) # 1-slack cutting plane ssvm one_slack_svm.fit(X, Y) # online subgradient ssvm subgradient_svm.fit(X, Y) # Block coordinate Frank-Wolfe bcfw_svm.fit(X, Y) # don't plot objective from chached inference for 1-slack inference_run = ~np.array(one_slack_svm.cached_constraint_) time_one = np.array(one_slack_svm.timestamps_[1:])[inference_run] # plot stuff
batch_size=1, tol=0.001) one_slack_svm = OneSlackSSVM(crf, check_constraints=False, max_iter=100, tol=0.001, inference_cache=50) subgradient_svm = SubgradientSSVM(crf, learning_rate=0.001, max_iter=20, decay_exponent=0, momentum=0) bcfw_svm = FrankWolfeSSVM(crf, max_iter=50, check_dual_every=4) #n-slack cutting plane ssvm n_slack_svm.fit(X, Y) # 1-slack cutting plane ssvm one_slack_svm.fit(X, Y) # online subgradient ssvm subgradient_svm.fit(X, Y) # Block coordinate Frank-Wolfe bcfw_svm.fit(X, Y) # don't plot objective from chached inference for 1-slack inference_run = ~np.array(one_slack_svm.cached_constraint_) time_one = np.array(one_slack_svm.timestamps_[1:])[inference_run] # plot stuff
# sure there's a more efficient way to do this # --- Test/train split --- # X_train = [examples[j] for j in train] Y_train = [labels[j] for j in train] X_test = [examples[j] for j in test] Y_test = [labels[j] for j in test] # if verbose: # print np.mean(map(np.mean,Y_train)), 'pm',np.var(map(np.mean,Y_train)) # print np.mean(map(np.mean,Y_test)), 'pm',np.var(map(np.mean,Y_test)) # --- Train model --- # model = EdgeFeatureGraphCRF(n_states,n_features,n_edge_features) ssvm = NSlackSSVM(model=model, C=0.1, tol=0.001, verbose=0,show_loss_every=10) # ssvm = OneSlackSSVM(model=model, C=.1, inference_cache=50, tol=0.1, verbose=0,show_loss_every=10) ssvm.fit(X_train, Y_train) # --- Test with pystruct --- # # print("Test score with graph CRF: %f" % ssvm.score(X_test, Y_test)) # --- Test manually - get contingency tables --- # prediction = ssvm.predict(X_test) contingency = np.array([0,0,0,0]) for i in xrange(len(test)): pred = prediction[i] true = Y_test[i] contingency = contingency+get_contingency(pred,true) TP, FP, TN, FN = contingency[0], contingency[1], contingency[2], contingency[3]
X, Y = make_simple_2x2(seed=1) # flatten X and Y X_flat = [x.reshape(-1, 1).astype(np.float) for x in X] Y_flat = [y.ravel() for y in Y] # first, use standard graph CRF. Can't do much, high loss. crf = GraphCRF() svm = NSlackSSVM(model=crf, max_iter=200, C=1, n_jobs=1) G = [make_grid_edges(x) for x in X] X_grid_edges = list(zip(X_flat, G)) svm.fit(X_grid_edges, Y_flat) plot_boxes(svm.predict(X_grid_edges), title="Non-latent SSVM predictions") print("Training score binary grid CRF: %f" % svm.score(X_grid_edges, Y_flat)) # using one latent variable for each 2x2 rectangle latent_crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2, inference_method='lp') ssvm = OneSlackSSVM(model=latent_crf, max_iter=200, C=100, n_jobs=-1, show_loss_every=10, inference_cache=50) latent_svm = LatentSSVM(ssvm) # make edges for hidden states: edges = [] node_indices = np.arange(4 * 4).reshape(4, 4) for i, (x, y) in enumerate(itertools.product([0, 2], repeat=2)):
def crf_postprocess(X_train, y_train, X_test, train_examples=2000): clf = NSlackSSVM(MultiLabelClf(), verbose=1, n_jobs=-1, show_loss_every=1) clf.fit(X_train, y_train) pred = clf.predict(X_test) pred = np.array(pred) return pred