def test_binary_blocks(): X, Y = generate_blocks(n_samples=10) crf = GridCRF() clf = StructuredPerceptron(model=crf, max_iter=40) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multinomial_blocks(): X, Y = generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) crf = GridCRF(n_states=X.shape[-1]) clf = StructuredPerceptron(model=crf, max_iter=10) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_overflow_averaged(): X = np.array([[np.finfo('d').max]]) Y = np.array([-1]) pcp = StructuredPerceptron(model=BinaryClf(n_features=1), max_iter=2, average=True) pcp.fit(X, Y) assert_true(np.isfinite(pcp.w[0]))
def test_multinomial_blocks(): X, Y = toy.generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) crf = GridCRF(n_states=X.shape[-1]) clf = StructuredPerceptron(problem=crf, max_iter=10) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_binary_blocks(): X, Y = toy.generate_blocks(n_samples=10) crf = GridCRF() clf = StructuredPerceptron(problem=crf, max_iter=40) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_binary_blocks_perceptron_online(): #testing subgradient ssvm on easy binary dataset X, Y = toy.generate_blocks(n_samples=10) crf = GridCRF() clf = StructuredPerceptron(model=crf, max_iter=20) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_binary_blocks_perceptron_online(): #testing subgradient ssvm on easy binary dataset X, Y = generate_blocks(n_samples=10) inference_method = get_installed(['qpbo', 'ad3', 'lp'])[0] crf = GridCRF(inference_method=inference_method) clf = StructuredPerceptron(model=crf, max_iter=20) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def createModel(data, labels, num_classes=2): weight_class=np.ones(2) model = chain_crf.ChainCRF(directed=True) clf = StructuredPerceptron(model=model,max_iter=1000,batch=False,average=True) print("Structured Perceptron + Chain CRF") train_start = time() clf.fit(X=data, Y=labels) train_end = time() print("Training took " + str((train_end - train_start) / 60) + " minutes to complete\n") return clf
def main(): from pystruct.learners import StructuredPerceptron import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") data = "notebooks/data/wsj_sec_2_21_gold_dependencies" X, Y = read_parse(data, limit=10000, length=20) model = FirstOrderModel(feature_hash=1000000, joint_feature_format="fast") sp = StructuredPerceptron(model, verbose=1, max_iter=10, average=False) sp.fit(X, Y) np.save("/tmp/w", sp.w)
def test_averaged(): # Under a lot of noise, averaging helps. This fails with less noise. X, Y = generate_blocks_multinomial(n_samples=15, noise=3, seed=0) X_train, Y_train = X[:10], Y[:10] X_test, Y_test = X[10:], Y[10:] crf = GridCRF() clf = StructuredPerceptron(model=crf, max_iter=3) clf.fit(X_train, Y_train) no_avg_test = clf.score(X_test, Y_test) clf.set_params(average=True) clf.fit(X_train, Y_train) avg_test = clf.score(X_test, Y_test) assert_greater(avg_test, no_avg_test)
def test_partial_averaging(): """Use XOR weight cycling to test partial averaging""" X = np.array([[a, b, 1] for a in (-1, 1) for b in (-1, 1)], dtype=np.float) Y = np.array([-1, 1, 1, -1]) pcp = StructuredPerceptron(model=BinarySVMModel(n_features=3), max_iter=5, decay_exponent=1, decay_t0=1) weight = {} for average in (0, 1, 4, -1): pcp.set_params(average=average) pcp.fit(X, Y) weight[average] = pcp.w assert_array_equal(weight[4], weight[-1]) assert_array_almost_equal(weight[0], [1.5, 3, 0]) assert_array_almost_equal(weight[1], [1.75, 3.5, 0]) assert_array_almost_equal(weight[4], [2.5, 5, 0])
def createModel(data, labels, num_classes=2): model = ChainCRF(n_states=num_classes, n_features=int(len(columns) - 5), directed=True) clf = StructuredPerceptron(model=model, max_iter=200, verbose=False, batch=False, average=True) print("Structured Perceptron + Chain CRF") train_start = time() clf.fit(X=data, Y=labels) train_end = time() print("Training took " + str((train_end - train_start) / 60) + " minutes to complete\n") return clf
def createModel(data, labels): model = GraphCRF(n_states=3, n_features=int(len(columns) - 4), directed=True, inference_method='max-product') clf = StructuredPerceptron(model=model, max_iter=100, verbose=False, batch=False, average=True) print("Structured Perceptron + Graph CRF") train_start = time() clf.fit(X=data, Y=labels) train_end = time() print("Training took " + str((train_end - train_start) / 60) + " minutes to complete\n") return clf
def test_averaging_early_stopping(): """Test averaging over final epoch when early stopping""" # we use logical OR, an easy problem solved after the second epoch X = np.array([[a, b, 1] for a in (-1, 1) for b in (-1, 1)], dtype=np.float) Y = np.array([-1, 1, 1, 1]) pcp = StructuredPerceptron(model=BinaryClf(n_features=3), max_iter=3, average=-1) pcp.fit(X, Y) # The exact weight is used without the influence of the early iterations assert_array_equal(pcp.w, [1, 1, 1]) # If we were expecting 3 iterations, we would end up with a zero vector pcp.set_params(average=2) pcp.fit(X, Y) assert_array_equal(pcp.w, [0, 0, 0])
class PerceptronTrainer: def __init__(self, max_iter=25, verbose=False): self.dsm = DiscourseSequenceModel(True) self.sp = StructuredPerceptron(self.dsm, verbose=(1 if verbose else 0), max_iter=max_iter, average=True) def fit(self, trainX, trainY): import warnings with warnings.catch_warnings(): warnings.simplefilter('ignore') self.sp.fit(trainX, trainY) def predict(self, testX): predY = self.sp.predict(testX) return predY def score(self, testX, testY): return self.sp.score(testX, testY) def weights(self): return self.dsm._vec.inverse_transform(self.sp.w)
def test_averaging_early_stopping(): """Test averaging over final epoch when early stopping""" # we use logical OR, an easy problem solved after the second epoch X = np.array([[a, b, 1] for a in (-1, 1) for b in (-1, 1)], dtype=np.float) Y = np.array([-1, 1, 1, 1]) pcp = StructuredPerceptron(model=BinarySVMModel(n_features=3), max_iter=3, average=-1) pcp.fit(X, Y) # The exact weight is used without the influence of the early iterations assert_array_equal(pcp.w, [1, 1, 1]) # If we were expecting 3 iterations, we would end up with a zero vector pcp.set_params(average=2) pcp.fit(X, Y) assert_array_equal(pcp.w, [0, 0, 0])
def test_partial_averaging(): """Use XOR weight cycling to test partial averaging""" X = np.array([[a, b, 1] for a in (-1, 1) for b in (-1, 1)], dtype=np.float) Y = np.array([-1, 1, 1, -1]) pcp = StructuredPerceptron(model=BinaryClf(n_features=3), max_iter=5, decay_exponent=1, decay_t0=1) weight = {} for average in (0, 1, 4, -1): pcp.set_params(average=average) pcp.fit(X, Y) weight[average] = pcp.w assert_array_equal(weight[4], weight[-1]) assert_array_almost_equal(weight[0], [1.5, 3, 0]) assert_array_almost_equal(weight[1], [1.75, 3.5, 0]) assert_array_almost_equal(weight[4], [2.5, 5, 0])
def test_xor(): """Test perceptron behaviour against hand-computed values for XOR""" X = np.array([[a, b, 1] for a in (-1, 1) for b in (-1, 1)], dtype=np.float) Y = np.array([-1, 1, 1, -1]) # Should cycle weight vectors (1, 1, -1), (0, 2, 0), (1, 1, 1), (0, 0, 0) # but this depends on how ties are settled. Maybe the test can be # made robust to this # Batch version should cycle (0, 0, -2), (0, 0, 0) expected_predictions = [ np.array([1, 1, 1, 1]), # online, no average, w = (0, 0, 0, 0) np.array([-1, 1, -1, 1]), # online, average, w ~= (0.5, 1, 0) np.array([1, 1, 1, 1]), # batch, no average, w = (0, 0, 0) np.array([-1, -1, -1, -1]) # batch, average, w ~= (0, 0, -2) ] pcp = StructuredPerceptron(model=BinaryClf(n_features=3), max_iter=2) for pred, (batch, average) in zip(expected_predictions, product((False, True), (False, True))): pcp.set_params(batch=batch, average=average) pcp.fit(X, Y) # We don't compare w explicitly but its prediction. As the perceptron # is invariant to the scaling of w, this will allow the optimization of # the underlying implementation assert_array_equal(pcp.predict(X), pred)
def test_xor(): """Test perceptron behaviour against hand-computed values for XOR""" X = np.array([[a, b, 1] for a in (-1, 1) for b in (-1, 1)], dtype=np.float) Y = np.array([-1, 1, 1, -1]) # Should cycle weight vectors (1, 1, -1), (0, 2, 0), (1, 1, 1), (0, 0, 0) # but this depends on how ties are settled. Maybe the test can be # made robust to this # Batch version should cycle (0, 0, -2), (0, 0, 0) expected_predictions = [ np.array([1, 1, 1, 1]), # online, no average, w = (0, 0, 0, 0) np.array([-1, 1, -1, 1]), # online, average, w ~= (0.5, 1, 0) np.array([1, 1, 1, 1]), # batch, no average, w = (0, 0, 0) np.array([-1, -1, -1, -1]), # batch, average, w ~= (0, 0, -2) ] pcp = StructuredPerceptron(model=BinarySVMModel(n_features=3), max_iter=2) for pred, (batch, average) in zip(expected_predictions, product((False, True), (False, True))): pcp.set_params(batch=batch, average=average) pcp.fit(X, Y) # We don't compare w explicitly but its prediction. As the perceptron # is invariant to the scaling of w, this will allow the optimization of # the underlying implementation assert_array_equal(pcp.predict(X), pred)
import matplotlib.pyplot as plt G = nx.DiGraph() np.seterr(divide='ignore', invalid='ignore') trainArticles = open('data/singleShort.txt', 'r').readlines() #=importArticles.getData('train') testArticles = open('data/singleShortTest.txt', 'r').readlines() #= importArticles.getData('test') print len(trainArticles) print len(testArticles) listOfYears = [] #X, Y = generate_blocks(n_samples=10) #inference_method = get_installed(['qpbo', 'ad3', 'lp'])[0] #crf = GridCRF(inference_method=inference_method) clf = StructuredPerceptron(model=GridCRF, max_iter=120) probs = [] titles = [] #A def getArticle(article): singleSets = [] try: chunks = gc.getChunks(article[1]) tags = tag.getTags(article[1], chunks) #if tags == []: # continue # check this is right. go to next itteration """The Stanford Open IE tags""" subject = tags['subject'] relation = tags['relation']
def main(): parser = argparse.ArgumentParser(description='Run parsing experiments.') parser.add_argument('--original_rules', type=str, help='Original rule file') parser.add_argument('--binarized_rules', type=str, help='Binarized rule file') parser.add_argument('--training_ps', type=str, help='Lexicalized phrase structure file.') parser.add_argument('--training_dep', type=str, help='Dependency parse file.') parser.add_argument('--store_hypergraph_dir', type=str, help='Directory to store/load hypergraphs.') parser.add_argument('--save_hypergraph', type=bool, help='Construct and save hypergraphs.') parser.add_argument('--limit', type=int, help='Number of sentences to use.') parser.add_argument('--test_file', type=str, help='Test file.') parser.add_argument('--gold_file', type=str, help='Gold file.') parser.add_argument('--model', type=str, help='Weight model.') parser.add_argument('--test_limit', type=int, help='Number of sentences to test on.') parser.add_argument('--run_eval', default=False, type=bool, help='') parser.add_argument('--test_load', default=False, type=bool, help='') parser.add_argument('--debugger', default=False, type=bool, help='') parser.add_argument('--oracle', default=False, type=bool, help='Run oracle experiments') parser.add_argument('config', type=str) parser.add_argument('label', type=str) print >>sys.stderr, open(sys.argv[1]).read() argparse_config.read_config_file(parser, sys.argv[1]) args = parser.parse_args() print args if args.debugger: from IPython.core import ultratb sys.excepthook = ultratb.FormattedTB(color_scheme='Linux', call_pdb=1) output_dir = os.path.join("Data", args.label) data_out = os.path.join(output_dir, "mydata.txt") print >>sys.stderr, data_out # Set up logging. logger.setLevel(logging.DEBUG) handler = logging.StreamHandler(open(data_out, 'w')) logger.addHandler(handler) # Load data. print args.training_dep print args.training_ps if args.training_dep: X, Y = train.read_data_set(args.training_dep, args.training_ps, args.limit) orules = tree.read_original_rules(open(args.original_rules)) grammar = read_rule_set(open(args.binarized_rules)) # for rule in grammar.unary_rules: # print rule X, Y = zip(*[(x, y) for x, y in zip(X, Y) if len(x.words) >= 5]) binarized_Y = [tree.binarize(orules, make_bounds(x.deps), y)[0] for x, y in zip(X, Y)] model = train.ReconstructionModel(feature_hash=int(1e7), joint_feature_format="fast", joint_feature_cache=False, part_feature_cache=False) model.set_grammar(grammar) model.initialize(X, binarized_Y) if args.test_load: print "LOAD" graphs = [] start = memory() for i in range(1000 -1): if len(X[i].words) < 5: continue x = X[i] path = "%s/graphs%s.graph"%(args.store_hypergraph_dir, i) encoder = LexicalizedCFGEncoder(x.words, x.tags, grammar) pre = memory() graph = pydecode.load(path) print i, memory() - pre, len(graph.edges), len(X[i].words), memory() - start pre = memory() encoder.load("%s/encoder%s.pickle"%( args.store_hypergraph_dir, i), graph) print i, memory() - pre graphs.append((graph, encoder)) elif args.save_hypergraph: print "SAVING" import time model.set_from_disk(None) for i in range(40000): if len(X[i].words) < 5: continue # if len(X[i].words) > 15: continue graph, encoder = model.dynamic_program(X[i]) # Sanity Check # print binarized_Y[i] # print encoder.structure_path(graph, binarized_Y[i]) if i % 100 == 0: print i pydecode.save("%s/graphs%s.graph"%( args.store_hypergraph_dir, X[i].index), graph) encoder.save("%s/encoder%s.pickle"%( args.store_hypergraph_dir, X[i].index), graph) del graph del encoder elif args.oracle: print "ORACLE" trees_out = open(os.path.join(output_dir, "oracle.txt"), 'w') model = train.ReconstructionModel(feature_hash=int(1e7), part_feature_cache=False, joint_feature_cache=False, joint_feature_format="sparse") model.set_grammar(grammar) model.initialize(X, binarized_Y) model.set_from_disk(None) X_test, Y_test = train.read_data_set( args.test_file, args.gold_file, args.test_limit) w = np.load(args.model) # GOLD TREES binarized_Y_test = [] for x, orig_y in zip(X_test, Y_test): y = tree.binarize(orules, orig_y) try: graph, encoder = model.dynamic_program(x) label_values = np.zeros(np.max(graph.labeling) + 1) label_values.fill(-1) possible = 0 brackets = set() for part in encoder.transform_structure(y): X = grammar.rule_nonterms(part[5])[0] brackets.add((part[0], part[2], X)) #print part if tuple(part) in encoder.encoder: label = encoder.encoder[tuple(part)] label_values[label] = 10.0 possible += 1 print "transform" label_weights = np.zeros(len(graph.labeling)) graph_labels = graph.labeling[graph.labeling != -1] parts = encoder.transform_labels(graph_labels) weights = [] for part in parts: X = grammar.rule_nonterms(part[5])[0] if part[1] != part[2] and X[0] != "Z": if (part[0], part[2], X) in brackets: weights.append(2.0) else: weights.append(-2.0) else: weights.append(0.0) label_weights = np.zeros(len(graph.labeling)) label_weights[graph.labeling != -1] = np.array(weights) # graph_labels = graph.labeling[graph.labeling != -1] # parts = encoder.transform_labels(graph_labels) # parts_features = model.parts_features(x, parts) # feature_indices = pydecode.model.sparse_feature_indices(parts_features, # model.temp_shape, # model.offsets, # model.feature_hash) # # Sum the feature weights for the features in each label row. # label_weights = np.zeros(len(graph.labeling)) # label_weights[graph.labeling != -1] = \ # np.sum(np.take(w, feature_indices, mode="clip"), axis=1) oracle_weights = pydecode.transform(graph, label_values) path = pydecode.best_path(graph, oracle_weights + label_weights) print "Match", oracle_weights.T * path.v, possible y_hat = encoder.transform_path(path) print >>trees_out, tree.remove_head(tree.unbinarize(y_hat)) \ .pprint(100000) except: print >>trees_out, "" print "error" continue elif args.test_file: print "TESTING" trees_out = open(os.path.join(output_dir, "trees.txt"), 'w') model = train.ReconstructionModel(feature_hash=int(1e7), part_feature_cache=False, joint_feature_cache=False, joint_feature_format="sparse") model.set_grammar(grammar) model.initialize(X, binarized_Y) model.set_from_disk(None) X_test, Y_test = train.read_data_set( args.test_file, args.gold_file, args.test_limit) w = np.load(args.model) # binarized_Y_test = [] # for i, y in enumerate(Y_test): # print i # binarized_Y_test.append(tree.binarize(orules, y)) # for x, y in zip(X_test, binarized_Y_test): for x in X_test: try: graph, encoder = model.dynamic_program(x) y_hat = model.inference(x, w) for part in encoder.transform_structure(y_hat): print part, grammar.rule_nonterms(part[-1]), model.score_part(x, w, part) a = w.T * model.joint_feature(x, y_hat) # b = w.T * model.joint_feature(x, y) # print a, b # if b > a: print "FAIL" print print tree.remove_head(y_hat) print print tree.remove_head(tree.unbinarize(y_hat))\ .pprint() # print tree.remove_head(tree.unbinarize(y))\ # .pprint() # print #)\tree.remove_head( print >>trees_out, tree.remove_head(tree.unbinarize(y_hat)) \ .pprint(100000) except: print "error" print >>trees_out, "" elif args.run_eval: test_file = os.path.join(output_dir, "oracle.txt") gold_file = args.gold_file print "Evaling", test_file, gold_file os.system("../evalb/EVALB/evalb -p ../evalb/EVALB/COLLINS.prm %s %s"%(gold_file, test_file)) else: print "TRAINING" model.set_from_disk(args.store_hypergraph_dir) sp = StructuredPerceptron(model, verbose=1, max_iter=5, average=False) import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") sp.fit(X, binarized_Y) np.save(os.path.join(output_dir, "params"), sp.w) w = sp.w
def fit(self, X, Y): model = (self.model if isinstance(self.model, type) else self.model.__class__) self.model = model(n_states=34, n_features=X[0].shape[1]) StructuredPerceptron.fit(self, X, Y)
# -*- coding: utf-8 -*- """ Spyder Editor This is a temporary script file. """ import pystruct from pystruct.datasets import load_letters import numpy as np from pystruct.models import ChainCRF from pystruct.learners import StructuredPerceptron letters = load_letters() X, y, folds = letters['data'], letters['labels'], letters['folds'] X, y = np.array(X), np.array(y) X_train, X_test = X[folds == 1], X[folds != 1] y_train, y_test = y[folds == 1], y[folds != 1] model = ChainCRF() ssvm = StructuredPerceptron(model = model, max_iter = 10) ssvm.fit(X_train, y_train)
def __init__(self, max_iter=25, verbose=False): self.dsm = DiscourseSequenceModel(True) self.sp = StructuredPerceptron(self.dsm, verbose=(1 if verbose else 0), max_iter=max_iter, average=True)
# -*- coding: utf-8 -*- """ Spyder Editor This is a temporary script file. """ import pystruct from pystruct.datasets import load_letters import numpy as np from pystruct.models import ChainCRF from pystruct.learners import StructuredPerceptron letters = load_letters() X, y, folds = letters['data'], letters['labels'], letters['folds'] X, y = np.array(X), np.array(y) X_train, X_test = X[folds == 1], X[folds != 1] y_train, y_test = y[folds == 1], y[folds != 1] model = ChainCRF() ssvm = StructuredPerceptron(model=model, max_iter=10) ssvm.fit(X_train, y_train)