def ner_predict_proba(): from nerwindow import WindowMLP np.random.seed(10) wv = np.random.randn(20, 10) clf = WindowMLP(wv, windowsize=3, dims=[None, 15, 3], rseed=10) J = clf.compute_loss([1, 2, 3], 1) print " dummy: J = %g" % J J = clf.compute_loss([[1, 2, 3], [2, 3, 4]], [0, 1]) print " dummy: J = %g" % J
def ner_predict_proba(): from nerwindow import WindowMLP np.random.seed(10) wv = np.random.randn(20,10) clf = WindowMLP(wv, windowsize=3, dims = [None, 15, 3], rseed=10) J = clf.compute_loss([1,2,3], 1) print " dummy: J = %g" % J J = clf.compute_loss([[1,2,3], [2,3,4]], [0,1]) print " dummy: J = %g" % J
def ner_predict_proba(): from nerwindow import WindowMLP np.random.seed(10) wv = np.random.randn(20, 10) clf = WindowMLP(wv, windowsize=3, dims=[None, 15, 3], rseed=10) p = clf.predict_proba([1, 2, 3]) assert (len(p.flatten()) == 3) p = clf.predict_proba([[1, 2, 3], [2, 3, 4]]) assert (np.ndim(p) == 2) assert (p.shape == (2, 3))
def ner_predict_proba(): from nerwindow import WindowMLP np.random.seed(10) wv = np.random.randn(20,10) clf = WindowMLP(wv, windowsize=3, dims = [None, 15, 3], rseed=10) p = clf.predict_proba([1,2,3]) assert(len(p.flatten()) == 3) p = clf.predict_proba([[1,2,3], [2,3,4]]) assert(np.ndim(p) == 2) assert(p.shape == (2,3))
def experiment(param): pms = param["param"] sche_name = param["setting_name"] clf = WindowMLP(**pms) if sche_name == "epoch": schedule = itertools.chain(*itertools.repeat(xrange(len(y_train)), nepoch)) elif sche_name == "N": schedule = random.randint(0, len(y_train), N) elif sche_name == "mini_batch": schedule = trainig_schedule(N, len(y_train), k) cost = clf.train_sgd(X_train, y_train, idxiter=schedule) result = {"cost": cost, "name": sche_name} return result
def experiment(param): pms = param["param"] sche_name = param["setting_name"] clf = WindowMLP(**pms) if sche_name == "epoch": schedule = itertools.chain( *itertools.repeat(xrange(len(y_train)), nepoch)) elif sche_name == "N": schedule = random.randint(0, len(y_train), N) elif sche_name == "mini_batch": schedule = trainig_schedule(N, len(y_train), k) cost = clf.train_sgd(X_train, y_train, idxiter=schedule) result = {"cost": cost, "name": sche_name} return result
def setup_probing(): num_to_word = dict(enumerate( ["hello", "world", "i", "am", "a", "banana", "there", "is", "no", "spoon"])) tagnames = ["O", "LOC", "MISC", "ORG", "PER"] num_to_tag = dict(enumerate(tagnames)) from nerwindow import WindowMLP np.random.seed(10) wv = np.random.randn(10,50) clf = WindowMLP(wv, windowsize=3, dims = [None, 100, 5], rseed=10) return clf, num_to_word, num_to_tag
# Dummy test code # run this script, and make sure nothing crashes # (this is the same as sanity check for part 1.1) if __name__ == '__main__': num_to_word = dict( enumerate([ "hello", "world", "i", "am", "a", "banana", "there", "is", "no", "spoon" ])) tagnames = ["O", "LOC", "MISC", "ORG", "PER"] num_to_tag = dict(enumerate(tagnames)) from nerwindow import WindowMLP random.seed(10) wv = random.randn(10, 50) clf = WindowMLP(wv, windowsize=3, dims=[None, 100, 5], rseed=10) print("\n=== Testing Part (a) ===\n") s, w = part_a(clf, num_to_word, verbose=True) assert (len(s) == len(w)) if type(s) == dict: # some students may have done this for k in list(s.keys()): assert (k in w) for k in list(w.keys()): assert (k in s) assert (len(s) >= 5) else: # list assert (len(s[0]) == len(w[0])) assert (len(s[0]) == 10) assert (type(w[0][0]) == str)
def ner_init(): from nerwindow import WindowMLP np.random.seed(10) wv = np.random.randn(20, 10) clf = WindowMLP(wv, windowsize=3, dims=[None, 15, 3], rseed=10)
# - `__init__()` (initialize parameters and hyperparameters) # - `_acc_grads()` (compute and accumulate gradients) # - `compute_loss()` (compute loss for a training example) # - `predict()`, `predict_proba()`, or other prediction method (for evaluation) # # `NNBase` provides you with a few others that will be helpful: # # - `grad_check()` (run a gradient check - calls `_acc_grads` and `compute_loss`) # - `train_sgd()` (run SGD training; more on this later) # # Your task is to implement the window model in `nerwindow.py`; a scaffold has been provided for you with instructions on what to fill in. # # When ready, you can test below: clf = WindowMLP(wv, windowsize=windowsize, dims=[None, 100, 5], reg=0.001, alpha=0.01) clf.grad_check(X_train[0], y_train[0]) # gradient check on single point # Now we'll train your model on some data! You can implement your own SGD method, but we recommend that you just call `clf.train_sgd`. #This takes the following arguments: # # - `X`, `y` : training data # - `idxiter`: iterable (list or generator) that gives index (row of X) of training examples in the order they should be visited by SGD # - `printevery`: int, prints progress after this many examples # - `costevery`: int, computes mean loss after this many examples. This is a costly operation, so don't make this too frequent! # # The implementation we give you supports minibatch learning; # if `idxiter` is a list-of-lists (or yields lists), then gradients will be computed for all indices in a minibatch # before modifying the parameters (this is why we have you write `_acc_grad` instead of applying them directly!). # # Before training, you should generate a training schedule to pass as `idxiter`.
def main(): # Load the starter word vectors wv, word_to_num, num_to_word = ner.load_wv('data/ner/vocab.txt', 'data/ner/wordVectors.txt') tagnames = ["O", "LOC", "MISC", "ORG", "PER"] num_to_tag = dict(enumerate(tagnames)) tag_to_num = du.invert_dict(num_to_tag) # Set window size windowsize = 3 # Load the training set docs = du.load_dataset('data/ner/train') X_train, y_train = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) # Load the dev set (for tuning hyperparameters) docs = du.load_dataset('data/ner/dev') X_dev, y_dev = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) # Load the test set (dummy labels only) docs = du.load_dataset('data/ner/test.masked') X_test, y_test = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) clf = WindowMLP(wv, windowsize=windowsize, dims=[None, 100, 5], reg=0.001, alpha=0.01) train_size = X_train.shape[0] """ costs = pickle.load(open("costs.dat", "rb")) clf = pickle.load(open("clf.dat", "rb")) """ nepoch = 5 N = nepoch * len(y_train) k = 5 # minibatch size costs = clf.train_sgd(X_train, y_train, idxiter=random_mini(k, N, train_size), printevery=10000, costevery=10000) pickle.dump(clf, open("clf.dat", "wb")) pickle.dump(costs, open("costs.dat", "wb")) plot_learning_curve(clf, costs) # Predict labels on the dev set yp = clf.predict(X_dev) # Save predictions to a file, one per line ner.save_predictions(yp, "dev.predicted") full_report(y_dev, yp, tagnames) # full report, helpful diagnostics eval_performance(y_dev, yp, tagnames) # performance: optimize this F1 # L: V x 50 # W[:,50:100]: 100 x 50 responses = clf.sparams.L.dot(clf.params.W[:, 50:100].T) # V x 100 index = np.argsort(responses, axis=0)[::-1] neurons = [1, 3, 4, 6, 8] # change this to your chosen neurons for i in neurons: print "Neuron %d" % i top_words = [num_to_word[k] for k in index[:10, i]] top_scores = [responses[k, i] for k in index[:10, i]] print_scores(top_scores, top_words)
import data_utils.utils as du import data_utils.ner as ner # Load the starter word vectors wv, word_to_num, num_to_word = ner.load_wv('data/ner/vocab.txt', 'data/ner/wordVectors.txt') tagnames = ["O", "LOC", "MISC", "ORG", "PER"] num_to_tag = dict(enumerate(tagnames)) tag_to_num = du.invert_dict(num_to_tag) # Set window size windowsize = 3 # Load the training set docs = du.load_dataset('data/ner/train') X_train, y_train = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) # Load the dev set (for tuning hyperparameters) docs = du.load_dataset('data/ner/dev') X_dev, y_dev = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) # Load the test set (dummy labels only) docs = du.load_dataset('data/ner/test.masked') X_test, y_test = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) clf = WindowMLP(wv, windowsize=windowsize, dims=[None, 100, 5], reg=0.001, alpha=0.01) clf.grad_check(X_train[0], y_train[0]) clf.train_sgd( X_train, y_train)
# - `grad_check()` (run a gradient check - calls `_acc_grads` and `compute_loss`) # - `train_sgd()` (run SGD training; more on this later) # # Your task is to implement the window model in `nerwindow.py`; a scaffold has been provided for you with instructions on what to fill in. # # When ready, you can test below: # In[72]: wv.shape # In[5]: from nerwindow import WindowMLP clf = WindowMLP(wv, windowsize=windowsize, dims=[None, 100, 5], reg=0.001, alpha=0.01) # clf._acc_grads(X_train[0],y_train[0]) clf.grad_check(X_train[0], y_train[0]) # gradient check on single point # Now we'll train your model on some data! You can implement your own SGD method, but we recommend that you just call `clf.train_sgd`. This takes the following arguments: # # - `X`, `y` : training data # - `idxiter`: iterable (list or generator) that gives index (row of X) of training examples in the order they should be visited by SGD # - `printevery`: int, prints progress after this many examples # - `costevery`: int, computes mean loss after this many examples. This is a costly operation, so don't make this too frequent! # # The implementation we give you supports minibatch learning; if `idxiter` is a list-of-lists (or yields lists), then gradients will be computed for all indices in a minibatch before modifying the parameters (this is why we have you write `_acc_grad` instead of applying them directly!). # # Before training, you should generate a training schedule to pass as `idxiter`. If you know how to use Python generators, we recommend those; otherwise, just make a static list. Make the following in the cell below: #