# - `_acc_grads()` (compute and accumulate gradients) # - `compute_loss()` (compute loss for a training example) # - `predict()`, `predict_proba()`, or other prediction method (for evaluation) # # `NNBase` provides you with a few others that will be helpful: # # - `grad_check()` (run a gradient check - calls `_acc_grads` and `compute_loss`) # - `train_sgd()` (run SGD training; more on this later) # # Your task is to implement the window model in `nerwindow.py`; a scaffold has been provided for you with instructions on what to fill in. # # When ready, you can test below: clf = WindowMLP(wv, windowsize=windowsize, dims=[None, 100, 5], reg=0.001, alpha=0.01) clf.grad_check(X_train[0], y_train[0]) # gradient check on single point # Now we'll train your model on some data! You can implement your own SGD method, but we recommend that you just call `clf.train_sgd`. #This takes the following arguments: # # - `X`, `y` : training data # - `idxiter`: iterable (list or generator) that gives index (row of X) of training examples in the order they should be visited by SGD # - `printevery`: int, prints progress after this many examples # - `costevery`: int, computes mean loss after this many examples. This is a costly operation, so don't make this too frequent! # # The implementation we give you supports minibatch learning; # if `idxiter` is a list-of-lists (or yields lists), then gradients will be computed for all indices in a minibatch # before modifying the parameters (this is why we have you write `_acc_grad` instead of applying them directly!). # # Before training, you should generate a training schedule to pass as `idxiter`. # If you know how to use Python generators, we recommend those; otherwise, just make a static list. Make the following in the cell below:
import data_utils.utils as du import data_utils.ner as ner # Load the starter word vectors wv, word_to_num, num_to_word = ner.load_wv('data/ner/vocab.txt', 'data/ner/wordVectors.txt') tagnames = ["O", "LOC", "MISC", "ORG", "PER"] num_to_tag = dict(enumerate(tagnames)) tag_to_num = du.invert_dict(num_to_tag) # Set window size windowsize = 3 # Load the training set docs = du.load_dataset('data/ner/train') X_train, y_train = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) # Load the dev set (for tuning hyperparameters) docs = du.load_dataset('data/ner/dev') X_dev, y_dev = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) # Load the test set (dummy labels only) docs = du.load_dataset('data/ner/test.masked') X_test, y_test = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) clf = WindowMLP(wv, windowsize=windowsize, dims=[None, 100, 5], reg=0.001, alpha=0.01) clf.grad_check(X_train[0], y_train[0]) clf.train_sgd( X_train, y_train)