def experiment(param): pms = param["param"] sche_name = param["setting_name"] clf = WindowMLP(**pms) if sche_name == "epoch": schedule = itertools.chain(*itertools.repeat(xrange(len(y_train)), nepoch)) elif sche_name == "N": schedule = random.randint(0, len(y_train), N) elif sche_name == "mini_batch": schedule = trainig_schedule(N, len(y_train), k) cost = clf.train_sgd(X_train, y_train, idxiter=schedule) result = {"cost": cost, "name": sche_name} return result
def experiment(param): pms = param["param"] sche_name = param["setting_name"] clf = WindowMLP(**pms) if sche_name == "epoch": schedule = itertools.chain( *itertools.repeat(xrange(len(y_train)), nepoch)) elif sche_name == "N": schedule = random.randint(0, len(y_train), N) elif sche_name == "mini_batch": schedule = trainig_schedule(N, len(y_train), k) cost = clf.train_sgd(X_train, y_train, idxiter=schedule) result = {"cost": cost, "name": sche_name} return result
def main(): # Load the starter word vectors wv, word_to_num, num_to_word = ner.load_wv('data/ner/vocab.txt', 'data/ner/wordVectors.txt') tagnames = ["O", "LOC", "MISC", "ORG", "PER"] num_to_tag = dict(enumerate(tagnames)) tag_to_num = du.invert_dict(num_to_tag) # Set window size windowsize = 3 # Load the training set docs = du.load_dataset('data/ner/train') X_train, y_train = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) # Load the dev set (for tuning hyperparameters) docs = du.load_dataset('data/ner/dev') X_dev, y_dev = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) # Load the test set (dummy labels only) docs = du.load_dataset('data/ner/test.masked') X_test, y_test = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) clf = WindowMLP(wv, windowsize=windowsize, dims=[None, 100, 5], reg=0.001, alpha=0.01) train_size = X_train.shape[0] """ costs = pickle.load(open("costs.dat", "rb")) clf = pickle.load(open("clf.dat", "rb")) """ nepoch = 5 N = nepoch * len(y_train) k = 5 # minibatch size costs = clf.train_sgd(X_train, y_train, idxiter=random_mini(k, N, train_size), printevery=10000, costevery=10000) pickle.dump(clf, open("clf.dat", "wb")) pickle.dump(costs, open("costs.dat", "wb")) plot_learning_curve(clf, costs) # Predict labels on the dev set yp = clf.predict(X_dev) # Save predictions to a file, one per line ner.save_predictions(yp, "dev.predicted") full_report(y_dev, yp, tagnames) # full report, helpful diagnostics eval_performance(y_dev, yp, tagnames) # performance: optimize this F1 # L: V x 50 # W[:,50:100]: 100 x 50 responses = clf.sparams.L.dot(clf.params.W[:, 50:100].T) # V x 100 index = np.argsort(responses, axis=0)[::-1] neurons = [1, 3, 4, 6, 8] # change this to your chosen neurons for i in neurons: print "Neuron %d" % i top_words = [num_to_word[k] for k in index[:10, i]] top_scores = [responses[k, i] for k in index[:10, i]] print_scores(top_scores, top_words)
import data_utils.utils as du import data_utils.ner as ner # Load the starter word vectors wv, word_to_num, num_to_word = ner.load_wv('data/ner/vocab.txt', 'data/ner/wordVectors.txt') tagnames = ["O", "LOC", "MISC", "ORG", "PER"] num_to_tag = dict(enumerate(tagnames)) tag_to_num = du.invert_dict(num_to_tag) # Set window size windowsize = 3 # Load the training set docs = du.load_dataset('data/ner/train') X_train, y_train = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) # Load the dev set (for tuning hyperparameters) docs = du.load_dataset('data/ner/dev') X_dev, y_dev = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) # Load the test set (dummy labels only) docs = du.load_dataset('data/ner/test.masked') X_test, y_test = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize) clf = WindowMLP(wv, windowsize=windowsize, dims=[None, 100, 5], reg=0.001, alpha=0.01) clf.grad_check(X_train[0], y_train[0]) clf.train_sgd( X_train, y_train)
# Feel free to create new cells and write new code here, including new functions (helpers and otherwise) in `nerwindow.py`. When you have a good model, follow the instructions below to make predictions on the test set. # # A strong model may require 10-20 passes (or equivalent number of random samples) through the training set and could take 20 minutes or more to train - but it's also possible to be much, much faster! # # Things you may want to tune: # - `alpha` (including using an "annealing" schedule to decrease the learning rate over time) # - training schedule and minibatch size # - regularization strength # - hidden layer dimension # - width of context window # In[120]: #### YOUR CODE HERE #### # Sandbox: build a good model by tuning hyperparameters clf.train_sgd(X_train[:100000], y_train[:100000], idxiter=xrange(100000), printevery=5000, costevery=5000,) #### END YOUR CODE #### # In[ ]: #### YOUR CODE HERE #### # Sandbox: build a good model by tuning hyperparameters from nerwindow import full_report, eval_performance schedules = [idxiter_epoch, idxiter_N, idxiter_batches()]#last one is best, but slow. choose second for train_idxiter in schedules: clf = WindowMLP(wv, windowsize=windowsize, dims=[None, 100, 5], reg=0.001, alpha=0.01) clf.train_sgd(X_train, y_train, idxiter=train_idxiter, printevery=250000, costevery=250000,) yp = clf.predict(X_dev)