lem_length = len(word2vec_model[','].tolist()) gram_length = len(word2vec_gram_model['_PNCT'].tolist()) length = (lem_length + gram_length + 1)* (2*winsize + 1) if __name__ == '__main__': files = os.listdir(CORPUS) random.shuffle(files) fs = files[len(files)/2 + 1:] fs2 = files[:len(files)/2] #fs2 = files[1000:2000] #fs = files[:1000] neurons, target, words, cls = data_utils.mklsts(CORPUS, fs, winsize, word2vec_model, word2vec_gram_model) trainDS = data_utils.DataSet(neurons, target, words, cls) model = net.NetworkBuilder(length, length / 2, 3, bias = True, fast = True, outclass = net.SoftmaxLayer) engine = data_utils.Engine(trainData = trainDS, model = model) engine.fit(engine.trainData, learningrate = 0.05, maxEpochs=200, file = 'training.txt', verbose = True, modelfile = 'network.model', FOLDER = F) del neurons del target del words del cls del engine.trainData neurons1, target1, words1, cls1 = data_utils.mklsts(CORPUS, fs2, winsize, word2vec_model, word2vec_gram_model)
def main(model='mlp', num_epochs=500): files = os.listdir(CORPUS) random.shuffle(files) random.seed(20) print len(files) fs2 = files[5000:6000] #train fs3 = files[6000 : ] #evaluate fs1 = files[:5000] #test X_train, targets_train, words_train, y_train = data_utils.mklsts(CORPUS, fs1, winsize, word2vec_model, word2vec_gram_model) X_val, targets_val, words_val, y_val = data_utils.mklsts(CORPUS, fs2, winsize, word2vec_model, word2vec_gram_model) input_var = T.matrix('inputs') target_var = T.ivector('targets') network = build_mlp(input_var) prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) eval_prediction = lasagne.layers.get_output(network, deterministic=True) eval_loss = lasagne.objectives.categorical_crossentropy(eval_prediction, target_var) eval_loss = eval_loss.mean() eval_acc = T.mean(T.eq(T.argmax(eval_prediction, axis=1), target_var), dtype=theano.config.floatX) train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [eval_loss, eval_acc]) for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, 100, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, 100, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) del X_train, targets_train, words_train, y_train, X_val, targets_val, words_val, y_val X_test, targets_test, words_test, y_test = data_utils.mklsts(CORPUS, fs3, winsize, word2vec_model, word2vec_gram_model) np.savez('/home/anna/Documents/News Classifier/model.npz', *lasagne.layers.get_all_param_values(network)) test_prediction = lasagne.layers.get_output(network, deterministic=True) predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1)) pred = list(predict_fn(X_test))# for _ in X_test) print len(pred) print len(y_test) score = f1_score(pred, y_test, average=None) print score predannotations = setAnnotations(pred, {1: 'Location'}, exactness = 'lenient') clsannotations = setAnnotations(y_test, {1: 'Location'}, exactness = 'lenient') print Pr(predannotations, clsannotations) print Re(predannotations, clsannotations) print f1(predannotations, clsannotations) np.savez('/home/anna/Documents/News Classifier/model.npz', *lasagne.layers.get_all_param_values(network))