def main(n_epochs=1, n_train=8000, n_test=1500, embedding_dimension=300, dropout_parameter=0.2, bidirectional=True, rnn_type='GRU', rnn_units=100, model_name='model', maxPooling=True, averagePooling=False, save=True): print("Working on Model: " + model_name) model = Model(embedding_dimension=embedding_dimension, dropout_parameter=dropout_parameter, bidirectional=bidirectional, rnn_type=rnn_type, rnn_units=rnn_units, name=model_name, maxPooling=maxPooling, averagePooling=averagePooling) model.build_model() for i in range(n_epochs): print("Training epoch {}".format(i + 1)) model.train_model(n_train=n_train) words, results, predictions, misclassified_examples, predicted_labels, true_labels, predicted_slots, true_slots = model.test_model( n_test=n_test) # format my words list such that they are in the correct format expected by the conlleval script words = [sentence.split() for sentence in words] _ = [sentence.pop(0) for sentence in words] _ = [sentence.pop(-1) for sentence in words] con_dict = conlleval(predicted_slots, true_slots, words, 'measure.txt') print('Precision = {}, Recall = {}, F1 = {}'.format( con_dict['p'], con_dict['r'], con_dict['f1'])) accuracy = model.get_accuracy(predicted_labels, true_labels) print('Accuracy = ' + str(accuracy)) model.precision, model.recall, model.f1 = con_dict['p'], con_dict['r'], con_dict['f1'] accuracy = model.get_accuracy(predicted_labels, true_labels) model.save_results(predictions, results, misclassified_examples) models[model_name] = {'accuracy': accuracy, 'precision': con_dict['p'], 'recall': con_dict['r'], 'f1': con_dict['f1']} model.summary['accuracy'] = accuracy print(model.summary['accuracy']) model.summary['precision'], model.summary['recall'], model.summary['f1'] = con_dict['p'], con_dict['r'], con_dict[ 'f1'] if save: model.save_model() return model
def bioEalve(id_sents, id_tagss, id_test_tagss, flag=0, word_dict=None, tag_dict=None): ## id_sents, id_tagss, id_labels,前两者都是矩阵,最后一个是向量。三者第一维相等 assert id_sents.shape == id_tagss.shape assert id_sents.shape == id_test_tagss.shape #if not os.path.exists(dict_pkl_file): # raise ValueError("Dict file do not exists") #word_dict, tag_dict, label_dict = cPickle.load(open(dict_pkl_file, "rb")) # evaluation predictions_test = [] groundtruth_test = [] words_test = [] for i in range(len(id_sents)): words = [] groundtruth = [] predictions = [] for j in range(len(id_sents[i])): if id_sents[i][j] != flag: words.append('O' if word_dict[id_sents[i][j]].upper( ) == '__UNKNOWN__TAG__' else word_dict[id_sents[i][j]].upper()) groundtruth.append('O' if tag_dict[id_tagss[i][j]].upper( ) == '__UNKNOWN__TAG__' else tag_dict[id_tagss[i][j]].upper()) predictions.append('O' if tag_dict[id_test_tagss[i][j]].upper( ) == '__UNKNOWN__TAG__' else tag_dict[id_test_tagss[i][j]]. upper()) words_test.append(words[:]) groundtruth_test.append(groundtruth[:]) predictions_test.append(predictions[:]) # evaluation // compute the accuracy using conlleval.pl res_test = conlleval(predictions_test, groundtruth_test, words_test) return res_test
sent = sent[np.newaxis, :] if sent.shape[1] > 1: loss = model.train_on_batch(sent, label) avgLoss += loss pred = model.predict_on_batch(sent) pred = np.argmax(pred, -1)[0] train_pred_label.append(pred) avgLoss = avgLoss / n_batch predword_train = [ list(map(lambda x: idx2la[x], y)) for y in train_pred_label ] con_dict = conlleval(predword_train, groundtruth_train, words_train, 'r.txt') train_f_scores.append(con_dict['f1']) print('Loss = {}, Precision = {}, Recall = {}, F1 = {}'.format( avgLoss, con_dict['r'], con_dict['p'], con_dict['f1'])) print("Validating =>") val_pred_label = [] avgLoss = 0 bar = progressbar.ProgressBar(max_value=len(val_x)) for n_batch, sent in bar(enumerate(val_x)): label = val_label[n_batch] label = np.eye(n_classes)[label][np.newaxis, :] sent = sent[np.newaxis, :] if sent.shape[1] > 1:
rnn.load_model_parameters(model_file) rnn.build_model() predictions_test = [ map(lambda x: idx2label[x], rnn.predict(x)) for x in test_x ] groundtruth_test = [map(lambda x: idx2label[x], y) for y in test_Y] words_test = [map(lambda x: idx2word[x], w) for w in test_X] predictions_valid = [ map(lambda x: idx2label[x], rnn.predict(x)) for x in valid_x ] groundtruth_valid = [map(lambda x: idx2label[x], y) for y in valid_Y] words_valid = [map(lambda x: idx2word[x], w) for w in valid_X] # evaluation // compute the accuracy using conlleval.pl res_test = conlleval(predictions_test, groundtruth_test, words_test, folder + '/current.test.txt') res_valid = conlleval(predictions_valid, groundtruth_valid, words_valid, folder + '/current.valid.txt') print res_test, res_valid test_file.write( 'Valid: F1:\t%f Precision:\t%f Recall:\t%f, File: %s' % (res_valid['f1'], res_valid['p'], res_valid['r'], file)) if res_valid['f1'] > valid_F1['f1']: valid_F1 = res_valid print 'Best valid F1 updated', res_valid test_file.write('\tBest valid F1 updated') test_file.write('\n') test_file.write( 'Test: F1:\t%f Precision:\t%f Recall:\t%f, File: %s\n' %
# evaluation // back into the real world : idx -> words predictions_test = [ map(lambda x: idx2label[x], \ rnn.classify(numpy.asarray(contextwin(x, s['win'])).astype('int32')))\ for x in test_lex ] groundtruth_test = [ map(lambda x: idx2label[x], y) for y in test_y ] words_test = [ map(lambda x: idx2word[x], w) for w in test_lex] predictions_valid = [ map(lambda x: idx2label[x], \ rnn.classify(numpy.asarray(contextwin(x, s['win'])).astype('int32')))\ for x in valid_lex ] groundtruth_valid = [ map(lambda x: idx2label[x], y) for y in valid_y ] words_valid = [ map(lambda x: idx2word[x], w) for w in valid_lex] # evaluation // compute the accuracy using conlleval.pl res_test = conlleval(predictions_test, groundtruth_test, words_test, folder + '/current.test.txt') res_valid = conlleval(predictions_valid, groundtruth_valid, words_valid, folder + '/current.valid.txt') if res_valid['f1'] > best_f1: rnn.save(folder) best_f1 = res_valid['f1'] if s['verbose']: print 'NEW BEST: epoch', e, 'valid F1', res_valid['f1'], 'best test F1', res_test['f1'], ' '*20 s['vf1'], s['vp'], s['vr'] = res_valid['f1'], res_valid['p'], res_valid['r'] s['tf1'], s['tp'], s['tr'] = res_test['f1'], res_test['p'], res_test['r'] s['be'] = e subprocess.call(['mv', folder + '/current.test.txt', folder + '/best.test.txt']) subprocess.call(['mv', folder + '/current.valid.txt', folder + '/best.valid.txt']) else: print ''
def main(args): np.random.seed(0xC0FFEE) train, test, dicts = pkl.load(open('datas/atis.pkl', 'r')) index2words = {value: key for key, value in dicts['words2idx'].iteritems()} index2tables = { value: key for key, value in dicts['tables2idx'].iteritems() } index2labels = { value: key for key, value in dicts['labels2idx'].iteritems() } datas = [ { 'name': 'train', 'x': train[0], 'y': train[2], 'size': len(train[0]) }, { 'name': 'test', 'x': test[0], 'y': test[2], 'size': len(test[0]) }, ] vocsize = len(dicts['words2idx']) + 1 nclasses = len(dicts['labels2idx']) context_window_size = args.window_size n = Network() # word embedding layer n.layers.append(Fullconnect(vocsize, 256, Tanh.function, Tanh.derivative)) # recurrent layer n.layers.append( Recurrent(n.layers[-1].output_size, 256, ReLU.function, ReLU.derivative)) n.layers.append( Dropout(n.layers[-1].output_size, 256, 0.5, ReLU.function, ReLU.derivative)) n.layers.append(Fullconnect(n.layers[-1].output_size, nclasses)) n.activation = Softmax(is_zero_pad=True) if not os.path.isfile(args.params): logging.error('not exist params: %s' % args.params) return fname = args.params n.load_params(pkl.load(open(fname, 'rb'))) logging.info('load parameters at %s' % (fname)) # prediction setup for evaluation for l, layer in enumerate(n.layers): if 'Dropout' == type(layer).__name__: n.layers[l].is_testing = True data = datas[1] max_iteration = data['size'] results = {'p': [], 'g': [], 'w': []} for i in range(max_iteration): idx = i x = data['x'][idx] labels = data['y'][idx] cwords = contextwin(datas[1]['x'][idx], context_window_size) words = onehotvector(cwords, vocsize)[0] _ = n.predict(words) y = [np.argmax(prediction) for prediction in _] results['p'].append([index2tables[_] for _ in y]) results['g'].append([index2tables[_] for _ in labels]) results['w'].append([index2words[_] for _ in x]) rv = conlleval(results['p'], results['g'], results['w'], 'atis_test_file.tmp') logging.info('evaluation result: %s' % (str(rv))) for i in range(20): idx = random.randint(0, datas[1]['size'] - 1) x = datas[1]['x'][idx] labels = datas[1]['y'][idx] cwords = contextwin(datas[1]['x'][idx], context_window_size) words = onehotvector(cwords, vocsize)[0] _ = n.predict(words) y = [np.argmax(prediction) for prediction in _] print 'word: ', ' '.join([index2words[_] for _ in x]) print 'table: ', ' '.join([index2tables[_] for _ in labels]) print 'label: ', ' '.join([index2labels[_] for _ in labels]) print 'predict:', ' '.join([index2labels[_] for _ in y])
def main(args): np.random.seed(0xC0FFEE) train, test, dicts = pkl.load(open('datas/atis.pkl', 'r')) index2words = {value: key for key, value in dicts['words2idx'].iteritems()} index2tables = { value: key for key, value in dicts['tables2idx'].iteritems() } index2labels = { value: key for key, value in dicts['labels2idx'].iteritems() } datas = [ { 'name': 'train', 'x': train[0], 'y': train[2], 'size': len(train[0]) }, { 'name': 'test', 'x': test[0], 'y': test[2], 'size': len(test[0]) }, ] vocsize = len(dicts['words2idx']) + 1 nclasses = len(dicts['labels2idx']) context_window_size = args.window_size learning_rate = args.learning_rate minibatch = args.minibatch logging.info( 'vocsize:%d, nclasses:%d, window-size:%d, minibatch:%d, learning-rate:%.5f' % (vocsize, nclasses, context_window_size, minibatch, learning_rate)) model_script = ''' def model(learning_rate=0.0001): n = Network() # word embedding layer n.layers.append( Fullconnect(573, 256, Tanh.function, Tanh.derivative, updater=GradientDescent(learning_rate)) ) # recurrent layer n.layers.append( Recurrent(n.layers[-1].output_size, 256, ReLU.function, ReLU.derivative, updater=GradientDescent(learning_rate)) ) n.layers.append( Dropout(n.layers[-1].output_size, 256, 0.5, ReLU.function, ReLU.derivative, updater=GradientDescent(learning_rate)) ) n.layers.append( Fullconnect(n.layers[-1].output_size, 127, updater=GradientDescent(learning_rate)) ) n.activation = Softmax(is_zero_pad=True) return n ''' exec(model_script) n = model(learning_rate) minimum_validation_error_rate = 1.0 for epoch in xrange(args.epoch): for data in datas: for l, layer in enumerate(n.layers): if 'Dropout' == type(layer).__name__: n.layers[l].is_testing = data['name'] == 'test' epoch_loss = 0 epoch_error_rate = 0 max_iteration = data['size'] / minibatch for i in xrange(max_iteration): if data['name'] == 'train': idxs = [ random.randint(0, data['size'] - 1) for _ in range(minibatch) ] else: idxs = [i * minibatch + k for k in range(minibatch)] cwords = [ contextwin(data['x'][idx], context_window_size) for idx in idxs ] words_labels = [ onehotvector(cword, vocsize, data['y'][idx], nclasses) for idx, cword in zip(idxs, cwords) ] words = [word for word, label in words_labels] labels = [label for word, label in words_labels] # zero padding for minibatch max_size_of_sequence = max([_.shape[0] for _ in words]) for k, (word, label) in enumerate(zip(words, labels)): size_of_sequence = word.shape[0] words[k] = np.pad( word, ((0, max_size_of_sequence - size_of_sequence), (0, 0)), mode='constant') labels[k] = np.pad( label, ((0, max_size_of_sequence - size_of_sequence), (0, 0)), mode='constant') words = np.swapaxes(np.array(words), 0, 1) labels = np.swapaxes(np.array(labels), 0, 1) if data['name'] == 'train': loss = n.train(words, labels) / (max_size_of_sequence * minibatch) predictions = n.y else: predictions = n.predict(words) loss = n.activation.loss(predictions, labels) / ( max_size_of_sequence * minibatch) error_rate = n.activation.error( predictions, labels) / (max_size_of_sequence * minibatch) epoch_loss += loss epoch_error_rate += error_rate if i % (1000 / minibatch) == 0 and i != 0 and data['name'] == 'train': logging.info( 'epoch:%04d iter:%04d loss:%.5f error-rate:%.5f' % (epoch, i, epoch_loss / (i + 1), epoch_error_rate / (i + 1))) logging.info('[%5s] epoch:%04d loss:%.5f error-rate:%.5f' % (data['name'], epoch, epoch_loss / max_iteration, epoch_error_rate / max_iteration)) if args.params and data[ 'name'] == 'test' and minimum_validation_error_rate > epoch_error_rate / max_iteration: minimum_validation_error_rate = epoch_error_rate / max_iteration fname = args.params + '_min_error.pkl' pkl.dump(n.dump_params(), open(fname, 'wb')) logging.info('dump parameters at %s' % (fname)) # prediction setup for evaluation for l, layer in enumerate(n.layers): if 'Dropout' == type(layer).__name__: n.layers[l].is_testing = data['name'] == 'test' data = datas[1] max_iteration = data['size'] results = {'p': [], 'g': [], 'w': []} for i in range(max_iteration): idx = i x = data['x'][idx] labels = data['y'][idx] cwords = contextwin(datas[1]['x'][idx], context_window_size) words = onehotvector(cwords, vocsize)[0] _ = n.predict(words) y = [np.argmax(prediction) for prediction in _] results['p'].append([index2labels[_] for _ in y]) results['g'].append([index2labels[_] for _ in labels]) results['w'].append([index2words[_] for _ in x]) rv = conlleval(results['p'], results['g'], results['w'], 'atis_test_file.tmp') logging.info('evaluation result: %s' % (str(rv))) if args.params: fname = args.params + '_last.pkl' pkl.dump(n.dump_params(), open(fname, 'wb')) logging.info('dump parameters at %s' % (fname)) '''
def main(args): np.random.seed(0xC0FFEE) train, test, dicts = pkl.load( open('datas/atis.pkl', 'r') ) index2words = {value:key for key, value in dicts['words2idx'].iteritems()} index2tables = {value:key for key, value in dicts['tables2idx'].iteritems()} index2labels = {value:key for key, value in dicts['labels2idx'].iteritems()} datas = [ {'name':'train', 'x':train[0], 'y':train[2], 'size':len(train[0])}, {'name':'test', 'x':test[0], 'y':test[2], 'size':len(test[0])}, ] vocsize = len(dicts['words2idx']) + 1 nclasses = len(dicts['labels2idx']) context_window_size = args.window_size n = Network() # word embedding layer n.layers.append( Fullconnect(vocsize, 256, Tanh.function, Tanh.derivative) ) # recurrent layer n.layers.append( Recurrent(n.layers[-1].output_size, 256, ReLU.function, ReLU.derivative) ) n.layers.append( Dropout(n.layers[-1].output_size, 256, 0.5, ReLU.function, ReLU.derivative) ) n.layers.append( Fullconnect(n.layers[-1].output_size, nclasses) ) n.activation = Softmax(is_zero_pad=True) if not os.path.isfile( args.params ): logging.error('not exist params: %s'%args.params) return fname = args.params n.load_params( pkl.load( open(fname, 'rb') ) ) logging.info('load parameters at %s'%(fname)) # prediction setup for evaluation for l, layer in enumerate(n.layers): if 'Dropout' == type( layer ).__name__: n.layers[l].is_testing = True data = datas[1] max_iteration = data['size'] results = {'p':[], 'g':[], 'w':[]} for i in range(max_iteration): idx = i x = data['x'][idx] labels = data['y'][idx] cwords = contextwin(datas[1]['x'][idx], context_window_size) words = onehotvector(cwords, vocsize)[0] _ = n.predict(words) y = [np.argmax(prediction) for prediction in _] results['p'].append( [index2tables[_] for _ in y] ) results['g'].append( [index2tables[_] for _ in labels] ) results['w'].append( [index2words[_] for _ in x] ) rv = conlleval(results['p'], results['g'], results['w'], 'atis_test_file.tmp') logging.info('evaluation result: %s'%(str(rv))) for i in range(20): idx = random.randint(0, datas[1]['size']-1) x = datas[1]['x'][idx] labels = datas[1]['y'][idx] cwords = contextwin(datas[1]['x'][idx], context_window_size) words = onehotvector(cwords, vocsize)[0] _ = n.predict(words) y = [np.argmax(prediction) for prediction in _] print 'word: ', ' '.join([index2words[_] for _ in x]) print 'table: ', ' '.join([index2tables[_] for _ in labels]) print 'label: ', ' '.join([index2labels[_] for _ in labels]) print 'predict:', ' '.join([index2labels[_] for _ in y])
def main(param=None): if not param: param = { 'lr': 0.0970806646812754, 'verbose': 1, 'decay': True, # decay on the learning rate if improvement stops 'win': 7, # number of words in the context window 'nhidden': 200, # number of hidden units 'seed': 345, 'emb_dimension': 50, # dimension of word embedding 'nepochs': 100, # 60 is recommended 'savemodel': False } print param folder = "RelationExtraction" if not os.path.exists(folder): os.mkdir(folder) #load dataset pickle_file = 'semeval.pkl' with open(pickle_file, 'rb') as f: save = pickle.load(f) train_dataset = save['train_dataset'] train_labels = save['train_labels'] test_dataset = save['test_dataset'] test_labels = save['test_labels'] dic = save['dicts'] del save # hint to help gc free up memory print('Training set', train_dataset.shape, train_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) # In[5]: train_dataset = [np.array(x, dtype=np.int32) for x in train_dataset] train_labels = [np.array(x, dtype=np.int32) for x in train_labels] x_test = [np.array(x, dtype=np.int32) for x in test_dataset] y_test = [np.array(x, dtype=np.int32) for x in test_labels] x_train = train_dataset[0:7200] y_train = train_labels[0:7200] x_valid = train_dataset[7201:8000] y_valid = train_labels[7201:8000] # In[6]: #Raw input encoding -''' visualize a few sentences ''' w2idx, labels2idx = dic['words2idx'], dic['labels2idx'] idx2w = dict((v, k) for k, v in w2idx.iteritems()) idx2la = dict((v, k) for k, v in labels2idx.iteritems()) # In[10]: vocsize = len(idx2w) nclasses = len(idx2la) nsentences = len(x_train) groundtruth_valid = [map(lambda x: idx2la[x], y) for y in y_valid] words_valid = [map(lambda x: idx2w[x], w) for w in x_valid] groundtruth_test = [map(lambda x: idx2la[x], y) for y in y_test] words_test = [map(lambda x: idx2w[x], w) for w in x_test] # instanciate the model np.random.seed(param['seed']) random.seed(param['seed']) rnn = GRUTheano(word_dim=param['emb_dimension'], window_context_size=param['win'], vocab_size=vocsize, num_labels=nclasses, hidden_dim=param['nhidden']) #rnn = RNNSLU_LSTM(hidden_dim=param['nhidden'], num_labels=nclasses, vocab_size=vocsize, word_dim=param['emb_dimension'], window_context_size=param['win']) # train with early stopping on validation set best_f1 = -np.inf param['clr'] = param['lr'] for e in xrange(param['nepochs']): # shuffle shuffle([x_train, y_train], param['seed']) param['ce'] = e tic = timeit.default_timer() for i, (x, y) in enumerate(zip(x_train, y_train)): rnn.train(x, y, param['win'], param['clr']) print '[learning] epoch %i >> %2.2f%%' % (e, (i + 1) * 100. / nsentences), print 'completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic), sys.stdout.flush() # evaluation // back into the real world : idx -> words predictions_test = [ map( lambda x: idx2la[x], rnn.classify( np.asarray(contextwin(x, param['win'])).astype('int32'))) for x in x_test ] predictions_valid = [ map( lambda x: idx2la[x], rnn.classify( np.asarray(contextwin(x, param['win'])).astype('int32'))) for x in x_valid ] # evaluation // compute the accuracy using conlleval.pl res_test = conlleval(predictions_test, groundtruth_test, words_test, folder + '/current.test.txt', folder) res_valid = conlleval(predictions_valid, groundtruth_valid, words_valid, folder + '/current.valid.txt', folder) if res_valid['f1'] > best_f1: if param['savemodel']: rnn.save(folder) best_rnn = copy.deepcopy(rnn) best_f1 = res_valid['f1'] if param['verbose']: print('NEW BEST: epoch', e, 'valid F1', res_valid['f1'], 'best test F1', res_test['f1']) param['vf1'], param['tf1'] = res_valid['f1'], res_test['f1'] param['vp'], param['tp'] = res_valid['p'], res_test['p'] param['vr'], param['tr'] = res_valid['r'], res_test['r'] param['be'] = e subprocess.call([ 'mv', folder + '/current.test.txt', folder + '/best.test.txt' ]) subprocess.call([ 'mv', folder + '/current.valid.txt', folder + '/best.valid.txt' ]) else: if param['verbose']: print '' # learning rate decay if no improvement in 10 epochs if param['decay'] and abs(param['be'] - param['ce']) >= 10: param['clr'] *= 0.5 rnn = best_rnn if param['clr'] < 1e-5: break print('BEST RESULT: epoch', param['be'], 'valid F1', param['vf1'], 'best test F1', param['tf1'], 'with the model', folder)
def main(param=None): if not param: param = {'lr': 0.0970806646812754, 'verbose': 1, 'decay': True, # decay on the learning rate if improvement stops 'win': 7, # number of words in the context window 'nhidden': 200, # number of hidden units 'seed': 345, 'emb_dimension': 50, # dimension of word embedding 'nepochs': 100, # 60 is recommended 'savemodel': False} print param folder = "RelationExtraction" if not os.path.exists(folder): os.mkdir(folder) #load dataset pickle_file = 'semeval.pkl' with open(pickle_file, 'rb') as f: save = pickle.load(f) train_dataset = save['train_dataset'] train_labels = save['train_labels'] test_dataset = save['test_dataset'] test_labels = save['test_labels'] dic=save['dicts'] del save # hint to help gc free up memory print('Training set', train_dataset.shape, train_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) # In[5]: train_dataset=[np.array(x,dtype=np.int32) for x in train_dataset] train_labels=[np.array(x,dtype=np.int32) for x in train_labels] x_test=[np.array(x,dtype=np.int32) for x in test_dataset] y_test=[np.array(x,dtype=np.int32) for x in test_labels] x_train=train_dataset[0:7200] y_train=train_labels[0:7200] x_valid=train_dataset[7201:8000] y_valid=train_labels[7201:8000] # In[6]: #Raw input encoding -''' visualize a few sentences ''' w2idx,labels2idx = dic['words2idx'], dic['labels2idx'] idx2w = dict((v,k) for k,v in w2idx.iteritems()) idx2la = dict((v,k) for k,v in labels2idx.iteritems()) # In[10]: vocsize = len(idx2w) nclasses = len(idx2la) nsentences = len(x_train) groundtruth_valid = [map(lambda x: idx2la[x], y) for y in y_valid] words_valid = [map(lambda x: idx2w[x], w) for w in x_valid] groundtruth_test = [map(lambda x: idx2la[x], y) for y in y_test] words_test = [map(lambda x: idx2w[x], w) for w in x_test] # instanciate the model np.random.seed(param['seed']) random.seed(param['seed']) rnn = GRUTheano(word_dim=param['emb_dimension'], window_context_size=param['win'], vocab_size=vocsize, num_labels=nclasses, hidden_dim=param['nhidden']) #rnn = RNNSLU_LSTM(hidden_dim=param['nhidden'], num_labels=nclasses, vocab_size=vocsize, word_dim=param['emb_dimension'], window_context_size=param['win']) # train with early stopping on validation set best_f1 = -np.inf param['clr'] = param['lr'] for e in xrange(param['nepochs']): # shuffle shuffle([x_train, y_train], param['seed']) param['ce'] = e tic = timeit.default_timer() for i, (x, y) in enumerate(zip(x_train, y_train)): rnn.train(x, y, param['win'], param['clr']) print '[learning] epoch %i >> %2.2f%%' % ( e, (i + 1) * 100. / nsentences), print 'completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic), sys.stdout.flush() # evaluation // back into the real world : idx -> words predictions_test = [map(lambda x: idx2la[x], rnn.classify(np.asarray( contextwin(x, param['win'])).astype('int32'))) for x in x_test] predictions_valid = [map(lambda x: idx2la[x], rnn.classify(np.asarray( contextwin(x, param['win'])).astype('int32'))) for x in x_valid] # evaluation // compute the accuracy using conlleval.pl res_test = conlleval(predictions_test, groundtruth_test, words_test, folder + '/current.test.txt', folder) res_valid = conlleval(predictions_valid, groundtruth_valid, words_valid, folder + '/current.valid.txt', folder) if res_valid['f1'] > best_f1: if param['savemodel']: rnn.save(folder) best_rnn = copy.deepcopy(rnn) best_f1 = res_valid['f1'] if param['verbose']: print('NEW BEST: epoch', e, 'valid F1', res_valid['f1'], 'best test F1', res_test['f1']) param['vf1'], param['tf1'] = res_valid['f1'], res_test['f1'] param['vp'], param['tp'] = res_valid['p'], res_test['p'] param['vr'], param['tr'] = res_valid['r'], res_test['r'] param['be'] = e subprocess.call(['mv', folder + '/current.test.txt', folder + '/best.test.txt']) subprocess.call(['mv', folder + '/current.valid.txt', folder + '/best.valid.txt']) else: if param['verbose']: print '' # learning rate decay if no improvement in 10 epochs if param['decay'] and abs(param['be']-param['ce']) >= 10: param['clr'] *= 0.5 rnn = best_rnn if param['clr'] < 1e-5: break print('BEST RESULT: epoch', param['be'], 'valid F1', param['vf1'], 'best test F1', param['tf1'], 'with the model', folder)
else: train_yp[i][j][k] = 1 np.random.seed(s['seed']) random.seed(s['seed']) rnn = model(hnum = s['hnum'], ynum = s['ynum'], wnum = s['wnum'], dnum = s['dnum'], me = s['me'], md = s['md'], mx = s['mx'], L2 = s['L2'], Wlnum = s['Wlnum'], Wrnum = s['Wrnum'], kalpha=s['kalpha']) s['cur_lr'] = s['lr'] test_pred = [] for ei, di, li, si, tli, tri, tai in zip(test_e, test_d, test_l, test_s, test_tl, test_tr, test_ta ): test_pred += [rnn.classify(ei, di, li, si, tli, tri, tai)] res_test = conlleval(test_pred, test_y) print "" for (d,x) in res_test.items(): print d + ": " + str(x) print "start train" for e in xrange(s['epoch']): #shuffle shuffle([train_e, train_d, train_l, train_s, train_tl, train_tr, train_ta, train_y, train_yp], s['seed']) s['cur_epoch'] = e tic = time.time() for i in xrange(nsentences): ei = train_e[i] di = train_d[i] li = train_l[i] si = train_s[i] tli = train_tl[i]