vocsize = len(words2idx) nclasses = len(labels2idx) nsentences = len(test_lex) numpy.random.seed(s['seed']) random.seed(s['seed']) rnn = model( nh = s['nhidden'], nc = nclasses, ne = vocsize, de = s['emb_dimension'], cs = s['win'] ) rnn.load(folder) #select a few popular words show, me, movies, what, in #w_idx = word2idx['show']; #print w_idx #w_emb = rnn.emb[w_idx] #print w_emb #word->index->emb #find n nearest embeddings #emb->index->word predictions_test = [ map(lambda x: idx2label[x], \ rnn.classify(numpy.asarray(contextwin(x, s['win'])).astype('int32')))\ for x in test_lex ] groundtruth_test = [ map(lambda x: idx2label[x], y) for y in test_y ] words_test = [ map(lambda x: idx2word[x], w) for w in test_lex] res_test = conlleval(predictions_test, groundtruth_test, words_test, sys.argv[6]) #print 'Test set performance -- F1: ', res_test['f1'], ' '*20 print res_test['a'],' ',res_test['p'],' ',res_test['r'],' ',res_test['f1'],' '
total_cost += rnn.train(word_batch, label_last_word, s['clr']) count +=1 rnn.normalize() if s['verbose']: print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic), sys.stdout.flush() print '' print 'Learning rate: %2.4f'%(s['clr']) print 'Average Training Cost: %2.4f'%(total_cost/count) predictions_valid = [ map(lambda x: idx2label[x], \ rnn.classify(numpy.asarray(contextwin(x, s['win'])).astype('int32')))\ for x in valid_lex ] groundtruth_valid = [ map(lambda x: idx2label[x], y) for y in valid_y ] words_valid = [ map(lambda x: idx2word[x], w) for w in valid_lex] res_valid = conlleval(predictions_valid, groundtruth_valid, words_valid, folder + '/current.valid.txt') if res_valid['f1'] > best_f1: rnn.save(folder) best_f1 = res_valid['f1'] if s['verbose']: print 'NEW BEST: epoch', e, 'valid F1', res_valid['f1'], ' '*20 s['vf1'], s['vp'], s['vr'] = res_valid['f1'], res_valid['p'], res_valid['r'] s['be'] = e subprocess.call(['mv', folder + '/current.valid.txt', folder + '/best.valid.txt']) else: print '' # learning rate decay if no improvement in 10 epochs if s['decay'] and abs(s['be']-s['ce']) >= 10: s['clr'] *= 0.5 if s['clr'] < 1e-5: break