try: x_fvec.append(model[term.split('-')[0]]) except KeyError: # same hack as in the training process x_fvec.append(model['some']) # map word to label_index if term in word2labelindx: # is label labels.append(word2labelindx[term]) else: # not a label labels.append(word2labelindx["XXXXX"]) termss.append(term) # add a PADDING-END word at the rightend (the last word in the sentence) labels.append(word2labelindx["</s>"]) # remove a PADDING_START word at the begining. labels.pop(0) cwords = contextwin(x_fvec, s['win'], model["<s>"], model["</s>"]) prediction_test = rnn.test(numpy.asarray(cwords).astype('float32')) prediction_test2 = [] for term_indx in range(len(labels)): prediction_test2.append(prediction_test[term_indx, labels[term_indx]]) #test_question_probs.append(prediction_test) test_question_probs.append(prediction_test2) print(','.join([str(p) for p in prediction_test2]))
word2idx = dic['words2idx'] idx2label = dict((k,v) for v,k in dic['labels2idx'].iteritems()) idx2word = dict((k,v) for v,k in dic['words2idx'].iteritems()) vocsize = len(dic['words2idx']) nclasses = len(dic['labels2idx']) print 'load model' rnn = model( nh = s['nhidden'], nc = nclasses, ne = vocsize, de = s['emb_dimension'], cs = s['win'] ) rnn.load(folder) print 'test ...' test_lex = ['NOUN', 'X', 'NOUN', 'NOUN', 'NOUN'] test_lex = [ word2idx[w] for w in test_lex] test_slex = ['NOUN', 'NOUN', 'CONJ', 'NOUN', 'NOUN', 'NOUN'] test_slex = [ word2idx[w] for w in test_slex] test_sy = ['O', 'O', 'O', 'B-T', 'I-T', 'I-T'] test_sy = [ label2idx[w] for w in test_sy] # evaluation predictions_test = [ map(lambda x: idx2label[x], \ rnn.classify(numpy.asarray(contextwin(test_lex, s['win'])).astype('int32'), test_slex, test_sy)) ] print predictions_test
x_fvec.append(model[term.split('-')[0]]) except KeyError: # same hack as in the training process x_fvec.append(model['some']) # map word to label_index if term in word2labelindx: # is label labels.append(word2labelindx[term]) else: # not a label labels.append(word2labelindx["XXXXX"]) termss.append(term) # add a PADDING-END word at the rightend (the last word in the sentence) labels.append(word2labelindx["</s>"]) # remove a PADDING_START word at the begining. labels.pop(0) cwords = contextwin(x_fvec, s['win'], model["<s>"], model["</s>"]) prediction_test = rnn.test(numpy.asarray(cwords).astype('float32')) prediction_test2 = [] for term_indx in range(len(labels)): prediction_test2.append(prediction_test[term_indx, labels[term_indx]]) #test_question_probs.append(prediction_test) test_question_probs.append(prediction_test2) print(','.join([str(p) for p in prediction_test2]))
rnn = model( nh = s['nhidden'], nc = nclasses, ne = vocsize, de = s['emb_dimension'], cs = s['win'] ) # train with early stopping on validation set best_f1 = -numpy.inf s['clr'] = s['lr'] for e in xrange(s['nepochs']): # shuffle shuffle([train_lex, train_ne, train_y], s['seed']) s['ce'] = e tic = time.time() for i in xrange(nsentences): cwords = contextwin(train_lex[i], s['win']) words = map(lambda x: numpy.asarray(x).astype('int32'),\ minibatch(cwords, s['bs'])) labels = train_y[i] for word_batch , label_last_word in zip(words, labels): rnn.train(word_batch, label_last_word, s['clr']) rnn.normalize() if s['verbose']: print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic), sys.stdout.flush() # evaluation // back into the real world : idx -> words predictions_test = [ map(lambda x: idx2label[x], \ rnn.classify(numpy.asarray(contextwin(x, s['win'])).astype('int32')))\
''' best_params = {} # train with early stopping on validation set best_f1 = -numpy.inf s['clr'] = s['lr'] training_loss = [] for e in xrange(s['nepochs']): # shuffling of data per epoch shuffle([X_train_idxs, X_train_pos_idxs, X_train_chunk_idxs, Y_train_idxs], s['seed']) s['ce'] = e tic = time.time() loss = 0.0 for i in xrange(num_train_sentences): #print X_train_idxs[i] sentence_forward = contextwin(X_train_idxs[i], s['win']) sentence_backward = list(reversed(sentence_forward)) sentence_pos_forward = contextwin(X_train_pos_idxs[i], s['win']) sentence_pos_backward = list(reversed(sentence_pos_forward)) sentence_chunk_forward = contextwin(X_train_chunk_idxs[i], s['win']) sentence_chunk_backward = list(reversed(sentence_chunk_forward)) labels = Y_train_idxs[i] #loss += rnn.sentence_train(sentence_forward, sentence_pos_forward, sentence_chunk_forward, labels, s['clr']) #rnn.normalize() loss += birnn.sentence_train(sentence_forward, sentence_backward, sentence_pos_forward, sentence_pos_backward, sentence_chunk_forward, sentence_chunk_backward, labels, s['clr']) birnn.normalize() #loss += lstm.sentence_train(sentence_forward, sentence_pos_forward, sentence_chunk_forward, labels, s['clr']) #lstm.normalize() if s['verbose']: print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./num_train_sentences),'completed in %.2f (sec) <<\r'%(time.time()-tic),
fnum0=s['f0'], fnum1=s['f1'], fnum2=s['f2'], fnum3=s['f3'], fnum4=s['f4'], kalpha=s['kalpha']) rnn.emb = load_emb('data/embeddingsall') #print 'load parameter' s['cur_lr'] = s['lr'] dev_pred = [] for words, f0i, f1i, f2i, f3i, f4i in zip(dev_word, dev_f0, dev_f1, dev_f2, dev_f3, dev_f4): dev_pred += [ rnn.classify(contextwin(words, s['wsize']), contextwin(f0i, s['fsize']), contextwin(f1i, s['fsize']), contextwin(f2i, s['fsize']), contextwin(f3i, s['fsize']), contextwin(f4i, s['fsize'])) ] res_dev = conlleval(dev_pred, dev_label, dev_word) print "" for (d, x) in res_dev.items(): print d + ": " + str(x) sys.stdout.flush() best_dev = 0 train_weight = list([1.0] for i in range(nsentences))
x_fvec.append(model[term]) else: # for instance: 'good-humoured' ==> 'good' x_fvec.append(model[term.split('-')[0]]) # map word to label_index if term in word2labelindx: # is label labels.append(word2labelindx[term]) else: # not a label labels.append(word2labelindx["XXXXX"]) # --- end modified --- #cwords = contextwin(train_lex[i], s['win']) cwords = contextwin(x_fvec[i], s['win'], model["<s>"], model["</s>"]) words = map(lambda x: numpy.asarray(x).astype('int32'), minibatch(cwords, s['bs'])) #labels = train_y[i] for word_batch, label_last_word in zip(words, labels): rnn.train(word_batch, label_last_word, s['clr']) #rnn.normalize() if s['verbose']: print('[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic),) sys.stdout.flush() # evaluation // back into the real world : idx -> words """
# train with early stopping on validation set print 'train with set...' best_f1 = -numpy.inf s['clr'] = s['lr'] print time.localtime(time.time()) for e in xrange(s['nepochs']): # shuffle shuffle([train_lex, train_y], s['seed']) s['ce'] = e tic = time.time() for i in xrange(nsentences): #print 'i=', i cwords = contextwin(train_lex[i], s['win']) words = map(lambda x: numpy.asarray(x).astype('int32'), minibatch(cwords, s['bs'])) labels = train_y[i] #print 'label=', labels for word_batch, label_last_word in zip(words, labels): t = rnn.train(word_batch, label_last_word, s['clr']) rnn.normalize() if (i + 1) % 270 == 0 & s['verbose']: print '[learning] epoch %i >> %2.2f%%' % ( e, (i + 1) * 100. / nsentences ), 'completed in %.2f (sec) <<\r' % (time.time() - tic) # sys.stdout.flush() # evaluation // back into the real world : idx -> words print 'evaluation step1: back into the real world : idx -> words'
de=s['emb_dimension'], cs=s['win']) # train with early stopping on validation set best_f1 = -numpy.inf s['clr'] = s['lr'] for e in range(s['nepochs']): # shuffle training data shuffle([train_lex, train_ne, train_y], s['seed']) s['ce'] = e tic = time.time() # for each sentence for i in range(nsentences): cwords = contextwin(train_lex[i], s['win']) words = map(lambda x: numpy.asarray(x).astype('int32'), minibatch(cwords, s['bs'])) labels = train_y[i] # each sentence is a minibatch, perform one update per sentence for word_batch, label_last_word in zip(words, labels): rnn.train(word_batch, label_last_word, s['clr']) rnn.normalize() print('training epoch %i > %2.2f%%' % (e, (i + 1) * 100. / nsentences)) sys.stdout.flush() # evaluation # test_y, valid_y, train_y contain correct labels
fsize=s['fsize'], L2=s['L2'], fnum0=s['f0'], fnum1=s['f1'], fnum2=s['f2'], fnum3=s['f3'], fnum4=s['f4'], kalpha=s['kalpha']) #rnn.emb = load_emb("data/embeddingsall") s['cur_lr'] = s['lr'] dev_pred = [] for words, f0i, f1i, f2i, f3i, f4i in zip(dev_word, dev_f0, dev_f1, dev_f2, dev_f3, dev_f4): dev_pred += [ rnn.classify(contextwin(words, s['wsize']), contextwin(f0i, s['fsize']), contextwin(f1i, s['fsize']), contextwin(f2i, s['fsize']), contextwin(f3i, s['fsize']), contextwin(f4i, s['fsize'])) ] fp = open("data/semi_test_pred.txt", 'w') for i in range(len(dev_pred)): for j in range(len(dev_pred[i])): fp.write(str(dev_pred[i][j]) + " ") fp.write("\n") fp.close() res_dev = conlleval(train_label, train_word, dev_pred, dev_label, dev_word) print ""