def train(self, x, y, window_size, learning_rate): cwords = contextwin(x, window_size) words = [numpy.asarray(x).astype('int32') for x in cwords] labels = y self.sentence_train(words, labels, learning_rate) if self.normal: self.normalize()
def train(self, x, y, window_size, learning_rate): cwords = contextwin(x, window_size) words = list(map(lambda x: numpy.asarray(x).astype('int32'), cwords)) labels = y self.sentence_train(words, labels, learning_rate) if self.normal: self.normalize()
#reader = Reader(md) with open(os.path.join(directory_model, 'reader.pkl'), 'rb') as f: reader = pickle.load(f) num_tags = len(reader.tag_dict) num_words = len(reader.word_dict) model = JordanRnn(args.hidden, num_tags, num_words, args.num_features, args.window) print('... loading models') model.load(directory_model) print('>>> READY') while True: sent = input() coded = reader.codify_string(sent) framed = numpy.asarray(\ utils.contextwin(coded, args.window,\ reader.get_padding_left(), reader.get_padding_right()\ ), dtype=numpy.int32) coded_tags = model.classify(framed) tags = [reader.reverse_tag_dict[t] for t in coded_tags] print('[INPUT] ' + str(sent)) print('[CODED] ' + str(coded)) print('[ TAG ] ' + str(coded_tags)) print('[UNTAG] ' + str(tags)) print()
def test_lstm(**kwargs): """ Wrapper function for training and testing LSTM :type fold: int :param fold: fold index of the ATIS dataset, from 0 to 4. :type lr: float :param lr: learning rate used (factor for the stochastic gradient). :type nepochs: int :param nepochs: maximal number of epochs to run the optimizer. :type win: int :param win: number of words in the context window. :type nhidden: int :param n_hidden: number of hidden units. :type emb_dimension: int :param emb_dimension: dimension of word embedding. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type decay: boolean :param decay: decay on the learning rate if improvement stop. :type savemodel: boolean :param savemodel: save the trained model or not. :type normal: boolean :param normal: normalize word embeddings after each update or not. :type folder: string :param folder: path to the folder where results will be stored. """ # process input arguments param = { 'experiment': 'standard', 'lr': 0.1, 'verbose': True, 'decay': True, 'win': 3, 'nhidden': 300, 'nhidden2': 300, 'seed': 345, 'emb_dimension': 90, 'nepochs': 40, 'savemodel': False, 'normal': True, 'layer_norm': False, 'minibatch_size': 4978, 'folder': '../result' } param_diff = set(kwargs.keys()) - set(param.keys()) if param_diff: raise KeyError("invalid arguments:" + str(tuple(param_diff))) param.update(kwargs) if param['verbose']: for k, v in param.items(): print("%s: %s" % (k, v)) # create result folder if not exists check_dir(param['folder']) # load the dataset print('... loading the dataset') train_set, valid_set, test_set, dic = load_data(3) train_set = list(train_set) valid_set = list(valid_set) # Add validation set to train set for i in range(3): train_set[i] += valid_set[i] # create mapping from index to label, and index to word idx2label = dict((k, v) for v, k in dic['labels2idx'].items()) idx2word = dict((k, v) for v, k in dic['words2idx'].items()) # unpack dataset train_lex, train_ne, train_y = train_set test_lex, test_ne, test_y = test_set n_trainbatches = len(train_lex) // param['minibatch_size'] print("Sentences in train: %d, Words in train: %d" % (count_of_words_and_sentences(train_lex))) print("Sentences in test: %d, Words in test: %d" % (count_of_words_and_sentences(test_lex))) vocsize = len(dic['words2idx']) nclasses = len(dic['labels2idx']) nsentences = len(train_lex) groundtruth_test = [[idx2label[x] for x in y] for y in test_y] words_test = [[idx2word[x] for x in w] for w in test_lex] # instanciate the model numpy.random.seed(param['seed']) random.seed(param['seed']) print('... building the model') lstm = LSTM(n_hidden=param['nhidden'], n_hidden2=param['nhidden2'], n_out=nclasses, n_emb=vocsize, dim_emb=param['emb_dimension'], cwind_size=param['win'], normal=param['normal'], layer_norm=param['layer_norm'], experiment=param['experiment']) # train with early stopping on validation set print('... training') best_f1 = -numpy.inf param['clr'] = param['lr'] for e in range(param['nepochs']): # shuffle shuffle([train_lex, train_ne, train_y], param['seed']) param['ce'] = e tic = timeit.default_timer() for minibatch_index in range(n_trainbatches): for i in range(minibatch_index * param['minibatch_size'], (1 + minibatch_index) * param['minibatch_size']): x = train_lex[i] y = train_y[i] res = lstm.train(x, y, param['win'], param['clr']) predictions_test = [[ idx2label[x] for x in lstm.classify( numpy.asarray(contextwin(x, param['win'])).astype('int32')) ] for x in test_lex] # evaluation // compute the accuracy using conlleval.pl res_test = conlleval(predictions_test, groundtruth_test, words_test, param['folder'] + '/current.test.txt', param['folder']) if res_test['f1'] > best_f1: if param['savemodel']: lstm.save(param['folder']) best_lstm = copy.deepcopy(lstm) best_f1 = res_test['f1'] if param['verbose']: print( 'NEW BEST: epoch %d, minibatch %d/%d, best test F1: %.3f' % (e, minibatch_index + 1, n_trainbatches, res_test['f1'])) param['tf1'] = res_test['f1'] param['tp'] = res_test['p'] param['tr'] = res_test['r'] param['be'] = e os.rename(param['folder'] + '/current.test.txt', param['folder'] + '/best.test.txt') else: if param['verbose']: print('') # learning rate decay if no improvement in 10 epochs if param['decay'] and abs(param['be'] - param['ce']) >= 10: param['clr'] *= 0.5 print("Decay happened. New Learning Rate:", param['clr']) lstm = best_lstm if param['clr'] < 0.00001: break print('BEST RESULT: epoch', param['be'], 'best test F1', param['tf1'], 'with the model', param['folder']) return lstm, dic
def test_lstm_parity(n_bit, fil): n_bit = n_bit n_hidden = 1 n_epochs = 1000 #For 8 bit: #learning_rate=0.15 #For 12 bit: learning_rate = 0.5 n_win = 7 verbose = True f = fil print('... loading the dataset') print >> f, '... loading the dataset' # generate datasets train_set_x, train_set_y = gen_parity_pair(n_bit, 1000) valid_set_x, valid_set_y = gen_parity_pair(n_bit, 500) test_set_x, test_set_y = gen_parity_pair(n_bit, 100) numpy.random.seed(100) #We need additional labels train_set_y = create_additioinal_label(train_set_x) valid_set_y = create_additioinal_label(valid_set_x) test_set_y = create_additioinal_label(test_set_x) n_out = 2 print('... building the model') print >> f, '... building the model' lstm = LSTM( nh=n_hidden, nc=n_out, cs=n_win, ) start_time = timeit.default_timer() # train with early stopping on validation set print('... training') print >> f, '... training' best_perform = numpy.inf for e in range(n_epochs): print('epoch:%d->' % e) print >> f, ('epoch:->%d' % e) for i, (x, y) in enumerate(zip(train_set_x, train_set_y)): lstm.train(x, y, n_win, learning_rate) pred_train = np.asarray([ lstm.classify(numpy.asarray(contextwin(x, n_win)).astype('int32')) for x in train_set_x ]) pred_valid = np.asarray([ lstm.classify(numpy.asarray(contextwin(x, n_win)).astype('int32')) for x in valid_set_x ]) pred_test = np.asarray([ lstm.classify(numpy.asarray(contextwin(x, n_win)).astype('int32')) for x in test_set_x ]) #Mean Square Error res_train = np.mean( np.asarray( (train_set_y[:, n_bit - 1] - pred_train[:, n_bit - 1])**2)) res_valid = np.mean( np.asarray( (valid_set_y[:, n_bit - 1] - pred_valid[:, n_bit - 1])**2)) res_test = np.mean( np.asarray( (test_set_y[:, n_bit - 1] - pred_test[:, n_bit - 1])**2)) print('cost(mse): %f' % res_train) print >> f, ('cost(mse): %f' % res_train) if res_valid < best_perform: best_perform = res_valid if verbose: print( 'NEW BEST: epoch %i, valid error %.4f %%, best test error %.4f %%' % (e, res_valid * 100., res_test * 100.)) print >> f, ( 'NEW BEST: epoch %i, valid error %.4f %%, best test error %.4f %%' % (e, res_valid * 100., res_test * 100.)) valid_error, test_error = res_valid, res_test best_epoch = e else: print('') print >> f, '' # learning rate decay if no improvement in 10 epochs if abs(best_epoch - e) >= 10: learning_rate *= 0.5 if learning_rate < 1e-5: break print( 'BEST RESULT: epoch %i, valid error %.4f %%, best test error %.4f %%' % (best_epoch, valid_error * 100., test_error * 100.)) print >> f, ( 'BEST RESULT: epoch %i, valid error %.4f %%, best test error %.4f %%' % (best_epoch, valid_error * 100., test_error * 100.)) end_time = timeit.default_timer() print((' ran for %.2fm' % ((end_time - start_time) / 60.))) print >> f, ((' ran for %.2fm' % ((end_time - start_time) / 60.))) f.close()
md = Metadata(args.filename) directory_model = 'bestModel' if args.load_reader: with open(os.path.join(directory_model, 'reader.pkl'), 'rb') as f: reader = pickle.load(f) else: reader = Reader(md) reader.save(directory_model) # Generate the training set num_sentences = len(reader.sentences) num_words = len(reader.word_dict) codified_sentences = [numpy.asarray(\ utils.contextwin([t.codified_word for t in s], args.window,\ reader.get_padding_left(), reader.get_padding_right()\ ), dtype=numpy.int32)\ for s in reader.sentences] #print('codified_sentences', codified_sentences) #sentences_shared = theano.shared(codified_sentences) num_tags = len(reader.tag_dict) codified_tags = [numpy.asarray([t.codified_tag for t in s], dtype=numpy.int32) for s in reader.sentences] #print('codified_tags', codified_tags) #tags_shared = theano.shared(codified_tags) model = JordanRnn(args.hidden, num_tags, num_words, args.num_features, args.window) print('... loading models') model.load(directory_model)
# Generate the training set num_sentences = len(reader.sentences) num_words = len(reader.word_dict) num_tags = len(reader.tag_dict) if args.validation_filename: valid_md = Metadata(args, args.validation_filename, args.fixed_embeddings or args.learn_embeddings) valid_reader = Reader(valid_md) valid_reader.word_dict = reader.word_dict valid_reader.tag_dict = reader.tag_dict valid_reader.codify_sentences() if args.fixed_embeddings: codified_sentences = [numpy.concatenate(numpy.asarray(\ utils.contextwin([reader.get_embedding(t.codified_word) for t in s], args.window,\ reader.get_padding_left(), reader.get_padding_right()\ ), dtype=theano.config.floatX), axis=0)\ for s in reader.sentences] if args.validation_filename: codified_sentences_valid = [numpy.concatenate(numpy.asarray(\ utils.contextwin([reader.get_embedding(t.codified_word) for t in s], args.window,\ reader.get_padding_left(), reader.get_padding_right()\ ), dtype=theano.config.floatX), axis=0)\ for s in valid_reader.sentences] else: codified_sentences = [numpy.asarray(\ utils.contextwin([t.codified_word for t in s], args.window,\