def main(_): model_path = os.path.join('model', FLAGS.name) # 拼接路径model/'name' if os.path.exists(model_path) is False: os.makedirs(model_path) with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() # 读取文本 converter = TextConverter(text, FLAGS.max_vocab) # 文本转换为词汇且截取FLAGS.max_vocab个词 converter.save_to_file(os.path.join(model_path, 'converter.pk1')) # 序列化存储词汇 data = converter.text_to_data(text) # 将文本转化为输入(word_to_int) g = batch_generator(data, FLAGS.n_seqs, FLAGS.n_steps) # 获取batch生成器 print(converter.vocab_size) # 模型参数初始化 model = CharRNN(converter.vocab_size, n_seqs=FLAGS.n_seqs, n_steps=FLAGS.n_steps, state_size=FLAGS.state_size, n_layers=FLAGS.n_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train(g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n)
def beamsearchdecode(): modelname = '' if config['attn']: modelname = 'att' + '_' + config['attn_model'] data_loader = data_utils.batch_generator(testX, testY, batch_size=config['batch_size'], shuffle=False) data_len, labels = next(data_loader) data, lengths = data_len data, lengths, labels = torch.tensor(data, dtype=torch.long), torch.tensor( lengths, dtype=torch.long), torch.tensor(labels, dtype=torch.long) data, labels = data.to(computing_device), labels.to(computing_device) encoder_outputs, encoder_hidden = encoder(data, lengths) decoder_hidden = encoder_hidden max_target_len = config['max_len'] loss = 0 decoder_charid = torch.zeros_like(labels) batch_size = labels.shape[0] decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)] ]).to(computing_device).transpose(0, 1) decoder_charid[:, 0] = decoder_input.reshape(-1) for t in range(max_target_len - 1): decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) output_id = torch.argmax(decoder_output.detach(), dim=2) decoder_charid[:, t + 1] = output_id.squeeze() decoder_input = output_idevaluate_test(encoder, decoder)
def main(_): start_time = time.time() model_path = os.path.join('model', FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) with open(FLAGS.input_file, 'r') as f: text = f.read() converter = TextConverter(text, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) print(converter.vocab_size) model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train( g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, ) print("Timing cost is --- %s ---second(s)" % (time.time() - start_time))
def train(self): logger.info("start train") self.sess = tf.Session() with self.sess as sess: sess.run(tf.global_variables_initializer()) step = 0 new_state = sess.run(self.initial_state) avg_loss = 0 for x, y in batch_generator(self.dt.data, self.dt.labels, self.batch_size, self.seq_lengths): feed = { self.inputs: x, self.targets: y, self.initial_state: new_state } batch_loss, new_state, _ = sess.run( [self.loss, self.final_state, self.optimizer], feed_dict=feed) step += 1 avg_loss += batch_loss if step % 100 == 0: print("steps: %d, batch_loss: %f" % (step, avg_loss / 100)) avg_loss = 0 correct_prediction = tf.equal(self.prob1, self.targets) accuracy = tf.reduce_mean( tf.cast(correct_prediction, "float")) print("Accuracy:", accuracy.eval({ self.inputs: x, self.targets: y })) print("targets", self.targets.eval({ self.inputs: x, self.targets: y })) print("logits", self.logits.eval({ self.inputs: x, self.targets: y })) print("prob", self.prob.eval({ self.inputs: x, self.targets: y })) print("prob1", self.prob1.eval({ self.inputs: x, self.targets: y })) if step == 7810: break
def train(word_vocab_size, tag_vocab_size, char_vocab_size, train_data, valid_data, epochs=20, word_embeddings=None): model = create_crf_on_lstm_model(word_vocab_size, tag_vocab_size, char_vocab_size, word_embeddings) optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) print(model) print("training model...") for epoch in range(1, epochs + 1): loss_sum = 0 timer = time.time() batch_count = 0 model.train() for word_x, char_x, y in batch_generator(*train_data): model.zero_grad() loss = torch.mean(model(word_x, char_x, y)) loss.backward() optim.step() loss = scalar(loss) loss_sum += loss batch_count += 1 timer = time.time() - timer loss_sum /= batch_count save_checkpoint('model', model, epoch, loss_sum, timer) loss_sum = 0 batch_count = 0 model.eval() with torch.no_grad(): for word_x, char_x, y in batch_generator(*valid_data): loss_sum += scalar(torch.mean(model(word_x, char_x, y))) batch_count += 1 print('validation loss: {}'.format(loss_sum / batch_count))
def main(_): model_path = os.path.join('model', FLAGS.name) if not os.path.exists(model_path): os.makedirs(model_path) data = read_corpus(FLAGS.input_file) converter = TextConverter(data, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) g = batch_generator(data, FLAGS.batch_size) print(converter.vocab_size) model = BilstmNer(converter.vocab_size, converter.num_classes, lstm_size=FLAGS.lstm_size, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, embedding_size=FLAGS.embedding_size) model.train( g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def main(_): if os.path.exists(checkpoint_path) is False: os.makedirs(checkpoint_path) # 读取训练文本 with open(datafile, 'r', encoding='utf-8') as f: train_data = f.read() # 加载/生成 词典 vocabulary = Vocabulary() if FLAGS.vocab_file: vocabulary.load_vocab(FLAGS.vocab_file) else: vocabulary.build_vocab(train_data) vocabulary.save(FLAGS.vocab_file) input_ids = vocabulary.encode(train_data) g = batch_generator(input_ids, FLAGS.batch_size, FLAGS.num_steps) model = LSTMModel(vocabulary.vocab_size, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train( g, FLAGS.max_steps, checkpoint_path, FLAGS.save_every_n, FLAGS.log_every_n, )
metrics = { 'rsenses': ['accuracy', 'loss'], } model = Model(input=inputs, output=outputs) model.summary() model.compile(optimizer=c('optimizer', "adam"), loss=losses, metrics=metrics) # load weights log.info("previous weights ({})".format(args.model_dir)) #model.load_weights(weights_hdf5) # weights of best training loss model.load_weights(weights_val_hdf5) # weights of best validation loss # convert from dataset to numeric format log.info("convert from dataset ({})".format(args.dataset_dir)) x, _ = next(batch_generator(dataset, indexes, indexes_size, arg1_len, arg2_len, conn_len, punc_len, len(dataset['rel_ids']), random_per_sample=0)) # make predictions log.info("make predictions") y = model.predict(x, batch_size=batch_size) # valid outputs TYPES = ['Explicit', 'Implicit', 'AltLex', 'EntRel', 'NoRel'] if args.lang == "en": SENSES = [ 'Expansion.Conjunction', # most common 'Temporal.Asynchronous.Precedence', 'Temporal.Asynchronous.Succession', 'Temporal.Synchrony', 'Contingency.Cause.Reason', 'Contingency.Cause.Result',
def evaluate_test(encoder, decoder): modelname = config['decoder'] if config['attn']: modelname = 'att' + '_' + config['attn_model'] data_loader = data_utils.batch_generator(testX, testY, batch_size=config['batch_size'], shuffle=False) data_len, labels = next(data_loader) data, lengths = data_len data, lengths, labels = torch.tensor(data, dtype=torch.long), torch.tensor( lengths, dtype=torch.long), torch.tensor(labels, dtype=torch.long) data, labels = data.to(computing_device), labels.to(computing_device) encoder_outputs, encoder_hidden = encoder(data, lengths) decoder_hidden = encoder_hidden max_target_len = config['max_len'] loss = 0 decoder_charid = torch.zeros_like(labels) batch_size = labels.shape[0] decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)] ]).to(computing_device).transpose(0, 1) # decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) # output_id = torch.argmax(decoder_output.detach(),dim=2) # decoder_charid[:,t+1] = output_id.squeeze() # decoder_input = output_id # loss += criterion(decoder_output.squeeze(), target[:,t]) # loss /= lengths.float().me[[SOS_token for _ in range(batch_size)]]).to(computing_device).transpose(0,1) decoder_charid[:, 0] = decoder_input.reshape(-1) target = labels[:, 1:] decode_batch = beam_decode(target, decoder_hidden, decoder, encoder_outputs) # print(decode_batch.shape) decoder_charid = torch.tensor(decode_batch).squeeze() # print(decoder_charid.shape) # for t in range(max_target_len-1): # decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) # output_id = torch.argmax(decoder_output.detach(),dim=2) # decoder_charid[:,t+1] = output_id.squeeze() # decoder_input = output_id data, decoder_charid, labels = data.cpu().numpy().tolist( ), decoder_charid.cpu().numpy().tolist(), labels.cpu().numpy().tolist() ori_input = [] model_output = [] target_output = [] for i, sentence_id in enumerate(decoder_charid): condition = lambda t: t not in (PAD_token, EOS_token, SOS_token) input_sentence_id = list(filter(condition, data[i])) input_sentence = ' '.join( [metadata['idx2w'][idx] for idx in input_sentence_id]) ori_input.append(input_sentence) sentence_id = list(filter(condition, sentence_id)) sentence = ' '.join([metadata['idx2w'][idx] for idx in sentence_id]) target_sentence_id = list(filter(condition, labels[i])) model_output.append(sentence) target_sentence = ' '.join( [metadata['idx2w'][idx] for idx in target_sentence_id]) target_output.append(target_sentence) filename = 'log/' + modelname + 'result.txt' with open(filename, 'a') as f: for i, sentence in enumerate(model_output): print("Input:" + ori_input[i] + '\n' + 'Chatbot:' + sentence + '\n \n')
def run_epoch(encoder, decoder, feature, labels, training=False, encoder_optimizer=None, decoder_optimizer=None): batch_size = config['batch_size'] epoch_loss = 0 epoch_bleu = 0 N = 1000 N_minibatch_loss = 0.0 beam_width = config['beam_width'] data_loader = data_utils.batch_generator(feature, labels, batch_size=config['batch_size']) for minibatch_count, (data_len, labels) in enumerate(data_loader): if training: encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() data, lengths = data_len data, lengths, labels = torch.tensor( data, dtype=torch.long), torch.tensor( lengths, dtype=torch.long), torch.tensor(labels, dtype=torch.long) data, labels = data.to(computing_device), labels.to(computing_device) encoder_outputs, encoder_hidden = encoder(data, lengths) # print(encoder_hidden.shape) # assert 0==1 decoder_hidden = encoder_hidden max_target_len = config['max_len'] loss = 0 # ert 0==1 if training: use_teacher_forcing = True if random.random( ) < config['teacher_forcing_ratio'] else False #when test or valid, don't use teacher_forcing else: use_teacher_forcing = False if config['teaching']: use_teacher_forcing = True decoder_charid = torch.zeros_like(labels) if use_teacher_forcing: decoder_charid[:, 0] = torch.LongTensor([[ SOS_token for _ in range(batch_size) ]]).to(computing_device).reshape(-1) batch_size = labels.shape[0] target = labels[:, 1:] # target = t for t in range(max_target_len-1): # decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) # output_id = torch.argmax(decoder_output.detach(),dim=2) # decoder_charid[:,t+1] = output_id.squeeze() # decoder_input = output_id # loss += criterion(decoder_output.squeeze(), target[:,t]) # loss /= lengths.float().mearget.contiguous().view(-1) decoder_input = labels[:, :-1] decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) decoder_charid[:, 1:] = torch.argmax(decoder_output, dim=2) decoder_output = decoder_output.view(-1, config['vocab_size']) # print(decoder_output.shape,target.shape) loss = criterion(decoder_output, target.reshape(-1)) else: decoder_input = torch.LongTensor([[ SOS_token for _ in range(batch_size) ]]).to(computing_device).transpose(0, 1) decoder_charid[:, 0] = decoder_input.reshape(-1) batch_size = labels.shape[0] # print(decoder_input.shape) # assert 0==1 # print(labels[:1]) target = labels[:, 1:] # decode_batch= beam_decode(target,decoder_hidden,decoder,encoder_outputs) # print(decode_batch) # print(decode_batch.shape) # assert 0==1 for t in range(max_target_len - 1): decoder_output, decoder_hidden = decoder( decoder_input, decoder_hidden, encoder_outputs) output_id = torch.argmax(decoder_output.detach(), dim=2) decoder_charid[:, t + 1] = output_id.squeeze() decoder_input = output_id # print(decoder_output,target[:,t]) loss += criterion(decoder_output.squeeze(), target[:, t]) # print(loss) # assert 0==1 loss /= lengths.float().mean() # decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) # log_prob, indexes = torch.topk(decoder_output, beam_width) # sequence = [] # nodes =[] # for node_id in range(beam_width): # node = BeamSearchNode(decoder_hidden, decoder_input, indexes[:,:,node_id],log_prop[:,:,node_id], 1) # node.append(node) # for t in range(max_target_len-2): # output_list = () # hiddens=[] # for node_id in range(beam_width): # decoder_input = nodes[node_id].wordId # decoder_hidden = nodes[node_id].hiddenstate # hiddens.append(decoder_hidden) # decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) # output_list.add(decoder_output) # output_k = torch.concat(output_list,dim=2) # log_prob,indexes = torch.topk(decoder_output,beam_width) # previous_nodes = nodes # nodes=[] # for node_id in range(beam_width): # # pre_id = int() # node = BeamSearchNode(decoder_hidden, decoder_input, indexes[:,:,node_id],log_prop[:,:,node_id], 1) # node.append(node) # decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) # output_id = torch.argmax(decoder_output.detach(),dim=2) # decoder_charid[:,t+1] = output_id.squeeze() # decoder_input = output_id # loss += criterion(decoder_output.squeeze(), target[:,t]) # loss = criterion(decoder_charid[:,1:],target) # loss /= lengths.float().mean() # log_prob, indexes = torch.topk(decoder_output, beam_width) # print(indexes.shape) # assert 0==1 if training: loss.backward() nn.utils.clip_grad_norm(encoder.parameters(), 50) nn.utils.clip_grad_norm(decoder.parameters(), 50) encoder_optimizer.step() decoder_optimizer.step() # print(decoder_charid[:1]) # assert 0==1 epoch_bleu += batchBLEU(decoder_charid, labels) epoch_loss += loss.detach() N_minibatch_loss += loss.detach() # loss = 0 # if minibatch_count >5000: # print(minibatch_count) if (minibatch_count % N == 0) and (minibatch_count != 0): # print('hhahahahah',minibatch_count) train_flag = "Training" if training else "Validating/Testing" print(train_flag + ' Average minibatch %d loss: %.3f' % (minibatch_count, N_minibatch_loss / N)) N_minibatch_loss = 0 return epoch_bleu / minibatch_count, epoch_loss / minibatch_count
# initialize weights if not os.path.isfile(weights_hdf5): log.info("initialize weights") else: log.info("previous weights ({})".format(args.experiment_dir)) model.load_weights(weights_hdf5) # prepare for training log.info("prepare snapshots") #if not os.path.isdir(train_snapshot_dir): #train_snapshot = next(batch_generator(train, indexes, indexes_size, arg1_len, arg2_len, conn_len, punc_len, min(len(train['rel_ids']), snapshot_size), random_per_sample=0)) # save_dict_of_np(train_snapshot_dir, train_snapshot) #train_snapshot = load_dict_of_np(train_snapshot_dir) #if not os.path.isdir(valid_snapshot_dir): valid_snapshot = next(batch_generator(valid, indexes, indexes_size, arg1_len, arg2_len, conn_len, punc_len, min(len(valid['rel_ids']), snapshot_size), random_per_sample=0)) # save_dict_of_np(valid_snapshot_dir, valid_snapshot) #valid_snapshot = load_dict_of_np(valid_snapshot_dir) train_iter = batch_generator(train, indexes, indexes_size, arg1_len, arg2_len, conn_len, punc_len, batch_size, random_per_sample=random_per_sample) # train model log.info("train model") callbacks = [ ModelCheckpoint(monitor='loss', mode='min', filepath=weights_hdf5, save_best_only=True), ModelCheckpoint(monitor='val_loss', mode='min', filepath=weights_val_hdf5, save_best_only=True), EarlyStopping(monitor='val_loss', mode='min', patience=epochs_patience), ] history = model.fit_generator(train_iter, nb_epoch=epochs, samples_per_epoch=epochs_len, validation_data=valid_snapshot, callbacks=callbacks, verbose=2) log.info("training finished") # return best result for hyperopt
with sess.as_default(): # Create model cnn = mnistCNN(dense=FLAGS.dense_size) # Trainer train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize( cnn.loss) # Saver saver = tf.train.Saver(max_to_keep=1) # Initialize all variables sess.run(tf.global_variables_initializer()) # Train proccess for epoch in range(FLAGS.num_epochs): for n_batch in range(int(55000 / FLAGS.batch_size)): batch = batch_generator(mnist_data, batch_size=FLAGS.batch_size, type='train') _, ce = sess.run([train_op, cnn.loss], feed_dict={ cnn.input_x: batch[0], cnn.input_y: batch[1] }) print(epoch, ce) model_file = saver.save(sess, '/tmp/mnist_model') print('Model saved in', model_file)