def run_infer(): vocab_dir = params['vocab_dir'] clean_data_dir = params['clean_data_dir'] test_dataset_dir = params['test_dataset_dir'] data_producer = data_helper.DataSet(vocab_dir, clean_data_dir) sos_id = data_producer.tokenizer.subtoken_to_id_dict['<s>'] eos_id = data_producer.tokenizer.subtoken_to_id_dict['</s>'] params['sos_id'] = sos_id params['eos_id'] = eos_id vocab_size = len(data_producer.tokenizer.subtoken_list) params['vocab_size'] = vocab_size params['maximum_iterations'] = 30 params["batch_size"] = 1 decode_mode = params['decode_mode'] model = Seq2SeqModel(params) while 1: sentence = input('you say: ') subtoken = data_producer.tokenizer.encode( pre_process(sentence, keep_sep=True)) output = model.infer(subtoken) if decode_mode == 'greedy': output = [int(i) for i in output[0]] res = data_producer.tokenizer.decode(output) print(res) print('\n') elif decode_mode == 'beam_search': for j in range(len(output)): print("output[i]", output[j]) res = [int(i) for i in list(output[j])] res = data_producer.tokenizer.decode(res) print('answer {}:{}'.format(j, res)) print('\n')
def __init__(self): print(" # Welcome to Seq2Seq Chatbot.") print(" # Tensorflow detected: v{}".format(tf.__version__)) print() self.config = Config self.dataloader = DataLoader( self.config.num_utterance, self.config.max_length, self.config.split_ratio, self.config.batch_size) self.word_to_id = self.dataloader.word_to_id self.id_to_word = self.dataloader.id_to_word self.config.vocab_size = self.dataloader.vocab_size self.config.SOS_ID = self.dataloader.SOS_ID self.config.EOS_ID = self.dataloader.EOS_ID self.model = Model(self.config) print() print(" # Parameter Size: {}".format(self.model.get_parameter_size())) print() self.sess = tf.Session() self.config.checkpoint_dir = os.path.join("save", self.model.__class__.__name__) print(" # Save directory: {}".format(self.config.checkpoint_dir))
def init_model(opt, rating_tokens_tensor): logging.info( '====================== Model Parameters =========================') if opt.model_type == 'hss': overall_model = HSSModel(opt) elif opt.model_type == 'multi_task_basic': overall_model = MultiTaskBasicClassifySeq2Seq(opt, rating_tokens_tensor) elif opt.model_type == 'word_attn_modulate': overall_model = AttnModulateClassifySeq2Seq(opt, rating_tokens_tensor) elif opt.model_type == 'hre_max': overall_model = HirEncMultiTaskBasicModel(opt) elif opt.model_type == 'external_feed': overall_model = ExternalFeedClassifySeq2Seq(opt) elif opt.model_type == "external_soft_feed": overall_model = ExternalSoftFeedClassifySeq2Seq( opt, rating_tokens_tensor) elif opt.model_type == "multi_view_ex_soft_feed": overall_model = MultiViewExternalSoftFeedClassifySeq2Seq( opt, rating_tokens_tensor) elif opt.model_type == "multi_view_attn_modulate": overall_model = MultiViewAttnModulateClassifySeq2Seq( opt, rating_tokens_tensor) elif opt.model_type == "multi_view_multi_task_basic": overall_model = MultiViewMultiTaskBasicClassifySeq2Seq( opt, rating_tokens_tensor) elif opt.model_type == "rnn_enc_single_classifier": overall_model = RnnEncSingleClassifier(opt) elif opt.model_type == "seq2seq": overall_model = Seq2SeqModel(opt) else: raise ValueError("Invalid model type") overall_model.to(opt.device) if opt.train_from: logging.info("loading previous checkpoint from %s" % opt.train_from) # TODO: load the saved model and override the current one if opt.w2v: # NOTE: the pretrained embedding having the same dimension # as args.emb_dim should already be trained embedding, _ = io.make_embedding(opt.idx2word, opt.w2v) overall_model.set_embedding(embedding) return overall_model
def __init__(self, opt): super(MultiTaskBasicModel, self).__init__() memory_bank_size = 2 * opt.encoder_size if opt.bidirectional else opt.encoder_size self.seq2seq_model = Seq2SeqModel(opt) if opt.classifier_type == "max": self.classifier_model = MaxPoolClassifier(memory_bank_size, opt.num_classes, opt.classifier_dropout, opt.ordinal) elif opt.classifier_type == "word_attn": self.classifier_model = WordAttnClassifier(opt.query_hidden_size, memory_bank_size, opt.num_classes, opt.attn_mode, opt.classifier_dropout, opt.ordinal) elif opt.classifier_type == "word_attn_no_query": self.classifier_model = WordAttnNoQueryClassifier(memory_bank_size, opt.num_classes, opt.classifier_dropout, opt.ordinal) elif opt.classifier_type == "word_multi_hop_attn": self.classifier_model = WordMultiHopAttnClassifier(opt.query_hidden_size, memory_bank_size, opt.num_classes, opt.attn_mode, opt.classifier_dropout, opt.ordinal) else: raise ValueError self.model_type = opt.model_type self.classifier_type = opt.classifier_type
def __init__(self, args, vocab, transition_system): super(Reconstructor, self).__init__() if args.no_copy: self.seq2seq = Seq2SeqModel( src_vocab=vocab.code, tgt_vocab=vocab.source, embed_size=args.embed_size, hidden_size=args.hidden_size, dropout=args.dropout, label_smoothing=args.src_token_label_smoothing, cuda=args.cuda) else: self.seq2seq = Seq2SeqWithCopy(src_vocab=vocab.code, tgt_vocab=vocab.source, embed_size=args.embed_size, hidden_size=args.hidden_size, dropout=args.dropout, cuda=args.cuda) self.vocab = vocab self.args = args self.transition_system = transition_system
te_path = "trans_test_data.pkl" # Preprocess the dataset(NTU here for demo) to generate data for # Step1: seq2seq unsupervised training # Step2: classification dsamp_train, dsamp_test, \ fea, lab, seq_len_new, \ test_fea, test_lab, test_seq_len_new = preprocess_pipeline(base_path, tr_path, te_path, mode="cross_subject_data", dsamp_frame=50) # Building Seq2Seq Model applying (Fixed-State or Fixed Weight strategy, modifying loop_fn in Seq2SeqModel to switch # to fixed-weight strategy, default is fixed-state strategy.) tf.reset_default_graph() sess = get_session() model = Seq2SeqModel(max_seq_len, input_size, rnn_size, batch_size, lr, train_keep_prob) sess = get_session() sess.run(tf.global_variables_initializer()) start_time = timeit.default_timer() knn_score = [] train_loss_li = [] max_score = 0.0 # Training for i in range(1, iterations + 1): encoder_inputs, decoder_inputs, seq_len_enc = mini_batch(dsamp_train, seq_len=50, input_size=75, batch_size=256) _, gradient_norm, train_loss = model.step(sess, encoder_inputs,
def run_train(): model = Seq2SeqModel(params) model.train(batch_iter, data_producer.tokenizer)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dataset = [] #dataset.extend(loadCornellDataset('data/cornell movie-dialogs corpus')) #dataset.extend(loadConvAI2Dataset('data/ConvAI2')) dataset.extend(loadNUCCDataset('data/nucc')) dataloader = TextDataloader(dataset, max_length=32, min_count=3, batch_size=args.batch_size, shuffle=True) voc = dataloader.getVoc() model = Seq2SeqModel(device, SOS_token, voc.num_words).to(device) if args.load: model.load_state_dict(torch.load(args.load)) if not args.eval: model.train() for epoch in range(args.iteration): for i, data in enumerate(dataloader): inputs, lengths, targets, mask, max_target_len = data inputs = inputs.to(device) lengths = lengths.to(device) targets = targets.to(device) mask = mask.to(device)
import argparse import tensorflow as tf from keras import backend as K from model.seq2seq import Seq2SeqModel from config.config import ConfigTrain if __name__ == '__main__': # Construct the argument parser ap = argparse.ArgumentParser() ap.add_argument( "-config", "--config", help="main configuration path, containing main parameters " + "except the training dataset. See ./config/config_delex.yaml " + "for an example") ap.add_argument("-train", "--train", help="training data path") args = vars(ap.parse_args()) config_path = args['config'] train_path = args['train'] config = ConfigTrain(main_config_path=config_path, train_path=train_path) optimizer = tf.keras.optimizers.Adam() loss_object = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True, reduction='none') seq2seq = Seq2SeqModel(config=config, optimizer=optimizer, loss_object=loss_object) seq2seq.train() K.clear_session()
batch_size = config.CONFIG.batch_size) pretrain_feed = DataFeed('pretrain_feed', dataset.pretrainset, batchop = _batchop, batch_size = config.CONFIG.batch_size) loss_ = partial(loss, loss_function=nn.NLLLoss()) test_feed = DataFeed('test_feed', dataset.testset, batchop=_batchop, batch_size=config.CONFIG.batch_size) model = Model(config, 'seq2seq_name_gen', input_vocab_size = len(dataset.input_vocab), output_vocab_size= len(dataset.input_vocab), gender_vocab_size = len(dataset.gender_vocab), loss_function = loss_, accuracy_function = accuracy, dataset = dataset, pretrain_feed = pretrain_feed, train_feed = train_feed, test_feed = test_feed, ) model.restore_checkpoint() if config.CONFIG.cuda: model = model.cuda() if config.CONFIG.multi_gpu and torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model)
class Seq2SeqChatbot(object): def __init__(self): print(" # Welcome to Seq2Seq Chatbot.") print(" # Tensorflow detected: v{}".format(tf.__version__)) print() self.config = Config self.dataloader = DataLoader( self.config.num_utterance, self.config.max_length, self.config.split_ratio, self.config.batch_size) self.word_to_id = self.dataloader.word_to_id self.id_to_word = self.dataloader.id_to_word self.config.vocab_size = self.dataloader.vocab_size self.config.SOS_ID = self.dataloader.SOS_ID self.config.EOS_ID = self.dataloader.EOS_ID self.model = Model(self.config) print() print(" # Parameter Size: {}".format(self.model.get_parameter_size())) print() self.sess = tf.Session() self.config.checkpoint_dir = os.path.join("save", self.model.__class__.__name__) print(" # Save directory: {}".format(self.config.checkpoint_dir)) def main(self): # self.encoder_states(self.sess) # self.train_model(self.sess) if FLAGS.mode == 'train': ckpt = tf.train.get_checkpoint_state(self.config.checkpoint_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path) and (not FLAGS.retrain): print(" # Restoring model parameters from %s." % ckpt.model_checkpoint_path) self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print(" # Creating model with fresh parameters.") self.sess.run(self.model.init_op) self.train_model(self.sess) def encoder_states(self, sess): f = 0 count = 0 for (enc_inp, dec_inp, dec_tar) in tqdm(self.dataloader.data_generator(flag='test')): outputs = self.model.encoder_states_session(sess, enc_inp) encoder_states = outputs['encoder_states'] encoder_outputs = outputs['encoder_outputs'] if f <= 2: print('number of layer: {}'.format(len(encoder_states))) for state in encoder_states: print('shape of encoder_states: {}'.format(state.shape)) print('shape of encoder_outputs: {}'.format(encoder_outputs.shape)) f += 1 print(count) count += 1 def save_session(self, sess): print(" # Saving checkpoints.") save_dir = os.path.join(self.config.checkpoint_dir) model_name = self.model.__class__.__name__ + '.ckpt' checkpoint_path = os.path.join(save_dir, model_name) self.model.saver.save(sess, checkpoint_path) print(' # Model saved.') def train_model(self, sess): best_result_loss = 1000.0 for epoch in range(self.config.num_epoch): print() print('----epoch: {}/{} | lr: {}'.format(epoch, self.config.num_epoch, sess.run(self.model.lr))) tic = datetime.datetime.now() train_iterator = self.dataloader.data_generator(flag='train') test_iterator = self.dataloader.data_generator(flag='test') train_batch_num = self.dataloader.train_batch_num # test_batch_num = self.dataloader.test_batch_num total_loss = 0.0 nll_loss = 0.0 word_error_rate = 0.0 count = 0 for (enc_inp, dec_inp, dec_tar) in tqdm(train_iterator, desc='training'): train_out = self.model.train_session(sess, enc_inp, dec_inp, dec_tar) count += 1 step = train_out["step"] # step 表示训练了多少个batch total_loss += train_out["total_loss"] nll_loss += train_out["nll_loss"] word_error_rate += train_out["word_error_rate"] if step % 50 == 0: cur_loss = total_loss / count cur_nll_loss = nll_loss / count cur_word_error_rate = word_error_rate / count cur_perplexity = math.exp(float(cur_nll_loss)) if cur_nll_loss < 300 else float("inf") print(" Step %4d | Batch [%3d/%3d] | Loss %.6f | PPL %.6f | WER %.6f" % (step, count, train_batch_num, cur_loss, cur_perplexity, cur_word_error_rate)) print() total_loss /= count nll_loss /= count word_error_rate /= count perplexity = math.exp(float(nll_loss)) if nll_loss < 300 else float("inf") print(" Train Epoch %4d | Loss %.6f | PPL %.6f | WER %.6f" % (epoch, total_loss, perplexity, word_error_rate)) # testing after every epoch test_loss = 0.0 test_nll_loss = 0.0 test_count = 0 test_rate = 0.0 for (enc_inp, dec_inp, dec_tar) in tqdm(test_iterator, desc="testing"): test_outputs = self.model.eval_session(sess, enc_inp, dec_inp, dec_tar) test_loss += test_outputs["total_loss"] test_nll_loss += test_outputs["nll_loss"] test_rate += test_outputs["word_error_rate"] test_count += 1 test_loss /= test_count test_rate /= test_count test_nll_loss /= test_count test_perp = math.exp(float(test_nll_loss)) if test_nll_loss < 300 else float("inf") print(" Test Epoch %d | Loss %.6f | PPL %.6f | WER %.6f" % (epoch, test_loss, test_perp, test_rate)) print() if test_loss < best_result_loss: self.save_session(sess) if np.abs(best_result_loss - test_loss) < 0.03: cur_lr = sess.run(self.model.lr) sess.run(self.model.update_lr_op, feed_dict={self.model.new_lr: cur_lr * 0.5}) best_result_loss = test_loss toc = datetime.datetime.now() print(" # Epoch finished in {}".format(toc - tic))