Exemple #1
0
def run_infer():
    vocab_dir = params['vocab_dir']
    clean_data_dir = params['clean_data_dir']
    test_dataset_dir = params['test_dataset_dir']
    data_producer = data_helper.DataSet(vocab_dir, clean_data_dir)
    sos_id = data_producer.tokenizer.subtoken_to_id_dict['<s>']
    eos_id = data_producer.tokenizer.subtoken_to_id_dict['</s>']
    params['sos_id'] = sos_id
    params['eos_id'] = eos_id
    vocab_size = len(data_producer.tokenizer.subtoken_list)
    params['vocab_size'] = vocab_size
    params['maximum_iterations'] = 30
    params["batch_size"] = 1
    decode_mode = params['decode_mode']

    model = Seq2SeqModel(params)
    while 1:
        sentence = input('you say: ')
        subtoken = data_producer.tokenizer.encode(
            pre_process(sentence, keep_sep=True))
        output = model.infer(subtoken)
        if decode_mode == 'greedy':
            output = [int(i) for i in output[0]]
            res = data_producer.tokenizer.decode(output)
            print(res)
            print('\n')
        elif decode_mode == 'beam_search':
            for j in range(len(output)):
                print("output[i]", output[j])
                res = [int(i) for i in list(output[j])]
                res = data_producer.tokenizer.decode(res)
                print('answer {}:{}'.format(j, res))
            print('\n')
    def __init__(self):
        print(" # Welcome to Seq2Seq Chatbot.")
        print(" # Tensorflow detected: v{}".format(tf.__version__))
        print()
        self.config = Config

        self.dataloader = DataLoader(
            self.config.num_utterance,
            self.config.max_length,
            self.config.split_ratio,
            self.config.batch_size)
        self.word_to_id = self.dataloader.word_to_id
        self.id_to_word = self.dataloader.id_to_word
        self.config.vocab_size = self.dataloader.vocab_size
        self.config.SOS_ID = self.dataloader.SOS_ID
        self.config.EOS_ID = self.dataloader.EOS_ID

        self.model = Model(self.config)
        print()
        print(" # Parameter Size: {}".format(self.model.get_parameter_size()))
        print()

        self.sess = tf.Session()
        self.config.checkpoint_dir = os.path.join("save", self.model.__class__.__name__)
        print(" # Save directory: {}".format(self.config.checkpoint_dir))
Exemple #3
0
def init_model(opt, rating_tokens_tensor):
    logging.info(
        '======================  Model Parameters  =========================')

    if opt.model_type == 'hss':
        overall_model = HSSModel(opt)
    elif opt.model_type == 'multi_task_basic':
        overall_model = MultiTaskBasicClassifySeq2Seq(opt,
                                                      rating_tokens_tensor)
    elif opt.model_type == 'word_attn_modulate':
        overall_model = AttnModulateClassifySeq2Seq(opt, rating_tokens_tensor)
    elif opt.model_type == 'hre_max':
        overall_model = HirEncMultiTaskBasicModel(opt)
    elif opt.model_type == 'external_feed':
        overall_model = ExternalFeedClassifySeq2Seq(opt)
    elif opt.model_type == "external_soft_feed":
        overall_model = ExternalSoftFeedClassifySeq2Seq(
            opt, rating_tokens_tensor)
    elif opt.model_type == "multi_view_ex_soft_feed":
        overall_model = MultiViewExternalSoftFeedClassifySeq2Seq(
            opt, rating_tokens_tensor)
    elif opt.model_type == "multi_view_attn_modulate":
        overall_model = MultiViewAttnModulateClassifySeq2Seq(
            opt, rating_tokens_tensor)
    elif opt.model_type == "multi_view_multi_task_basic":
        overall_model = MultiViewMultiTaskBasicClassifySeq2Seq(
            opt, rating_tokens_tensor)
    elif opt.model_type == "rnn_enc_single_classifier":
        overall_model = RnnEncSingleClassifier(opt)
    elif opt.model_type == "seq2seq":
        overall_model = Seq2SeqModel(opt)
    else:
        raise ValueError("Invalid model type")
    overall_model.to(opt.device)

    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)
        # TODO: load the saved model and override the current one

    if opt.w2v:
        # NOTE: the pretrained embedding having the same dimension
        #       as args.emb_dim should already be trained
        embedding, _ = io.make_embedding(opt.idx2word, opt.w2v)
        overall_model.set_embedding(embedding)

    return overall_model
Exemple #4
0
 def __init__(self, opt):
     super(MultiTaskBasicModel, self).__init__()
     memory_bank_size = 2 * opt.encoder_size if opt.bidirectional else opt.encoder_size
     self.seq2seq_model = Seq2SeqModel(opt)
     if opt.classifier_type == "max":
         self.classifier_model = MaxPoolClassifier(memory_bank_size, opt.num_classes, opt.classifier_dropout, opt.ordinal)
     elif opt.classifier_type == "word_attn":
         self.classifier_model = WordAttnClassifier(opt.query_hidden_size, memory_bank_size, opt.num_classes, opt.attn_mode, opt.classifier_dropout, opt.ordinal)
     elif opt.classifier_type == "word_attn_no_query":
         self.classifier_model = WordAttnNoQueryClassifier(memory_bank_size, opt.num_classes, opt.classifier_dropout,
                                                     opt.ordinal)
     elif opt.classifier_type == "word_multi_hop_attn":
         self.classifier_model = WordMultiHopAttnClassifier(opt.query_hidden_size, memory_bank_size, opt.num_classes,
                                                      opt.attn_mode, opt.classifier_dropout, opt.ordinal)
     else:
         raise ValueError
     self.model_type = opt.model_type
     self.classifier_type = opt.classifier_type
Exemple #5
0
    def __init__(self, args, vocab, transition_system):
        super(Reconstructor, self).__init__()
        if args.no_copy:
            self.seq2seq = Seq2SeqModel(
                src_vocab=vocab.code,
                tgt_vocab=vocab.source,
                embed_size=args.embed_size,
                hidden_size=args.hidden_size,
                dropout=args.dropout,
                label_smoothing=args.src_token_label_smoothing,
                cuda=args.cuda)
        else:
            self.seq2seq = Seq2SeqWithCopy(src_vocab=vocab.code,
                                           tgt_vocab=vocab.source,
                                           embed_size=args.embed_size,
                                           hidden_size=args.hidden_size,
                                           dropout=args.dropout,
                                           cuda=args.cuda)

        self.vocab = vocab
        self.args = args
        self.transition_system = transition_system
Exemple #6
0
te_path = "trans_test_data.pkl"

# Preprocess the dataset(NTU here for demo) to generate data for
# Step1: seq2seq unsupervised training
# Step2: classification
dsamp_train, dsamp_test, \
fea, lab, seq_len_new, \
test_fea, test_lab, test_seq_len_new = preprocess_pipeline(base_path, tr_path, te_path, mode="cross_subject_data",
                                                     dsamp_frame=50)

# Building Seq2Seq Model applying (Fixed-State or Fixed Weight strategy, modifying loop_fn in Seq2SeqModel to switch
# to fixed-weight strategy, default is fixed-state strategy.)
tf.reset_default_graph()
sess = get_session()

model = Seq2SeqModel(max_seq_len, input_size, rnn_size, batch_size, lr,
                     train_keep_prob)
sess = get_session()
sess.run(tf.global_variables_initializer())

start_time = timeit.default_timer()
knn_score = []
train_loss_li = []
max_score = 0.0

# Training
for i in range(1, iterations + 1):
    encoder_inputs, decoder_inputs, seq_len_enc = mini_batch(dsamp_train,
                                                             seq_len=50,
                                                             input_size=75,
                                                             batch_size=256)
    _, gradient_norm, train_loss = model.step(sess, encoder_inputs,
Exemple #7
0
def run_train():

    model = Seq2SeqModel(params)
    model.train(batch_iter, data_producer.tokenizer)
Exemple #8
0
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dataset = []
#dataset.extend(loadCornellDataset('data/cornell movie-dialogs corpus'))
#dataset.extend(loadConvAI2Dataset('data/ConvAI2'))
dataset.extend(loadNUCCDataset('data/nucc'))

dataloader = TextDataloader(dataset,
                            max_length=32,
                            min_count=3,
                            batch_size=args.batch_size,
                            shuffle=True)
voc = dataloader.getVoc()

model = Seq2SeqModel(device, SOS_token, voc.num_words).to(device)

if args.load:
    model.load_state_dict(torch.load(args.load))

if not args.eval:
    model.train()

    for epoch in range(args.iteration):
        for i, data in enumerate(dataloader):
            inputs, lengths, targets, mask, max_target_len = data
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            targets = targets.to(device)
            mask = mask.to(device)
import argparse
import tensorflow as tf
from keras import backend as K
from model.seq2seq import Seq2SeqModel
from config.config import ConfigTrain

if __name__ == '__main__':
    # Construct the argument parser
    ap = argparse.ArgumentParser()
    ap.add_argument(
        "-config",
        "--config",
        help="main configuration path, containing main parameters " +
        "except the training dataset. See ./config/config_delex.yaml " +
        "for an example")
    ap.add_argument("-train", "--train", help="training data path")
    args = vars(ap.parse_args())

    config_path = args['config']
    train_path = args['train']

    config = ConfigTrain(main_config_path=config_path, train_path=train_path)
    optimizer = tf.keras.optimizers.Adam()
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction='none')

    seq2seq = Seq2SeqModel(config=config,
                           optimizer=optimizer,
                           loss_object=loss_object)
    seq2seq.train()
    K.clear_session()
Exemple #10
0
                              batch_size = config.CONFIG.batch_size)

    pretrain_feed     = DataFeed('pretrain_feed',
                                 dataset.pretrainset,
                                 batchop    = _batchop,
                                 batch_size = config.CONFIG.batch_size)
    
    
    loss_ = partial(loss, loss_function=nn.NLLLoss())
    test_feed      = DataFeed('test_feed', dataset.testset, batchop=_batchop, batch_size=config.CONFIG.batch_size)
    model =  Model(config, 'seq2seq_name_gen',
                   input_vocab_size = len(dataset.input_vocab),
                   output_vocab_size= len(dataset.input_vocab),
                   gender_vocab_size = len(dataset.gender_vocab),
                   loss_function = loss_,
                   accuracy_function = accuracy,
                   dataset = dataset,
                   pretrain_feed = pretrain_feed,
                   train_feed = train_feed,
                   test_feed = test_feed,
    )

    model.restore_checkpoint()
    
    if config.CONFIG.cuda:
        model = model.cuda()        
        if config.CONFIG.multi_gpu and torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            model = nn.DataParallel(model)

class Seq2SeqChatbot(object):
    def __init__(self):
        print(" # Welcome to Seq2Seq Chatbot.")
        print(" # Tensorflow detected: v{}".format(tf.__version__))
        print()
        self.config = Config

        self.dataloader = DataLoader(
            self.config.num_utterance,
            self.config.max_length,
            self.config.split_ratio,
            self.config.batch_size)
        self.word_to_id = self.dataloader.word_to_id
        self.id_to_word = self.dataloader.id_to_word
        self.config.vocab_size = self.dataloader.vocab_size
        self.config.SOS_ID = self.dataloader.SOS_ID
        self.config.EOS_ID = self.dataloader.EOS_ID

        self.model = Model(self.config)
        print()
        print(" # Parameter Size: {}".format(self.model.get_parameter_size()))
        print()

        self.sess = tf.Session()
        self.config.checkpoint_dir = os.path.join("save", self.model.__class__.__name__)
        print(" # Save directory: {}".format(self.config.checkpoint_dir))

    def main(self):
        # self.encoder_states(self.sess)
        # self.train_model(self.sess)
        if FLAGS.mode == 'train':
            ckpt = tf.train.get_checkpoint_state(self.config.checkpoint_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path) and (not FLAGS.retrain):
                print(" # Restoring model parameters from %s." % ckpt.model_checkpoint_path)
                self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)
            else:
                print(" # Creating model with fresh parameters.")
                self.sess.run(self.model.init_op)
            self.train_model(self.sess)

    def encoder_states(self, sess):
        f = 0
        count = 0
        for (enc_inp, dec_inp, dec_tar) in tqdm(self.dataloader.data_generator(flag='test')):
            outputs = self.model.encoder_states_session(sess, enc_inp)
            encoder_states = outputs['encoder_states']
            encoder_outputs = outputs['encoder_outputs']
            if f <= 2:
                print('number of layer: {}'.format(len(encoder_states)))
                for state in encoder_states:
                    print('shape of encoder_states: {}'.format(state.shape))
                print('shape of encoder_outputs: {}'.format(encoder_outputs.shape))
                f += 1
            print(count)
            count += 1

    def save_session(self, sess):
        print(" # Saving checkpoints.")
        save_dir = os.path.join(self.config.checkpoint_dir)
        model_name = self.model.__class__.__name__ + '.ckpt'
        checkpoint_path = os.path.join(save_dir, model_name)
        self.model.saver.save(sess, checkpoint_path)
        print(' # Model saved.')

    def train_model(self, sess):
        best_result_loss = 1000.0
        for epoch in range(self.config.num_epoch):
            print()
            print('----epoch: {}/{} | lr: {}'.format(epoch, self.config.num_epoch, sess.run(self.model.lr)))

            tic = datetime.datetime.now()
            train_iterator = self.dataloader.data_generator(flag='train')
            test_iterator = self.dataloader.data_generator(flag='test')
            train_batch_num = self.dataloader.train_batch_num
            # test_batch_num = self.dataloader.test_batch_num

            total_loss = 0.0
            nll_loss = 0.0
            word_error_rate = 0.0
            count = 0

            for (enc_inp, dec_inp, dec_tar) in tqdm(train_iterator, desc='training'):
                train_out = self.model.train_session(sess, enc_inp, dec_inp, dec_tar)

                count += 1
                step = train_out["step"] # step 表示训练了多少个batch
                total_loss += train_out["total_loss"]
                nll_loss += train_out["nll_loss"]
                word_error_rate += train_out["word_error_rate"]

                if step % 50 == 0:
                    cur_loss = total_loss / count
                    cur_nll_loss = nll_loss / count
                    cur_word_error_rate = word_error_rate / count
                    cur_perplexity = math.exp(float(cur_nll_loss)) if cur_nll_loss < 300 else float("inf")
                    print(" Step %4d | Batch [%3d/%3d] | Loss %.6f | PPL %.6f | WER %.6f" %
                          (step, count, train_batch_num, cur_loss, cur_perplexity, cur_word_error_rate))
            print()
            total_loss /= count
            nll_loss /= count
            word_error_rate /= count
            perplexity = math.exp(float(nll_loss)) if nll_loss < 300 else float("inf")
            print(" Train Epoch %4d | Loss %.6f | PPL %.6f | WER %.6f" %
                  (epoch, total_loss, perplexity, word_error_rate))

            # testing after every epoch
            test_loss = 0.0
            test_nll_loss = 0.0
            test_count = 0
            test_rate = 0.0
            for (enc_inp, dec_inp, dec_tar) in tqdm(test_iterator, desc="testing"):
                test_outputs = self.model.eval_session(sess, enc_inp, dec_inp, dec_tar)
                test_loss += test_outputs["total_loss"]
                test_nll_loss += test_outputs["nll_loss"]
                test_rate += test_outputs["word_error_rate"]
                test_count += 1
            test_loss /= test_count
            test_rate /= test_count
            test_nll_loss /= test_count
            test_perp = math.exp(float(test_nll_loss)) if test_nll_loss < 300 else float("inf")
            print(" Test Epoch %d | Loss %.6f | PPL %.6f | WER %.6f" % (epoch, test_loss, test_perp, test_rate))
            print()

            if test_loss < best_result_loss:
                self.save_session(sess)
                if np.abs(best_result_loss - test_loss) < 0.03:
                    cur_lr = sess.run(self.model.lr)
                    sess.run(self.model.update_lr_op, feed_dict={self.model.new_lr: cur_lr * 0.5})
                best_result_loss = test_loss
            toc = datetime.datetime.now()
            print(" # Epoch finished in {}".format(toc - tic))