def setup():
    global model, vocab, reverse_vocab

    sess = tf.Session()
    with open(os.path.join(PATH, 'vocab.json'), 'r') as fp:
        vocab = json.load(fp)
    reverse_vocab = dict()
    for key, value in vocab.items():
        reverse_vocab[value] = key
    vocab_size = len(vocab)
    model = seq2seq(sess, encoder_vocab_size=vocab_size, decoder_vocab_size=vocab_size, max_step=50)
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint(PATH + '/models'))
Beispiel #2
0
 def train_seq2seq(self):
     print("Input sequence read, starting training")
     s2s = seq2seq(self.vocab_size + 3, self.maxlen + 2, \
                                   self.vocab_size + 3)
     model = s2s.seq2seq_plain()
     for e in range(10000):
         print("epoch %d \n" % e)
         for ind, (X, Y) in enumerate(self.proproces.gen_batch()):
             loss, acc = model.train_on_batch(
                 X, Y)  #, batch_size=64, nb_epoch=1)
             #print("Loss is %f, accuracy is %f " % (loss, acc), end='\r')
             # After one epoch test one sentence
             if ind % 100 == 0:
                 testX = X[0, :].reshape(1, self.maxlen + 2)
                 testY = Y[0]
                 pred = model.predict(testX, batch_size=1)
                 self.decode(testX, pred)
import tensorflow as tf
import model as m
import process_data
import matplotlib.pyplot as plt

num_samples = 7260  # Number of samples to train on.

encoder_input_data, decoder_input_data, decoder_target_data = process_data.process(
)
model = m.seq2seq()

callbacks = [
    # If 'val_loss' does not improve over 2 epochs, the training stops.
    tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
    # Record logs for displaying on tensor board
    tf.keras.callbacks.TensorBoard(log_dir='./tensor_board')
]

history = model.fit([encoder_input_data, decoder_input_data],
                    decoder_target_data,
                    callbacks=callbacks,
                    batch_size=m.batch_size,
                    epochs=m.epochs,
                    validation_split=0.2)

# Save model
model.save_weights('./pretrained_weights/t1_savedModel', save_format='tf')

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
Beispiel #4
0
from model import seq2seq
import tensorflow as tf
import re, json
from data_process import *

if __name__ == "__main__":
    PATH = "models"
    sess = tf.Session()
    with open('vocab.json', 'r') as fp:
        vocab = json.load(fp)
    reverse_vocab = dict()
    for key, value in vocab.items():
        reverse_vocab[value] = key
    vocab_size = len(vocab)
    model = seq2seq(sess,
                    encoder_vocab_size=vocab_size,
                    decoder_vocab_size=vocab_size,
                    max_step=50)
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint(PATH))

    while True:
        test = input("User >> ")
        if test == "exit":
            break
        speak = sentence_to_char_index([test], vocab, False)
        result = model.inference([speak])
        for sentence in result:
            response = ''
            for index in sentence:
                if index == 0:
                    break
Beispiel #5
0
        val_tokens, maxlen, error_rate=error_rate, shuffle=False)
    print(val_encoder[:10])
    print(val_decoder[:10])
    print(val_target[:10])
    print('Number of non-unique validation tokens =', len(val_tokens))
    print('Max sequence length in the validation set:', val_maxlen)

    # Define training and evaluation configuration.
    input_ctable  = CharacterTable(input_chars)
    target_ctable = CharacterTable(target_chars)

    train_steps = len(vocab) // train_batch_size
    val_steps = len(val_tokens) // val_batch_size

    # Compile the model.
    model, encoder_model, decoder_model = seq2seq(
        hidden_size, nb_input_chars, nb_target_chars)
    print(model.summary())

    # Train and evaluate.
    for epoch in range(nb_epochs):
        print('Main Epoch {:d}/{:d}'.format(epoch + 1, nb_epochs))
    
        train_encoder, train_decoder, train_target = transform(
            vocab, maxlen, error_rate=error_rate, shuffle=True)
        
        train_encoder_batch = batch(train_encoder, maxlen, input_ctable,
                                    train_batch_size, reverse)
        train_decoder_batch = batch(train_decoder, maxlen, target_ctable,
                                    train_batch_size)
        train_target_batch  = batch(train_target, maxlen, target_ctable,
                                    train_batch_size)    
Beispiel #6
0
def main():
    parse = argparse.ArgumentParser()

    parse.add_argument(
        "--data_dir",
        default='./nmt/en-cn/',
        type=str,
        required=False,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task.",
    )
    parse.add_argument("--batch_size", default=16, type=int)
    parse.add_argument("--do_train",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--do_test",
                       default=True,
                       action="store_true",
                       help="Whether to run test.")
    parse.add_argument("--do_translate",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--learnning_rate", default=5e-4, type=float)
    parse.add_argument("--dropout", default=0.2, type=float)
    parse.add_argument("--num_epoch", default=10, type=int)
    parse.add_argument("--max_vocab_size", default=50000, type=int)
    parse.add_argument("--embed_size", default=300, type=int)
    parse.add_argument("--enc_hidden_size", default=512, type=int)
    parse.add_argument("--dec_hidden_size", default=512, type=int)
    parse.add_argument("--warmup_steps",
                       default=0,
                       type=int,
                       help="Linear warmup over warmup_steps.")
    parse.add_argument("--GRAD_CLIP", default=1, type=float)
    parse.add_argument("--UNK_IDX", default=1, type=int)
    parse.add_argument("--PAD_IDX", default=0, type=int)
    parse.add_argument("--beam_size", default=5, type=int)
    parse.add_argument("--max_beam_search_length", default=100, type=int)

    args = parse.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.device = device
    setseed()

    processor = DataProcessor(args)

    encoder = Encoder(processor.en_tokenizer.vocab_size, args.embed_size,
                      args.enc_hidden_size, args.dec_hidden_size, args.dropout)
    decoder = Decoder(processor.cn_tokenizer.vocab_size, args.embed_size,
                      args.enc_hidden_size, args.dec_hidden_size, args.dropout)
    model = seq2seq(encoder, decoder)
    if os.path.exists("translate-best.th"):
        model.load_state_dict(torch.load("translate-best.th"))
    model.to(device)
    loss_fn = LanguageModelCriterion().to(device)

    train_data = processor.get_train_examples(args)
    eval_data = processor.get_dev_examples(args)

    if args.do_train:
        train(args, model, train_data, loss_fn, eval_data)

    if args.do_test:
        test(args, model, processor)

    if args.do_translate:
        model.load_state_dict(torch.load("translate-best.th"))
        model.to(device)
        while True:
            title = input("请输入要翻译的英文句子:\n")
            if len(title.strip()) == 0:
                continue
            title = ['BOS'] + nltk.word_tokenize(title.lower()) + ['EOS']
            title_num = [
                processor.en_tokenizer.word2idx.get(word, 1) for word in title
            ]
            mb_x = torch.from_numpy(np.array(title_num).reshape(
                1, -1)).long().to(device)
            mb_x_len = torch.from_numpy(np.array([len(title_num)
                                                  ])).long().to(device)

            bos = torch.Tensor([[processor.cn_tokenizer.word2idx['BOS']]
                                ]).long().to(device)

            completed_hypotheses = model.beam_search(
                mb_x,
                mb_x_len,
                bos,
                processor.cn_tokenizer.word2idx['EOS'],
                topk=args.beam_size,
                max_length=args.max_beam_search_length)

            for hypothes in completed_hypotheses:
                result = "".join([
                    processor.cn_tokenizer.id2word[id] for id in hypothes.value
                ])
                score = hypothes.score
                print("翻译后的中文结果为:{},score:{}".format(result, score))
Beispiel #7
0
def main():
    char2idx = load_vocab()
    train_data, test_data = load_dataset(char2idx)

    model = seq2seq(len(char2idx), EMB_SIZE, HIDDEN_SIZE, 0.1, RNN_LAYERS,
                    TEACHER_FORCING_RATIO)
    #model = torch.load('./model20.pkl')
    if USE_CUDA:
        model = model.cuda()

    loss_function = nn.CrossEntropyLoss(size_average=True, ignore_index=0)
    optimizer = optim.Adam(params=model.parameters(), lr=LR)

    losses = []
    for epoch in range(EPOCH):
        for _, batch in enumerate(getBatch(BATCH_SIZE, train_data)):
            batch_x, batch_y = pad_to_batch(batch, char2idx[PADDING])
            batch_x, batch_y = Variable(torch.LongTensor(batch_x)), Variable(
                torch.LongTensor(batch_y))
            start_decode = Variable(
                torch.LongTensor([[char2idx[SOS]]] * batch_x.size(0)))

            if USE_CUDA:
                batch_x, batch_y, start_decode = batch_x.cuda(), batch_y.cuda(
                ), start_decode.cuda()

            preds = model(batch_x, start_decode, batch_y.size(1), batch_y)

            loss = loss_function(preds, batch_y.view(-1))
            losses.append(loss.data.item())
            model.zero_grad()
            loss.backward()
            optimizer.step()

            if len(losses) == LOSSES_NUM:
                print('loss:', np.mean(losses))
                losses = []

        # Eval
        bleu_scores = []
        for _, batch in enumerate(getBatch(BATCH_SIZE, test_data)):
            test_x, test_y = pad_to_batch(batch, char2idx[PADDING])
            test_x, test_y = Variable(torch.LongTensor(test_x)), Variable(
                torch.LongTensor(test_y))
            start_decode = Variable(
                torch.LongTensor([[char2idx[SOS]]] * test_x.size(0)))
            if USE_CUDA:
                test_x, test_y, start_decode = test_x.cuda(), test_y.cuda(
                ), start_decode.cuda()

            #output, hidden = encoder(test_x)
            #preds = decoder(start_decode, hidden, test_y.size(1), output)
            preds = model(test_x, start_decode, test_y.size(1), None, False)
            preds = torch.max(preds, 1)[1].view(test_y.size(0), test_y.size(1))
            bleu_scores.append(cal_bleu(preds, test_y, char2idx[EOS]))

        print('Epoch.', epoch, ':mean_bleu_score:', np.mean(bleu_scores))
        print(bleu_scores)

        torch.save(model, './model/model' + str(epoch + 1) + '.pkl')

    torch.save(model, './model/model.pkl')
if __name__ == "__main__":
    PATH = "models"

    # load vocab, reverse_vocab, vocab_size
    with open('vocab.json', 'r') as fp:
        vocab = json.load(fp)
    reverse_vocab = dict()
    for key, value in vocab.items():
        reverse_vocab[value] = key
    vocab_size = len(vocab)

    config = tf.ConfigProto()  # GPU/CPU usage
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)  # Creating a session
    model = seq2seq(sess,
                    encoder_vocab_size=vocab_size,
                    decoder_vocab_size=vocab_size,
                    max_step=50)  # Starting the seq-to-seq learning
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint(PATH))

    while True:
        test = input("User >> ")
        if test == "exit":
            break
        speak = sentence_to_char_index([test], vocab, False)
        result = model.inference([speak])
        response = ''
        for index in result[0]:
            if index == 0:
                break
            response += reverse_vocab[index]
Beispiel #9
0
    # read and build dataset
    data = read_txt('./data/dialog.txt')
    vocab, reverse_vocab, vocab_size = build_character(data)

    # save vocab
    with open('vocab.json', 'w') as fp:
        json.dump(vocab, fp)

    # open session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # make model instance
    model = seq2seq(sess,
                    encoder_vocab_size=vocab_size,
                    decoder_vocab_size=vocab_size)

    # make train batches
    input, target = make_dataset(data)
    batches = batch_iter(list(zip(input, target)),
                         batch_size=64,
                         num_epochs=1001)

    # model saver
    saver = tf.train.Saver(max_to_keep=3, keep_checkpoint_every_n_hours=0.5)

    # train model
    print('모델 훈련을 시작합니다.')
    avgLoss = []
    for step, batch in enumerate(batches):
Beispiel #10
0
    #train_x = train_x / 255.
    train_x = normalize(train_x)
    tEnd = time.time()
    print('cost %s' % tfmt(tEnd - tStart))
    '''
    if args.valid_ratio != 0:
        valid_size = int(len(train_x) * args.valid_ratio)
        print('Split %d/%d validation data...' % (valid_size, len(train_x)))
        train_x = shuffle(train_x)
        valid_x = train_x[-valid_size:]
        train_x = train_x[:-valid_size]
    '''

    print('Select %s Model' % args.model)
    if args.model == 'seq2seq':
        model = seq2seq(train_x.shape[1], args.latent, verbose=1)

    adam = Adam(lr=1e-3)
    csvlogger = CSVLogger(logger)
    earlystopping = EarlyStopping(monitor='loss',
                                  patience=20,
                                  verbose=1,
                                  mode='min')
    checkpoint = ModelCheckpoint(params,
                                 monitor='loss',
                                 save_best_only=True,
                                 verbose=0,
                                 mode='min')
    model.compile(loss='mse', optimizer=adam)
    print('Start Training...')
    model.fit(train_x,
Beispiel #11
0
def main(args):
    source_dataset, target_dataset, vocab, vocab_inv = read_data_and_vocab(
        args.source_train,
        args.target_train,
        args.source_dev,
        args.target_dev,
        args.source_test,
        args.target_test,
        reverse_source=True)

    save_vocab(args.model_dir, vocab, vocab_inv)

    source_dataset_train, source_dataset_dev, source_dataset_test = source_dataset
    target_dataset_train, target_dataset_dev, target_dataset_test = target_dataset

    vocab_source, vocab_target = vocab
    vocab_inv_source, vocab_inv_target = vocab_inv

    # split into buckets
    source_buckets_train, target_buckets_train = make_buckets(
        source_dataset_train, target_dataset_train)
    if args.buckets_slice is not None:
        source_buckets_train = source_buckets_train[:args.buckets_slice + 1]
        target_buckets_train = target_buckets_train[:args.buckets_slice + 1]

    # development dataset
    source_buckets_dev = None
    if len(source_dataset_dev) > 0:
        source_buckets_dev, target_buckets_dev = make_buckets(
            source_dataset_dev, target_dataset_dev)
        if args.buckets_slice is not None:
            source_buckets_dev = source_buckets_dev[:args.buckets_slice + 1]
            target_buckets_dev = target_buckets_dev[:args.buckets_slice + 1]

    # test dataset
    source_buckets_test = None
    if len(source_dataset_test) > 0:
        source_buckets_test, target_buckets_test = make_buckets(
            source_dataset_test, target_dataset_test)
        if args.buckets_slice is not None:
            source_buckets_test = source_buckets_test[:args.buckets_slice + 1]
            target_buckets_test = target_buckets_test[:args.buckets_slice + 1]

    # show log
    dump_dataset(
        source_dataset, vocab,
        (source_buckets_train, source_buckets_dev, source_buckets_test))

    # to maintain equilibrium
    required_interations = []
    for data in source_buckets_train:
        itr = len(data) // args.batchsize + 1
        required_interations.append(itr)
    total_iterations = sum(required_interations)
    buckets_distribution = np.asarray(required_interations,
                                      dtype=float) / total_iterations

    # init
    model = load_model(args.model_dir)
    if model is None:
        model = seq2seq(len(vocab_source),
                        len(vocab_target),
                        args.ndim_embedding,
                        args.ndim_h,
                        args.num_layers,
                        pooling=args.pooling,
                        dropout=args.dropout,
                        zoneout=args.zoneout,
                        weightnorm=args.weightnorm,
                        wgain=args.wgain,
                        densely_connected=args.densely_connected,
                        attention=args.attention)

    if args.gpu_device >= 0:
        cuda.get_device(args.gpu_device).use()
        model.to_gpu()

    # setup an optimizer
    optimizer = get_optimizer(args.optimizer, args.learning_rate,
                              args.momentum)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip))
    optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
    final_learning_rate = 1e-5
    total_time = 0

    indices_train = []
    for bucket_idx, bucket in enumerate(source_buckets_train):
        indices = np.arange(len(bucket))
        np.random.shuffle(indices)
        indices_train.append(indices)

    def mean(l):
        return sum(l) / len(l)

    # training
    for epoch in range(1, args.epoch + 1):
        print("Epoch", epoch)
        start_time = time.time()

        with chainer.using_config("train", True):

            for itr in range(total_iterations):
                bucket_idx = int(
                    np.random.choice(np.arange(len(source_buckets_train)),
                                     size=1,
                                     p=buckets_distribution))
                source_bucket = source_buckets_train[bucket_idx]
                target_bucket = target_buckets_train[bucket_idx]

                # sample minibatch
                source_batch = source_bucket[:args.batchsize]
                target_batch = target_bucket[:args.batchsize]
                skip_mask = source_batch != ID_PAD
                target_batch_input, target_batch_output = make_source_target_pair(
                    target_batch)

                # to gpu
                if args.gpu_device >= 0:
                    skip_mask = cuda.to_gpu(skip_mask)
                    source_batch = cuda.to_gpu(source_batch)
                    target_batch_input = cuda.to_gpu(target_batch_input)
                    target_batch_output = cuda.to_gpu(target_batch_output)

                # compute loss
                model.reset_state()
                if args.attention:
                    last_hidden_states, last_layer_outputs = model.encode(
                        source_batch, skip_mask)
                    y_batch = model.decode(target_batch_input,
                                           last_hidden_states,
                                           last_layer_outputs, skip_mask)
                else:
                    last_hidden_states = model.encode(source_batch, skip_mask)
                    y_batch = model.decode(target_batch_input,
                                           last_hidden_states)
                loss = softmax_cross_entropy(y_batch,
                                             target_batch_output,
                                             ignore_label=ID_PAD)

                # update parameters
                optimizer.update(lossfun=lambda: loss)

                # show log
                printr("iteration {}/{}".format(itr + 1, total_iterations))

                source_buckets_train[bucket_idx] = np.roll(source_bucket,
                                                           -args.batchsize,
                                                           axis=0)  # shift
                target_buckets_train[bucket_idx] = np.roll(target_bucket,
                                                           -args.batchsize,
                                                           axis=0)  # shift

            # shuffle
            for bucket_idx in range(len(source_buckets_train)):
                indices = indices_train[bucket_idx]
                np.random.shuffle(indices)
                source_buckets_train[bucket_idx] = source_buckets_train[
                    bucket_idx][indices]
                target_buckets_train[bucket_idx] = target_buckets_train[
                    bucket_idx][indices]

        # serialize
        save_model(args.model_dir, model)

        # clear console
        printr("")

        # show log
        with chainer.using_config("train", False):
            if epoch % args.interval == 0:
                printb("translate (train)")
                dump_random_source_target_translation(model,
                                                      source_buckets_train,
                                                      target_buckets_train,
                                                      vocab_inv_source,
                                                      vocab_inv_target,
                                                      num_translate=5,
                                                      beam_width=1)

                if source_buckets_dev is not None:
                    printb("translate (dev)")
                    dump_random_source_target_translation(model,
                                                          source_buckets_dev,
                                                          target_buckets_dev,
                                                          vocab_inv_source,
                                                          vocab_inv_target,
                                                          num_translate=5,
                                                          beam_width=1)

                if source_buckets_dev is not None:
                    printb("WER (dev)")
                    wer_dev = compute_error_rate_buckets(model,
                                                         source_buckets_dev,
                                                         target_buckets_dev,
                                                         len(vocab_inv_target),
                                                         beam_width=1)
                    print(mean(wer_dev), wer_dev)

        elapsed_time = (time.time() - start_time) / 60.
        total_time += elapsed_time
        print("done in {} min, lr = {:.4f}, total {} min".format(
            int(elapsed_time), get_current_learning_rate(optimizer),
            int(total_time)))

        # decay learning rate
        decay_learning_rate(optimizer, args.lr_decay_factor,
                            final_learning_rate)
Beispiel #12
0
def main(args):
	# load textfile
	source_dataset, target_dataset, vocab, vocab_inv = read_data(args.source_filename, args.target_filename, train_split_ratio=args.train_split, dev_split_ratio=args.dev_split, seed=args.seed)
	save_vocab(args.model_dir, vocab, vocab_inv)

	source_dataset_train, source_dataset_dev, source_dataset_test = source_dataset
	target_dataset_train, target_dataset_dev, target_dataset_test = target_dataset
	print_bold("data	#")
	print("train	{}".format(len(source_dataset_train)))
	print("dev	{}".format(len(source_dataset_dev)))
	print("test	{}".format(len(source_dataset_test)))

	vocab_source, vocab_target = vocab
	vocab_inv_source, vocab_inv_target = vocab_inv
	print("vocab	{}	(source)".format(len(vocab_source)))
	print("vocab	{}	(target)".format(len(vocab_target)))

	# split into buckets
	source_buckets_train, target_buckets_train = make_buckets(source_dataset_train, target_dataset_train)
	if args.buckets_limit is not None:
		source_buckets_train = source_buckets_train[:args.buckets_limit+1]
		target_buckets_train = target_buckets_train[:args.buckets_limit+1]

	print_bold("buckets 	#data	(train)")
	for size, data in zip(bucket_sizes, source_buckets_train):
		print("{} 	{}".format(size, len(data)))

	print_bold("buckets 	#data	(dev)")
	source_buckets_dev, target_buckets_dev = make_buckets(source_dataset_dev, target_dataset_dev)
	if args.buckets_limit is not None:
		source_buckets_dev = source_buckets_dev[:args.buckets_limit+1]
		target_buckets_dev = target_buckets_dev[:args.buckets_limit+1]
	for size, data in zip(bucket_sizes, source_buckets_dev):
		print("{} 	{}".format(size, len(data)))

	print_bold("buckets		#data	(test)")
	source_buckets_test, target_buckets_test = make_buckets(source_dataset_test, target_dataset_test)
	if args.buckets_limit is not None:
		source_buckets_test = source_buckets_test[:args.buckets_limit+1]
		target_buckets_test = target_buckets_test[:args.buckets_limit+1]
	for size, data in zip(bucket_sizes, source_buckets_test):
		print("{} 	{}".format(size, len(data)))

	# to maintain equilibrium
	min_num_data = 0
	for data in source_buckets_train:
		if min_num_data == 0 or len(data) < min_num_data:
			min_num_data = len(data)
	repeats = []
	for data in source_buckets_train:
		repeats.append(len(data) // min_num_data + 1)

	num_updates_per_iteration = 0
	for repeat, data in zip(repeats, source_buckets_train):
		num_updates_per_iteration += repeat * args.batchsize
	num_iteration = len(source_dataset_train) // num_updates_per_iteration + 1

	# init
	model = load_model(args.model_dir)
	if model is None:
		model = seq2seq(len(vocab_source), len(vocab_target), args.ndim_embedding, args.num_layers, ndim_h=args.ndim_h, pooling=args.pooling, dropout=args.dropout, zoneout=args.zoneout, wgain=args.wgain, densely_connected=args.densely_connected, attention=args.attention)
	if args.gpu_device >= 0:
		cuda.get_device(args.gpu_device).use()
		model.to_gpu()

	# setup an optimizer
	if args.eve:
		optimizer = Eve(alpha=args.learning_rate, beta1=0.9)
	else:
		optimizer = optimizers.Adam(alpha=args.learning_rate, beta1=0.9)
	optimizer.setup(model)
	optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip))
	optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
	min_learning_rate = 1e-7
	prev_wer = None
	total_time = 0

	def mean(l):
		return sum(l) / len(l)

	# training
	for epoch in xrange(1, args.epoch + 1):
		print("Epoch", epoch)
		start_time = time.time()
		for itr in xrange(1, num_iteration + 1):
			for repeat, source_bucket, target_bucket in zip(repeats, source_buckets_train, target_buckets_train):
				for r in xrange(repeat):
					# sample minibatch
					source_batch, target_batch = sample_batch_from_bucket(source_bucket, target_bucket, args.batchsize)
					skip_mask = source_batch != ID_PAD
					target_batch_input, target_batch_output = make_source_target_pair(target_batch)

					# to gpu
					if model.xp is cuda.cupy:
						skip_mask = cuda.to_gpu(skip_mask)
						source_batch = cuda.to_gpu(source_batch)
						target_batch_input = cuda.to_gpu(target_batch_input)
						target_batch_output = cuda.to_gpu(target_batch_output)

					# compute loss
					model.reset_state()
					if args.attention:
						last_hidden_states, last_layer_outputs = model.encode(source_batch, skip_mask)
						Y = model.decode(target_batch_input, last_hidden_states, last_layer_outputs, skip_mask)
					else:
						last_hidden_states = model.encode(source_batch, skip_mask)
						Y = model.decode(target_batch_input, last_hidden_states)
					loss = softmax_cross_entropy(Y, target_batch_output, ignore_label=ID_PAD)
					optimizer.update(lossfun=lambda: loss)

				sys.stdout.write("\r{} / {}".format(itr, num_iteration))
				sys.stdout.flush()

			if itr % args.interval == 0 or itr == num_iteration:
				save_model(args.model_dir, model)

		# show log
		sys.stdout.write("\r" + stdout.CLEAR)
		sys.stdout.flush()
		print_bold("translate (train)")
		show_random_source_target_translation(model, source_buckets_train, target_buckets_train, vocab_inv_source, vocab_inv_target, num_translate=5, argmax=True)
		print_bold("translate (dev)")
		show_random_source_target_translation(model, source_buckets_dev, target_buckets_dev, vocab_inv_source, vocab_inv_target, num_translate=5, argmax=True)
		print_bold("WER (sampled train)")
		wer_train = compute_random_mean_wer(model, source_buckets_train, target_buckets_train, len(vocab_inv_target), sample_size=args.batchsize, argmax=True)
		print(mean(wer_train), wer_train)
		print_bold("WER (dev)")
		wer_dev = compute_mean_wer(model, source_buckets_dev, target_buckets_dev, len(vocab_inv_target), batchsize=args.batchsize, argmax=True)
		mean_wer_dev = mean(wer_dev)
		print(mean_wer_dev, wer_dev)
		elapsed_time = (time.time() - start_time) / 60.
		total_time += elapsed_time
		print("done in {} min, lr = {}, total {} min".format(int(elapsed_time), optimizer.alpha, int(total_time)))

		# decay learning rate
		if prev_wer is not None and mean_wer_dev >= prev_wer and optimizer.alpha > min_learning_rate:
			optimizer.alpha *= 0.5
		prev_wer = mean_wer_dev
Beispiel #13
0
from config import (VOCAB_SIZE, MAXLEN, EPOCHS, SAVE_AT, LEARNING_RATE,
                    BATCH_SIZE, VERBOSE, LOSS)

tokenizer = Tokenizer()

encoder_input_data, decoder_input_data, decoder_output_data = create_training_data(
)  # parsing the dataset and creating conversation pairs

encoder_input_data, decoder_input_data, decoder_output_data = tokenizer.tokenize_and_pad_training_data(
    encoder_input_data, decoder_input_data,
    decoder_output_data)  # tokenizing and padding those pairs

tokenizer.save_tokenizer(
    f'tokenizer-vocab_size-{VOCAB_SIZE}')  # saving tokenizer for layer use

Seq2SeqModel = seq2seq()  # creating the seq2seq model

optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE,
                                     clipnorm=1.0,
                                     clipvalue=0.5)
Seq2SeqModel.compile(optimizer=optimizer, loss=LOSS, metrics=['accuracy'])
Seq2SeqModel.summary()


def train(model, encoder_input_data, decoder_input_data, decoder_output_data,
          epochs, batch_size, verbose, save_at):
    with tf.device('/device:GPU:0' if utils.check_cuda else '/cpu:0'):
        for epoch in range(1, epochs + 1):
            print(f'Epochs {epoch}/{epochs}')
            model.fit([encoder_input_data, decoder_input_data],
                      decoder_output_data,
Beispiel #14
0
def main():
    parse = argparse.ArgumentParser()

    parse.add_argument(
        "--data_dir",
        default='./nmt/en-cn/',
        type=str,
        required=False,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task.",
    )
    parse.add_argument("--batch_size", default=16, type=int)
    parse.add_argument("--do_train",
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--do_translate",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--learnning_rate", default=5e-4, type=float)
    parse.add_argument("--dropout", default=0.2, type=float)
    parse.add_argument("--num_epoch", default=10, type=int)
    parse.add_argument("--max_vocab_size", default=50000, type=int)
    parse.add_argument("--embed_size", default=300, type=int)
    parse.add_argument("--enc_hidden_size", default=512, type=int)
    parse.add_argument("--dec_hidden_size", default=512, type=int)
    parse.add_argument("--warmup_steps",
                       default=0,
                       type=int,
                       help="Linear warmup over warmup_steps.")
    parse.add_argument("--GRAD_CLIP", default=1, type=float)
    parse.add_argument("--UNK_IDX", default=1, type=int)
    parse.add_argument("--PAD_IDX", default=0, type=int)

    args = parse.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.device = device
    setseed()

    processor = DataProcessor(args)

    encoder = Encoder(processor.en_tokenizer.vocab_size, args.embed_size,
                      args.enc_hidden_size, args.dec_hidden_size, args.dropout)
    decoder = Decoder(processor.cn_tokenizer.vocab_size, args.embed_size,
                      args.enc_hidden_size, args.dec_hidden_size, args.dropout)
    model = seq2seq(encoder, decoder)
    if os.path.exists("translate-best.th"):
        model.load_state_dict(torch.load("translate-best.th"))
    model.to(device)
    loss_fn = LanguageModelCriterion().to(device)

    train_data = processor.get_train_examples(args)
    eval_data = processor.get_dev_examples(args)

    if args.do_train:
        train(args, model, train_data, loss_fn, eval_data)

    if args.do_translate:
        model.load_state_dict(torch.load("translate-best.th"))
        model.to(device)
        while True:
            title = input("请输入要翻译的英文句子:\n")
            if len(title.strip()) == 0:
                continue
            title = ['BOS'] + nltk.word_tokenize(title.lower()) + ['EOS']
            title_num = [
                processor.en_tokenizer.word2idx.get(word, 1) for word in title
            ]
            mb_x = torch.from_numpy(np.array(title_num).reshape(
                1, -1)).long().to(device)
            mb_x_len = torch.from_numpy(np.array([len(title_num)
                                                  ])).long().to(device)

            bos = torch.Tensor([[processor.cn_tokenizer.word2idx['BOS']]
                                ]).long().to(device)

            translation, attn = model.translate(mb_x, mb_x_len, bos)
            # 这里传入bos作为首个单词的输入
            # translation=tensor([[ 8,  6, 11, 25, 22, 57, 10,  5,  6,  4]], device='cuda:0')

            translation = [
                processor.cn_tokenizer.id2word[i]
                for i in translation.data.cpu().numpy().reshape(-1)
            ]

            trans = []
            for word in translation:
                if word != "EOS":  # 把数值变成单词形式
                    trans.append(word)  #
                else:
                    break
            print("翻译后的中文结果为:{}".format("".join(trans)))
Beispiel #15
0
from model import seq2seq

import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.python.ops import variable_scope
from tensorflow.python.framework import dtypes

X, y, predict, global_step = seq2seq()

import numpy as np
import pandas as pd

aq = pd.read_csv('/home/duanchx/KDDCup2018/beijing_201802_201803_aq.csv')
aq[['PM2.5', 'PM10', 'O3']] = aq[['PM2.5', 'PM10',
                                  'O3']].fillna(aq[['PM2.5', 'PM10',
                                                    'O3']].mean())
fs = np.array(aq[aq['stationId'] == 'fangshan_aq']['PM2.5'])
x_ = np.expand_dims(fs[0:120], axis=0)
y_ = fs[120:168]

with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(session, './save/iteraction1650')
    feed = {
        X[t]: x_.reshape((-1, 120))[:, t].reshape((-1, 1))
        for t in range(120)
    }
    feed.update({y[t]: np.array([0.0]).reshape((-1, 1)) for t in range(48)})
    p = session.run(predict, feed_dict=feed)
    p = [np.expand_dims(p_, axis=1) for p_ in p]
Beispiel #16
0
X, y = get_pair(5, 2, 50)
print(X.shape, y.shape)
print('X=%s, y=%s' % (one_hot_decode(X[0]), one_hot_decode(y[0])))

# Baseline without attention
# configure problem
n_features = 50
n_timesteps_in = 5
n_timesteps_out = 2

# Create different models & compare
simple_lstm = lstm(lstm_cells=150,
                   n_timesteps_in=n_timesteps_in,
                   n_features=n_features)
seq2seq_model = seq2seq(lstm_cells=150,
                        n_timesteps_in=n_timesteps_in,
                        n_features=n_features)
attention_model = attention(lstm_cells=150,
                            n_timesteps_in=n_timesteps_in,
                            n_features=n_features)

for model in simple_lstm, seq2seq_model, attention_model:

    # train
    for epoch in range(5000):
        # generate new random sequence
        X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
        # fit model for one epoch on this sequence
        model.fit(X, y, epochs=1, verbose=0)

    # evaluate