Exemple #1
0
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                        iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total/plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
        
    showPlot(plot_losses)
    
    plt.savefig('base-lstm-loss')
    torch.save(encoder.state_dict(), 'encoder.pth')
    torch.save(decoder.state_dict(), 'decoder.pth')

if __name__ == "__main__":
    hidden_size = 256
    n_iters = 910000
    teacher_forcing_ratio = 0.5

    input_lang, output_lang, pairs = loader.prepareData('eng', 'fra', True)

    encoder = seq2seq.Encoder(input_lang.n_words, hidden_size).to(device)
    decoder = seq2seq.Decoder(hidden_size, output_lang.n_words).to(device)

    trainiters(encoder, decoder, n_iters)
Exemple #2
0
    print('BLEU: {:.4}'.format(sum(scores) / n))


if __name__ == "__main__":
    '''
    Evaluation is mostly the same as training,
    but there are no targets so we simply feed the decoder’s predictions back to itself for each step.
    Every time it predicts a word we add it to the output string,
    and if it predicts the EOS token we stop there.
    '''
    hidden_size = 300

    input_lang, output_lang, pairs = loader.prepareData('eng', 'fra', True)

    input_emb_matrix, output_emb_matrix = np.load(
        'input_emb_matrix.npy'), np.load('output_emb_matrix.npy')
    print('Embedding-matrix shape: {}, {}'.format(input_emb_matrix.shape,
                                                  output_emb_matrix.shape))

    encoder = seq2seq.Encoder(input_lang.n_words, hidden_size,
                              input_emb_matrix).to(device)
    decoder = seq2seq.Decoder(hidden_size, output_lang.n_words,
                              output_emb_matrix).to(device)

    encoder.load_state_dict(torch.load('encoder.pth'))
    encoder.eval()
    decoder.load_state_dict(torch.load('decoder.pth'))
    decoder.eval()

    evaluateRandomly(encoder, decoder, pairs, int(len(pairs) * 0.3))
Exemple #3
0
		with open(corpus_path, 'rb') as f:
			input_lang, output_lang, pairs = pickle.load(f)
	else:
		input_lang, output_lang, pairs = corpus.read_file(
			'ENG', 'FRA', 'data/eng-fra.txt', True)
		pairs = corpus.filter_pairs(pairs)
		for pair in pairs:
			input_lang.add_sentence(pair[0])
			output_lang.add_sentence(pair[1])
		with open(corpus_path, 'wb') as f:
			pickle.dump((input_lang, output_lang, pairs), f)
	print(f'{len(pairs)} pairs, {input_lang.n_words} source, {output_lang.n_words} target')

	# Load model
	encoder = seq2seq.Encoder(input_lang.n_words, hidden_size, embed_size, n_layers)
	decoder = seq2seq.Decoder(hidden_size, embed_size, output_lang.n_words, n_layers)
	model = seq2seq.Seq2seq(encoder, decoder).to(device)
	if load_model:
		model.load_state_dict(torch.load(model_path))
		# train.train(model, (input_lang, output_lang, pairs), batch_size, n_epochs, learning_rate,
		#						teaching_rate, clip, model_path)
	else:
		def init_weights(m):
			for name, param in m.named_parameters():
				nn.init.uniform_(param.data, -0.08, 0.08)
		model.apply(init_weights)

	# Test or train
	if mode == 'train':
		train.train(model, (input_lang, output_lang, pairs), batch_size, n_epochs, learning_rate,
								teaching_rate, clip, model_path)
Exemple #4
0
    train = ds_train.take(BUFFER_SIZE)  # 1.5M samples
    print("Dataset sample taken")
    train_dataset = train.map(s2s.tf_encode)

    # train_dataset = train_dataset.shuffle(BUFFER_SIZE) – optional
    train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=True)
    print("Dataset batching done")

    steps_per_epoch = BUFFER_SIZE // BATCH_SIZE
    embedding_dim = 128
    units = 256  # from pointer generator paper
    EPOCHS = 6

    encoder = s2s.Encoder(vocab_size, embedding_dim, units, BATCH_SIZE)
    decoder = s2s.Decoder(vocab_size, embedding_dim, units, BATCH_SIZE)

    # Learning rate scheduler
    lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
        0.001,
        decay_steps=steps_per_epoch * (EPOCHS / 2),
        decay_rate=2,
        staircase=False)

    optimizer = tf.keras.optimizers.Adam(lr_schedule)

    if args.checkpoint is None:
        dt = datetime.datetime.today().strftime("%Y-%b-%d-%H-%M-%S")
        checkpoint_dir = './training_checkpoints-' + dt
    else:
        checkpoint_dir = args.checkpoint