Ejemplo n.º 1
0
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                        iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total/plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
        
    showPlot(plot_losses)
    
    plt.savefig('base-lstm-loss')
    torch.save(encoder.state_dict(), 'encoder.pth')
    torch.save(decoder.state_dict(), 'decoder.pth')

if __name__ == "__main__":
    hidden_size = 256
    n_iters = 910000
    teacher_forcing_ratio = 0.5

    input_lang, output_lang, pairs = loader.prepareData('eng', 'fra', True)

    encoder = seq2seq.Encoder(input_lang.n_words, hidden_size).to(device)
    decoder = seq2seq.Decoder(hidden_size, output_lang.n_words).to(device)

    trainiters(encoder, decoder, n_iters)
Ejemplo n.º 2
0
	if load_corpus:
		with open(corpus_path, 'rb') as f:
			input_lang, output_lang, pairs = pickle.load(f)
	else:
		input_lang, output_lang, pairs = corpus.read_file(
			'ENG', 'FRA', 'data/eng-fra.txt', True)
		pairs = corpus.filter_pairs(pairs)
		for pair in pairs:
			input_lang.add_sentence(pair[0])
			output_lang.add_sentence(pair[1])
		with open(corpus_path, 'wb') as f:
			pickle.dump((input_lang, output_lang, pairs), f)
	print(f'{len(pairs)} pairs, {input_lang.n_words} source, {output_lang.n_words} target')

	# Load model
	encoder = seq2seq.Encoder(input_lang.n_words, hidden_size, embed_size, n_layers)
	decoder = seq2seq.Decoder(hidden_size, embed_size, output_lang.n_words, n_layers)
	model = seq2seq.Seq2seq(encoder, decoder).to(device)
	if load_model:
		model.load_state_dict(torch.load(model_path))
		# train.train(model, (input_lang, output_lang, pairs), batch_size, n_epochs, learning_rate,
		#						teaching_rate, clip, model_path)
	else:
		def init_weights(m):
			for name, param in m.named_parameters():
				nn.init.uniform_(param.data, -0.08, 0.08)
		model.apply(init_weights)

	# Test or train
	if mode == 'train':
		train.train(model, (input_lang, output_lang, pairs), batch_size, n_epochs, learning_rate,
Ejemplo n.º 3
0
if __name__ == "__main__":
    config = argparser()

    embedding_size = config.embedding_size
    hidden_size = config.hidden_size
    teacher_forcing_ratio = config.teacher_forcing_ratio
    n_iters = config.n_iters

    input_lang, output_lang, pairs = loader.prepareData('eng', 'fra', True)

    input_emb_matrix, output_emb_matrix = np.load(
        'input_emb_matrix.npy'), np.load('output_emb_matrix.npy')
    print('Embedding-matrix shape: {}, {}'.format(input_emb_matrix.shape,
                                                  output_emb_matrix.shape))

    encoder = seq2seq.Encoder(input_size=input_lang.n_words,
                              embedding_size=embedding_size,
                              hidden_size=hidden_size,
                              embedding_matrix=input_emb_matrix,
                              n_layers=config.n_layers,
                              dropout_p=config.dropout_p).to(device)

    decoder = seq2seq.AttnDecoder(output_size=output_lang.n_words,
                                  embedding_size=embedding_size,
                                  hidden_size=hidden_size,
                                  embedding_matrix=output_emb_matrix,
                                  n_layers=config.n_layers,
                                  dropout_p=config.dropout_p).to(device)

    trainiters(pairs, encoder, decoder, n_iters)
Ejemplo n.º 4
0
    print('BLEU: {:.4}'.format(sum(scores) / n))


if __name__ == "__main__":
    '''
    Evaluation is mostly the same as training,
    but there are no targets so we simply feed the decoder’s predictions back to itself for each step.
    Every time it predicts a word we add it to the output string,
    and if it predicts the EOS token we stop there.
    '''
    hidden_size = 300

    input_lang, output_lang, pairs = loader.prepareData('eng', 'fra', True)

    input_emb_matrix, output_emb_matrix = np.load(
        'input_emb_matrix.npy'), np.load('output_emb_matrix.npy')
    print('Embedding-matrix shape: {}, {}'.format(input_emb_matrix.shape,
                                                  output_emb_matrix.shape))

    encoder = seq2seq.Encoder(input_lang.n_words, hidden_size,
                              input_emb_matrix).to(device)
    decoder = seq2seq.Decoder(hidden_size, output_lang.n_words,
                              output_emb_matrix).to(device)

    encoder.load_state_dict(torch.load('encoder.pth'))
    encoder.eval()
    decoder.load_state_dict(torch.load('decoder.pth'))
    decoder.eval()

    evaluateRandomly(encoder, decoder, pairs, int(len(pairs) * 0.3))
Ejemplo n.º 5
0
    BATCH_SIZE = 64  # try bigger batch for faster training

    train = ds_train.take(BUFFER_SIZE)  # 1.5M samples
    print("Dataset sample taken")
    train_dataset = train.map(s2s.tf_encode)

    # train_dataset = train_dataset.shuffle(BUFFER_SIZE) – optional
    train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=True)
    print("Dataset batching done")

    steps_per_epoch = BUFFER_SIZE // BATCH_SIZE
    embedding_dim = 128
    units = 256  # from pointer generator paper
    EPOCHS = 6

    encoder = s2s.Encoder(vocab_size, embedding_dim, units, BATCH_SIZE)
    decoder = s2s.Decoder(vocab_size, embedding_dim, units, BATCH_SIZE)

    # Learning rate scheduler
    lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
        0.001,
        decay_steps=steps_per_epoch * (EPOCHS / 2),
        decay_rate=2,
        staircase=False)

    optimizer = tf.keras.optimizers.Adam(lr_schedule)

    if args.checkpoint is None:
        dt = datetime.datetime.today().strftime("%Y-%b-%d-%H-%M-%S")
        checkpoint_dir = './training_checkpoints-' + dt
    else: