예제 #1
0
def generate_sequences(id_2_word, num_samples, model_type, emb_size, hidden_size, seq_len, batch_size, num_layers, dp_keep_prob, vocab_size, path):
	if model_type=='RNN':
		model = RNN(emb_size=emb_size, hidden_size=hidden_size,
				seq_len=seq_len, batch_size=batch_size,
				vocab_size=vocab_size, num_layers=num_layers,
				dp_keep_prob=dp_keep_prob)
	else:
		model = GRU(emb_size=emb_size, hidden_size=hidden_size,
				seq_len=seq_len, batch_size=batch_size,
				vocab_size=vocab_size, num_layers=num_layers,
				dp_keep_prob=dp_keep_prob)

	model.load_state_dict(torch.load(path))
	model = model.to(device)
	hidden = nn.Parameter(torch.zeros(num_layers, num_samples, hidden_size)).to(device)
	input = torch.ones(10000)*1/1000
	input = torch.multinomial(input, num_samples).to(device)
	output = model.generate(input, hidden, seq_len)
	f = open(model_type + '_generated_sequences' +'.txt','w')

	for i in range(num_samples):
		for j in range(seq_len):
			f.write(id_2_word.get(output[j,i].item())+' ')
		f.write('\n')
	f.close()
예제 #2
0
hidden = model.init_hidden()
hidden = hidden.to(device)

start = [word_to_id["the"]]
start.append(word_to_id["a"])
start.append(word_to_id["an"])
start.append(word_to_id["he"])
start.append(word_to_id["she"])
start.append(word_to_id["it"])
start.append(word_to_id["they"])
start.append(word_to_id["why"])
start.append(word_to_id["how"])
start.append(word_to_id["to"])

hidden = repackage_hidden(hidden)
short_samples = model.generate(start, hidden, args.seq_len)

print("-" * 20 + "Short sentences" + "-" * 20)
for i, sample in enumerate(short_samples.to("cpu"), 1):
    sentence = " ".join([id_2_word[int(word)] for word in sample])
    print(sentence, end="\n\n")
    if i >= 10:
        break

hidden = repackage_hidden(hidden)
long_samples = model.generate(start, hidden, 2 * args.seq_len)

print("-" * 20 + "Long sentences" + "-" * 20)
for i, sample in enumerate(long_samples.to("cpu"), 1):
    sentence = " ".join([id_2_word[int(word)] for word in sample])
    print(sentence, end="\n\n")
예제 #3
0
                        p.data.add_(-lr, p.grad.data)
            if step % (epoch_size // 10) == 10:
                print('step: '+ str(step) + '\t' \
                    + "loss (sum over all examples' seen this epoch):" + str(costs) + '\t' \
                    + 'speed (wps):' + str(iters * model.batch_size / (time.time() - start_time)))
    return np.exp(costs / iters), losses


###############################################################################
#
# RUN MAIN LOOP (TRAIN AND VAL)
#
###############################################################################

print("\n########## Running Main Loop ##########################")
print("\n########## Generating Samples #########################")

hidden_zero = model.init_hidden()
#hidden_zero.to(torch.long)
hidden_zero = hidden_zero.to(device)
#print(type(hidden_zero))
#samples = model.generate(torch.cuda.LongTensor([0+ 3*i for i in range(20)]),hidden_zero,10)
train_slice = train_data[5355:5365]
train_slice = np.array(train_slice, dtype=np.int32)
inputs = torch.from_numpy(train_slice.astype(np.int64)).contiguous().to(device)
samples = model.generate(inputs, hidden_zero, 70)
# NOTE ==============================================
# To load these, run
# >>> x = np.load(lc_path)[()]
# You will need these values for plotting learning curves (Problem 4)
예제 #4
0
# np.save(lc_path, {'val_ts_losses':timestep_loss.numpy(),
#                   'val_ppls':val_ppls,
#                   #'train_losses':train_losses,
#                   'val_losses':val_losses,
#                   'times': times})
# NOTE ==============================================
# To load these, run
# >>> x = np.load(lc_path)[()]
# You will need these values for plotting learning curves (Problem 4)
val_loss = []
val_ppl = []
timestep_loss = []
#val_ppl, val_loss, timestep_loss = run_epoch(model, valid_data)
#print(val_ppl)
input = torch.tensor([1]).to(device)
hidden = model.init_hidden().to(
    device
)  #hidden = torch.zeros([args.num_layers, model.batch_size,args.hidden_size]).to(device)
seq_length = args.seq_len

for i in (35, 70):
    for j in range(10):
        input = torch.randint(low=0, high=999, size=(1, ),
                              dtype=torch.long).to(device)
        sequence = model.generate(input, hidden, i)
        phrase = "\n" + str(input.item()) + ": "
        for token_id in enumerate(sequence):
            phrase += id_2_word[token_id[1].item()] + " "

        print(phrase)
예제 #5
0
print("device is = ", model)

print("Load model parameters, best_params.pt")

dir = args.save_dir
bp_path = os.path.join(dir, 'best_params.pt')
model.load_state_dict(torch.load(bp_path))
txt_path = os.path.join(dir, '_generate.txt')
model.eval()
nb_sentence = 20
inputs = torch.LongTensor(nb_sentence).random_(0, model.vocab_size).to(device)
generated_seq_len = model.seq_len
f = open(txt_path, "w+")
f.write("==============seq_len===============\n")
hidden = repackage_hidden(model.init_hidden())
samples = model.generate(inputs, hidden.to(device), generated_seq_len)
# import pdb; pdb.set_trace()
for i in range(nb_sentence):
    sentence = ""
    for t in range(generated_seq_len):
        sentence += id_2_word[samples[t][i].data.item()] + ' '

    f.write("---------------------This is line %d\r\n" % (i + 1))
    f.write(sentence + '\n')

f.write("==============seq_len * 2===============\n")
inputs = torch.LongTensor(nb_sentence).random_(0, model.vocab_size).to(device)
generated_seq_len = (generated_seq_len * 2)
print('generated_seq_len', generated_seq_len)
f.write("---------------------This is line %d\r\n" % (i + 1))
hidden = repackage_hidden(model.init_hidden())
예제 #6
0
###############################################################################

print("\n########## Running Main Loop ##########################")
model.load_state_dict(
    torch.load(os.path.join(args.save_dir, 'best_params.pt'),
               map_location=lambda storage, location: storage))
model = model.to(device)
print(model)

given = np.array(list(id_2_word.keys())[10:20], dtype=np.int64)
print([id_2_word[id] for id in given])
given = torch.from_numpy(given).to(device)

hidden = model.init_hidden().to(device)
model.eval()

res = model.generate(given, hidden, args.gen_length).transpose(0, 1)
print(res.shape)
res = res.to('cpu').numpy()

res = [' '.join([id_2_word[id] for id in sent]) for sent in res]

with open('{}_{}.txt'.format(args.model, args.gen_length), 'w') as writer:
    for i, sent in enumerate(res):
        writer.write('{}. {}\n\n'.format(
            i + 1,
            sent.replace('<unk>', '!unk!').replace('<eos>',
                                                   '!eos!').replace('$',
                                                                    '\$')))
print('\n'.join(res))
예제 #7
0
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)

model.load_state_dict(torch.load(args.load_model))  #, map_location='cpu'))
# model.to(device) ???
model.eval()
print(model)
input = torch.LongTensor(args.batch_size).random_(0, vocab_size)
generated_seq_len = 70
results = model.generate(
    input, model.init_hidden(),
    generated_seq_len)  ###  input, hidden, generated_seq_len)
# print(results)
results = results.numpy().transpose().tolist()
for l in results:
    for i in range(len(l)):
        l[i] = id_2_word[l[i]]
    print(" ".join(l))
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=1)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=1)
else:
    raise Exception("Unknown model")

model.load_state_dict(
    torch.load('{}/best_params.pt'.format(output_dir), map_location=device))

model.to(device)

# start with zeros
seed = torch.zeros(args.batch_size, dtype=torch.long)

samples = model.generate(seed, model.init_hidden(), seq_len)
samples = samples.transpose(0, 1)  # shape (batch_size, generated_seq_len)

with open(os.path.join(output_dir, "samples.{}.txt".format(seq_len)),
          "w") as of:
    for sample in samples:
        print(" ".join([id_2_word[idx] for idx in sample.numpy()]), file=of)
예제 #9
0
                            n_units=args.hidden_size,
                            n_blocks=args.num_layers,
                            dropout=1. - args.dp_keep_prob)
    # these 3 attributes don't affect the Transformer's computations;
    # they are only used in run_epoch
    model.batch_size = args.batch_size
    model.seq_len = args.seq_len
    model.vocab_size = vocab_size
else:
    print("Model type not recognized.")

if args.model_path is not None and args.generate:
    model.load_state_dict(torch.load(args.model_path))
    model = model.to(device)
    gen = model.generate(
        torch.LongTensor(args.batch_size).random_(0, model.vocab_size).cuda(),
        model.init_hidden().cuda(), 10)
    gen = [[id_2_word[w] for w in seq.data.cpu().numpy()] for seq in gen]
    for i in range(10):
        print(gen[i])

# LOSS FUNCTION
loss_fn = nn.CrossEntropyLoss(reduction='none')
if args.optimizer == 'ADAM':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr)

# LEARNING RATE SCHEDULE
lr = args.initial_lr
lr_decay_base = 1 / 1.15
m_flat_lr = 14.0  # we will not touch lr for the first m_flat_lr epochs
예제 #10
0



for m in range(len(model_types)):
	for s in range(len(seq_len)):
		print('Processing model: '+model_types[m]+' seq_len: '+str(seq_len[s])+'\n')
		if model_types[m]=='RNN':
			model = RNN(emb_size=embSize[m], hidden_size=hiddenSize[m], 
					seq_len=seqLen[m], batch_size=batchSize[m],
					vocab_size=vocab_size, num_layers=numLayers[m], 
					dp_keep_prob=dropOut[m])
		else:
			model =GRU(emb_size=embSize[m], hidden_size=hiddenSize[m], 
					seq_len=seqLen[m], batch_size=batchSize[m],
					vocab_size=vocab_size, num_layers=numLayers[m], 
					dp_keep_prob=dropOut[m])
		model.load_state_dict(torch.load(path[m]))
		model = model.to(device)
		hidden = nn.Parameter(torch.zeros(numLayers[m],samples,hiddenSize[m])).to(device)
		input=torch.ones(10000)*1/1000
		input=torch.multinomial(input,samples).to(device)
		model.eval()
		output=model.generate(input, hidden, seq_len[s])
		print('Saving generated samples')
		fid=open(model_types[m]+'_' +str(seq_len[s])+'.txt','w')
		for i in range(samples):
			for j in range(seq_len[s]):
				fid.write(id_2_word.get(output[j,i].item())+' ')
			fid.write('\n')
		fid.close()