def generate_sequences(id_2_word, num_samples, model_type, emb_size, hidden_size, seq_len, batch_size, num_layers, dp_keep_prob, vocab_size, path): if model_type=='RNN': model = RNN(emb_size=emb_size, hidden_size=hidden_size, seq_len=seq_len, batch_size=batch_size, vocab_size=vocab_size, num_layers=num_layers, dp_keep_prob=dp_keep_prob) else: model = GRU(emb_size=emb_size, hidden_size=hidden_size, seq_len=seq_len, batch_size=batch_size, vocab_size=vocab_size, num_layers=num_layers, dp_keep_prob=dp_keep_prob) model.load_state_dict(torch.load(path)) model = model.to(device) hidden = nn.Parameter(torch.zeros(num_layers, num_samples, hidden_size)).to(device) input = torch.ones(10000)*1/1000 input = torch.multinomial(input, num_samples).to(device) output = model.generate(input, hidden, seq_len) f = open(model_type + '_generated_sequences' +'.txt','w') for i in range(num_samples): for j in range(seq_len): f.write(id_2_word.get(output[j,i].item())+' ') f.write('\n') f.close()
hidden = model.init_hidden() hidden = hidden.to(device) start = [word_to_id["the"]] start.append(word_to_id["a"]) start.append(word_to_id["an"]) start.append(word_to_id["he"]) start.append(word_to_id["she"]) start.append(word_to_id["it"]) start.append(word_to_id["they"]) start.append(word_to_id["why"]) start.append(word_to_id["how"]) start.append(word_to_id["to"]) hidden = repackage_hidden(hidden) short_samples = model.generate(start, hidden, args.seq_len) print("-" * 20 + "Short sentences" + "-" * 20) for i, sample in enumerate(short_samples.to("cpu"), 1): sentence = " ".join([id_2_word[int(word)] for word in sample]) print(sentence, end="\n\n") if i >= 10: break hidden = repackage_hidden(hidden) long_samples = model.generate(start, hidden, 2 * args.seq_len) print("-" * 20 + "Long sentences" + "-" * 20) for i, sample in enumerate(long_samples.to("cpu"), 1): sentence = " ".join([id_2_word[int(word)] for word in sample]) print(sentence, end="\n\n")
p.data.add_(-lr, p.grad.data) if step % (epoch_size // 10) == 10: print('step: '+ str(step) + '\t' \ + "loss (sum over all examples' seen this epoch):" + str(costs) + '\t' \ + 'speed (wps):' + str(iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters), losses ############################################################################### # # RUN MAIN LOOP (TRAIN AND VAL) # ############################################################################### print("\n########## Running Main Loop ##########################") print("\n########## Generating Samples #########################") hidden_zero = model.init_hidden() #hidden_zero.to(torch.long) hidden_zero = hidden_zero.to(device) #print(type(hidden_zero)) #samples = model.generate(torch.cuda.LongTensor([0+ 3*i for i in range(20)]),hidden_zero,10) train_slice = train_data[5355:5365] train_slice = np.array(train_slice, dtype=np.int32) inputs = torch.from_numpy(train_slice.astype(np.int64)).contiguous().to(device) samples = model.generate(inputs, hidden_zero, 70) # NOTE ============================================== # To load these, run # >>> x = np.load(lc_path)[()] # You will need these values for plotting learning curves (Problem 4)
# np.save(lc_path, {'val_ts_losses':timestep_loss.numpy(), # 'val_ppls':val_ppls, # #'train_losses':train_losses, # 'val_losses':val_losses, # 'times': times}) # NOTE ============================================== # To load these, run # >>> x = np.load(lc_path)[()] # You will need these values for plotting learning curves (Problem 4) val_loss = [] val_ppl = [] timestep_loss = [] #val_ppl, val_loss, timestep_loss = run_epoch(model, valid_data) #print(val_ppl) input = torch.tensor([1]).to(device) hidden = model.init_hidden().to( device ) #hidden = torch.zeros([args.num_layers, model.batch_size,args.hidden_size]).to(device) seq_length = args.seq_len for i in (35, 70): for j in range(10): input = torch.randint(low=0, high=999, size=(1, ), dtype=torch.long).to(device) sequence = model.generate(input, hidden, i) phrase = "\n" + str(input.item()) + ": " for token_id in enumerate(sequence): phrase += id_2_word[token_id[1].item()] + " " print(phrase)
print("device is = ", model) print("Load model parameters, best_params.pt") dir = args.save_dir bp_path = os.path.join(dir, 'best_params.pt') model.load_state_dict(torch.load(bp_path)) txt_path = os.path.join(dir, '_generate.txt') model.eval() nb_sentence = 20 inputs = torch.LongTensor(nb_sentence).random_(0, model.vocab_size).to(device) generated_seq_len = model.seq_len f = open(txt_path, "w+") f.write("==============seq_len===============\n") hidden = repackage_hidden(model.init_hidden()) samples = model.generate(inputs, hidden.to(device), generated_seq_len) # import pdb; pdb.set_trace() for i in range(nb_sentence): sentence = "" for t in range(generated_seq_len): sentence += id_2_word[samples[t][i].data.item()] + ' ' f.write("---------------------This is line %d\r\n" % (i + 1)) f.write(sentence + '\n') f.write("==============seq_len * 2===============\n") inputs = torch.LongTensor(nb_sentence).random_(0, model.vocab_size).to(device) generated_seq_len = (generated_seq_len * 2) print('generated_seq_len', generated_seq_len) f.write("---------------------This is line %d\r\n" % (i + 1)) hidden = repackage_hidden(model.init_hidden())
############################################################################### print("\n########## Running Main Loop ##########################") model.load_state_dict( torch.load(os.path.join(args.save_dir, 'best_params.pt'), map_location=lambda storage, location: storage)) model = model.to(device) print(model) given = np.array(list(id_2_word.keys())[10:20], dtype=np.int64) print([id_2_word[id] for id in given]) given = torch.from_numpy(given).to(device) hidden = model.init_hidden().to(device) model.eval() res = model.generate(given, hidden, args.gen_length).transpose(0, 1) print(res.shape) res = res.to('cpu').numpy() res = [' '.join([id_2_word[id] for id in sent]) for sent in res] with open('{}_{}.txt'.format(args.model, args.gen_length), 'w') as writer: for i, sent in enumerate(res): writer.write('{}. {}\n\n'.format( i + 1, sent.replace('<unk>', '!unk!').replace('<eos>', '!eos!').replace('$', '\$'))) print('\n'.join(res))
hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) model.load_state_dict(torch.load(args.load_model)) #, map_location='cpu')) # model.to(device) ??? model.eval() print(model) input = torch.LongTensor(args.batch_size).random_(0, vocab_size) generated_seq_len = 70 results = model.generate( input, model.init_hidden(), generated_seq_len) ### input, hidden, generated_seq_len) # print(results) results = results.numpy().transpose().tolist() for l in results: for i in range(len(l)): l[i] = id_2_word[l[i]] print(" ".join(l))
batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=1) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=1) else: raise Exception("Unknown model") model.load_state_dict( torch.load('{}/best_params.pt'.format(output_dir), map_location=device)) model.to(device) # start with zeros seed = torch.zeros(args.batch_size, dtype=torch.long) samples = model.generate(seed, model.init_hidden(), seq_len) samples = samples.transpose(0, 1) # shape (batch_size, generated_seq_len) with open(os.path.join(output_dir, "samples.{}.txt".format(seq_len)), "w") as of: for sample in samples: print(" ".join([id_2_word[idx] for idx in sample.numpy()]), file=of)
n_units=args.hidden_size, n_blocks=args.num_layers, dropout=1. - args.dp_keep_prob) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = args.batch_size model.seq_len = args.seq_len model.vocab_size = vocab_size else: print("Model type not recognized.") if args.model_path is not None and args.generate: model.load_state_dict(torch.load(args.model_path)) model = model.to(device) gen = model.generate( torch.LongTensor(args.batch_size).random_(0, model.vocab_size).cuda(), model.init_hidden().cuda(), 10) gen = [[id_2_word[w] for w in seq.data.cpu().numpy()] for seq in gen] for i in range(10): print(gen[i]) # LOSS FUNCTION loss_fn = nn.CrossEntropyLoss(reduction='none') if args.optimizer == 'ADAM': optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr) # LEARNING RATE SCHEDULE lr = args.initial_lr lr_decay_base = 1 / 1.15 m_flat_lr = 14.0 # we will not touch lr for the first m_flat_lr epochs
for m in range(len(model_types)): for s in range(len(seq_len)): print('Processing model: '+model_types[m]+' seq_len: '+str(seq_len[s])+'\n') if model_types[m]=='RNN': model = RNN(emb_size=embSize[m], hidden_size=hiddenSize[m], seq_len=seqLen[m], batch_size=batchSize[m], vocab_size=vocab_size, num_layers=numLayers[m], dp_keep_prob=dropOut[m]) else: model =GRU(emb_size=embSize[m], hidden_size=hiddenSize[m], seq_len=seqLen[m], batch_size=batchSize[m], vocab_size=vocab_size, num_layers=numLayers[m], dp_keep_prob=dropOut[m]) model.load_state_dict(torch.load(path[m])) model = model.to(device) hidden = nn.Parameter(torch.zeros(numLayers[m],samples,hiddenSize[m])).to(device) input=torch.ones(10000)*1/1000 input=torch.multinomial(input,samples).to(device) model.eval() output=model.generate(input, hidden, seq_len[s]) print('Saving generated samples') fid=open(model_types[m]+'_' +str(seq_len[s])+'.txt','w') for i in range(samples): for j in range(seq_len[s]): fid.write(id_2_word.get(output[j,i].item())+' ') fid.write('\n') fid.close()