def generate_sequences(id_2_word, num_samples, model_type, emb_size, hidden_size, seq_len, batch_size, num_layers, dp_keep_prob, vocab_size, path): if model_type=='RNN': model = RNN(emb_size=emb_size, hidden_size=hidden_size, seq_len=seq_len, batch_size=batch_size, vocab_size=vocab_size, num_layers=num_layers, dp_keep_prob=dp_keep_prob) else: model = GRU(emb_size=emb_size, hidden_size=hidden_size, seq_len=seq_len, batch_size=batch_size, vocab_size=vocab_size, num_layers=num_layers, dp_keep_prob=dp_keep_prob) model.load_state_dict(torch.load(path)) model = model.to(device) hidden = nn.Parameter(torch.zeros(num_layers, num_samples, hidden_size)).to(device) input = torch.ones(10000)*1/1000 input = torch.multinomial(input, num_samples).to(device) output = model.generate(input, hidden, seq_len) f = open(model_type + '_generated_sequences' +'.txt','w') for i in range(num_samples): for j in range(seq_len): f.write(id_2_word.get(output[j,i].item())+' ') f.write('\n') f.close()
def _load_model(model_type): emb_size = 200 hidden_size = 1500 seq_len = 35 # 70 batch_size = 20 vocab_size = 10000 num_layers = 2 dp_keep_prob = 0.35 # Load model (Change to RNN if you want RNN to predict) if model_type == 'RNN': model = RNN(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) PATH = os.path.join("RNN_ADAM_0", "best_params.pt") else: model = GRU(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) PATH = os.path.join("GRU_SGD_LR_SCHEDULE_0", "best_params.pt") if torch.cuda.is_available(): model.load_state_dict(torch.load(PATH)).cuda() model.eval() else: model.load_state_dict(torch.load(PATH, map_location='cpu')) model.eval() return model
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data_loader = data.DataLoader( DataLayer(args.data_root, args.test_session_set), batch_size=args.batch_size, num_workers=args.num_workers, ) c3d_model = C3D().to(device) c3d_model.load_state_dict(torch.load(args.c3d_pth)) c3d_model.train(False) rnn_model = RNN().to(device) rnn_model.load_state_dict(torch.load(args.rnn_pth)) rnn_model.train(False) air_criterion = nn.L1Loss().to(device) bed_criterion = nn.L1Loss().to(device) air_errors = 0.0 bed_errors = 0.0 start = time.time() with torch.set_grad_enabled(False): for batch_idx, (c3d_data, rnn_data, air_target, bed_target, data_path) \ in enumerate(data_loader): print('Processing {}/{}, {:3.3f}%'.format( data_path[0], str(batch_idx).zfill(5) + '.mat', 100.0 * batch_idx / len(data_loader))) c3d_data = c3d_data.to(device) rnn_data = rnn_data.to(device) air_target = air_target.to(device) bed_target = bed_target.to(device) air_feature, bed_feature = c3d_model.features(c3d_data) init = torch.cat((air_feature, bed_feature), 1) air_output, bed_output = rnn_model(rnn_data, init) # NOTE: Save these air and bed layers for visualization air_layer = (air_output.to('cpu').numpy() + 1) * 412 bed_layer = (bed_output.to('cpu').numpy() + 1) * 412 air_loss = air_criterion(air_output, air_target) bed_loss = bed_criterion(bed_output, bed_target) air_errors += air_loss.item() bed_errors += bed_loss.item() end = time.time() print('Finish all, errors (air): {:4.2f} (bed): {:4.2f}, | ' 'total running time: {:.2f} sec'.format( air_errors / len(data_loader.dataset) * 412, bed_errors / len(data_loader.dataset) * 412, end - start, ))
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data_loader = data.DataLoader( DataLayer( data_root=osp.join(args.data_root, 'Test'), phase='Test', ), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, ) if osp.isfile(args.checkpoint): checkpoint = torch.load(args.checkpoint) else: raise (RuntimeError('Cannot find the checkpoint {}'.format( args.checkpoint))) model = Model().to(device) model.load_state_dict(checkpoint) model.train(False) softmax = nn.Softmax(dim=1).to(device) corrects = 0.0 with torch.set_grad_enabled(False): for batch_idx, (spatial, temporal, length, target) in enumerate(data_loader): spatial_input = torch.zeros(*spatial.shape) temporal_input = torch.zeros(*temporal.shape) target_input = [] length_input = [] index = utl.argsort(length)[::-1] for i, idx in enumerate(index): spatial_input[i] = spatial[idx] temporal_input[i] = temporal[idx] target_input.append(target[idx]) length_input.append(length[idx]) spatial_input = spatial_input.to(device) temporal_input = temporal_input.to(device) target_input = torch.LongTensor(target_input).to(device) pack1 = pack_padded_sequence(spatial_input, length_input, batch_first=True) pack2 = pack_padded_sequence(temporal_input, length_input, batch_first=True) score = model(pack1, pack2) pred = torch.max(softmax(score), 1)[1].cpu() corrects += torch.sum(pred == target_input.cpu()).item() print('The accuracy is {:.4f}'.format(corrects / len(data_loader.dataset)))
def export_RNN_regressor(checkpoint_path): """ :param checkpoint_path: relative path to a PyTorch .pth checkpoint :return: None, dumps a prediction text file in the model's training folder """ checkpoint = torch.load(checkpoint_path) model = RNN(checkpoint['net_config']) model.load_state_dict(checkpoint['model']) model = model.eval().cuda() test_dataset = TweetDataset(dataset_type='test') test_loader = DataLoader(test_dataset, batch_size=TRAIN_CONFIG['batch_size'], num_workers=TRAIN_CONFIG['workers'], collate_fn=collate_function, shuffle=False, pin_memory=True) with open(DATASET_CONFIG['test_csv_relative_path'], newline='') as csvfile: test_data = list(csv.reader(csvfile))[1:] ids = [datum[0] for datum in test_data] n = len(test_loader) with open( "checkpoints/{}/predictions.txt".format( checkpoint['train_config']['experiment_name']), 'w') as f: writer = csv.writer(f) writer.writerow(["TweetID", "NoRetweets"]) current_idx = 0 for batch_index, batch in enumerate(test_loader): printProgressBar(batch_index, n) batch_size = batch['numeric'].shape[0] numeric = batch['numeric'].cuda() text = batch['embedding'].cuda() prediction = torch.exp(model( text, numeric)) - 1 if EXPORT_CONFIG['log'] else model( text, numeric) if EXPORT_CONFIG['threshold']: prediction[ prediction > EXPORT_CONFIG['threshold']] = EXPORT_CONFIG['threshold'] for idx_in_batch in range(batch_size): writer.writerow([ str(ids[current_idx + idx_in_batch]), str(int(prediction[idx_in_batch].item())) ]) current_idx += batch_size print("Exportation done! :)")
class Tester: """ 测试 """ def __init__(self, _hparams): self.test_loader = get_test_loader(_hparams) self.encoder = CNN().to(DEVICE) self.decoder = RNN(fea_dim=_hparams.fea_dim, embed_dim=_hparams.embed_dim, hid_dim=_hparams.hid_dim, max_sen_len=_hparams.max_sen_len, vocab_pkl=_hparams.vocab_pkl).to(DEVICE) self.test_cap = _hparams.test_cap def testing(self, save_path, test_path): """ 测试 :param save_path: 模型的保存地址 :param test_path: 保存测试过程生成句子的路径 :return: """ print('*' * 20, 'test', '*' * 20) self.load_models(save_path) self.set_eval() sen_json = [] with torch.no_grad(): for val_step, (img, img_id) in tqdm(enumerate(self.test_loader)): img = img.to(DEVICE) features = self.encoder.forward(img) sens, _ = self.decoder.sample(features) sen_json.append({'image_id': int(img_id), 'caption': sens[0]}) with open(test_path, 'w') as f: json.dump(sen_json, f) result = coco_eval(self.test_cap, test_path) for metric, score in result: print(metric, score) def load_models(self, save_path): ckpt = torch.load(save_path, map_location={'cuda:2': 'cuda:0' }) # 映射是因为解决保存模型的卡与加载模型的卡不一致的问题 encoder_state_dict = ckpt['encoder_state_dict'] self.encoder.load_state_dict(encoder_state_dict) decoder_state_dict = ckpt['decoder_state_dict'] self.decoder.load_state_dict(decoder_state_dict) def set_eval(self): self.encoder.eval() self.decoder.eval()
def _load_model(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob, PATH, model_type): # Load model (Change to RNN if you want RNN to predict) if model_type == 'RNN': model = RNN(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) else: model = GRU(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) if torch.cuda.is_available(): model.load_state_dict(torch.load(PATH)).cuda() model.eval() else: model.load_state_dict(torch.load(PATH, map_location='cpu')) model.eval() return model
def get_best_model(model_type: str) -> nn.Module: model: nn.Module = None if model_type == 'RNN': model = RNN(emb_size=200, hidden_size=1500, seq_len=35, batch_size=20, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.35) model.load_state_dict( torch.load('./4_1_a/best_params.pt', map_location=device)) elif model_type == 'GRU': model = GRU(emb_size=200, hidden_size=1500, seq_len=35, batch_size=20, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.35) model.load_state_dict( torch.load('./4_1_b/best_params.pt', map_location=device)) elif model_type == 'TRANSFORMER': model = TRANSFORMER(vocab_size=vocab_size, n_units=512, n_blocks=6, dropout=1. - 0.9) model.batch_size = 128 model.seq_len = 35 model.vocab_size = vocab_size model.load_state_dict(torch.load('./4_1_c/best_params.pt')) return model
def make_my_model(model_name, device, seq_len=35, batch_size=20, pt=None): # --model=RNN --optimizer=ADAM --initial_lr=0.0001 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best # --model=GRU --optimizer=SGD_LR_SCHEDULE --initial_lr=10 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best # --model=TRANSFORMER --optimizer=SGD_LR_SCHEDULE --initial_lr=20 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=6 --dp_keep_prob=0.9 --save_best if model_name == 'RNN': model = RNN(emb_size=200, hidden_size=1500, seq_len=seq_len, batch_size=batch_size, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.35) elif model_name == 'GRU': model = GRU(emb_size=200, hidden_size=1500, seq_len=seq_len, batch_size=batch_size, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.35) elif model_name == 'TRANSFORMER': model = TRANSFORMER(vocab_size=vocab_size, n_units=512, n_blocks=6, dropout=1. - 0.9) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = 128 model.seq_len = 35 model.vocab_size = vocab_size else: print("ERROR: Model type not recognized.") return # Model to device model = model.to(device) # Load pt if pt is not None: model.load_state_dict(torch.load(pt, map_location=device)) return model
def load_model(model_info, device, vocab_size, emb_size=200, load_on_device=True): params_path = model_info.get_params_path() if model_info.model == 'RNN': model = RNN(emb_size=emb_size, hidden_size=model_info.hidden_size, seq_len=model_info.seq_len, batch_size=model_info.batch_size, vocab_size=vocab_size, num_layers=model_info.num_layers, dp_keep_prob=model_info.dp_keep_prob) elif model_info.model == 'GRU': model = GRU(emb_size=emb_size, hidden_size=model_info.hidden_size, seq_len=model_info.seq_len, batch_size=model_info.batch_size, vocab_size=vocab_size, num_layers=model_info.num_layers, dp_keep_prob=model_info.dp_keep_prob) else: model = TRANSFORMER(vocab_size=vocab_size, n_units=model_info.hidden_size, n_blocks=model_info.num_layers, dropout=1. - model_info.dp_keep_prob) model.batch_size = model_info.batch_size model.seq_len = model_info.seq_len model.vocab_size = vocab_size if load_on_device: model = model.to(device) model.load_state_dict(torch.load(params_path, map_location=device)) return model
if args.optimizer == 'ADAM': optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr) # LEARNING RATE SCHEDULE lr = args.initial_lr lr_decay_base = 1 / 1.15 m_flat_lr = 14.0 # we will not touch lr for the first m_flat_lr epochs ############################################################################### # # Load Model dictionnary # ############################################################################### #model.load_state_dict(torch.load("./RNN.pt")) model.load_state_dict(torch.load("./GRU.pt")) #model.load_state_dict(torch.load("./Transformer.pt")) print(device) model = model.to(device) ############################################################################### # # DEFINE COMPUTATIONS FOR PROCESSING ONE EPOCH # ############################################################################### def repackage_hidden(h): """ Wraps hidden states in new Tensors, to detach them from their history. This prevents Pytorch from trying to backpropagate into previous input
def train(): global_step = 0 # Loaded pretrained VAE vae = VAE(hp.vsize).to(DEVICE) ckpt = sorted(glob.glob(os.path.join(hp.ckpt_dir, 'vae', '*k.pth.tar')))[-1] vae_state = torch.load(ckpt) vae.load_state_dict(vae_state['model']) vae.eval() print('Loaded vae ckpt {}'.format(ckpt)) rnn = RNN(hp.vsize, hp.asize, hp.rnn_hunits).to(DEVICE) ckpts = sorted(glob.glob(os.path.join(hp.ckpt_dir, 'rnn', '*k.pth.tar'))) if ckpts: ckpt = ckpts[-1] rnn_state = torch.load(ckpt) rnn.load_state_dict(rnn_state['model']) global_step = int(os.path.basename(ckpt).split('.')[0][:-1]) * 1000 print('Loaded rnn ckpt {}'.format(ckpt)) data_path = hp.data_dir if not hp.extra else hp.extra_dir # optimizer = torch.optim.RMSprop(rnn.parameters(), lr=1e-3) optimizer = torch.optim.Adam(rnn.parameters(), lr=1e-4) dataset = GameEpisodeDataset(data_path, seq_len=hp.seq_len) loader = DataLoader(dataset, batch_size=1, shuffle=True, drop_last=True, num_workers=hp.n_workers, collate_fn=collate_fn) testset = GameEpisodeDataset(data_path, seq_len=hp.seq_len, training=False) test_loader = DataLoader(testset, batch_size=1, shuffle=False, drop_last=False, collate_fn=collate_fn) ckpt_dir = os.path.join(hp.ckpt_dir, 'rnn') sample_dir = os.path.join(ckpt_dir, 'samples') os.makedirs(sample_dir, exist_ok=True) l1 = nn.L1Loss() while global_step < hp.max_step: # GO_states = torch.zeros([hp.batch_size, 1, hp.vsize+hp.asize]).to(DEVICE) with tqdm(enumerate(loader), total=len(loader), ncols=70, leave=False) as t: t.set_description('Step {}'.format(global_step)) for idx, (obs, actions) in t: obs, actions = obs.to(DEVICE), actions.to(DEVICE) with torch.no_grad(): latent_mu, latent_var = vae.encoder(obs) # (B*T, vsize) z = latent_mu # z = vae.reparam(latent_mu, latent_var) # (B*T, vsize) z = z.view(-1, hp.seq_len, hp.vsize) # (B*n_seq, T, vsize) # import pdb; pdb.set_trace() next_z = z[:, 1:, :] z, actions = z[:, :-1, :], actions[:, :-1, :] states = torch.cat([z, actions], dim=-1) # (B, T, vsize+asize) # states = torch.cat([GO_states, next_states[:,:-1,:]], dim=1) x, _, _ = rnn(states) loss = l1(x, next_z) optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 if global_step % hp.log_interval == 0: eval_loss = evaluate(test_loader, vae, rnn, global_step) now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") with open(os.path.join(ckpt_dir, 'train.log'), 'a') as f: log = '{} || Step: {}, train_loss: {:.4f}, loss: {:.4f}\n'.format( now, global_step, loss.item(), eval_loss) f.write(log) S = 2 y = vae.decoder(x[S, :, :]) v = vae.decoder(next_z[S, :, :]) save_image( y, os.path.join(sample_dir, '{:04d}-rnn.png'.format(global_step))) save_image( v, os.path.join(sample_dir, '{:04d}-vae.png'.format(global_step))) save_image( obs[S:S + hp.seq_len - 1], os.path.join(sample_dir, '{:04d}-obs.png'.format(global_step))) if global_step % hp.save_interval == 0: d = { 'model': rnn.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save( d, os.path.join( ckpt_dir, '{:03d}k.pth.tar'.format(global_step // 1000)))
if args.model == 'RNN': model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) else: print("Model type not recognized.") model = model.to(device) model.load_state_dict(torch.load(lc_path)) #model = torch.load(map_location=torch.device('cpu')) model.eval() # MAIN LOOP for epoch in range(num_epochs): t0 = time.time() print('\nEPOCH ' + str(epoch) + ' ------------------') if args.optimizer == 'SGD_LR_SCHEDULE': lr_decay = lr_decay_base ** max(epoch - m_flat_lr, 0) lr = lr * lr_decay # decay lr if it is time # RUN MODEL ON TRAINING DATA run_epoch(model, train_data, True, lr)
# initialize accumulators. current_epoch = 1 batch_step_count = 1 time_used_global = 0.0 checkpoint = 1 # load lastest model to resume training model_list = os.listdir(model_dir) if model_list: print('Loading lastest checkpoint...') state = load_model(model_dir, model_list) encoder.load_state_dict(state['encoder']) decoder.load_state_dict(state['decoder']) optimizer.load_state_dict(state['optimizer']) current_epoch = state['epoch'] + 1 time_used_global = state['time_used_global'] batch_step_count = state['batch_step_count'] for group in optimizer.param_groups: group['lr'] = 0.0000001 group['weight_decay'] = 0.0 for param in encoder.parameters(): param.requires_grad_(requires_grad=True) BATCH_SIZE = 16 print('LR --> 0.0000001, WD = 0.0. Resume fine-tuning CNN.')
dev_dataset = torch.load(args.prepro_root + args.dev_dataset_path) elif args.mode == 'Test': dev_users_path = args.dataset_root + 'predict/test.users' dev_mask = torch.from_numpy(np.load(args.prepro_root + 'test_mask.npy')).float().cuda() dev_dataset = torch.load(args.prepro_root + args.test_dataset_path) item_list = np.load(args.prepro_root + 'item_list.npy') valid_tensor = torch.load(args.prepro_root + 'valid_writer_keywd.pkl').to(device) model = RNN(args.num_readers, args.num_writers, args.num_keywords, args.num_items, args.num_magazines, args.hid_dim, valid_tensor).to(device) print(model) model.eval() model.load_state_dict( torch.load('./models/%d_rnn_attention.pkl' % args.test_epoch)) file_w = open('./recommend.txt', 'w') file = open(dev_users_path, 'r') readers = file.read().splitlines() dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False) # [item_id, writer] + keywd + [reg_ts, meg_id] with torch.no_grad(): for i, input in enumerate(tqdm(dev_loader)): input = input[0].to(device) items = input[:, 18:].contiguous().view(-1, 5, 9) preds = model(input[:, :18], items, mode=args.mode) preds = torch.mul( preds[:, 0],
dropout=args.dropout, bidirectional=args.bidirectional, tie_weights=args.tie_weights, nonlinearity=args.nonlinearity) else: # no embedding layer (one-hot encoding) model = OneHotRNN(vocabulary=vocab, rnn_type=args.rnn_type, hidden_size=args.hidden_size, n_layers=args.n_layers, dropout=args.dropout, bidirectional=args.bidirectional, nonlinearity=args.nonlinearity) # load the best model model.load_state_dict(torch.load(args.model_file)) model.eval() ## enable evaluation modes # set up output filename if args.sample_idx is not None: output_filename = 'sampled-SMILES-{}.smi'.format(args.sample_idx) else: output_filename = 'sampled-SMILES.smi' output_file = os.path.join(args.output_dir, output_filename) # sample a set of SMILES from the final, trained model sampled_count = 0 batch_size = 512 while sampled_count < args.mols_per_file: sampled_smiles, NLLs = model.sample(batch_size,
use_embedding=args.use_embedding)[0].squeeze(1) loss = criterion(predictions, batch.label) acc = accuracy(predictions, batch.label) epoch_loss += loss.item() epoch_acc += acc.item() n += 1 return epoch_loss / n, epoch_acc / n funct = train funce = evaluate epoch_initial = 0 if args.mode == "resume": checkpoint = torch.load(model_dir + '/final.pt', map_location=device) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch_initial = checkpoint['epoch'] best_valid_acc = 0 final_valid_loss = 0 patience_max = 20 patience = 0 if (args.gradients == True): patience_max = 100 for epoch in range(epoch_initial, args.epochs + epoch_initial): start_time = time.time() train_loss, train_acc = funct(model, train_iterator, optimizer, criterion, epoch, valid_iterator) valid_loss, valid_acc = funce(model, valid_iterator, criterion)
seq_len=argsdict["seq_len"], batch_size=argsdict["batch_size"], vocab_size=vocab_size, num_layers=argsdict["RNN_num_layers"], dp_keep_prob=1) gru = GRU(emb_size=argsdict["GRU_emb_size"], hidden_size=argsdict["GRU_hidden_size"], seq_len=argsdict["seq_len"], batch_size=argsdict["batch_size"], vocab_size=vocab_size, num_layers=argsdict["GRU_num_layers"], dp_keep_prob=1) # Load the model weight rnn.load_state_dict(torch.load(args.RNN_path)) gru.load_state_dict(torch.load(args.GRU_path)) rnn.eval() gru.eval() # Initialize the hidden state hidden = [rnn.init_hidden(), gru.init_hidden()] # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) # Generate the word seed using random words # in the first 100 most common words. input = torch.randint(0, 100, (args.batch_size, 1)).squeeze()
tt_cur_time = torch.squeeze(targets[t, :].view(-1, model.batch_size)) loss = loss_fn(outputs.contiguous().view(-1, model.vocab_size), tt_cur_time) # here loss is the final step loss. hidden_grad = torch.autograd.grad(loss, hidden_list) # get gradient norms over concatenated multiple hidden layers. grad_norm = [grad.norm() for grad in hidden_grad] # normalized grad norm grad_norm = [(x-min(grad_norm)) / (max(grad_norm)-min(grad_norm)) for x in grad_norm] grad_norm = [grad.item() for grad in grad_norm] # first mini-batch as warmup, second mini-batch used for comparision. if step == 2: with open('5_2_{}.txt'.format(args.model), 'w') as f: f.write('{}\n'.format(args.model)) for grad in grad_norm: f.write('{}\n'.format(grad)) break ############################################################################### # # RUN MAIN LOOP # ############################################################################### print("\n########## Running Main Loop ##########################") model.load_state_dict(torch.load(os.path.join(args.save_dir, 'best_params.pt'), map_location=lambda storage, location: storage)) model = model.to(device) run_epoch(model, train_data)
best_val_so_far = np.inf times = [] if model_types[m]=='RNN': model = RNN(emb_size=embSize[m], hidden_size=hiddenSize[m], seq_len=seqLen[m], batch_size=batchSize[m], vocab_size=vocab_size, num_layers=numLayers[m], dp_keep_prob=dropOut[m]) elif model_types[m]=='GRU': model =GRU(emb_size=embSize[m], hidden_size=hiddenSize[m], seq_len=seqLen[m], batch_size=batchSize[m], vocab_size=vocab_size, num_layers=numLayers[m], dp_keep_prob=dropOut[m]) else: model=TRANSFORMER(vocab_size=vocab_size,n_units=hiddenSize[m], n_blocks=numLayers[m],dropout=1-dropOut[m]) model.load_state_dict(torch.load(path[m])) model.batch_size=batchSize[m] model.seq_len=seqLen[m] model.vocab_size=vocab_size model = model.to(device) # MAIN LOOP val_loss = run_epoch(model, valid_data,model_types[m]) total_loss[m,:]=val_loss time=np.arange(1,seqLen[m]+1) print('Plotting graph...') plt.figure() plt.plot(time, val_loss.flatten(), label='Val. Loss') plt.ylabel('Average loss') plt.xlabel('time-step (t)') plt.grid(True)
model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size, n_blocks=args.num_layers, dropout=1. - args.dp_keep_prob) save_dir = 'Probleme_4_1/TRANSFORMER/best_params.pt' # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = args.batch_size model.seq_len = args.seq_len model.vocab_size = vocab_size else: print("Model type not recognized.") train = False if train == True: model.load_state_dict(torch.load(save_dir, map_location=device)) model = model.to(device) # LOSS FUNCTION loss_fn = nn.CrossEntropyLoss() if args.optimizer == 'ADAM': optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr) # LEARNING RATE SCHEDULE lr = args.initial_lr lr_decay_base = 1 / 1.15 m_flat_lr = 14.0 # we will not touch lr for the first m_flat_lr epochs ############################################################################### #
def main(args): # hyperparameters batch_size = args.batch_size num_workers = 1 # Image Preprocessing transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) # load COCOs dataset IMAGES_PATH = 'data/train2014' CAPTION_FILE_PATH = 'data/annotations/captions_train2014.json' vocab = load_vocab() train_loader = get_coco_data_loader(path=IMAGES_PATH, json=CAPTION_FILE_PATH, vocab=vocab, transform=transform, batch_size=batch_size, shuffle=True, num_workers=num_workers) IMAGES_PATH = 'data/val2014' CAPTION_FILE_PATH = 'data/annotations/captions_val2014.json' val_loader = get_coco_data_loader(path=IMAGES_PATH, json=CAPTION_FILE_PATH, vocab=vocab, transform=transform, batch_size=batch_size, shuffle=True, num_workers=num_workers) losses_val = [] losses_train = [] # Build the models ngpu = 1 initial_step = initial_epoch = 0 embed_size = args.embed_size num_hiddens = args.num_hidden learning_rate = 1e-3 num_epochs = 3 log_step = args.log_step save_step = 500 checkpoint_dir = args.checkpoint_dir encoder = CNN(embed_size) decoder = RNN(embed_size, num_hiddens, len(vocab), 1, rec_unit=args.rec_unit) # Loss criterion = nn.CrossEntropyLoss() if args.checkpoint_file: encoder_state_dict, decoder_state_dict, optimizer, *meta = utils.load_models( args.checkpoint_file, args.sample) initial_step, initial_epoch, losses_train, losses_val = meta encoder.load_state_dict(encoder_state_dict) decoder.load_state_dict(decoder_state_dict) else: params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.batchnorm.parameters()) optimizer = torch.optim.Adam(params, lr=learning_rate) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() if args.sample: return utils.sample(encoder, decoder, vocab, val_loader) # Train the Models total_step = len(train_loader) try: for epoch in range(initial_epoch, num_epochs): for step, (images, captions, lengths) in enumerate(train_loader, start=initial_step): # Set mini-batch dataset images = utils.to_var(images, volatile=True) captions = utils.to_var(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() encoder.zero_grad() if ngpu > 1: # run on multiple GPU features = nn.parallel.data_parallel( encoder, images, range(ngpu)) outputs = nn.parallel.data_parallel( decoder, features, range(ngpu)) else: # run on single GPU features = encoder(images) outputs = decoder(features, captions, lengths) train_loss = criterion(outputs, targets) losses_train.append(train_loss.data[0]) train_loss.backward() optimizer.step() # Run validation set and predict if step % log_step == 0: encoder.batchnorm.eval() # run validation set batch_loss_val = [] for val_step, (images, captions, lengths) in enumerate(val_loader): images = utils.to_var(images, volatile=True) captions = utils.to_var(captions, volatile=True) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] features = encoder(images) outputs = decoder(features, captions, lengths) val_loss = criterion(outputs, targets) batch_loss_val.append(val_loss.data[0]) losses_val.append(np.mean(batch_loss_val)) # predict sampled_ids = decoder.sample(features) sampled_ids = sampled_ids.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(sampled_ids, vocab) print('Sample:', sentence) true_ids = captions.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(true_ids, vocab) print('Target:', sentence) print( 'Epoch: {} - Step: {} - Train Loss: {} - Eval Loss: {}' .format(epoch, step, losses_train[-1], losses_val[-1])) encoder.batchnorm.train() # Save the models if (step + 1) % save_step == 0: utils.save_models(encoder, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses( losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl')) except KeyboardInterrupt: pass finally: # Do final save utils.save_models(encoder, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses(losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl'))
hidden = repackage_hidden(hidden) outputs, hidden = model(inputs, hidden) targets = torch.from_numpy(y.astype(np.int64)).transpose(0, 1).contiguous().to(device)#.cuda() # LOSS COMPUTATION for t in range(model.seq_len): tt = torch.squeeze(targets[t,:].view(-1, model.batch_size)) loss = loss_fn(outputs[t,:,:].contiguous().view(-1, model.vocab_size), tt) losses[0,t] += loss.data.item() iters += 1 return losses/iters # Load weights if torch.cuda.is_available(): model.load_state_dict(torch.load(args.weights)) else: model.load_state_dict(torch.load(args.weights,map_location='cpu')) # calculate the loss val_loss = run_epoch(model, valid_data) # plot the loss plt.plot(val_loss.flatten()) plt.title(f"The average loss for each timestep for {args.model}") plt.xlabel("Timestep") plt.ylabel("The average loss") # save figures plt.savefig(os.path.join(base_folder,'avg_losses.png'))
gru = GRU(emb_size=200, hidden_size=1500, seq_len=35, batch_size=20, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.35) trans = TRANSFORMER(vocab_size=vocab_size, n_units=512, n_blocks=6, dropout=1. - 0.9) # RNN num_epochs = args.num_epochs rnn.load_state_dict(torch.load('Question_4.1/RNN/best_params.pt')) rnn_name = rnn.__class__.__name__ rnn_avg_losses = run_epoch(rnn, valid_data, model_type='RNN') rnn_filename = rnn_name + '_avg_losses.npy' path = os.path.join('Question_5.1', rnn_filename) np.save(path, rnn_avg_losses) # # GRU # gru.load_state_dict(torch.load('Question_4.1/GRU/best_params.pt')) # gru_name = gru.__class__.__name__ # gru_avg_losses = run_epoch(gru, valid_data, model_type='GRU') # gru_filename = gru_name +'_avg_losses.npy' # path = os.path.join('Question_5.1', gru_filename) # np.save(path, gru_avg_losses) # # Transformer
shuffle=True) LOG(f"[DATA] Data is loaded. Vocabulary size is {len(word2idx)}") # Model Definition model = RNN(vocab_size=len(word2idx), embedding_dim=128, hidden_dim=256, num_layers=2, target="lstm") optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) criterion = nn.CrossEntropyLoss() loss_meter = tnt.meter.AverageValueMeter() if MODEL_PATH is not None: model.load_state_dict(torch.load(MODEL_PATH)) model.to(device) LOG(f"[MODEL] Build model complete.") # Train if MODE == "train": for epoch in range(EPOCH): loss_meter.reset() for index, data in tqdm.tqdm(enumerate(dataloader, 0)): data = data.long().contiguous().to(device) optimizer.zero_grad() input_, target = data[:, :-1], data[:, 1:] output, _ = model(input_) loss = criterion(output, target.reshape(-1))
model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size, n_blocks=args.num_layers, dropout=1. - args.dp_keep_prob) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = args.batch_size model.seq_len = args.seq_len model.vocab_size = vocab_size else: print("Model type not recognized.") #loading weights state = torch.load('./best_params.pt') model.load_state_dict(state) model = model.to(device) # LOSS FUNCTION loss_fn = nn.CrossEntropyLoss() if args.optimizer == 'ADAM': optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr) # LEARNING RATE SCHEDULE lr = args.initial_lr lr_decay_base = 1 / 1.15 m_flat_lr = 14.0 # we will not touch lr for the first m_flat_lr epochs ############################################################################### # # DEFINE COMPUTATIONS FOR PROCESSING ONE EPOCH
valid_tensor = torch.load(args.prepro_root+'valid_writer_keywd.pkl').to(device) model = RNN(args.num_readers, args.num_writers, args.num_keywords, args.num_items, args.num_magazines, args.hid_dim, valid_tensor).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = torch.nn.CrossEntropyLoss(ignore_index=0) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print(model) print('# of params : ', params) if args.start_epoch: model.load_state_dict(torch.load(args.save_path+'%d_rnn_attention.pkl' % args.start_epoch)) best_loss = 9999999 for epoch in range(args.num_epochs): model.train() for i, data in enumerate(tqdm.tqdm(train_loader, desc='Train')): # reader readerat reader_f*8 reader_k*8 (item writer keywd*5 reg_ts maga)*N data = data[0].to(device) items = data[:,18:].contiguous().view(-1,5,9) item_logits = model(data[:,:18], items[:,:-1], mode=args.mode) loss = criterion(item_logits[:,0], items[:,-1,0].long()) model.zero_grad() loss.backward() optimizer.step()
n_blocks=args.num_layers, dropout=1. - args.dp_keep_prob) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = args.batch_size model.seq_len = args.seq_len model.vocab_size = vocab_size else: print("Model type not recognized.") ############################################################################### # Loading pre-trained model ############################################################################### # dir = './experiments/Exploration_of_optimizers/' trained_dir = './TRANSFORMER_ADAM_model=TRANSFORMER_optimizer=ADAM_initial_lr=0.0001_batch_size=128_seq_len=35_hidden_size=1024_num_layers=6_dp_keep_prob=0.9_save_best_0/' model.load_state_dict(torch.load(trained_dir + 'best_params.pt')) model = model.to(device) # LOSS FUNCTION loss_fn = nn.CrossEntropyLoss() if args.optimizer == 'ADAM': optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr) # LEARNING RATE SCHEDULE lr = args.initial_lr lr_decay_base = 1 / 1.15 m_flat_lr = 14.0 # we will not touch lr for the first m_flat_lr epochs ############################################################################### #
n_units=args.hidden_size, n_blocks=args.num_layers, dropout=1. - args.dp_keep_prob) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = args.batch_size model.seq_len = args.seq_len model.vocab_size = vocab_size else: print("Model type not recognized.") model = model.to(device) print("###Loading the model from best_params.pt###") model.load_state_dict( torch.load('{}/best_params.pt'.format(args['experiment_path']), map_location=device)) # LOSS FUNCTION loss_fn = nn.CrossEntropyLoss() def run_epoch(model, data): """ One epoch of training/validation (depending on flag is_train). """ model.eval() seq_losses = np.zeros(model.seq_len) # LOOP THROUGH MINIBATCHES for step, (x, y) in enumerate( utils.ptb_iterator(data, model.batch_size, model.seq_len)): if step % 10 == 0:
return grads_norm ############################################################################### # # RUN MAIN LOOP (TRAIN AND VAL) # ############################################################################### print("\n########## Running Main Loop ##########################") grads_norm = [] times = [] # MAIN LOOP model.load_state_dict(torch.load(args.load_model)) model.eval() print(model) for epoch in range(1): t0 = time.time() print('\nEPOCH ' + str(epoch) + ' ------------------') if args.optimizer == 'SGD_LR_SCHEDULE': lr_decay = lr_decay_base**max(epoch - m_flat_lr, 0) lr = lr * lr_decay # decay lr if it is time # RUN MODEL ON VALIDATION DATA FOR ONE MINIBATCH grads_norm = run_epoch(model, valid_data) print(grads_norm)