def train_model(model, optimizer, train, dev, x_to_ix, y_to_ix, batch_size, max_epochs): criterion = nn.NLLLoss(size_average=False) for epoch in range(max_epochs): print('Epoch:', epoch) y_true = list() y_pred = list() total_loss = 0 for batch, targets, lengths, raw_data in utils.create_dataset( train, x_to_ix, y_to_ix, batch_size=batch_size): batch, targets, lengths = utils.sort_batch(batch, targets, lengths) model.zero_grad() pred, loss = apply(model, criterion, batch, targets, lengths) loss.backward() optimizer.step() pred_idx = torch.max(pred, 1)[1] y_true += list(targets.int()) y_pred += list(pred_idx.data.int()) total_loss += loss acc = accuracy_score(y_true, y_pred) val_loss, val_acc = evaluate_validation_set(model, dev, x_to_ix, y_to_ix, criterion) print( "Train loss: {} - acc: {} \nValidation loss: {} - acc: {}".format( total_loss.data.float() / len(train), acc, val_loss, val_acc)) return model
def evaluate_test_set(model, test, x_to_ix, y_to_ix): y_true = list() y_pred = list() for batch, targets, lengths, raw_data in utils.create_dataset(test, x_to_ix, y_to_ix, batch_size=1): batch, targets, lengths = utils.sort_batch(batch, targets, lengths) pred = model(torch.autograd.Variable(batch), lengths.cpu().numpy()) pred_idx = torch.max(pred, 1)[1] y_true += list(targets.int()) y_pred += list(pred_idx.data.int()) print(len(y_true), len(y_pred)) print(classification_report(y_true, y_pred)) print(confusion_matrix(y_true, y_pred)) y_test = y_true y_test_pred = y_pred name = 'test' print(f'{name} set, Counter(y): {counter(y_test)}') print(f'cm of {name} set: ', metrics.confusion_matrix(y_true=y_test, y_pred=y_test_pred)) report = metrics.classification_report(y_test, y_test_pred) # print(report) model.accuracy = metrics.accuracy_score(y_test, y_test_pred) model.F1 = metrics.f1_score(y_test, y_test_pred, average='weighted') model.recall = metrics.recall_score(y_test, y_test_pred, average='weighted') model.precision = metrics.precision_score(y_test, y_test_pred, average='weighted') print(f'{name} set, accuracy: {model.accuracy}, F1: {model.F1}, recall: {model.recall}, ' f'precision: {model.precision}')
def test(self): s = '#' * 28 + ' Test Epoch %3d / %3d ' % (self.epoch_i + 1, self.epoch) + '#' * 28 print s bar = tqdm(self.testDataLoader) for idx, (objs, target_bb, instruction) in enumerate(bar): if torch.cuda.is_available(): torch.cuda.empty_cache() # First sort the batch according to the length of instruction batchSize = objs.size(0) instruction_idx = None for i in range(batchSize): if instruction_idx is None: instruction_idx = self.tokenizer.encode_sentence( instruction[i]) else: instruction_idx = np.concatenate( (instruction_idx, self.tokenizer.encode_sentence(instruction[i])), axis=0) seq_lengths, perm_idx = sort_batch( instruction_idx) # input in numpy and return in tensor instruction_idx = torch.from_numpy(instruction_idx).long() seq_lengths = seq_lengths.long() # require grad objs = objs.requires_grad() target_bb = target_bb.requires_grad() instruction_idx = instruction_idx.requires_grad() # to cuda if torch.cuda.is_available(): objs = objs.cuda() target_bb = target_bb.cuda() instruction_idx = instruction_idx.cuda() perm_idx = perm_idx.cuda() # sort according the length objs = objs[perm_idx] target_bb = target_bb[perm_idx] instruction_idx = instruction_idx[perm_idx] # Go through the models output_bb = self.RN(objs, instruction_idx, seq_lengths, target_bb) # 1024 * 28 * 28 # calculate loss lossValue = self.loss(input=output_bb, target=target_bb) # Tensorboard record self.writer.add_scalar('Loss/Test', lossValue.item(), self.stepCnt_test) self.stepCnt_test += 1 del lossValue
def evaluate_validation_set(model, devset, x_to_ix, y_to_ix, criterion): y_true = list() y_pred = list() total_loss = 0 for batch, targets, lengths, raw_data in utils.create_dataset(devset, x_to_ix, y_to_ix, batch_size=1): batch, targets, lengths = utils.sort_batch(batch, targets, lengths) pred, loss = apply(model, criterion, batch, targets, lengths) pred_idx = torch.max(pred, 1)[1] y_true += list(targets.int()) y_pred += list(pred_idx.data.int()) total_loss += loss acc = accuracy_score(y_true, y_pred) return total_loss.data.float() / len(devset), acc
def forward(self, x): sorted_x, sorted_lengths, sorted_indices = sort_batch(x) sorted_x = self.Embedding(sorted_x) packed_inputs = pack_padded_sequence(sorted_x, sorted_lengths.tolist(), batch_first=True) packed_outputs, _ = self.Encoder(packed_inputs) context, _ = pad_packed_sequence(packed_outputs, batch_first=True) context = restore_batch(context, sorted_indices) return context
def evaluate_test_set(model, test, x_to_ix, y_to_ix): y_true = list() y_pred = list() for batch, targets, lengths, raw_data in utils.create_dataset(test, x_to_ix, y_to_ix, batch_size=1): batch, targets, lengths = utils.sort_batch(batch, targets, lengths) pred = model(torch.autograd.Variable(batch), lengths.cpu().numpy()) pred_idx = torch.max(pred, 1)[1] y_true += list(targets.int()) y_pred += list(pred_idx.data.int()) print(len(y_true), len(y_pred)) print(classification_report(y_true, y_pred)) print(confusion_matrix(y_true, y_pred))
def compute_validation_metrics(model, dataloader, device, size): """ For the given model, computes accuracy & loss on validation/test set. :param model: VQA model :param dataloader: validation/test set dataloader :param device: cuda/cpu device where the model resides :param size: no. of samples (subset) to use :return: metrics {'accuracy', 'loss'} :rtype: dict """ model.eval() with torch.no_grad(): batch_size = dataloader.batch_size loss = 0.0 num_correct = 0 n_iters = size // batch_size # Evaluate on mini-batches for i, batch in enumerate(dataloader): # Load batch data image = batch['image'] question = batch['question'] ques_len = batch['ques_len'] label = batch['label'] # Sort batch based on sequence length image, question, label, ques_len = sort_batch( image, question, label, ques_len) # Load data onto the available device image = image.to(device) question = question.to(device) ques_len = ques_len.to(device) label = label.to(device) # Forward Pass label_logits = model(image, question, ques_len) # Compute Accuracy label_predicted = torch.argmax(label_logits, dim=1) correct = (label == label_predicted) num_correct += correct.sum().item() # Compute Loss loss += F.cross_entropy(label_logits, label, reduction='mean') if i >= n_iters: break # Total Samples total = n_iters * batch_size # Final Accuracy accuracy = 100.0 * num_correct / total # Final Loss (averaged over mini-batches - n_iters) loss = loss / n_iters metrics = {'accuracy': accuracy, 'loss': loss} return metrics
def main(): parser = argparse.ArgumentParser(description='Visual Question Answering') # Experiment params parser.add_argument('--mode', type=str, help='train or test mode', required=True, choices=['train', 'test']) parser.add_argument('--expt_dir', type=str, help='root directory to save model & summaries', required=True) parser.add_argument('--expt_name', type=str, help='expt_dir/expt_name: organize experiments', required=True) parser.add_argument( '--run_name', type=str, help='expt_dir/expt_name/run_name: organize training runs', required=True) parser.add_argument('--model', type=str, help='VQA model', choices=['baseline', 'attention', 'bert'], required=True) # Data params parser.add_argument('--train_img', type=str, help='path to training images directory', required=True) parser.add_argument('--train_file', type=str, help='training dataset file', required=True) parser.add_argument('--val_img', type=str, help='path to validation images directory') parser.add_argument('--val_file', type=str, help='validation dataset file') parser.add_argument('--num_cls', '-K', type=int_min_two, help='top K answers (labels); min=2', default=1000) # Vocab params parser.add_argument( '--vocab_file', type=str, help='vocabulary pickle file (gen. by prepare_data.py)') # Training params parser.add_argument('--batch_size', '-bs', type=int, help='batch size', default=8) parser.add_argument('--num_epochs', '-ep', type=int, help='number of epochs', default=50) parser.add_argument('--learning_rate', '-lr', type=float, help='initial learning rate', default=1e-4) parser.add_argument('--log_interval', type=int, help='interval size for logging training summaries', default=100) parser.add_argument('--save_interval', type=int, help='save model after `n` weight update steps', default=3000) parser.add_argument('--val_size', type=int, help='validation set size for evaluating accuracy', default=10000) # Evaluation params parser.add_argument('--K_eval', type=int, help='top-K labels during evaluation/inference', default=1000) # Model params parser.add_argument( '--model_ckpt', type=str, help='resume training/perform inference; e.g. model_1000.pth') parser.add_argument('--vgg_wts_path', type=str, help='VGG-11 (bn) pre-trained weights (.pth) file') parser.add_argument('--vgg_train', type=str2bool, help='whether to train the VGG encoder', default='false') # parser.add_argument('--model_config', type=str, help='model config file - specifies model architecture') # GPU params # parser.add_argument('--num_gpus', type=int, help='number of GPUs to use for training', default=1) parser.add_argument('--gpu_id', type=int, help='cuda:gpu_id (0,1,2,..) if num_gpus = 1', default=0) parser.add_argument('--opt_lvl', type=int, help='Automatic-Mixed Precision: opt-level (O_)', default=1, choices=[0, 1, 2, 3]) # Misc params parser.add_argument('--num_workers', type=int, help='number of worker threads for Dataloader', default=1) args = parser.parse_args() device = torch.device( 'cuda:{}'.format(args.gpu_id) if torch.cuda.is_available() else 'cpu') print('Selected Device: {}'.format(device)) # torch.cuda.get_device_properties(device).total_memory # in Bytes # Train params n_epochs = args.num_epochs batch_size = args.batch_size lr = args.learning_rate # Load vocab (.pickle) file vocab = load_vocab(args.vocab_file) print('Vocabulary loaded from {}'.format(args.vocab_file)) # Unpack vocab word2idx, idx2word, label2idx, idx2label, max_seq_length = [ v for k, v in vocab.items() ] vocab_size = len(word2idx) # Model Config model_config = setup_model_configs(args, vocab_size) image_size = model_config['image_size'] # TODO: Multi-GPU PyTorch Implementation # if args.num_gpus > 1 and torch.cuda.device_count() > 1: # print("Using {} GPUs!".format(torch.cuda.device_count())) # model = nn.DataParallel(model, device_ids=[0, 1]) # model.to(device) # Train if args.mode == 'train': # Setup train log directory log_dir = os.path.join(args.expt_dir, args.expt_name, args.run_name) if not os.path.exists(log_dir): os.makedirs(log_dir) print('Training Log Directory: {}\n'.format(log_dir)) # TensorBoard summaries setup --> /expt_dir/expt_name/run_name/ writer = SummaryWriter(log_dir) # Train log file log_file = setup_logs_file(parser, log_dir) # Dataset & Dataloader train_dataset = VQADataset(args.train_file, args.train_img, word2idx, label2idx, max_seq_length, transform=Compose([ Resize(image_size), ToTensor(), Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers) print('Question Vocabulary Size: {} \n\n'.format(vocab_size)) print('Train Data Size: {}'.format(train_dataset.__len__())) # Plot data (image, question, answer) for sanity check # plot_data(train_loader, idx2word, idx2label, num_plots=10) # sys.exit() if args.val_file: # Use the same word-index dicts as that obtained for the training set val_dataset = VQADataset(args.val_file, args.val_img, word2idx, label2idx, max_seq_length, transform=Compose([ Resize(image_size), ToTensor(), Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers) log_msg = 'Validation Data Size: {}\n'.format( val_dataset.__len__()) log_msg += 'Validation Accuracy is computed using {} samples. See --val_size\n'.format( args.val_size) print_and_log(log_msg, log_file) # Num of classes = K + 1 (for UNKNOWN) num_classes = args.num_cls + 1 # Setup model params question_encoder_params = model_config['question_params'] image_encoder_params = model_config['image_params'] # Define model & load to device VQANet = model_config['model'] model = VQANet(question_encoder_params, image_encoder_params, K=num_classes) model.to(device) # Load model checkpoint file (if specified) from `log_dir` if args.model_ckpt: #model_ckpt_path = os.path.join(log_dir, args.model_ckpt) model_ckpt_path = args.model_ckpt checkpoint = torch.load(model_ckpt_path) model.load_state_dict(checkpoint) log_msg = 'Model successfully loaded from {}'.format( model_ckpt_path) + '\nResuming Training...' print_and_log(log_msg, log_file) # Loss & Optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr) # TODO: StepLR Scheduler # scheduler = StepLR(optimizer, step_size=1, gamma=0.1) model, optimizer = amp.initialize(model, optimizer, opt_level="O{}".format(args.opt_lvl)) steps_per_epoch = len(train_loader) start_time = time() curr_step = 0 # TODO: Save model with best validation accuracy best_val_acc = 0.0 for epoch in range(n_epochs): for batch_data in train_loader: # Load batch data image = batch_data['image'] question = batch_data['question'] ques_len = batch_data['ques_len'] label = batch_data['label'] # Sort batch based on sequence length image, question, label, ques_len = sort_batch( image, question, label, ques_len) # Load data onto the available device image = image.to(device) # [B, C, H, W] question = question.to(device) # [B, L] ques_len = ques_len.to(device) # [B] label = label.to(device) # [B] # Forward Pass label_predict = model(image, question, ques_len) # Compute Loss loss = criterion(label_predict, label) # Backward Pass optimizer.zero_grad() loss.backward() ''' with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() ''' optimizer.step() # Print Results - Loss value & Validation Accuracy if (curr_step + 1) % args.log_interval == 0 or curr_step == 1: # Validation set accuracy if args.val_file: validation_metrics = compute_validation_metrics( model, val_loader, device, size=args.val_size) # Reset the mode to training model.train() log_msg = 'Validation Accuracy: {:.2f} % || Validation Loss: {:.4f}'.format( validation_metrics['accuracy'], validation_metrics['loss']) print_and_log(log_msg, log_file) # If current model has the best accuracy on the validation set & >= training accuracy, # save model to disk # Add summaries to TensorBoard writer.add_scalar('Val/Accuracy', validation_metrics['accuracy'], curr_step) writer.add_scalar('Val/Loss', validation_metrics['loss'], curr_step) # Add summaries to TensorBoard writer.add_scalar('Train/Loss', loss.item(), curr_step) # Compute elapsed & remaining time for training to complete time_elapsed = (time() - start_time) / 3600 # total time = time_per_step * steps_per_epoch * total_epochs total_time = (time_elapsed / curr_step) * steps_per_epoch * n_epochs time_left = total_time - time_elapsed log_msg = 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f} | time elapsed: {:.2f}h | time left: {:.2f}h'.format( epoch + 1, n_epochs, curr_step + 1, steps_per_epoch, loss.item(), time_elapsed, time_left) print_and_log(log_msg, log_file) # Save the model if (curr_step + 1) % args.save_interval == 0: print('Saving the model at the {} step to directory:{}'. format(curr_step + 1, log_dir)) save_path = os.path.join( log_dir, 'model_' + str(curr_step + 1) + '.pth') torch.save(model.state_dict(), save_path) curr_step += 1 # Validation set accuracy on the entire set if args.val_file: # Total validation set size total_validation_size = val_dataset.__len__() validation_metrics = compute_validation_metrics( model, val_loader, device, total_validation_size) log_msg = '\nAfter {} epoch:\n'.format(epoch + 1) log_msg += 'Validation Accuracy: {:.2f} % || Validation Loss: {:.4f}\n'.format( validation_metrics['accuracy'], validation_metrics['loss']) print_and_log(log_msg, log_file) # Reset the mode to training model.train() writer.close() log_file.close() # TODO: Test/Inference elif args.mode == 'test': # Use the same word-index dicts as that obtained for the training set test_dataset = VQADataset(args.val_file, args.val_img, word2idx, label2idx, max_seq_length, transform=Compose([ Resize(image_size), ToTensor(), Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers) # Num of classes = K + 1 (for UNKNOWN) num_classes = args.num_cls + 1 # Setup model params question_encoder_params = model_config['question_params'] image_encoder_params = model_config['image_params'] # Define model & load to device VQANet = model_config['model'] model = VQANet(question_encoder_params, image_encoder_params, K=num_classes) model.to(device) # Load weights from checkpoint file log_dir = os.path.join(args.expt_dir, args.expt_name, args.run_name) model_ckpt_path = os.path.join(log_dir, args.model_ckpt) checkpoint = torch.load(model_ckpt_path, map_location=device) model.load_state_dict(checkpoint) print('Model successfully loaded from {}'.format(model_ckpt_path)) test_metrics = compute_validation_metrics(model, test_loader, device, test_dataset.__len__())
losses = [] start = time() for epoch in range(EPOCHS): print('Training............................................................') encoder.train() decoder.train() total_loss = 0 for (batch, (input, target, input_len)) in enumerate(dataset): loss = 0 xsorted, ysorted, x_sorted_len = sort_batch(input, target, input_len) enc_output, enc_hidden = encoder(xsorted.to(device), x_sorted_len, device) # Remind that def forward(self, x, lens, device): dec_hidden = enc_hidden dec_input = torch.tensor([[target_lang_train.w2i['<sos>']]] * BATCH_SIZE) # tensor([[256]]) for t in range(1, ysorted.size(1)): predictions, dec_hidden, _ = decoder(dec_input.to(device), dec_hidden.to(device), enc_output.to(device)) loss += loss_function(ysorted[:,t].to(device), predictions.to(device), criterion) #Remind that this: (real_value, pred_value, criterion) dec_input = ysorted[:,t].unsqueeze(1) # t 마다 디코더에 들어가는 입력을 갱신
def train_model(model, optimizer, train, dev, x_to_ix, y_to_ix, batch_size, max_epochs, args): criterion = nn.CrossEntropyLoss(size_average=None) loss_list = [] acc_list = [] val_loss_list = [] val_acc_list = [] model.train() for epoch in range(max_epochs): print('Epoch:', epoch) y_true = list() y_pred = list() total_loss = 0 for batch, targets, lengths, raw_data in utils.create_dataset( train, x_to_ix, y_to_ix, batch_size=batch_size): batch, targets, lengths = utils.sort_batch(batch, targets, lengths) model.zero_grad() pred, loss = apply(model, criterion, batch, targets, lengths) loss.backward() optimizer.step() pred_idx = torch.max(pred, 1)[1] y_true += list(targets.int()) y_pred += list(pred_idx.data.int()) total_loss += loss acc = accuracy_score(y_true, y_pred) loss_list.append(total_loss.data.float() / len(train)) acc_list.append(100 * acc) val_loss, val_acc = evaluate_validation_set(model, dev, x_to_ix, y_to_ix, criterion) print( "Train loss: {} - acc: {} \nValidation loss: {} - acc: {}".format( total_loss.data.float() / len(train), acc, val_loss, val_acc)) val_loss_list.append(val_loss) val_acc_list.append(100 * val_acc) fig, axs = plt.subplots(2, 2, figsize=(20, 10)) fig.suptitle( "Hidden_dim: {} -embded_dim: {} - Batch Size: {} - Num Epochs: {} - learning_rate : {} - weight_decay : {}" .format(args.hidden_dim, args.embded_dim, args.batch_size, args.num_epochs, args.learning_rate, args.weight_decay), fontsize=16) axs[0][0].plot(loss_list) axs[0][0].set_title('Training Loss') axs[0][1].plot(acc_list) axs[0][1].set_title('Training Accuracy') axs[1][0].plot(val_loss_list) axs[1][0].set_title('Validation Loss') axs[1][1].plot(val_acc_list) axs[1][1].set_title('Validation Accuracy') plt.show() axs[0][0].annotate(str('%.3f' % (float(loss_list[-1].data))), xy=(len(loss_list) / 2, loss_list[-1])) axs[0][1].annotate(str('%.3f' % (float(acc_list[-1]))), xy=(len(acc_list) / 2, acc_list[-1])) axs[1][0].annotate(str('%.3f' % (float(val_loss_list[-1].data))), xy=(len(val_loss_list) / 2, val_loss_list[-1])) axs[1][1].annotate(str('%.3f' % (float(val_acc_list[-1]))), xy=(len(val_acc_list) / 2, val_acc_list[-1])) plt.show() fig.savefig('result' + str(args.hidden_dim) + str(args.embded_dim) + str(args.batch_size) + str(args.num_epochs) + str(args.learning_rate) + str(args.weight_decay) + '.png') return model
from utils import device, sort_batch from model import Encoder, Decoder, vocab_input_size, embedding_dim, units, BATCH_SIZE, vocab_target_size from converting import dataset, target_lang_train import torch """ 인코더 테스터 """ encoder = Encoder(vocab_input_size, embedding_dim, units, BATCH_SIZE) encoder.to(device) iteration = iter(dataset) x, y, x_len = next(iteration) xsorted, ysorted, lensorted = sort_batch(x, y, x_len) enc_output, enc_hidden = encoder(xsorted.to(device), lensorted, device) #print(enc_output[0][0]) print("------------Encoder info --------------------") print("Input: ", x.shape) print("Output: ", y.shape) print("Encoder Output: ", enc_output.shape) # batch_size X max_length X enc_units print( "Encoder Hidden: ", enc_hidden.shape) # batch_size X enc_units (corresponds to the last state) """ 디코더 테스터 """ decoder = Decoder(vocab_target_size, embedding_dim, units, units, BATCH_SIZE).to(device)
def main(): input_lang, output_lang, pairs, data1, data2 = read_langs("eng", "fra", True) input_tensor = [[input_lang.word2index[s] for s in es.split(' ')] for es in data1] target_tensor = [[output_lang.word2index[s] for s in es.split(' ')] for es in data2] max_length_inp, max_length_tar = max_length(input_tensor), max_length(target_tensor) input_tensor = [pad_sequences(x, max_length_inp) for x in input_tensor] target_tensor = [pad_sequences(x, max_length_tar) for x in target_tensor] print(len(target_tensor)) input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2) # Show length print(len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val)) BUFFER_SIZE = len(input_tensor_train) BATCH_SIZE = 64 N_BATCH = BUFFER_SIZE // BATCH_SIZE embedding_dim = 256 units = 1024 vocab_inp_size = len(input_lang.word2index) vocab_tar_size = len(output_lang.word2index) train_dataset = MyData(input_tensor_train, target_tensor_train) val_dataset = MyData(input_tensor_val, target_tensor_val) dataset = DataLoader(train_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=True) device = torch.device("cpu") encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE) decoder = Decoder(vocab_tar_size, embedding_dim, units, units, BATCH_SIZE) encoder.to(device) decoder.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=0.001) EPOCHS = 10 for epoch in range(EPOCHS): start = time() encoder.train() decoder.train() total_loss = 0 for (batch, (inp, targ, inp_len)) in enumerate(dataset): loss = 0 xs, ys, lens = sort_batch(inp, targ, inp_len) enc_output, enc_hidden = encoder(xs.to(device), lens, device) dec_hidden = enc_hidden dec_input = torch.tensor([[output_lang.word2index['<sos>']]] * BATCH_SIZE) for t in range(1, ys.size(1)): predictions, dec_hidden, _ = decoder(dec_input.to(device), dec_hidden.to(device), enc_output.to(device)) loss += loss_function(criterion, ys[:, t].to(device), predictions.to(device)) # loss += loss_ dec_input = ys[:, t].unsqueeze(1) batch_loss = (loss / int(ys.size(1))) total_loss += batch_loss optimizer.zero_grad() loss.backward() ### UPDATE MODEL PARAMETERS optimizer.step() if batch % 100 == 0: print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, batch, batch_loss.detach().item())) ### TODO: Save checkpoint for model print('Epoch {} Loss {:.4f}'.format(epoch + 1, total_loss / N_BATCH)) print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
def epochTrain(self): s = '#' * 30 + ' Epoch %3d / %3d ' % (self.epoch_i + 1, self.epoch) + '#' * 30 print s bar = tqdm(self.trainDataLoader) for idx, (objs, target_bb, instruction) in enumerate(bar): if torch.cuda.is_available(): torch.cuda.empty_cache() # First sort the batch according to the length of instruction batchSize = objs.size(0) instruction_idx = None for i in range(batchSize): if instruction_idx is None: instruction_idx = self.tokenizer.encode_sentence( instruction[i]) else: instruction_idx = np.concatenate( (instruction_idx, self.tokenizer.encode_sentence(instruction[i])), axis=0) seq_lengths, perm_idx = sort_batch( instruction_idx) # input in numpy and return in tensor instruction_idx = torch.from_numpy(instruction_idx).long() seq_lengths = seq_lengths.long() # Norm # print objs.shape,objs[0] with torch.no_grad(): bs, dim_N, fea = objs.size() objs_label = objs[:, :, :1] objs_feature = objs[:, :, 1:] num_regions = [] # the number of regions in each batch batch_mean = [] # the mean of non-zero element in each batch for batch_iter in range(bs): tmpfeature = objs_feature[batch_iter] # dim_N * 4 total_sum = 0 for i in range(self.objNumMax): tmp_sum = tmpfeature[i].sum().item() if tmp_sum != 0: total_sum += tmp_sum else: batch_mean.append(total_sum / ((fea - 1) * i)) num_regions.append(i) break for batch_iter in range(bs): try: num_r = num_regions[ batch_iter] # number of region proposals except: print "Error!", batch_iter exit(1) tmp_mean = torch.tensor([batch_mean[batch_iter] ]).unsqueeze(1).repeat( num_r, fea - 1) tmp_mean = torch.cat( (tmp_mean, torch.zeros(self.objNumMax - num_r, fea - 1)), 0) objs_feature[ batch_iter] = objs_feature[batch_iter] - tmp_mean objs = torch.cat((objs_label, objs_feature), 2) # print objs_feature.size(),bs # objs_f_flat = objs_feature.contiguous().view(bs,dim_N * (fea-1)) # objs_f_mean = torch.mean(objs_f_flat, 1,keepdim=True) # objs_f_mean_in = objs_f_mean.unsqueeze(2).repeat(1,dim_N,fea-1) # objs_mean_ta = objs_mean.unsqueeze(1).repeat(1,4) #objs_max = torch.max(objs_flat, 1)[0] # print objs_max.shape # exit(1) #objs_max_in = objs_max.unsqueeze(1).unsqueeze(2).repeat(1,dim_N,fea) # objs_max_ta = objs_max.unsqueeze(1).repeat(1,4) # objs_feature = objs_feature - objs_f_mean_in # objs = torch.cat((objs_label,objs_feature),2) # target_bb = (target_bb - objs_mean_ta) / objs_max_ta # print objs.shape,objs[0] # exit(1) # to cuda if torch.cuda.is_available(): objs = objs.cuda() target_bb = target_bb.cuda() instruction_idx = instruction_idx.cuda() perm_idx = perm_idx.cuda() # sort according the length objs = objs[perm_idx] target_bb = target_bb[perm_idx] instruction_idx = instruction_idx[perm_idx] # Go through the models output_bb = self.RN(objs, instruction_idx, seq_lengths) # 1024 * 28 * 28 # calculate loss lossValue = self.loss(input=output_bb, target=target_bb) # Tensorboard record self.writer.add_scalar('Loss/Train', lossValue.item(), self.stepCnt_train) self.stepCnt_train += 1 # print loss bar.set_description('Epoch: %d Loss: %f' % (self.epoch_i + 1, lossValue.item())) # Backward self.optimizer.zero_grad() lossValue.backward() self.optimizer.step() self.scheduler.step(lossValue) # Save model if (idx + 1) % self.batchModelSave == 0: self.save(batchIdx=(idx + 1)) if idx % self.batchPrint == 0: s = '' output_bb_numpy = output_bb.detach().cpu().numpy() target_bb_numpy = target_bb.detach().cpu().numpy() for i in range(output_bb_numpy.shape[0]): s += ' ### ' for j in range(4): s += str(target_bb_numpy[i][j]) + ', ' s += ' & ' for j in range(4): s += str(output_bb_numpy[i][j]) + ', ' self.writer.add_text('Target & Output', s, self.stepCnt_train) del lossValue