def test(i, epoch, model, data_loader, args): model.eval() loss = [] correct = [] for data, target in data_loader.test_loader: if args.cuda: data, target = data.cuda(), target.cuda() with torch.no_grad(): data, target = Variable(data), Variable(target) if len(list(target.size())) > 1: #XXX: hax target = torch.squeeze(target) target = target == i target = target.type(long_type(args.cuda)) output = model(data, which_model=i) loss_t = model.loss_function(output, target) correct_t = softmax_accuracy(output, target) loss.append(loss_t.detach().cpu().data[0]) correct.append(correct_t) loss = np.mean(loss) acc = np.mean(correct) print('\n[POOL_{} | {} samples]Test Epoch: {}\tAverage loss: {:.4f}\tAverage Accuracy: {:.4f}\n'.format( i, num_samples_in_loader(data_loader.test_loader), epoch, loss, acc)) return loss, acc
def train(i, epoch, model, optimizer, data_loader, args): ''' i : which submodel to train ''' model.train() for batch_idx, (data, target) in enumerate(data_loader.train_loader): if args.cuda: data, target = data.cuda(), target.cuda() # data, target = Variable(data), Variable(target) # if len(list(target.size())) > 1: #XXX: hax # target = torch.squeeze(target) data, target = Variable(data), Variable(target) if len(list(target.size())) > 1: #XXX: hax target = torch.squeeze(target) target = target == i target = target.type(long_type(args.cuda)) optimizer.zero_grad() # project to the output dimension output = model(data, which_model=i) loss = model.loss_function(output, target) correct = softmax_accuracy(output, target) # compute loss loss.backward() optimizer.step() # log every nth interval if batch_idx % args.log_interval == 0: # the total number of samples is different # if we have filtered using the class_sampler if hasattr(data_loader.train_loader, "sampler") \ and hasattr(data_loader.train_loader.sampler, "num_samples"): num_samples = data_loader.train_loader.sampler.num_samples else: num_samples = len(data_loader.train_loader.dataset) print('[POOL_{}]Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.4f}'.format( i, epoch, batch_idx * len(data), num_samples, 100. * batch_idx * len(data) / num_samples, loss.data[0], correct))
def execute_graph(epoch, model, loader, grapher, optimizer=None, prefix='test'): """ execute the graph; when 'train' is in the name the model runs the optimizer :param epoch: the current epoch number :param model: the torch model :param loader: the train or **TEST** loader :param grapher: the graph writing helper (eg: visdom / tf wrapper) :param optimizer: the optimizer :param prefix: 'train', 'test' or 'valid' :returns: dictionary with scalars :rtype: dict """ start_time = time.time() model.eval() if prefix == 'test' else model.train() assert optimizer is not None if 'train' in prefix or 'valid' in prefix else optimizer is None loss_map, num_samples, print_once = {}, 0, False # iterate over train and valid data for minibatch, labels in loader: minibatch = minibatch.cuda() if args.cuda else minibatch labels = labels.cuda() if args.cuda else labels if args.half: minibatch = minibatch.half() if 'train' in prefix: optimizer.zero_grad() # zero gradients on optimizer with torch.no_grad() if prefix == 'test' else dummy_context(): pred_logits = model(minibatch) # get normal predictions loss_t = { 'loss_mean': F.cross_entropy( input=pred_logits, target=labels), # change to F.mse_loss for regression 'accuracy_mean': softmax_accuracy(preds=F.softmax(pred_logits, -1), targets=labels) } loss_map = _add_loss_map(loss_map, loss_t) num_samples += minibatch.size(0) if 'train' in prefix: # compute bp and optimize loss_t['loss_mean'].backward() optimizer.step() if args.debug_step: # for testing purposes break # compute the mean of the map loss_map = _mean_map(loss_map) # reduce the map to get actual means print( '{}[Epoch {}][{} samples][{:.2f} sec]: Loss: {:.4f}\tAccuracy: {:.4f}'. format(prefix, epoch, num_samples, time.time() - start_time, loss_map['loss_mean'].item(), loss_map['accuracy_mean'].item() * 100.0)) # plot the test accuracy, loss and images register_plots({**loss_map}, grapher, epoch=epoch, prefix='linear' + prefix) register_images({'input_imgs': F.upsample(minibatch, size=(100, 100))}, grapher, prefix=prefix) # return this for early stopping loss_val = loss_map['loss_mean'].detach().item() loss_map.clear() return loss_val
def execute_graph(epoch, model, data_loader, grapher, optimizer=None, prefix='test', plot_mem=False): ''' execute the graph; when 'train' is in the name the model runs the optimizer ''' start_time = time.time() model.eval() if not 'train' in prefix else model.train() assert optimizer is not None if 'train' in prefix else optimizer is None loss_map, num_samples = {}, 0 x_original, x_related = None, None for item in data_loader: # first destructure the data, cuda-ize and wrap in vars x_original, x_related, labels = _unpack_data_and_labels(item) x_related, labels = cudaize(x_related, is_data_tensor=True), cudaize(labels) if 'train' in prefix: # zero gradients on optimizer optimizer.zero_grad() with torch.no_grad() if 'train' not in prefix else dummy_context(): with torch.autograd.detect_anomaly( ) if args.detect_anomalies else dummy_context(): x_original, x_related = generate_related( x_related, x_original, args) #x_original = cudaize(x_original, is_data_tensor=True) # run the model and gather the loss map data_to_infer = x_original if args.use_full_resolution else x_related loss_logits_t = model(data_to_infer) loss_t = { 'loss_mean': F.cross_entropy(input=loss_logits_t, target=labels) } # compute accuracy and aggregate into map loss_t['accuracy_mean'] = softmax_accuracy(F.softmax( loss_logits_t, -1), labels, size_average=True) loss_map = _add_loss_map(loss_map, loss_t) num_samples += x_related.size(0) if 'train' in prefix: # compute bp and optimize if args.half is True: optimizer.backward(loss_t['loss_mean']) # with amp_handle.scale_loss(loss_t['loss_mean'], optimizer, # dynamic_loss_scale=True) as scaled_loss: # scaled_loss.backward() else: loss_t['loss_mean'].backward() if args.clip > 0: # TODO: clip by value or norm? torch.nn.utils.clip_grad_value_ torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) \ if not args.half is True else optimizer.clip_master_grads(args.clip) optimizer.step() del loss_t loss_map = _mean_map(loss_map) # reduce the map to get actual means correct_percent = 100.0 * loss_map['accuracy_mean'] print( '''{}[Epoch {}][{} samples][{:.2f} sec]:Average loss: {:.4f}\tAcc: {:.4f}''' .format(prefix, epoch, num_samples, time.time() - start_time, loss_map['loss_mean'].item(), correct_percent)) # add memory tracking if plot_mem: process = psutil.Process(os.getpid()) loss_map['cpumem_scalar'] = process.memory_info().rss * 1e-6 loss_map['cudamem_scalar'] = torch.cuda.memory_allocated() * 1e-6 # plot all the scalar / mean values register_plots(loss_map, grapher, epoch=epoch, prefix=prefix) # plot images, crops, inlays and all relevant images def resize_4d_or_5d(img): if len(img.shape) == 4: return F.interpolate(img, (32, 32), mode='bilinear', align_corners=True) elif len(img.shape) == 5: return torch.cat([ F.interpolate(img[:, i, :, :, :], (32, 32), mode='bilinear', align_corners=True) for i in range(img.shape[1]) ], 0) else: raise Exception("only 4d or 5d images supported") # input_imgs_map = { # 'related_imgs': F.interpolate(x_related, (32, 32), mode='bilinear', align_corners=True), # 'original_imgs': F.interpolate(x_original, (32, 32), mode='bilinear', align_corners=True) # } input_imgs_map = { 'related_imgs': resize_4d_or_5d(x_related), 'original_imgs': resize_4d_or_5d(x_original) } register_images(input_imgs_map, grapher, prefix=prefix) grapher.show() # return this for early stopping loss_val = { 'loss_mean': loss_map['loss_mean'].clone().detach().item(), 'acc_mean': correct_percent } # delete the data instances, see https://tinyurl.com/ycjre67m loss_map.clear() input_imgs_map.clear() del loss_map del input_imgs_map del x_related del x_original del labels gc.collect() # return loss and accuracy return loss_val
def validate(self, epoch): """ Evaluate the model on the validation set. """ losses = AverageMeter() accs = AverageMeter() softmax_acc = 0 for i, (x, y) in enumerate(self.valid_loader): if self.use_gpu: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # duplicate 10 times x = x.repeat(self.M, 1, 1, 1) # initialize location vector and hidden state self.batch_size = x.shape[0] h_t, l_t = self.reset() # extract the glimpses log_pi = [] baselines = [] for _ in range(self.num_glimpses - 1): # forward pass through model _, h_t, l_t, b_t, p = self.model(x, l_t, h_t) # store baselines.append(b_t) log_pi.append(p) # last iteration _, h_t, l_t, b_t, log_probas, p = self.model( x, l_t, h_t, last=True ) log_pi.append(p) baselines.append(b_t) # convert list to tensors and reshape baselines = torch.stack(baselines).transpose(1, 0) log_pi = torch.stack(log_pi).transpose(1, 0) # average log_probas = log_probas.view( self.M, -1, log_probas.shape[-1] ) log_probas = torch.mean(log_probas, dim=0) baselines = baselines.contiguous().view( self.M, -1, baselines.shape[-1] ) baselines = torch.mean(baselines, dim=0) log_pi = log_pi.contiguous().view( self.M, -1, log_pi.shape[-1] ) log_pi = torch.mean(log_pi, dim=0) # This miight be averaged wrong over the repition in x softmax_acc += softmax_accuracy(log_probas, y) # calculate reward predicted = torch.max(log_probas, 1)[1] R = (predicted.detach() == y).float() R = R.unsqueeze(1).repeat(1, self.num_glimpses) # compute losses for differentiable modules loss_action = F.nll_loss(log_probas, y) loss_baseline = F.mse_loss(baselines, R) # compute reinforce loss adjusted_reward = R - baselines.detach() loss_reinforce = torch.sum(-log_pi*adjusted_reward, dim=1) loss_reinforce = torch.mean(loss_reinforce, dim=0) # sum up into a hybrid loss loss = loss_action + loss_baseline + loss_reinforce # compute accuracy correct = (predicted == y).float() acc = 100 * (correct.sum() / len(y)) # store losses.update(loss.item(), x.size()[0]) accs.update(acc.item(), x.size()[0]) # Average over the number of batches softmax_acc /= i # log to tensorboard per epoch instead of per iteration if self.use_tensorboard: # iteration = epoch*len(self.valid_loader) + i log_value('valid_loss', losses.avg, epoch) log_value('valid_acc', accs.avg, epoch) if self.use_visdom: # Do visdom train acc and train loss register_plots({'mean': np.array(losses.avg)}, self.grapher, epoch, prefix='validation loss') register_plots({'mean': np.array(accs.avg)}, self.grapher, epoch, prefix='validation accuracy') register_plots({'mean': np.array(softmax_acc)}, self.grapher, epoch, prefix='softmax validation accuracy') self.grapher.show() return losses.avg, accs.avg
def train_one_epoch(self, epoch): """ Train the model for 1 epoch of the training set. An epoch corresponds to one full pass through the entire training set in successive mini-batches. This is used by train() and should not be called manually. """ batch_time = AverageMeter() losses = AverageMeter() accs = AverageMeter() softmax_acc = 0 tic = time.time() with tqdm(total=self.num_train) as pbar: for i, (x, y) in enumerate(self.train_loader): if self.use_gpu: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) plot = False if (epoch % self.plot_freq == 0) and (i == 0): plot = True # initialize location vector and hidden state self.batch_size = x.shape[0] h_t, l_t = self.reset() # save images # imgs = [] # imgs.append(x[0:9]) # extract the glimpses locs = [] log_pi = [] baselines = [] glimpses = [] for t in range(self.num_glimpses - 1): # forward pass through model phi, h_t, l_t, b_t, p = self.model(x, l_t, h_t) # store, to look into # locs.append(l_t[0:9]) glimpses.append(phi) baselines.append(b_t) log_pi.append(p) # last iteration phi, h_t, l_t, b_t, log_probas, p = self.model( x, l_t, h_t, last=True ) glimpses.append(phi) log_pi.append(p) baselines.append(b_t) locs.append(l_t[0:9]) # convert list to tensors and reshape baselines = torch.stack(baselines).transpose(1, 0) log_pi = torch.stack(log_pi).transpose(1, 0) # calculate reward predicted = torch.max(log_probas, 1)[1] R = (predicted.detach() == y).float() R = R.unsqueeze(1).repeat(1, self.num_glimpses) # compute losses for differentiable modules loss_action = F.nll_loss(log_probas, y) loss_baseline = F.mse_loss(baselines, R) # compute reinforce loss # summed over timesteps and averaged across batch adjusted_reward = R - baselines.detach() loss_reinforce = torch.sum(-log_pi*adjusted_reward, dim=1) loss_reinforce = torch.mean(loss_reinforce, dim=0) # sum up into a hybrid loss loss = loss_action + loss_baseline + loss_reinforce # compute accuracy correct = (predicted == y).float() acc = 100 * (correct.sum() / len(y)) # softmax accuracy softmax_acc += softmax_accuracy(log_probas, y) # store losses.update(loss.item(), x.size()[0]) accs.update(acc.item(), x.size()[0]) # compute gradients and update SGD self.optimizer.zero_grad() loss.backward() self.optimizer.step() # measure elapsed time toc = time.time() batch_time.update(toc-tic) pbar.set_description( ( "{:.1f}s - loss: {:.3f} - acc: {:.3f}".format( (toc-tic), loss.item(), acc.item() ) ) ) pbar.update(self.batch_size) # Div by the number of batches softmax_acc /= i # Only per epoch to tensorboard if self.use_tensorboard: # iteration = epoch*len(self.train_loader) + i log_value('train_loss', losses.avg, epoch) log_value('train_acc', accs.avg, epoch) # Per epoch to visdom if self.use_visdom: # Do visdom train acc and train loss register_plots({'mean': np.array(losses.avg)}, self.grapher, epoch, prefix='train loss') register_plots({'mean': np.array(accs.avg)}, self.grapher, epoch, prefix='train accuracy') register_plots({'mean': np.array(softmax_acc)}, self.grapher, epoch, prefix='softmax train accuracy') self.grapher.show() # Todo: code glimse development over time, or location over image if self.use_visdom and self.visdom_images: phi_tensors = [] for j, phi in enumerate(glimpses): # stack all phi images from the glimpse list phi_row = phi.cpu().data.detach().view((-1, self.num_patches, self.patch_size, self.patch_size)) phi_tensors.append(phi_row.squeeze()) register_images(phi_row, 'train glimpse', self.grapher, prefix='train_' + str(epoch) + '_g_' + str(j)) self.grapher.show() image_grid_tensor = torch.stack(phi_tensors).view(self.num_glimpses * self.batch_size, 1, self.patch_size, self.patch_size) register_images(image_grid_tensor, 'train glimpse', self.grapher, prefix='train_' + str(epoch)) self.grapher.show() return losses.avg, accs.avg