def train(loader, model, optimizer, exp_logger, epoch, train_all, print_freq=100, clip_gradient=True, iter_size=1): model.train() meters = exp_logger.reset_meters('train') end = time.time() for i, sample in enumerate( loader): # (im_data, im_info, gt_objects, gt_relationships) # measure the data loading time batch_size = len(sample['visual']) # measure data loading time meters['data_time'].update(time.time() - end, n=batch_size) input_visual = [item for item in sample['visual']] target_objects = sample['objects'] target_relations = sample['relations'] image_info = sample['image_info'] # RPN targets rpn_anchor_targets_obj = [[ np_to_variable(item[0], is_cuda=False, dtype=torch.LongTensor), np_to_variable(item[1], is_cuda=False), np_to_variable(item[2], is_cuda=False), np_to_variable(item[3], is_cuda=False) ] for item in sample['rpn_targets']['object']] # compute output try: raw_losses = model(im_data=input_visual, im_info=image_info, gt_objects=target_objects, gt_relationships=target_relations, rpn_anchor_targets_obj=rpn_anchor_targets_obj) # Determine the loss function def merge_losses(losses): for key in losses: if isinstance(losses[key], dict) or isinstance( losses[key], list): losses[key] = merge_losses(losses[key]) elif key.startswith('loss'): losses[key] = losses[key].mean() return losses losses = merge_losses(raw_losses) if train_all: loss = losses['loss'] + losses['rpn']['loss'] * 0.5 else: loss = losses['loss'] # to logging the loss and itermediate values meters['loss'].update(losses['loss'].cpu().item(), n=batch_size) meters['loss_cls_obj'].update(losses['loss_cls_obj'].cpu().item(), n=batch_size) meters['loss_reg_obj'].update(losses['loss_reg_obj'].cpu().item(), n=batch_size) meters['loss_cls_rel'].update(losses['loss_cls_rel'].cpu().item(), n=batch_size) meters['loss_rpn'].update(losses['rpn']['loss'].cpu().item(), n=batch_size) meters['batch_time'].update(time.time() - end, n=batch_size) meters['epoch_time'].update(meters['batch_time'].val, n=batch_size) # add support for iter size # special case: last iterations optimizer.zero_grad() loss.backward() if clip_gradient: network.clip_gradient(model, 10.) else: network.avg_gradient(model, iter_size) optimizer.step() except: import pdb pdb.set_trace() print("Error: [{}]".format(i)) end = time.time() # Logging the training loss if (i + 1) % print_freq == 0: print('Epoch: [{0}][{1}/{2}] ' 'Batch_Time: {batch_time.avg: .3f}\t' 'FRCNN Loss: {loss.avg: .4f}\t' 'RPN Loss: {rpn_loss.avg: .4f}\t'.format( epoch, i + 1, len(loader), batch_time=meters['batch_time'], loss=meters['loss'], rpn_loss=meters['loss_rpn'])) print('\t[object] loss_cls_obj: {loss_cls_obj.avg:.4f} ' 'loss_reg_obj: {loss_reg_obj.avg:.4f} ' 'loss_cls_rel: {loss_cls_rel.avg:.4f} '.format( loss_cls_obj=meters['loss_cls_obj'], loss_reg_obj=meters['loss_reg_obj'], loss_cls_rel=meters['loss_cls_rel'], )) exp_logger.log_meters('train', n=epoch)
def train(loader, model, optimizer, exp_logger, epoch, train_all, print_freq=100, clip_gradient=True, iter_size=1): model.train() meters = exp_logger.reset_meters('train') end = time.time() for i, sample in enumerate( loader): # (im_data, im_info, gt_objects, gt_relationships) # measure the data loading time batch_size = sample['visual'].size(0) # measure data loading time meters['data_time'].update(time.time() - end, n=batch_size) input_visual = Variable(sample['visual'].cuda()) target_objects = sample['objects'] target_relations = sample['relations'] image_info = sample['image_info'] # RPN targets rpn_anchor_targets_obj = [ np_to_variable(sample['rpn_targets']['object'][0], is_cuda=True, dtype=torch.LongTensor), np_to_variable(sample['rpn_targets']['object'][1], is_cuda=True), np_to_variable(sample['rpn_targets']['object'][2], is_cuda=True), np_to_variable(sample['rpn_targets']['object'][3], is_cuda=True) ] try: # compute output model(input_visual, image_info, target_objects, target_relations, rpn_anchor_targets_obj) # Determine the loss function if train_all: loss = model.loss + model.rpn.loss * 0.5 else: loss = model.loss # to logging the loss and itermediate values meters['loss'].update(model.loss.data.cpu().numpy()[0], n=batch_size) meters['loss_cls_obj'].update( model.loss_cls_obj.data.cpu().numpy()[0], n=batch_size) meters['loss_reg_obj'].update( model.loss_reg_obj.data.cpu().numpy()[0], n=batch_size) meters['loss_cls_rel'].update( model.loss_cls_rel.data.cpu().numpy()[0], n=batch_size) meters['loss_rpn'].update(model.rpn.loss.data.cpu().numpy()[0], n=batch_size) meters['batch_time'].update(time.time() - end, n=batch_size) meters['epoch_time'].update(meters['batch_time'].val, n=batch_size) # add support for iter size # special case: last iterations if i % iter_size == 0 or i == len(loader) - 1: loss.backward() if clip_gradient: network.clip_gradient(model, 10.) else: network.avg_gradient(model, iter_size) optimizer.step() optimizer.zero_grad() else: loss.backward() end = time.time() # Logging the training loss if (i + 1) % print_freq == 0: print( 'Epoch: [{0}][{1}/{2}] ' 'Batch_Time: {batch_time.avg: .3f}\t' 'FRCNN Loss: {loss.avg: .4f}\t' 'RPN Loss: {rpn_loss.avg: .4f}\t'.format( epoch, i + 1, len(loader), batch_time=meters['batch_time'], loss=meters['loss'], rpn_loss=meters['loss_rpn'])) print( '\t[object] loss_cls_obj: {loss_cls_obj.avg:.4f} ' 'loss_reg_obj: {loss_reg_obj.avg:.4f} ' 'loss_cls_rel: {loss_cls_rel.avg:.4f} '.format( loss_cls_obj=meters['loss_cls_obj'], loss_reg_obj=meters['loss_reg_obj'], loss_cls_rel=meters['loss_cls_rel'], )) except Exception: pdb.set_trace() exp_logger.log_meters('train', n=epoch)