def graph_construction(object_rois, gt_rois=None): if isinstance(object_rois, torch.Tensor): object_rois = object_rois.cpu().numpy() object_rois, region_rois, mat_object, mat_phrase, mat_region = graph_construction_py( object_rois, gt_rois) object_rois = network.np_to_variable(object_rois, is_cuda=True) region_rois = network.np_to_variable(region_rois, is_cuda=True) return object_rois, region_rois, mat_object, mat_phrase, mat_region
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, _feat_stride, opts, anchor_scales, anchor_ratios, mappings): rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy() rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy() x = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, _feat_stride, opts, anchor_scales, anchor_ratios, mappings) x = network.np_to_variable(x, is_cuda=True) return x.view(-1, 6)
def proposal_target_layer(object_rois, gt_objects, gt_relationships, n_classes_obj): """ ---------- object_rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] region_rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] gt_objects: (G_obj, 5) [x1 ,y1 ,x2, y2, obj_class] int gt_relationships: (G_obj, G_obj) [pred_class] int (-1 for no relationship) gt_regions: (G_region, 4+40) [x1, y1, x2, y2, word_index] (-1 for padding) # gt_ishard: (G_region, 4+40) {0 | 1} 1 indicates hard # dontcare_areas: (D, 4) [ x1, y1, x2, y2] n_classes_obj n_classes_pred is_training to indicate whether in training scheme ---------- Returns ---------- rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1} bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2] bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss """ object_rois = object_rois.data.cpu().numpy() targets_object, targets_phrase, targets_region = proposal_target_layer_py( object_rois, gt_objects, gt_relationships, n_classes_obj) # print labels.shape, bbox_targets.shape, bbox_inside_weights.shape object_labels, object_rois, bbox_targets, bbox_inside_weights, mat_object, object_fg_duplicate = targets_object[: 6] phrase_labels, mat_phrase = targets_phrase[:2] region_rois, mat_region = targets_region[:2] object_rois = network.np_to_variable(object_rois, is_cuda=True) region_rois = network.np_to_variable(region_rois, is_cuda=True) object_labels = network.np_to_variable(object_labels, is_cuda=True, dtype=torch.LongTensor) bbox_targets = network.np_to_variable(bbox_targets, is_cuda=True) bbox_inside_weights = network.np_to_variable(bbox_inside_weights, is_cuda=True) phrase_labels = network.np_to_variable(phrase_labels, is_cuda=True, dtype=torch.LongTensor) duplicate_labels = network.np_to_variable(object_fg_duplicate, is_cuda=True) return tuple([object_labels, object_rois, bbox_targets, bbox_inside_weights, duplicate_labels]), \ tuple([phrase_labels]), \ tuple([None, region_rois]), \ mat_object, mat_phrase, mat_region
def train(loader, model, optimizer, exp_logger, epoch, train_all, print_freq=100, clip_gradient=True, iter_size=1): model.train() meters = exp_logger.reset_meters('train') end = time.time() for i, sample in enumerate( loader): # (im_data, im_info, gt_objects, gt_relationships) # measure the data loading time batch_size = len(sample['visual']) # measure data loading time meters['data_time'].update(time.time() - end, n=batch_size) input_visual = [item for item in sample['visual']] target_objects = sample['objects'] target_relations = sample['relations'] image_info = sample['image_info'] # RPN targets rpn_anchor_targets_obj = [[ np_to_variable(item[0], is_cuda=False, dtype=torch.LongTensor), np_to_variable(item[1], is_cuda=False), np_to_variable(item[2], is_cuda=False), np_to_variable(item[3], is_cuda=False) ] for item in sample['rpn_targets']['object']] # compute output try: raw_losses = model(im_data=input_visual, im_info=image_info, gt_objects=target_objects, gt_relationships=target_relations, rpn_anchor_targets_obj=rpn_anchor_targets_obj) # Determine the loss function def merge_losses(losses): for key in losses: if isinstance(losses[key], dict) or isinstance( losses[key], list): losses[key] = merge_losses(losses[key]) elif key.startswith('loss'): losses[key] = losses[key].mean() return losses losses = merge_losses(raw_losses) if train_all: loss = losses['loss'] + losses['rpn']['loss'] * 0.5 else: loss = losses['loss'] # to logging the loss and itermediate values meters['loss'].update(losses['loss'].cpu().item(), n=batch_size) meters['loss_cls_obj'].update(losses['loss_cls_obj'].cpu().item(), n=batch_size) meters['loss_reg_obj'].update(losses['loss_reg_obj'].cpu().item(), n=batch_size) meters['loss_cls_rel'].update(losses['loss_cls_rel'].cpu().item(), n=batch_size) meters['loss_rpn'].update(losses['rpn']['loss'].cpu().item(), n=batch_size) meters['batch_time'].update(time.time() - end, n=batch_size) meters['epoch_time'].update(meters['batch_time'].val, n=batch_size) # add support for iter size # special case: last iterations optimizer.zero_grad() loss.backward() if clip_gradient: network.clip_gradient(model, 10.) else: network.avg_gradient(model, iter_size) optimizer.step() except: import pdb pdb.set_trace() print("Error: [{}]".format(i)) end = time.time() # Logging the training loss if (i + 1) % print_freq == 0: print('Epoch: [{0}][{1}/{2}] ' 'Batch_Time: {batch_time.avg: .3f}\t' 'FRCNN Loss: {loss.avg: .4f}\t' 'RPN Loss: {rpn_loss.avg: .4f}\t'.format( epoch, i + 1, len(loader), batch_time=meters['batch_time'], loss=meters['loss'], rpn_loss=meters['loss_rpn'])) print('\t[object] loss_cls_obj: {loss_cls_obj.avg:.4f} ' 'loss_reg_obj: {loss_reg_obj.avg:.4f} ' 'loss_cls_rel: {loss_cls_rel.avg:.4f} '.format( loss_cls_obj=meters['loss_cls_obj'], loss_reg_obj=meters['loss_reg_obj'], loss_cls_rel=meters['loss_cls_rel'], )) exp_logger.log_meters('train', n=epoch)
def train(train_loader, target_net, optimizer, epoch): batch_time = network.AverageMeter() data_time = network.AverageMeter() train_loss = network.AverageMeter() train_loss_obj_box = network.AverageMeter() train_loss_obj_entropy = network.AverageMeter() accuracy_obj = network.AccuracyMeter() target_net.train() end = time.time() for i, sample in enumerate(train_loader): # measure the data loading time data_time.update(time.time() - end) im_data = sample['visual'][0].cuda() im_info = sample['image_info'] gt_objects = sample['objects'] anchor_targets = [ np_to_variable(sample['rpn_targets']['object'][0][0], is_cuda=True, dtype=torch.LongTensor), np_to_variable(sample['rpn_targets']['object'][0][1], is_cuda=True), np_to_variable(sample['rpn_targets']['object'][0][2], is_cuda=True), np_to_variable(sample['rpn_targets']['object'][0][3], is_cuda=True) ] # Forward pass target_net(im_data, im_info, rpn_data=anchor_targets) # record loss loss = target_net.loss # total loss train_loss.update(loss.data[0], im_data.size(0)) # object bbox reg train_loss_obj_box.update(target_net.loss_box.data[0], im_data.size(0)) # object score train_loss_obj_entropy.update(target_net.loss_cls.data[0], im_data.size(0)) # accuracy accuracy_obj.update(target_net.tp, target_net.tf, target_net.fg_cnt, target_net.bg_cnt) # backward optimizer.zero_grad() torch.cuda.synchronize() loss.backward() if not args.disable_clip_gradient: network.clip_gradient(target_net, 10.) torch.cuda.synchronize() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if (i + 1) % args.log_interval == 0: print( 'Epoch: [{0}][{1}/{2}]\t' 'Batch_Time: {batch_time.avg:.3f}s\t' 'lr: {lr: f}\t' 'Loss: {loss.avg:.4f}\n' '\t[object]:\t' 'tp: {accuracy_obj.true_pos:.3f}, \t' 'tf: {accuracy_obj.true_neg:.3f}, \t' 'fg/bg=({accuracy_obj.foreground:.1f}/{accuracy_obj.background:.1f})\t' 'cls_loss: {cls_loss_object.avg:.3f}\t' 'reg_loss: {reg_loss_object.avg:.3f}'.format( epoch, i + 1, len(train_loader), batch_time=batch_time, lr=args.lr, data_time=data_time, loss=train_loss, cls_loss_object=train_loss_obj_entropy, reg_loss_object=train_loss_obj_box, accuracy_obj=accuracy_obj))
def train(loader, model, optimizer, exp_logger, epoch, train_all, print_freq=100, clip_gradient=True, iter_size=1): model.train() meters = exp_logger.reset_meters('train') end = time.time() for i, sample in enumerate( loader): # (im_data, im_info, gt_objects, gt_relationships) # measure the data loading time batch_size = sample['visual'].size(0) # measure data loading time meters['data_time'].update(time.time() - end, n=batch_size) input_visual = Variable(sample['visual'].cuda()) target_objects = sample['objects'] target_relations = sample['relations'] image_info = sample['image_info'] # RPN targets rpn_anchor_targets_obj = [ np_to_variable(sample['rpn_targets']['object'][0], is_cuda=True, dtype=torch.LongTensor), np_to_variable(sample['rpn_targets']['object'][1], is_cuda=True), np_to_variable(sample['rpn_targets']['object'][2], is_cuda=True), np_to_variable(sample['rpn_targets']['object'][3], is_cuda=True) ] try: # compute output model(input_visual, image_info, target_objects, target_relations, rpn_anchor_targets_obj) # Determine the loss function if train_all: loss = model.loss + model.rpn.loss * 0.5 else: loss = model.loss # to logging the loss and itermediate values meters['loss'].update(model.loss.data.cpu().numpy()[0], n=batch_size) meters['loss_cls_obj'].update( model.loss_cls_obj.data.cpu().numpy()[0], n=batch_size) meters['loss_reg_obj'].update( model.loss_reg_obj.data.cpu().numpy()[0], n=batch_size) meters['loss_cls_rel'].update( model.loss_cls_rel.data.cpu().numpy()[0], n=batch_size) meters['loss_rpn'].update(model.rpn.loss.data.cpu().numpy()[0], n=batch_size) meters['batch_time'].update(time.time() - end, n=batch_size) meters['epoch_time'].update(meters['batch_time'].val, n=batch_size) # add support for iter size # special case: last iterations if i % iter_size == 0 or i == len(loader) - 1: loss.backward() if clip_gradient: network.clip_gradient(model, 10.) else: network.avg_gradient(model, iter_size) optimizer.step() optimizer.zero_grad() else: loss.backward() end = time.time() # Logging the training loss if (i + 1) % print_freq == 0: print( 'Epoch: [{0}][{1}/{2}] ' 'Batch_Time: {batch_time.avg: .3f}\t' 'FRCNN Loss: {loss.avg: .4f}\t' 'RPN Loss: {rpn_loss.avg: .4f}\t'.format( epoch, i + 1, len(loader), batch_time=meters['batch_time'], loss=meters['loss'], rpn_loss=meters['loss_rpn'])) print( '\t[object] loss_cls_obj: {loss_cls_obj.avg:.4f} ' 'loss_reg_obj: {loss_reg_obj.avg:.4f} ' 'loss_cls_rel: {loss_cls_rel.avg:.4f} '.format( loss_cls_obj=meters['loss_cls_obj'], loss_reg_obj=meters['loss_reg_obj'], loss_cls_rel=meters['loss_cls_rel'], )) except Exception: pdb.set_trace() exp_logger.log_meters('train', n=epoch)
def forward(self, im_data, boxes, rel_boxes, SpatialFea, classes, ix1, ix2, args): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) boxes = network.np_to_variable(boxes, is_cuda=True) rel_boxes = network.np_to_variable(rel_boxes, is_cuda=True) SpatialFea = network.np_to_variable(SpatialFea, is_cuda=True) classes = network.np_to_variable(classes, is_cuda=True, dtype=torch.LongTensor) ix1 = network.np_to_variable(ix1, is_cuda=True, dtype=torch.LongTensor) ix2 = network.np_to_variable(ix2, is_cuda=True, dtype=torch.LongTensor) x = self.conv1(im_data) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) x = self.conv5(x) x_so = self.roi_pool(x, boxes) x_so = x_so.view(x_so.size()[0], -1) x_so = self.fc6(x_so) x_so = F.dropout(x_so, training=self.training) x_so = self.fc7(x_so) x_so = F.dropout(x_so, training=self.training) obj_score = self.fc_obj(x_so) x_so = self.fc8(x_so) x_u = self.roi_pool(x, rel_boxes) x = x_u.view(x_u.size()[0], -1) x = self.fc6(x) x = F.dropout(x, training=self.training) x = self.fc7(x) x = F.dropout(x, training=self.training) x = self.fc8(x) if (args.use_so): x_s = torch.index_select(x_so, 0, ix1) x_o = torch.index_select(x_so, 0, ix2) x_so = torch.cat((x_s, x_o), 1) x_so = self.fc_so(x_so) x = torch.cat((x, x_so), 1) if (args.loc_type == 1): lo = self.fc_lov(SpatialFea) x = torch.cat((x, lo), 1) elif (args.loc_type == 2): lo = self.conv_lo(SpatialFea) lo = lo.view(lo.size()[0], -1) lo = self.fc_lov(lo) x = torch.cat((x, lo), 1) if (args.use_obj): emb = self.emb(classes) emb = torch.squeeze(emb, 1) emb_s = torch.index_select(emb, 0, ix1) emb_o = torch.index_select(emb, 0, ix2) emb_so = torch.cat((emb_s, emb_o), 1) emb_so = self.fc_so_emb(emb_so) x = torch.cat((x, emb_so), 1) x = self.fc_fusion(x) rel_score = self.fc_rel(x) return obj_score, rel_score