def _match_priors_gt(self, priors, gt, thresh, num_boxes): batch_size = gt.size(0) num_priors = priors.size(0) overlaps = bbox_overlaps_batch(priors, gt) # [b, num_objects] best prior for each ground truth best_prior_overlap, best_prior_idx = overlaps.max(1) # [b, num_priors] best ground truth for each prior best_truth_overlap, best_truth_idx = overlaps.max(2) matches = torch.zeros(batch_size, num_priors, 5).type_as(priors) for num in range(batch_size): # select valid best prior idx best_prior_idx_valid = best_prior_idx[num][:num_boxes[num]] best_truth_overlap[num].index_fill_(0, best_prior_idx_valid, 2) # ensure best prior # TODO refactor: index best_prior_idx with long tensor # ensure every gt matches with its prior of max overlap for j in range(best_prior_idx_valid.size(0)): best_truth_idx[num][best_prior_idx_valid[j]] = j matches[num] = gt[num][best_truth_idx[num]] loc = matches[:, :, :-1] # Shape: [bs, num_priors,4] conf = matches[:, :, -1] # Shape: [bs, num_priors] conf[best_truth_overlap < thresh] = 0 # label as background encoded_loc = bbox_transform_batch(priors, loc) if cfg.TRAIN.COMMON.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev encoded_loc = ((encoded_loc - self.BBOX_NORMALIZE_MEANS.expand_as(encoded_loc)) / self.BBOX_NORMALIZE_STDS.expand_as(encoded_loc)) return encoded_loc, conf
def forward(self, data_batch): im_data = data_batch[0] im_info = data_batch[1] gt_boxes = data_batch[2] gt_grasps = data_batch[3] num_boxes = data_batch[4] num_grasps = data_batch[5] gt_grasp_inds = data_batch[6] batch_size = im_data.size(0) if self.training: self.iter_counter += 1 # for jacquard dataset, the bounding box labels are set to -1. For training, we set them to 1, which does not # affect the training process. if self.training: if gt_boxes[:, :, -1].sum().item() < 0: gt_boxes[:, :, -1] = 1 for i in range(batch_size): if torch.sum(gt_grasp_inds[i]).item() == 0: gt_grasp_inds[i, :num_grasps[i].item()] = 1 # features base_feat = self.FeatExt(im_data) # generate rois of RCNN rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) if not self.use_objdet_branch: rois_scores = rois[:, :, 5:].clone() rois = rois[:, :, :5].clone() if self.training: rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = \ self._get_header_train_data(rois, gt_boxes, num_boxes) else: rois_label, rois_target, rois_inside_ws, rois_outside_ws = None, None, None, None pooled_feat = self._roi_pooling(base_feat, rois) cls_prob, bbox_pred, RCNN_loss_bbox, RCNN_loss_cls = \ None, None, torch.Tensor([0]).type_as(rois), torch.Tensor([0]).type_as(rois) if self.use_objdet_branch: # object detection branch cls_score, cls_prob, bbox_pred = self._get_obj_det_result( pooled_feat) if self.training: RCNN_loss_bbox, RCNN_loss_cls = self._obj_det_loss_comp( cls_score, cls_prob, bbox_pred, rois_label, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) else: cls_prob = torch.cat([1 - rois_scores, rois_scores], dim=-1) # grasp detection branch # 1. obtaining grasp features of the positive ROIs and prepare grasp training data if self.training: rois_overlaps = bbox_overlaps_batch(rois, gt_boxes) # bs x N_{rois} _, rois_inds = torch.max(rois_overlaps, dim=2) rois_inds += 1 grasp_rois_mask = rois_label.view(-1) > 0 if (grasp_rois_mask > 0).sum().item() > 0: grasp_feat = self._MGN_head_to_tail( pooled_feat[grasp_rois_mask]) grasp_rois = rois.view(-1, 5)[grasp_rois_mask] # process grasp ground truth, return: N_{gr_rois} x N_{Gr_gt} x 5 grasp_gt_xywhc = points2labels(gt_grasps) grasp_gt_xywhc = self._assign_rois_grasps( grasp_gt_xywhc, gt_grasp_inds, rois_inds) grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask] else: # when there are no one positive rois, return dummy results grasp_loc = torch.Tensor([]).type_as(gt_grasps) grasp_prob = torch.Tensor([]).type_as(gt_grasps) grasp_bbox_loss = torch.Tensor([0]).type_as(gt_grasps) grasp_cls_loss = torch.Tensor([0]).type_as(gt_grasps) grasp_conf_label = torch.Tensor([-1]).type_as(rois_label) grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,\ grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors else: grasp_feat = self._MGN_head_to_tail(pooled_feat) # N_{gr_rois} x W x H x A*5, N_{gr_rois} x W x H x A*2 grasp_loc, grasp_conf = self.FCGN_classifier(grasp_feat) feat_height, feat_width = grasp_conf.size(1), grasp_conf.size(2) # reshape grasp_loc and grasp_conf grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5) grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2) grasp_prob = F.softmax(grasp_conf, 2) # 2. calculate grasp loss grasp_bbox_loss, grasp_cls_loss, grasp_conf_label = 0, 0, None if self.training: # N_{gr_rois} x K*A x 5 grasp_all_anchors = self._generate_anchors(feat_height, feat_width, grasp_rois) grasp_bbox_loss, grasp_cls_loss, grasp_conf_label = self._grasp_loss_comp( grasp_rois, grasp_conf, grasp_loc, grasp_gt_xywhc, grasp_all_anchors, feat_height, feat_width) else: # bs*N x K*A x 5 grasp_all_anchors = self._generate_anchors(feat_height, feat_width, rois.view(-1, 5)) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,\ grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1) # if we want to change batch_size, we should consider to change roi2gt_assignment[0] # roi_part_match[0] and roi_part_match_overlap[0] and so on if True: iou_threshold = 0.8 dis_threshold = 0.2 # first, calculate the overlaps among rois, set weights in edges between nodes iou>0.7 to 1 overlaps = bbox_overlaps_batch(rois, rois) overlaps_bin = overlaps.cpu().data.numpy().copy() _, N_node, _ = overlaps.shape overlaps_bin1 = torch.unsqueeze(torch.eye(N_node, N_node).cuda(), dim=0) overlaps_bin1[overlaps >= iou_threshold] = 1 overlaps_bin1[overlaps < iou_threshold] = 0 for j in range(N_node): for k in range(N_node): if overlaps_bin[0][j][k] >= iou_threshold: overlaps_bin[0][j][k] = 1 else: overlaps_bin[0][j][k] = 0 if k == j: overlaps_bin[0][j][k] = 0 # second, calculate the distance among rois, set weights in edges between nodes iou=0 and dis<threshold to 1 distances = bbox_distances_batch(rois, rois) distances_bin = distances.cpu().data.numpy().copy() for j in range(N_node): for k in range(N_node): if distances_bin[0][j][k] <= dis_threshold: distances_bin[0][j][k] = 1 else: distances_bin[0][j][k] = 0 if k == j: distances_bin[0][j][k] = 0 #adj_matrix_bin = overlaps_bin + distances_bin # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) dot_product_mat = torch.mm(pooled_feat, torch.transpose(pooled_feat, 0, 1)) len_vec = torch.unsqueeze(torch.sqrt( torch.sum(pooled_feat * pooled_feat, dim=1)), dim=0) len_mat = torch.mm(torch.transpose(len_vec, 0, 1), len_vec) pooled_feat_sim_mat = dot_product_mat / len_mat cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6) # update 20191027: build graph for rois based on index (default: batch_size = 1) part_size = 10 relation_size = 5 if True: cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6) # calculate the adj_mat based on adj_matrix_bin, the weights on edges are the cosine distance between nodes adj_matrix = np.zeros((N_node, N_node)) for s in range(N_node): row_idx = [t for t in range(N_node)] random.shuffle(row_idx) part_cnt = 0 relation_cnt = 0 for t in row_idx: if part_cnt <= part_size: if overlaps_bin[0, s, t] == 1: node_feat_s = pooled_feat[s, :] node_feat_t = pooled_feat[t, :] adj_matrix[s, t] = cos(node_feat_s, node_feat_t) part_cnt = part_cnt + 1 continue for t in row_idx: if part_cnt <= part_size: if overlaps_bin[0, s, t] == 1: node_feat_s = pooled_feat[s, :] node_feat_t = pooled_feat[t, :] adj_matrix[s, t] = cos(node_feat_s, node_feat_t) part_cnt = part_cnt + 1 continue # if relation_cnt <= relation_size: # if distances_bin[0, s, t] == 1: # node_feat_s = pooled_feat[s, :] # node_feat_t = pooled_feat[t, :] # adj_matrix[s, t] = cos(node_feat_s, node_feat_t) # relation_cnt = relation_cnt + 1 # continue # if part_cnt > part_size and relation_cnt > relation_size: # break if part_cnt > part_size: break adj_matrix = torch.from_numpy(adj_matrix).float().cuda() pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix)) pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix)) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # update 2019-6-17:fix the bug for dimension specified as 0... if self.training: rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors scores = input[0] gt_boxes = input[1] im_info = input[2] num_boxes = input[3] feat_shapes = input[4] # NOTE: need to change # height, width = scores.size(2), scores.size(3) height, width = 0, 0 batch_size = gt_boxes.size(0) anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios, feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores) total_anchors = anchors.size(0) #keep = ((anchors[:, 0] >= -self._allowed_border) & # (anchors[:, 1] >= -self._allowed_border) & # (anchors[:, 2] < long(im_info[0][1]) + self._allowed_border) & # (anchors[:, 3] < long(im_info[0][0]) + self._allowed_border)) keep = ((anchors[:, 0] >= -self._allowed_border) & (anchors[:, 1] >= -self._allowed_border) & (anchors[:, 2] < int(im_info[0][1]) + self._allowed_border) & (anchors[:, 3] < int(im_info[0][0]) + self._allowed_border)) inds_inside = torch.nonzero(keep).view(-1) # keep only inside anchors anchors = anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1) bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_() bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_() overlaps = bbox_overlaps_batch(anchors, gt_boxes) max_overlaps, argmax_overlaps = torch.max(overlaps, 2) gt_max_overlaps, _ = torch.max(overlaps, 1) if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 gt_max_overlaps[gt_max_overlaps==0] = 1e-5 keep = torch.sum(overlaps.eq(gt_max_overlaps.view(batch_size,1,-1).expand_as(overlaps)), 2) if torch.sum(keep) > 0: labels[keep>0] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) sum_fg = torch.sum((labels == 1).int(), 1) sum_bg = torch.sum((labels == 0).int(), 1) for i in range(batch_size): # subsample positive labels if we have too many if sum_fg[i] > num_fg: fg_inds = torch.nonzero(labels[i] == 1).view(-1) # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. # See https://github.com/pytorch/pytorch/issues/1868 for more details. # use numpy instead. #rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long() #rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).long() rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).type(torch.long) disable_inds = fg_inds[rand_num[:fg_inds.size(0)-num_fg]] labels[i][disable_inds] = -1 num_bg = cfg.TRAIN.RPN_BATCHSIZE - sum_fg[i] # subsample negative labels if we have too many if sum_bg[i] > num_bg: bg_inds = torch.nonzero(labels[i] == 0).view(-1) #rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long() #rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).long() rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).type(torch.long) # print('bg_inds.size(0):', bg_inds.size(0)) # print('bg_inds.size:', bg_inds.size()) # print('num_bg:', num_bg) # print('rand_num:', rand_num.shape) # print('bg_inds:', bg_inds.shape) # print('rand_num[:bg_inds.size(0)-num_bg] shape:', rand_num[:bg_inds.size(0)-num_bg].shape) # print('rand_num[:bg_inds.size(0)-num_bg]', rand_num[:bg_inds.size(0)-num_bg]) disable_inds = bg_inds[rand_num[:bg_inds.size(0)-num_bg]] labels[i][disable_inds] = -1 offset = torch.arange(0, batch_size)*gt_boxes.size(1) argmax_overlaps = argmax_overlaps + offset.view(batch_size, 1).type_as(argmax_overlaps) bbox_targets = _compute_targets_batch(anchors, gt_boxes.view(-1,5)[argmax_overlaps.view(-1), :].view(batch_size, -1, 5)) # use a single value instead of 4 values for easy index. bbox_inside_weights[labels==1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS[0] if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: num_examples = torch.sum(labels[i] >= 0).item() positive_weights = 1.0 / num_examples negative_weights = 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) bbox_outside_weights[labels == 1] = positive_weights bbox_outside_weights[labels == 0] = negative_weights labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0) outputs = [] # labels = labels.view(batch_size, height, width, A).permute(0,3,1,2).contiguous() # labels = labels.view(batch_size, 1, A * height, width) outputs.append(labels) # bbox_targets = bbox_targets.view(batch_size, height, width, A*4).permute(0,3,1,2).contiguous() outputs.append(bbox_targets) # anchors_count = bbox_inside_weights.size(1) # bbox_inside_weights = bbox_inside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4) # bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, height, width, 4*A)\ # .permute(0,3,1,2).contiguous() outputs.append(bbox_inside_weights) # bbox_outside_weights = bbox_outside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4) # bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, height, width, 4*A)\ # .permute(0,3,1,2).contiguous() outputs.append(bbox_outside_weights) return outputs
def context_anchor(rois, features, hh, hw): # topleft = [2*x1 - x2, 2*y1 y2, x1, y1] # top = [x1, 2*y1 - y2, x2, y1] # topright = [x2, 2*y1 - y2, 2*x2 - x1, y1] # left = [2*x1 - x2, y1, x1, y2] # right = [x2, y1, 2*x2 - x1, y2] # bottomleft = [2*x1 - x2, y2, x1, 2*y2 - y1] # bottom = [x1, y2, x2, 2*y2 - y1] # bottomright = [x2, y2, 2*x2 - x1, 2*y2 - y1] """ rois[:,0],rois[:,1],rois[:,2],rois[:,3] x1, y1, x2, y2 """ batch_size = features.size(0) num_channels = features.size(1) H = features.size(2) W = features.size(3) #rois:[128,5] x1 = rois[:, 1].cpu().numpy().reshape(rois.size(0), 1) #[128,1] y1 = rois[:, 2].cpu().numpy().reshape(rois.size(0), 1) x2 = rois[:, 3].cpu().numpy().reshape(rois.size(0), 1) y2 = rois[:, 4].cpu().numpy().reshape(rois.size(0), 1) _w = (x2 - x1) #[128,1] _h = (y2 - y1) #[128,1] # cell center shift_x = (x1 - _w) + _w * np.arange(0, 3) + _w / 2 #[128,3] shift_y = (y1 - _h) + _h * np.arange(0, 3) + _h / 2 offset = torch.from_numpy(np.hstack( (-_w / 4, -_h / 4, _w / 4, _h / 4))) #[128,4] offset = offset.type_as(rois).float() #rois[0] anchor shift_xx, shift_yy = np.meshgrid(shift_x[0], shift_y[0]) #[3,3] offset0 = offset[0] #[1,4] shifts0 = torch.from_numpy( np.vstack((shift_xx.ravel(), shift_yy.ravel(), shift_xx.ravel(), shift_yy.ravel())).transpose()) #[4,9]----->[9,4] shifts0 = shifts0.contiguous().type_as(rois).float() gt = offset0 + shifts0 #[9,4] import ipdb #ipdb.set_trace() ww = gt[:, 2] - gt[:, 0] + 1 hhh = gt[:, 3] - gt[:, 1] + 1 min_size = 16 keep = ((gt[:, 0] < 0) | (gt[:, 1] < 0) | (gt[:, 2] >= hw) | (gt[:, 3] >= hh) | (ww < min_size) | (hhh < min_size)) if torch.sum(keep) > 0: gt[keep] = rois[0][1:5] gt = torch.cat((gt[:4, :], gt[5:, :]), 0) #gt = np.delete(gt,4,0)#[8,4] A = rois.size(0) #128 K = gt.shape[0] #8 for i in range(1, rois.size(0)): shift_xx, shift_yy = np.meshgrid(shift_x[i], shift_y[i]) #[3,3] shifts = torch.from_numpy(np.vstack((shift_xx.ravel(),shift_yy.ravel(),\ shift_xx.ravel(),shift_yy.ravel())).transpose())#[4,9]----->[9,4] shifts = shifts.contiguous().type_as(rois).float() gti = offset[i] + shifts #[9,4] ww = gti[:, 2] - gti[:, 0] + 1 hhh = gti[:, 3] - gti[:, 1] + 1 keep = ((gti[:, 0] < 0) | (gti[:, 1] < 0) | (gti[:, 2] >= hw) | (gti[:, 3] >= hh) | (ww < min_size) | (hhh < min_size)) if torch.sum(keep) > 0: gti[keep] = rois[i][1:5] gti = torch.cat((gti[:4, :], gti[5:, :]), 0) gt = torch.cat((gt, gti), 0) #[1024,4] gt = gt.view(batch_size, -1, 4) #[1,1024,4] # check if surpass the bound all_anchors = rois[:, 1:5] #[128,5]] total_anchors = int(K * A) #128*9=1024 overlaps = bbox_overlaps_batch(all_anchors, gt) #[1,128,1024] max_overlaps, argmax_overlaps = torch.max(overlaps, 2) #[1,128] gt_max_overlaps, _ = torch.max(overlaps, 1) #[1,1024] inds_inside = torch.zeros(gt.size(1)).view(-1) #[1024,1] labels = gt.new(batch_size, inds_inside.size(0)).fill_(0) #[1,1024] gt_max_overlaps[gt_max_overlaps == 0] = 1e-5 keep = torch.sum( overlaps.eq( gt_max_overlaps.view(batch_size, 1, -1).expand_as(overlaps)), 2) width_cell = gt[:, :, 2] - gt[:, :, 0] height_cell = gt[:, :, 3] - gt[:, :, 1] max_cell = np.maximum(width_cell, height_cell) min_cell = np.minimum(width_cell, height_cell) # intersect with rois >0.3 # width height cell labels[gt_max_overlaps >= 0.3] = 1 import ipdb # ipdb.set_trace() if torch.sum(labels == 1) > 0: width = gt.new(batch_size, inds_inside.size(0)).fill_(0) height = gt.new(batch_size, inds_inside.size(0)).fill_(0) width[labels == 1] = rois[_[labels == 1]][:, 3] - rois[_[labels == 1]][:, 1] height[labels == 1] = rois[_[labels == 1]][:, 4] - rois[_[labels == 1]][:, 2] max = np.maximum(width, height) min = np.minimum(width, height) labels[max >= max_cell] = 0 labels[min < 1 / 3 * min_cell] = 0 if torch.sum(labels == 1) > 0: gt[labels == 1] = rois[_[labels == 1]][:, 1:5] labels[:] = 0 #gt = np.insert(gt,0,values = labels,axis = 2).view(-1,5) gt = torch.cat((labels.view(batch_size, -1, 1), gt), -1) return gt
def _sample_rois_pytorch(self, all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps_batch(all_rois, gt_boxes) max_overlaps, gt_assignment = torch.max(overlaps, 2) batch_size = overlaps.size(0) num_proposal = overlaps.size(1) num_boxes_per_img = overlaps.size(2) offset = torch.arange(0, batch_size) * gt_boxes.size(1) offset = offset.view(-1, 1).type_as(gt_assignment) + gt_assignment # labels = gt_boxes[:,:,4].contiguous().view(-1).index(offset.view(-1)).view(batch_size, -1) labels = gt_boxes[:, :, 4].contiguous().view(-1).index( (offset.view(-1), )).view(batch_size, -1) labels_batch = labels.new(batch_size, rois_per_image).zero_() rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_() gt_assign_batch = all_rois.new(batch_size, rois_per_image).zero_() gt_rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_() # Guard against the case when an image has fewer than max_fg_rois_per_image # foreground RoIs for i in range(batch_size): fg_inds = torch.nonzero( max_overlaps[i] >= cfg.TRAIN.FG_THRESH).view(-1) fg_num_rois = fg_inds.numel() # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = torch.nonzero( (max_overlaps[i] < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps[i] >= cfg.TRAIN.BG_THRESH_LO)).view(-1) bg_num_rois = bg_inds.numel() if fg_num_rois > 0 and bg_num_rois > 0: # sampling fg fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. # See https://github.com/pytorch/pytorch/issues/1868 for more details. # use numpy instead. #rand_num = torch.randperm(fg_num_rois).long().cuda() rand_num = torch.from_numpy(np.random.permutation( fg_num_rois)).type_as(gt_boxes).long() fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] # sampling bg bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image # Seems torch.rand has a bug, it will generate very large number and make an error. # We use numpy rand instead. #rand_num = (torch.rand(bg_rois_per_this_image) * bg_num_rois).long().cuda() rand_num = np.floor( np.random.rand(bg_rois_per_this_image) * bg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() bg_inds = bg_inds[rand_num] elif fg_num_rois > 0 and bg_num_rois == 0: # sampling fg #rand_num = torch.floor(torch.rand(rois_per_image) * fg_num_rois).long().cuda() rand_num = np.floor( np.random.rand(rois_per_image) * fg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() fg_inds = fg_inds[rand_num] fg_rois_per_this_image = rois_per_image bg_rois_per_this_image = 0 elif bg_num_rois > 0 and fg_num_rois == 0: # sampling bg #rand_num = torch.floor(torch.rand(rois_per_image) * bg_num_rois).long().cuda() rand_num = np.floor( np.random.rand(rois_per_image) * bg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() bg_inds = bg_inds[rand_num] bg_rois_per_this_image = rois_per_image fg_rois_per_this_image = 0 else: raise ValueError( "bg_num_rois = 0 and fg_num_rois = 0, this should not happen!" ) # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels_batch[i].copy_(labels[i][keep_inds]) # Clamp labels for the background RoIs to 0 labels_batch[i][fg_rois_per_this_image:] = 0 rois_batch[i] = all_rois[i][keep_inds] rois_batch[i, :, 0] = i # TODO: check the below line when batch_size > 1, no need to add offset here gt_assign_batch[i] = gt_assignment[i][keep_inds] gt_rois_batch[i] = gt_boxes[i][gt_assignment[i][keep_inds]] bbox_target_data = self._compute_targets_pytorch( rois_batch[:, :, 1:5], gt_rois_batch[:, :, :4]) bbox_targets, bbox_inside_weights = \ self._get_bbox_regression_labels_pytorch(bbox_target_data, labels_batch, num_classes) return labels_batch, rois_batch, gt_assign_batch, bbox_targets, bbox_inside_weights
def forward(self, input): # [1, 256, 5] rois = input[1] batch_size = rois.size(0) # [1, 256, 21] cls_prob = input[0].view(batch_size, -1, input[0].size(1)) # [1, 20, 5] gt_boxes = input[2] # [1, 20, 3] crowdsourced_classes = input[3] num_annotator = crowdsourced_classes.size(2) # [3, 21, 21] alpha_con = input[4] rois_label = input[5] overlaps = bbox_overlaps_batch(rois, gt_boxes) max_overlaps, gt_assignment = torch.max(overlaps, 2) alpha = get_alpha(alpha_con) # TODO batch_size 不为1 时可能有bug # 前景rois索引 fg_rois_ix = torch.nonzero(rois_label != 0).view(-1) for i in range(batch_size): # 前景bbox 索引 fg_gt_boxes_ix = torch.nonzero(gt_boxes[i, :, 4] != 0).view(-1) for ix in fg_gt_boxes_ix: # 搜集所有的与该bbox IoU最大的前景roi rois_ix = torch.nonzero( gt_assignment[i, fg_rois_ix] == ix).view(-1) # 留下的rois没有与该gt box匹配的 if rois_ix.size(0) == 0: continue # 映射全部rois下的索引 rois_ix = fg_rois_ix[rois_ix] # 推理这个fg_gt_boxes的类别 # p暂时用rois的类别平均概率 (后可以试试用IoU分配权重)size[21] mean_cls_prob = torch.mean(cls_prob[i, rois_ix, :], dim=0) tmp = torch.ones(mean_cls_prob.size()).type_as(mean_cls_prob) for c in range(tmp.size(0)): tmp[c] *= mean_cls_prob[c] for j in range(num_annotator): tmp[c] *= alpha[j, c, crowdsourced_classes[i, ix, j]] tmp = tmp / tmp.sum() # 推理出的类别 不能为0 _, mu = torch.max(tmp[1:], dim=0) mu += 1 # 更改 rois_label rois_label[rois_ix] = mu # 更新 alpha_con for j in range(num_annotator): alpha_con[j, mu, crowdsourced_classes[i, ix, j]] += 1 return rois_label
def forward(self, im_data, gt): # object detection if self.training: self._train_iter_conter += 1 self.batch_size = im_data.size(0) gt_boxes = gt['boxes'] gt_grasps = gt['grasps'] gt_grasp_inds = gt['grasp_inds'] num_boxes = gt['num_boxes'] num_grasps = gt['num_grasps'] im_info = gt['im_info'] rel_mat = gt['rel_mat'] # feed image data to base model to obtain base feature map base_feat = self.VMRN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.VMRN_obj_rpn(base_feat, im_info, gt_boxes, num_boxes) # rois preprocess if self.training: obj_det_rois = rois[:,:cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET] roi_data = self.VMRN_obj_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data grasp_rois = rois.clone() rois = torch.cat([obj_det_rois,rois],1) rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) pooled_feat = self._roi_pooing(base_feat, rois) if self.training: pooled_feat_shape = pooled_feat.size() pooled_feat = pooled_feat.contiguous().view((self.batch_size, -1) + pooled_feat_shape[1:]) grasp_feat = pooled_feat[:, cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET:].\ contiguous().view((-1,) + pooled_feat_shape[1:]) pooled_feat = pooled_feat.view(pooled_feat_shape) if self._MGN_USE_POOLED_FEATS: rois_overlaps = bbox_overlaps_batch(rois[:, cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET:], gt_boxes) # bs x N_{rois} _, rois_inds = torch.max(rois_overlaps, dim=2) rois_inds += 1 grasp_rois_mask = rois_label.view(-1) > 0 else: raise NotImplementedError ############################################ # OBJECT DETECTION ############################################ # feed pooled features to top model VMRN_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.VMRN_obj_bbox_pred(VMRN_feat) if self.training: if self.class_agnostic: bbox_pred = bbox_pred.contiguous().view(self.batch_size, -1, 4) else: bbox_pred = bbox_pred.contiguous().view(self.batch_size, -1, 4 * self.n_classes) obj_det_bbox_pred = bbox_pred[:,:cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET] bbox_pred = bbox_pred[:,cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET:] if self.class_agnostic: obj_det_bbox_pred = obj_det_bbox_pred.contiguous().view(-1, 4) bbox_pred = bbox_pred.contiguous().view(-1, 4) else: obj_det_bbox_pred = obj_det_bbox_pred.contiguous().view(-1, 4 * self.n_classes) bbox_pred = bbox_pred.contiguous().view(-1, 4 * self.n_classes) # compute object classification probability cls_score = self.VMRN_obj_cls_score(VMRN_feat) cls_prob = F.softmax(cls_score) if self.training: cls_score = cls_score.contiguous().view(self.batch_size, -1, self.n_classes) obj_det_cls_score = cls_score[:, :cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET] cls_score = cls_score[:, cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET:] obj_det_cls_score = obj_det_cls_score.contiguous().view(-1, self.n_classes) cls_score = cls_score.contiguous().view(-1, self.n_classes) cls_prob = cls_prob.contiguous().view(self.batch_size, -1, self.n_classes) obj_det_cls_prob = cls_prob[:, :cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET] cls_prob = cls_prob[:, cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET:] obj_det_cls_prob = obj_det_cls_prob.contiguous().view(-1, self.n_classes) cls_prob = cls_prob.contiguous().view(-1, self.n_classes) VMRN_obj_loss_cls = 0 VMRN_obj_loss_bbox = 0 # compute object detector loss if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) if self.training: # classification loss VMRN_obj_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss VMRN_obj_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) ############################################ # VISUAL MANIPULATION RELATIONSHIP ############################################ # online data if self.training: if self._train_iter_conter > cfg.TRAIN.VMRN.ONLINEDATA_BEGIN_ITER: obj_rois, obj_num = self._obj_det(obj_det_rois, obj_det_cls_prob.contiguous().view(self.batch_size, -1, self.n_classes), obj_det_bbox_pred.contiguous().view(self.batch_size, -1, 4 if self.class_agnostic else 4 * self.n_classes), self.batch_size, im_info) obj_rois = obj_rois.type_as(gt_boxes) obj_num = obj_num.type_as(num_boxes) else: obj_rois = torch.FloatTensor([]).type_as(gt_boxes) obj_num = torch.LongTensor([]).type_as(num_boxes) obj_labels = None else: # when testing, this is object detection results # TODO: SUPPORT MULTI-IMAGE BATCH obj_rois, obj_num = self._obj_det(rois, cls_prob.contiguous().view(self.batch_size, -1, self.n_classes), bbox_pred.contiguous().view(self.batch_size, -1, 4 if self.class_agnostic else 4 * self.n_classes), self.batch_size, im_info) if obj_rois.numel() > 0: obj_labels = obj_rois[:,5] obj_rois = obj_rois[:,:5] obj_rois = obj_rois.type_as(gt_boxes) obj_num = obj_num.type_as(num_boxes) else: # there is no object detected obj_labels = torch.Tensor([]).type_as(gt_boxes).long() obj_rois = obj_rois.type_as(gt_boxes) obj_num = obj_num.type_as(num_boxes) # offline data if self.training: for i in range(self.batch_size): obj_rois = torch.cat([obj_rois, torch.cat([(i * torch.ones(num_boxes[i].item(),1)).type_as(gt_boxes), (gt_boxes[i][:num_boxes[i]][:,0:4])],1) ]) obj_num = torch.cat([obj_num,torch.Tensor([num_boxes[i]]).type_as(obj_num)]) obj_rois = Variable(obj_rois) if obj_rois.size(0)>1: # filter out the detection of only one object instance obj_pair_feat = self.VMRN_rel_op2l(base_feat, obj_rois, self.batch_size, obj_num) # obj_pair_feat = obj_pair_feat.detach() obj_pair_feat = self._rel_head_to_tail(obj_pair_feat) rel_cls_score = self.VMRN_rel_cls_score(obj_pair_feat) rel_cls_prob = F.softmax(rel_cls_score) VMRN_rel_loss_cls = 0 if self.training: self.rel_batch_size = rel_cls_prob.size(0) obj_pair_rel_label = self._generate_rel_labels(obj_rois, gt_boxes, obj_num, rel_mat) obj_pair_rel_label = obj_pair_rel_label.type_as(gt_boxes).long() rel_not_keep = (obj_pair_rel_label == 0) rel_keep = torch.nonzero(rel_not_keep == 0).view(-1) rel_cls_score = rel_cls_score[rel_keep] obj_pair_rel_label = obj_pair_rel_label[rel_keep] obj_pair_rel_label -= 1 VMRN_rel_loss_cls = F.cross_entropy(rel_cls_score, obj_pair_rel_label) else: if (not cfg.TEST.VMRN.ISEX) and cfg.TRAIN.VMRN.ISEX: rel_cls_prob = rel_cls_prob[::2,:] else: VMRN_rel_loss_cls = 0 # no detected relationships rel_cls_prob = Variable(torch.Tensor([]).type_as(obj_labels)) rel_result = None if not self.training: if obj_rois.numel() > 0: pred_boxes = obj_rois.data[:,1:5] pred_boxes[:, 0::2] /= im_info[0][3].item() pred_boxes[:, 1::2] /= im_info[0][2].item() rel_result = (pred_boxes, obj_labels, rel_cls_prob.data) else: rel_result = (obj_rois.data, obj_labels, rel_cls_prob.data) ############################################ # ROI-BASED GRASP DETECTION ############################################ if self.training: if (grasp_rois_mask > 0).sum().item() > 0: grasp_feat = self._MGN_head_to_tail(grasp_feat[grasp_rois_mask]) else: # when there are no one positive rois: grasp_loc = Variable(torch.Tensor([]).type_as(gt_grasps)) grasp_prob = Variable(torch.Tensor([]).type_as(gt_grasps)) grasp_bbox_loss = Variable(torch.Tensor([0]).type_as(VMRN_obj_loss_bbox)) grasp_cls_loss = Variable(torch.Tensor([0]).type_as(VMRN_obj_loss_cls)) grasp_conf_label = torch.Tensor([-1]).type_as(rois_label) grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps) return rois, cls_prob, bbox_pred, rel_result, rpn_loss_cls, rpn_loss_bbox, \ VMRN_obj_loss_cls, VMRN_obj_loss_bbox, VMRN_rel_loss_cls, rois_label, \ grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors else: grasp_feat = self._MGN_head_to_tail(pooled_feat) grasp_pred = self.MGN_classifier(grasp_feat) # bs*N x K*A x 5, bs*N x K*A x 2 grasp_loc, grasp_conf = grasp_pred # generate anchors # bs*N x K*A x 5 if self.training: grasp_all_anchors = self._generate_anchors(grasp_conf.size(1), grasp_conf.size(2), grasp_rois) else: grasp_all_anchors = self._generate_anchors(grasp_conf.size(1), grasp_conf.size(2), rois) # filter out negative samples grasp_all_anchors = grasp_all_anchors.type_as(gt_grasps) if self.training: grasp_all_anchors = grasp_all_anchors[grasp_rois_mask] # bs*N x 1 x 1 rois_w = (grasp_rois[:, :, 3] - grasp_rois[:, :, 1]).data.view(-1).unsqueeze(1).unsqueeze(2) rois_h = (grasp_rois[:, :, 4] - grasp_rois[:, :, 2]).data.view(-1).unsqueeze(1).unsqueeze(2) rois_w = rois_w[grasp_rois_mask] rois_h = rois_h[grasp_rois_mask] # bs*N x 1 x 1 fsx = rois_w / grasp_conf.size(1) fsy = rois_h / grasp_conf.size(2) # bs*N x 1 x 1 xleft = grasp_rois[:, :, 1].data.view(-1).unsqueeze(1).unsqueeze(2) ytop = grasp_rois[:, :, 2].data.view(-1).unsqueeze(1).unsqueeze(2) xleft = xleft[grasp_rois_mask] ytop = ytop[grasp_rois_mask] # reshape grasp_loc and grasp_conf grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5) grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2) grasp_batch_size = grasp_loc.size(0) # bs*N x K*A x 2 grasp_prob = F.softmax(grasp_conf, 2) grasp_bbox_loss = 0 grasp_cls_loss = 0 grasp_conf_label = None if self.training: # inside weights indicate which bounding box should be regressed # outside weidhts indicate two things: # 1. Which bounding box should contribute for classification loss, # 2. Balance cls loss and bbox loss grasp_gt_xywhc = points2labels(gt_grasps) # bs*N x N_{Gr_gt} x 5 grasp_gt_xywhc = self._assign_rois_grasps(grasp_gt_xywhc, gt_grasp_inds, rois_inds) # filter out negative samples grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask] # absolute coords to relative coords grasp_gt_xywhc[:, :, 0:1] -= xleft grasp_gt_xywhc[:, :, 0:1] = torch.clamp(grasp_gt_xywhc[:, :, 0:1], min = 0) grasp_gt_xywhc[:, :, 0:1] = torch.min(grasp_gt_xywhc[:, :, 0:1], rois_w) grasp_gt_xywhc[:, :, 1:2] -= ytop grasp_gt_xywhc[:, :, 1:2] = torch.clamp(grasp_gt_xywhc[:, :, 1:2], min = 0) grasp_gt_xywhc[:, :, 1:2] = torch.min(grasp_gt_xywhc[:, :, 1:2], rois_h) # grasp training data grasp_loc_label, grasp_conf_label, grasp_iw, grasp_ow = self.MGN_proposal_target(grasp_conf, grasp_gt_xywhc, grasp_all_anchors,xthresh = fsx/2, ythresh = fsy/2) grasp_keep = Variable(grasp_conf_label.view(-1).ne(-1).nonzero().view(-1)) grasp_conf = torch.index_select(grasp_conf.view(-1, 2), 0, grasp_keep.data) grasp_conf_label = torch.index_select(grasp_conf_label.view(-1), 0, grasp_keep.data) grasp_cls_loss = F.cross_entropy(grasp_conf, grasp_conf_label) grasp_iw = Variable(grasp_iw) grasp_ow = Variable(grasp_ow) grasp_loc_label = Variable(grasp_loc_label) grasp_bbox_loss = _smooth_l1_loss(grasp_loc, grasp_loc_label, grasp_iw, grasp_ow, dim = [2,1]) return rois, cls_prob, bbox_pred, rel_result, rpn_loss_cls, rpn_loss_bbox, \ VMRN_obj_loss_cls, VMRN_obj_loss_bbox, VMRN_rel_loss_cls, rois_label, \ grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1) # if we want to change batch_size, we should consider to change roi2gt_assignment[0] # roi_part_match[0] and roi_part_match_overlap[0] and so onif self.training: # part_threshold = 0.25 # # # first, calculate the overlaps among rois and gt, get the max roi for each gt (node_cls) overlaps = bbox_overlaps_batch(rois, rois)[0] N_node, _ = overlaps.shape node_list = [i for i in range(N_node)] for j in range(N_node): for k in range(N_node): if overlaps[j][k] != 0: overlaps[j][k] = 1 if k == j: overlaps[j][k] = 0 idx_subgraph, vertex_subgraph = subgraph_split(overlaps) # max_overlaps_rois2gt, roi2gt_assignment = torch.max(overlaps, 1) # # # second, calculate the overlaps among rois and rois_select, # # using threshold to select roi for each rois_select (node_part) # # rois_cls_tmp = rois[:, roi2gt_assignment[0], :] # rois_cls_num = np.argwhere(gt_boxes[:, :, 4].cpu().data.numpy()[0] != 0).shape[0] # rois_cls_tmp = rois_cls_tmp[:,:rois_cls_num, :] # rois_cls = rois_cls_tmp.new(rois_cls_tmp.size(0), rois_cls_tmp.size(1), 5).zero_() # rois_cls[:, :, :4] = rois_cls_tmp[:, :, 1:5] # rois_cls[:, :, 4] = rois_cls_tmp[:, :, 0] # # # rois_cls_idx_list is the idx related from rois_cls to rois # roi_cls_idx_list = roi2gt_assignment[0][:rois_cls_num] # # overlaps = bbox_overlaps_batch(rois, rois_cls) # max_overlaps_rois2cls, roi2cls_assignment = torch.max(overlaps, 2) # # roi_part_match_overlap = max_overlaps_rois2cls.cpu().data.numpy() # roi_part_match = roi2cls_assignment.cpu().data.numpy() # # # roi_part_idx_list is the idx related from rois_part to rois # roi_part_idx_list = [] # roi_part_match_idx = np.unique(roi_part_match[0]) # for roi_cls_idx in roi_part_match_idx: # match_idx_tmp = np.transpose(np.argwhere(roi_part_match[0] == roi_cls_idx))[0] # match_overlap_tmp = roi_part_match_overlap[0][match_idx_tmp] # # use threshold to select rois_part # match_idx_tmp_select = np.transpose(np.argwhere(match_overlap_tmp > part_threshold))[0] # match_idx_tmp = match_idx_tmp[match_idx_tmp_select] # roi_part_idx_list.append(torch.from_numpy(match_idx_tmp)) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # # update 20191027: build graph for rois based on index (default: batch_size = 1) # adj_jud = np.zeros((0)) # adj_rois = torch.zeros(0).cuda().long() # for i in range(roi_cls_idx_list.shape[0]): # adj_jud = np.concatenate((adj_jud, [1])) # adj_rois = torch.cat((adj_rois, roi_cls_idx_list[i:i+1])) # try: # adj_jud = np.concatenate((adj_jud, np.zeros((roi_part_idx_list[i].shape[0])))) # adj_rois = torch.cat((adj_rois, roi_part_idx_list[i].cuda())) # except IndexError: # print ('IndexError happen, continue') # continue # # node_cls_idx = np.transpose(np.argwhere(adj_jud == 1))[0] # # adj_matrix_bin = np.zeros((len(adj_jud), len(adj_jud))) # # # link edges for node_cls to node_cls # for k in range(len(node_cls_idx)-1): # idx_node_cls_1 = node_cls_idx[k] # idx_node_cls_2 = node_cls_idx[k + 1] # adj_matrix_bin[idx_node_cls_1, idx_node_cls_2] = 1 # adj_matrix_bin[idx_node_cls_2, idx_node_cls_1] = 1 # # # link edges for node_cls to related node_part # for k in range(len(node_cls_idx)-1): # idx_start = node_cls_idx[k] # idx_end = node_cls_idx[k + 1] # for s in range(idx_start, idx_end): # for t in range(idx_start, idx_end): # if s == t: # adj_matrix_bin[s, t] = 0 # else: # adj_matrix_bin[s, t] = 1 # # calculate the adj_mat based on adj_matrix_bin, the weights on edges are the cosine distance between nodes # adj_matrix = np.zeros((len(adj_jud), len(adj_jud))) # # cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6) # # for s in range(len(adj_jud)): # for t in range(len(adj_jud)): # if adj_matrix_bin[s, t] == 1: # node_feat_s = pooled_feat[adj_rois[s], :] # node_feat_t = pooled_feat[adj_rois[t], :] # adj_matrix[s, t] = cos(node_feat_s, node_feat_t) # else: # adj_matrix[s, t] = 0 # # adj_matrix = torch.from_numpy(adj_matrix).float().cuda() # # pooled_feat[adj_rois, :] = F.relu(self.gcn1(pooled_feat[adj_rois, :], adj_matrix)) # pooled_feat[adj_rois, :] = F.relu(self.gcn2(pooled_feat[adj_rois, :], adj_matrix)) # adj_jud = np.zeros((N_node, N_node)) adj_matrix = np.zeros((N_node, N_node)) # # for k in range(idx_subgraph): # idx_k = np.transpose(np.argwhere(vertex_subgraph == k))[0] # for s in range(idx_k.shape[0]): # for t in range(idx_k.shape[0]): # if s == t: # adj_jud[s, t] = 0 # else: # adj_jud[s, t] = 1 # cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6) for s in range(N_node): for t in range(N_node): #if adj_jud[s,t] != 0: if s != t: node_feat_s = pooled_feat[s, :] node_feat_t = pooled_feat[t, :] adj_matrix[s, t] = cos(node_feat_s, node_feat_t) adj_matrix = torch.from_numpy(adj_matrix).float().cuda() pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix)) pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix)) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # update 2019-6-17:fix the bug for dimension specified as 0... if self.training: rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1) # if we want to change batch_size, we should consider to change roi2gt_assignment[0] # roi_part_match[0] and roi_part_match_overlap[0] and so onif self.training: part_threshold = 0.5 # the shape of rois is 1,300,5, however, there is no 300 proposal after nms, so the last of the rois is all 0s rois_none_idx = 300 for i in range(rois.shape[1]): if rois[:, i, :].sum() <= 0: rois_none_idx = i break # # first, calculate the overlaps among rois and gt, get the max roi for each gt (node_cls) overlaps = bbox_overlaps_batch(rois[:, :rois_none_idx, :], rois[:, :rois_none_idx, :])[0] N_node, _ = overlaps.shape overlaps_bin = overlaps.cpu().data.numpy().copy() for j in range(N_node): for k in range(N_node): if overlaps_bin[j][k] >= part_threshold: overlaps_bin[j][k] = 1 else: overlaps_bin[j][k] = 0 if k == j: overlaps_bin[j][k] = 0 idx_subgraph, vertex_subgraph = subgraph_split(overlaps_bin) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # # update 20191105: build graph for rois based on index (default: batch_size = 1) roi_all_idx_list = [] roi_cls_idx_list = [] roi_part_idx_list = [] adj_jud = np.zeros((0)) adj_rois = torch.zeros(0).cuda().long() for k in range(idx_subgraph): idx_k = np.transpose(np.argwhere(vertex_subgraph == k))[0] roi_all_idx_list.append(idx_k) overlaps = overlaps.cpu().data.numpy() # 选度数最大的点作为node_cls for i in range(len(roi_all_idx_list)): rois_idx = roi_all_idx_list[i] # consider the size of rois_select larger than 5, the rois_select is probably an object if rois_idx.shape[0] < 5: continue overlaps_once = overlaps[rois_idx][:, rois_idx] overlaps_once_bin = overlaps_bin[rois_idx][:, rois_idx] N_node_once, _ = overlaps_once.shape ########## update 20191104: select IoU > threshold # for j in range(N_node_once): # for k in range(N_node_once): # if overlaps_once[j][k] >= part_threshold: # overlaps_once[j][k] = 1 # else: # overlaps_once[j][k] = 0 # if k == j: # overlaps_once[j][k] = 0 # overlaps_once = np.sum(overlaps_once, axis=1) # # rois_once_max_idx = np.argmax(overlaps_once) # roi_cls_idx_list.append(rois_idx[rois_once_max_idx]) # # roi_part_tmp = [] # for k in range(rois_idx.shape[0]): # if overlaps[rois_idx[rois_once_max_idx]][k] == 0: # continue # roi_part_tmp.append(rois_idx[k]) # roi_part_tmp = torch.from_numpy(np.array(roi_part_tmp)) # roi_part_idx_list.append(roi_part_tmp) ########## update 20191107: all proposal overlaps_once_bin = np.sum(overlaps_once_bin, axis=1) rois_once_max_idx = np.argmax(overlaps_once_bin) roi_cls_idx_list.append(rois_idx[rois_once_max_idx]) roi_part_tmp = [] roi_iou = overlaps_once[rois_once_max_idx] roi_part_num_threshold = 10 if roi_iou.shape[0] >= roi_part_num_threshold: roi_order = np.argsort(roi_iou)[::-1] for ii in range(roi_part_num_threshold): roi_part_tmp.append(rois_idx[roi_order[ii]]) else: for k in range(rois_idx.shape[0]): if overlaps[rois_idx[rois_once_max_idx]][k] == 0: continue roi_part_tmp.append(rois_idx[k]) roi_part_tmp = torch.from_numpy(np.array(roi_part_tmp)) roi_part_idx_list.append(roi_part_tmp) roi_cls_idx_list = torch.from_numpy(np.array(roi_cls_idx_list)).cuda() for i in range(roi_cls_idx_list.shape[0]): adj_jud = np.concatenate((adj_jud, [1])) adj_rois = torch.cat((adj_rois, roi_cls_idx_list[i:i + 1])) try: if roi_part_idx_list[i].shape[0] != 0: adj_jud = np.concatenate( (adj_jud, np.zeros((roi_part_idx_list[i].shape[0])))) adj_rois = torch.cat( (adj_rois, roi_part_idx_list[i].cuda())) except IndexError: print('IndexError happen, continue') continue node_cls_idx = np.transpose(np.argwhere(adj_jud == 1))[0] adj_matrix_bin = np.zeros((len(adj_jud), len(adj_jud))) # link edges for node_cls to node_cls for k in range(len(node_cls_idx) - 1): idx_node_cls_1 = node_cls_idx[k] idx_node_cls_2 = node_cls_idx[k + 1] adj_matrix_bin[idx_node_cls_1, idx_node_cls_2] = 1 adj_matrix_bin[idx_node_cls_2, idx_node_cls_1] = 1 # link edges for node_cls to related node_part for k in range(len(node_cls_idx) - 1): idx_start = node_cls_idx[k] idx_end = node_cls_idx[k + 1] for s in range(idx_start, idx_end): for t in range(idx_start, idx_end): if s == t: adj_matrix_bin[s, t] = 0 else: adj_matrix_bin[s, t] = 1 cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6) adj_matrix = np.zeros((len(adj_jud), len(adj_jud))) for s in range(len(adj_jud)): for t in range(len(adj_jud)): if adj_matrix_bin[s, t] == 1: node_feat_s = pooled_feat[adj_rois[s], :] node_feat_t = pooled_feat[adj_rois[t], :] adj_matrix[s, t] = cos(node_feat_s, node_feat_t) else: adj_matrix[s, t] = 0 adj_matrix = torch.from_numpy(adj_matrix).float().cuda() try: pooled_feat[adj_rois, :] = F.relu( self.gcn1(pooled_feat[adj_rois, :], adj_matrix)) pooled_feat[adj_rois, :] = F.relu( self.gcn2(pooled_feat[adj_rois, :], adj_matrix)) except RuntimeError: print(pooled_feat[adj_rois, :].size()) print(adj_matrix.size()) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # update 2019-6-17:fix the bug for dimension specified as 0... if self.training: rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox, num_proposal = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1) # if we want to change batch_size, we should consider to change roi2gt_assignment[0] # roi_part_match[0] and roi_part_match_overlap[0] and so on iou_threshold = 0.7 dis_threshold = 0.01 # part_size = 10 # relation_size = 5 iou_size = 6 edge_size = 4 child_size = 4 batch = 0 if True: if not self.training: rois = rois[:, :num_proposal, :] pooled_feat = pooled_feat[:num_proposal, :] # first, calculate the overlaps among rois, set weights in edges between nodes iou>0.7 to 1 overlaps = bbox_overlaps_batch(rois, rois) # overlaps_bin = overlaps.cpu().data.numpy().copy() _, N_node, _ = overlaps.shape # second, calculate the distance among rois, set weights in edges between nodes iou=0 and distances = bbox_distances_batch(rois, rois) # update 20191115: build graph for rois based on index (default: batch_size = 1) # feature cosine similarity # similarity in PGCN dot_product_mat = torch.mm(pooled_feat, torch.transpose(pooled_feat, 0, 1)) len_vec = torch.unsqueeze(torch.sqrt( torch.sum(pooled_feat * pooled_feat, dim=1)), dim=0) len_mat = torch.mm(torch.transpose(len_vec, 0, 1), len_vec) pooled_feat_sim_mat = dot_product_mat / len_mat # cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6) # calculate the adj_mat based on iou and distance, the weights on edges are the cosine similarity between nodes mask = torch.eye(N_node, N_node).cuda() for s in range(N_node): overlap_node_index = (overlaps[batch][s] >= iou_threshold).nonzero() overlap_node_size = iou_size if overlap_node_index.shape[ 0] > iou_size else overlap_node_index.shape[0] overlap_node_random = torch.randperm( overlap_node_index.shape[0])[0:overlap_node_size] overlap_node_index_select = overlap_node_index[ overlap_node_random] # TODO(junjie) remove the iou box in distance box. distance_node_index = (distances[batch][s] < dis_threshold).nonzero() distance_node_size = iou_size if distance_node_index.shape[ 0] > iou_size else distance_node_index.shape[0] distance_node_random = torch.randperm( distance_node_index.shape[0])[0:distance_node_size] distance_node_index_select = distance_node_index[ distance_node_random] _node_index_select = torch.cat( (overlap_node_index_select, distance_node_index_select), dim=0) if _node_index_select.shape[0] == 0: continue else: _node_index_select = _node_index_select.squeeze(dim=1) _node_size = child_size if _node_index_select.shape[ 0] > child_size else _node_index_select.shape[0] _node_index_select_random = torch.randperm( _node_index_select.shape[0])[0:_node_size] node_index_select = _node_index_select[ _node_index_select_random] mask[s, node_index_select] = 1 # print("test ") adj_matrix = torch.mul(mask, pooled_feat_sim_mat) pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix)) pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix)) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # update 2019-6-17:fix the bug for dimension specified as 0... if self.training: rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, gt): batch_size = im_data.size(0) gt_boxes = gt['boxes'] # for jacquard dataset, the bounding box labels are set to -1. For training, we set them to 1, which does not # affect the training process. if self.training: if gt_boxes[:, :, -1].sum().item() < 0: gt_boxes[:, :, -1] = -gt_boxes[:, :, -1] gt_grasps = gt['grasps'] gt_grasp_inds = gt['grasp_inds'] num_boxes = gt['num_boxes'] num_grasps = gt['num_grasps'] im_info = gt['im_info'] for i in range(batch_size): if torch.sum(gt_grasp_inds[i]).item() == 0: gt_grasp_inds[i, :num_grasps[i].item()] = 1 # features base_feat = self.base(im_data) rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) else: rois_label = None rpn_loss_cls = 0 rpn_loss_bbox = 0 if cfg.MGN.USE_FIXED_SIZE_ROI: _rois = rois.view(-1, 5) rois_cx = (_rois[:, 1:2] + _rois[:, 3:4]) / 2 rois_cy = (_rois[:, 2:3] + _rois[:, 4:5]) / 2 rois_xmin = torch.clamp(rois_cx - 100, min=1, max=600) rois_ymin = torch.clamp(rois_cy - 100, min=1, max=600) rois_xmax = rois_xmin + 200 rois_ymax = rois_ymin + 200 rois_for_grasp = torch.cat( [_rois[:, :1], rois_xmin, rois_ymin, rois_xmax, rois_ymax], dim=1) if cfg.RCNN_COMMON.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois_for_grasp, base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.RCNN_COMMON.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois_for_grasp) elif cfg.RCNN_COMMON.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois_for_grasp) else: if cfg.RCNN_COMMON.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.RCNN_COMMON.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.RCNN_COMMON.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model # grasp top if self.training: if self._ROIGN_USE_POOLED_FEATS: rois_overlaps = bbox_overlaps_batch(rois, gt_boxes) # bs x N_{rois} _, rois_inds = torch.max(rois_overlaps, dim=2) rois_inds += 1 grasp_rois_mask = rois_label.view(-1) > 0 else: raise NotImplementedError if self.training: if (grasp_rois_mask > 0).sum().item() > 0: grasp_feat = self._ROIGN_head_to_tail( pooled_feat[grasp_rois_mask]) else: # when there are no one positive rois: grasp_loc = Variable(torch.Tensor([]).type_as(gt_grasps)) grasp_prob = Variable(torch.Tensor([]).type_as(gt_grasps)) grasp_bbox_loss = Variable( torch.Tensor([0]).type_as(gt_grasps)) grasp_cls_loss = Variable(torch.Tensor([0]).type_as(gt_grasps)) grasp_conf_label = torch.Tensor([-1]).type_as(rois_label) grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps) return rois, rpn_loss_cls, rpn_loss_bbox, rois_label,\ grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors else: grasp_feat = self._ROIGN_head_to_tail(pooled_feat) grasp_pred = self.ROIGN_classifier(grasp_feat) # bs*N x K*A x 5, bs*N x K*A x 2 grasp_loc, grasp_conf = grasp_pred # generate anchors # bs*N x K*A x 5 grasp_all_anchors = self._generate_anchors(grasp_conf.size(1), grasp_conf.size(2), rois) # filter out negative samples grasp_all_anchors = grasp_all_anchors.type_as(gt_grasps) if self.training: grasp_all_anchors = grasp_all_anchors[grasp_rois_mask] # bs*N x 1 x 1 rois_w = (rois[:, :, 3] - rois[:, :, 1]).data.view(-1).unsqueeze(1).unsqueeze(2) rois_h = (rois[:, :, 4] - rois[:, :, 2]).data.view(-1).unsqueeze(1).unsqueeze(2) rois_w = rois_w[grasp_rois_mask] rois_h = rois_h[grasp_rois_mask] # bs*N x 1 x 1 fsx = rois_w / grasp_conf.size(1) fsy = rois_h / grasp_conf.size(2) # bs*N x 1 x 1 xleft = rois[:, :, 1].data.view(-1).unsqueeze(1).unsqueeze(2) ytop = rois[:, :, 2].data.view(-1).unsqueeze(1).unsqueeze(2) xleft = xleft[grasp_rois_mask] ytop = ytop[grasp_rois_mask] # reshape grasp_loc and grasp_conf grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5) grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2) grasp_batch_size = grasp_loc.size(0) # bs*N x K*A x 2 grasp_prob = F.softmax(grasp_conf, 2) grasp_bbox_loss = 0 grasp_cls_loss = 0 grasp_conf_label = None if self.training: # inside weights indicate which bounding box should be regressed # outside weidhts indicate two things: # 1. Which bounding box should contribute for classification loss, # 2. Balance cls loss and bbox loss grasp_gt_xywhc = points2labels(gt_grasps) # bs*N x N_{Gr_gt} x 5 grasp_gt_xywhc = self._assign_rois_grasps(grasp_gt_xywhc, gt_grasp_inds, rois_inds) # filter out negative samples grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask] # absolute coords to relative coords grasp_gt_xywhc[:, :, 0:1] -= xleft grasp_gt_xywhc[:, :, 0:1] = torch.clamp(grasp_gt_xywhc[:, :, 0:1], min=0) grasp_gt_xywhc[:, :, 0:1] = torch.min(grasp_gt_xywhc[:, :, 0:1], rois_w) grasp_gt_xywhc[:, :, 1:2] -= ytop grasp_gt_xywhc[:, :, 1:2] = torch.clamp(grasp_gt_xywhc[:, :, 1:2], min=0) grasp_gt_xywhc[:, :, 1:2] = torch.min(grasp_gt_xywhc[:, :, 1:2], rois_h) # grasp training data grasp_loc_label, grasp_conf_label, grasp_iw, grasp_ow = self.ROIGN_proposal_target( grasp_conf, grasp_gt_xywhc, grasp_all_anchors, xthresh=fsx / 2, ythresh=fsy / 2) grasp_keep = Variable( grasp_conf_label.view(-1).ne(-1).nonzero().view(-1)) grasp_conf = torch.index_select(grasp_conf.view(-1, 2), 0, grasp_keep.data) grasp_conf_label = torch.index_select(grasp_conf_label.view(-1), 0, grasp_keep.data) grasp_cls_loss = F.cross_entropy(grasp_conf, grasp_conf_label) grasp_iw = Variable(grasp_iw) grasp_ow = Variable(grasp_ow) grasp_loc_label = Variable(grasp_loc_label) grasp_bbox_loss = _smooth_l1_loss(grasp_loc, grasp_loc_label, grasp_iw, grasp_ow, dim=[2, 1]) return rois, rpn_loss_cls, rpn_loss_bbox, rois_label,\ grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
def sample_rois_pytorch(self, all_rois, gt_boxes, fg_rois_per_image, rois_per_image): """Generate a random sample of template RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps_batch(all_rois, gt_boxes) max_overlaps, gt_assignment = torch.max(overlaps, 2) batch_size = overlaps.size(0) num_proposal = overlaps.size(1) num_boxes_per_img = overlaps.size(2) offset = torch.arange(0, batch_size) * gt_boxes.size(1) offset = offset.view(-1, 1).type_as(gt_assignment) + gt_assignment labels = gt_boxes[:, :, 4].contiguous().view(-1)[(offset.view(-1), )].view( batch_size, -1) track_id = gt_boxes[:, :, 5].contiguous().view(-1)[(offset.view(-1), )].view( batch_size, -1) labels_batch = labels.new(batch_size, rois_per_image).zero_() track_id_batch = track_id.new(batch_size, rois_per_image).zero_() - 1 rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_() # Guard against the case when an image has fewer than max_fg_rois_per_image # foreground RoIs for i in range(batch_size): fg_inds = torch.nonzero( max_overlaps[i] >= cfg.SIAMESE.TEMPLATE_SEL_FG_THRESH).view(-1) fg_num_rois = fg_inds.numel() # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = torch.nonzero( (max_overlaps[i] < cfg.SIAMESE.TEMPLATE_SEL_BG_THRESH_HI) & (max_overlaps[i] >= cfg.SIAMESE.TEMPLATE_SEL_BG_THRESH_LO) | (max_overlaps[i] < 0)).view(-1) bg_num_rois = bg_inds.numel() if fg_num_rois > 0 and bg_num_rois > 0: # sampling fg fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. # See https://github.com/pytorch/pytorch/issues/1868 for more details. # use numpy instead. # rand_num = torch.randperm(fg_num_rois).long().cuda() rand_num = torch.from_numpy(np.random.permutation( fg_num_rois)).type_as(gt_boxes).long() fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] # sampling bg bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image # Seems torch.rand has a bug, it will generate very large number and make an error. # We use numpy rand instead. # rand_num = (torch.rand(bg_rois_per_this_image) * bg_num_rois).long().cuda() rand_num = np.floor( np.random.rand(bg_rois_per_this_image) * bg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() bg_inds = bg_inds[rand_num] elif fg_num_rois > 0 and bg_num_rois == 0: # sampling fg # rand_num = torch.floor(torch.rand(rois_per_image) * fg_num_rois).long().cuda() rand_num = np.floor( np.random.rand(rois_per_image) * fg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() fg_inds = fg_inds[rand_num] fg_rois_per_this_image = rois_per_image bg_rois_per_this_image = 0 elif bg_num_rois > 0 and fg_num_rois == 0: # sampling bg # rand_num = torch.floor(torch.rand(rois_per_image) * bg_num_rois).long().cuda() rand_num = np.floor( np.random.rand(rois_per_image) * bg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() bg_inds = bg_inds[rand_num] bg_rois_per_this_image = rois_per_image fg_rois_per_this_image = 0 else: print('overlaps:', overlaps.shape) print('max_overlaps:', max_overlaps) raise ValueError( "template proposal layer bg_num_rois = 0 and fg_num_rois = 0, this should not happen!" ) # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels_batch[i].copy_(labels[i][keep_inds]) track_id_batch[i].copy_(track_id[i][keep_inds]) # Clamp labels for the background RoIs to 0 if fg_rois_per_this_image < rois_per_image: labels_batch[i][fg_rois_per_this_image:] = 0 track_id_batch[i][fg_rois_per_this_image:] = -1 rois_batch[i] = all_rois[i][keep_inds] rois_batch[i, :, 0] = i return rois_batch, labels_batch, track_id_batch
def forward(self, im_data, gt): batch_size = im_data.size(0) gt_boxes = gt['boxes'] gt_grasps = gt['grasps'] gt_grasp_inds = gt['grasp_inds'] num_boxes = gt['num_boxes'] num_grasps = gt['num_grasps'] im_info = gt['im_info'] # features base_feat = self.base(im_data) rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 if cfg.RCNN_COMMON.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.RCNN_COMMON.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.RCNN_COMMON.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model # grasp top if self.training: if self._MGN_USE_POOLED_FEATS: rois_overlaps = bbox_overlaps_batch(rois, gt_boxes) # bs x N_{rois} _, rois_inds = torch.max(rois_overlaps, dim=2) rois_inds += 1 grasp_rois_mask = rois_label.view(-1) > 0 else: raise NotImplementedError # bbox top bbox_feat = self._RCNN_head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(bbox_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(bbox_feat) cls_prob = F.softmax(cls_score) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss if cfg.TRAIN.COMMON.USE_FOCAL_LOSS: RCNN_loss_cls = F.cross_entropy(cls_score, rois_label, reduce=False) focal_loss_factor = torch.pow( (1 - cls_prob[range(int(cls_prob.size(0))), rois_label]), cfg.TRAIN.COMMON.FOCAL_LOSS_GAMMA) RCNN_loss_cls = torch.mean(RCNN_loss_cls * focal_loss_factor) else: RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) if self.training: if (grasp_rois_mask > 0).sum().item() > 0: grasp_feat = self._MGN_head_to_tail( pooled_feat[grasp_rois_mask]) else: # when there are no one positive rois: grasp_loc = Variable(torch.Tensor([]).type_as(gt_grasps)) grasp_prob = Variable(torch.Tensor([]).type_as(gt_grasps)) grasp_bbox_loss = Variable( torch.Tensor([0]).type_as(RCNN_loss_bbox)) grasp_cls_loss = Variable( torch.Tensor([0]).type_as(RCNN_loss_cls)) grasp_conf_label = torch.Tensor([-1]).type_as(rois_label) grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,\ grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors else: grasp_feat = self._MGN_head_to_tail(pooled_feat) grasp_pred = self.MGN_classifier(grasp_feat) # bs*N x K*A x 5, bs*N x K*A x 2 grasp_loc, grasp_conf = grasp_pred # generate anchors # bs*N x K*A x 5 grasp_all_anchors = self._generate_anchors(grasp_conf.size(1), grasp_conf.size(2), rois) # filter out negative samples grasp_all_anchors = grasp_all_anchors.type_as(gt_grasps) if self.training: grasp_all_anchors = grasp_all_anchors[grasp_rois_mask] # bs*N x 1 x 1 rois_w = (rois[:, :, 3] - rois[:, :, 1]).data.view(-1).unsqueeze(1).unsqueeze(2) rois_h = (rois[:, :, 4] - rois[:, :, 2]).data.view(-1).unsqueeze(1).unsqueeze(2) rois_w = rois_w[grasp_rois_mask] rois_h = rois_h[grasp_rois_mask] # bs*N x 1 x 1 fsx = rois_w / grasp_conf.size(1) fsy = rois_h / grasp_conf.size(2) # bs*N x 1 x 1 xleft = rois[:, :, 1].data.view(-1).unsqueeze(1).unsqueeze(2) ytop = rois[:, :, 2].data.view(-1).unsqueeze(1).unsqueeze(2) xleft = xleft[grasp_rois_mask] ytop = ytop[grasp_rois_mask] # reshape grasp_loc and grasp_conf grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5) grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2) grasp_batch_size = grasp_loc.size(0) # bs*N x K*A x 2 grasp_prob = F.softmax(grasp_conf, 2) grasp_bbox_loss = 0 grasp_cls_loss = 0 grasp_conf_label = None if self.training: # inside weights indicate which bounding box should be regressed # outside weidhts indicate two things: # 1. Which bounding box should contribute for classification loss, # 2. Balance cls loss and bbox loss grasp_gt_xywhc = points2labels(gt_grasps) # bs*N x N_{Gr_gt} x 5 grasp_gt_xywhc = self._assign_rois_grasps(grasp_gt_xywhc, gt_grasp_inds, rois_inds) # filter out negative samples grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask] # absolute coords to relative coords grasp_gt_xywhc[:, :, 0:1] -= xleft grasp_gt_xywhc[:, :, 0:1] = torch.clamp(grasp_gt_xywhc[:, :, 0:1], min=0) grasp_gt_xywhc[:, :, 0:1] = torch.min(grasp_gt_xywhc[:, :, 0:1], rois_w) grasp_gt_xywhc[:, :, 1:2] -= ytop grasp_gt_xywhc[:, :, 1:2] = torch.clamp(grasp_gt_xywhc[:, :, 1:2], min=0) grasp_gt_xywhc[:, :, 1:2] = torch.min(grasp_gt_xywhc[:, :, 1:2], rois_h) # grasp training data grasp_loc_label, grasp_conf_label, grasp_iw, grasp_ow = self.MGN_proposal_target( grasp_conf, grasp_gt_xywhc, grasp_all_anchors, xthresh=fsx / 2, ythresh=fsy / 2) grasp_keep = Variable( grasp_conf_label.view(-1).ne(-1).nonzero().view(-1)) grasp_conf = torch.index_select(grasp_conf.view(-1, 2), 0, grasp_keep.data) grasp_conf_label = torch.index_select(grasp_conf_label.view(-1), 0, grasp_keep.data) grasp_cls_loss = F.cross_entropy(grasp_conf, grasp_conf_label) grasp_iw = Variable(grasp_iw) grasp_ow = Variable(grasp_ow) grasp_loc_label = Variable(grasp_loc_label) grasp_bbox_loss = _smooth_l1_loss(grasp_loc, grasp_loc_label, grasp_iw, grasp_ow, dim=[2, 1]) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,\ grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
def forward(self, input): # torch.Size([1, 18, 50, 37]) # input (rpn_cls_prob.data, gt_boxes, num_boxes, crowdsourced_classes, alpha_con) rpn_cls_prob = input[0] gt_boxes = input[1] num_boxes = input[2] im_info = input[3] crowdsourced_classes = input[4] alpha_con = input[5] batch_size = gt_boxes.size(0) # 把每个anchor的坐标列出来 feat_height, feat_width = rpn_cls_prob.size(2), rpn_cls_prob.size(3) # _feat_stride 16 图片到feature map的比例 shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(rpn_cls_prob).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as( gt_boxes) # move to specific gpu. # all_anchors torch.Size([1850, 9, 4]) all_anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) all_anchors = all_anchors.view(K * A, 4) # 删除越界的anchors keep = ((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < long(im_info[0][1])) & (all_anchors[:, 3] < long(im_info[0][0]))) # 保留的anchors 索引 inds_inside = torch.nonzero(keep).view(-1) anchors = all_anchors[inds_inside, :] # 从rpn_cls_score中找到gt_box对应的标签 overlaps = bbox_overlaps_batch(anchors, gt_boxes) # arggt_max_overlaps size([1, 20]) _, arggt_max_overlaps = torch.max(overlaps, 1) index = inds_inside[arggt_max_overlaps] reshape_rpn_cls_prob = rpn_cls_prob.view(batch_size, 2, -1) # the first set of _num_anchors channels are bg probs # the second set are the fg probs # gt_boxes_cls <=> p gt_boxes_cls = torch.gather(reshape_rpn_cls_prob[:, 1], 1, index) alpha = get_alpha(alpha_con) sensitivity = get_sensitivity(alpha) specificity = get_specificity(alpha) if DEBUG: print('sensitivity: ', sensitivity) print('specificity: ', specificity) a = get_a(sensitivity, crowdsourced_classes) b = get_b(specificity, crowdsourced_classes) # print('a: ', a) # print('b: ', b) # print('p: ', gt_boxes_cls) # Size [1, 20] if DEBUG: print('a: ', a) print('b: ', b) print('gt_boxes_cls: ', gt_boxes_cls) mu = binary_get_mu(a, b, gt_boxes_cls) if DEBUG: print('mu: ', mu) bg_index = torch.nonzero(mu < 0.5) # 更新 alpha_con 针对与 [i, 0, j] update_alpha_con_rpn(alpha_con, bg_index, crowdsourced_classes) if bg_index.size() != torch.Size([0]): bg_index = bg_index.t() gt_boxes[bg_index[0], bg_index[1], :] = 0 return gt_boxes
def forward(self, data_batch): im_data = data_batch[0] im_info = data_batch[1] gt_boxes = data_batch[2] gt_grasps = data_batch[3] num_boxes = data_batch[4] num_grasps = data_batch[5] rel_mat = data_batch[6] gt_grasp_inds = data_batch[7] # object detection if self.training: self.iter_counter += 1 self.batch_size = im_data.size(0) # feed image data to base model to obtain base feature map base_feat = self.FeatExt(im_data) ### GENERATE ROIs rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes) if self.training: rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = \ self._get_header_train_data(rois, gt_boxes, num_boxes) pos_rois_labels = [(rois_label[i * rois.size(1): (i + 1) * rois.size(1)] > 0) for i in range(self.batch_size)] od_rois = [rois[i][pos_rois_labels[i]].data for i in range(self.batch_size)] else: rois_label, rois_target, rois_inside_ws, rois_outside_ws = None, None, None, None od_rois = rois.data pooled_feat = self._roi_pooling(base_feat, rois) ### OBJECT DETECTION cls_score, cls_prob, bbox_pred = self._get_obj_det_result(pooled_feat) RCNN_loss_bbox, RCNN_loss_cls = 0, 0 if self.training: RCNN_loss_bbox, RCNN_loss_cls = self._obj_det_loss_comp(cls_score, cls_prob, bbox_pred, rois_label, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.contiguous().view(self.batch_size, rois.size(1), -1) bbox_pred = bbox_pred.contiguous().view(self.batch_size, rois.size(1), -1) ### VISUAL MANIPULATION RELATIONSHIP DETECTION # for object detection before relationship detection if self.training: od_cls_prob = [cls_prob[i][pos_rois_labels[i]].data for i in range(self.batch_size)] od_bbox_pred = [bbox_pred[i][pos_rois_labels[i]].data for i in range(self.batch_size)] else: od_cls_prob = cls_prob.data od_bbox_pred = bbox_pred.data # generate object RoIs. obj_rois, obj_num = torch.Tensor([]).type_as(rois), torch.Tensor([]).type_as(num_boxes) # online data if not self.training or (cfg.TRAIN.VMRN.TRAINING_DATA == 'all' or 'online'): obj_rois, obj_num = self._object_detection(od_rois, od_cls_prob, od_bbox_pred, self.batch_size, im_info.data) # offline data if self.training and (cfg.TRAIN.VMRN.TRAINING_DATA == 'all' or 'offline'): for i in range(self.batch_size): img_ind = (i * torch.ones(num_boxes[i].item(),1)).type_as(gt_boxes) obj_rois = torch.cat([obj_rois, torch.cat([img_ind, (gt_boxes[i][:num_boxes[i]])],1)]) obj_num = torch.cat([obj_num, num_boxes]) obj_labels = torch.Tensor([]).type_as(gt_boxes).long() if obj_rois.size(0) > 0: obj_labels = obj_rois[:, 5] obj_rois = obj_rois[:, :5] VMRN_rel_loss_cls = 0 if (obj_num > 1).sum().item() > 0: rel_cls_score, rel_cls_prob = self._get_rel_det_result(base_feat, obj_rois, obj_num) if self.training: obj_pair_rel_label = self._generate_rel_labels(obj_rois, gt_boxes, obj_num, rel_mat, rel_cls_prob.size(0)) VMRN_rel_loss_cls = self._rel_det_loss_comp(obj_pair_rel_label.type_as(gt_boxes).long(), rel_cls_score) else: rel_cls_prob = self._rel_cls_prob_post_process(rel_cls_prob) else: rel_cls_prob = torch.Tensor([]).type_as(cls_prob) rel_result = None if not self.training: if obj_rois.numel() > 0: pred_boxes = obj_rois.data[:,1:5] pred_boxes[:, 0::2] /= im_info[0][3].item() pred_boxes[:, 1::2] /= im_info[0][2].item() rel_result = (pred_boxes, obj_labels, rel_cls_prob.data) else: rel_result = (obj_rois.data, obj_labels, rel_cls_prob.data) ### ROI-BASED GRASP DETECTION if self.training: rois_overlaps = bbox_overlaps_batch(rois, gt_boxes) # bs x N_{rois} _, rois_inds = torch.max(rois_overlaps, dim=2) rois_inds += 1 grasp_rois_mask = rois_label.view(-1) > 0 if (grasp_rois_mask > 0).sum().item() > 0: grasp_feat = self._MGN_head_to_tail(pooled_feat[grasp_rois_mask]) grasp_rois = rois.view(-1, 5)[grasp_rois_mask] # process grasp ground truth, return: N_{gr_rois} x N_{Gr_gt} x 5 grasp_gt_xywhc = points2labels(gt_grasps) grasp_gt_xywhc = self._assign_rois_grasps(grasp_gt_xywhc, gt_grasp_inds, rois_inds) grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask] else: # when there are no one positive rois, return dummy results grasp_loc = torch.Tensor([]).type_as(gt_grasps) grasp_prob = torch.Tensor([]).type_as(gt_grasps) grasp_bbox_loss = torch.Tensor([0]).type_as(gt_grasps) grasp_cls_loss = torch.Tensor([0]).type_as(gt_grasps) grasp_conf_label = torch.Tensor([-1]).type_as(rois_label) grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,\ grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors else: grasp_feat = self._MGN_head_to_tail(pooled_feat) # N_{gr_rois} x W x H x A*5, N_{gr_rois} x W x H x A*2 grasp_loc, grasp_conf = self.FCGN_classifier(grasp_feat) feat_height, feat_width = grasp_conf.size(1), grasp_conf.size(2) # reshape grasp_loc and grasp_conf grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5) grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2) grasp_prob = F.softmax(grasp_conf, 2) # 2. calculate grasp loss grasp_bbox_loss, grasp_cls_loss, grasp_conf_label = 0, 0, None if self.training: # N_{gr_rois} x K*A x 5 grasp_all_anchors = self._generate_anchors(feat_height, feat_width, grasp_rois) grasp_bbox_loss, grasp_cls_loss, grasp_conf_label = self._grasp_loss_comp(grasp_rois, grasp_conf, grasp_loc, grasp_gt_xywhc, grasp_all_anchors, feat_height, feat_width) else: # bs*N x K*A x 5 grasp_all_anchors = self._generate_anchors(feat_height, feat_width, rois.view(-1, 5)) return rois, cls_prob, bbox_pred, rel_result, rpn_loss_cls, rpn_loss_bbox, \ RCNN_loss_cls, RCNN_loss_bbox, VMRN_rel_loss_cls, rois_label, \ grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors rpn_cls_score = input[0] gt_boxes = input[1] im_info = input[2] num_boxes = input[3] # map of shape (..., H, W) height, width = rpn_cls_score.size(2), rpn_cls_score.size(3) batch_size = gt_boxes.size(0) feat_height, feat_width = rpn_cls_score.size(2), rpn_cls_score.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(rpn_cls_score).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(gt_boxes) # move to specific gpu. all_anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) all_anchors = all_anchors.view(K * A, 4) total_anchors = int(K * A) keep = ((all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < long(im_info[0][1]) + self._allowed_border) & (all_anchors[:, 3] < long(im_info[0][0]) + self._allowed_border)) inds_inside = torch.nonzero(keep).view(-1) # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1) bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_() bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_() overlaps = bbox_overlaps_batch(anchors, gt_boxes) max_overlaps, argmax_overlaps = torch.max(overlaps, 2) gt_max_overlaps, _ = torch.max(overlaps, 1) if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: labels[(max_overlaps<=cfg.SIAMESE.RPN_NEGATIVE_OVERLAP_HI) & (max_overlaps>=cfg.SIAMESE.RPN_NEGATIVE_OVERLAP_LO)] = 0 gt_max_overlaps[gt_max_overlaps==0] = 1e-5 keep = torch.sum(overlaps.eq(gt_max_overlaps.view(batch_size,1,-1).expand_as(overlaps)), 2) if torch.sum(keep) > 0: labels[keep>0] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.SIAMESE.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: labels[(max_overlaps<=cfg.SIAMESE.RPN_NEGATIVE_OVERLAP_HI) & (max_overlaps>=cfg.SIAMESE.RPN_NEGATIVE_OVERLAP_LO)] = 0 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) sum_fg = torch.sum((labels == 1).int(), 1) sum_bg = torch.sum((labels == 0).int(), 1) for i in range(batch_size): # subsample positive labels if we have too many if sum_fg[i] > num_fg: fg_inds = torch.nonzero(labels[i] == 1).view(-1) # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. # See https://github.com/pytorch/pytorch/issues/1868 for more details. # use numpy instead. #rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long() rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).long() disable_inds = fg_inds[rand_num[:fg_inds.size(0)-num_fg]] labels[i][disable_inds] = -1 # num_bg = cfg.TRAIN.RPN_BATCHSIZE - sum_fg[i] num_bg = cfg.TRAIN.RPN_BATCHSIZE - torch.sum((labels == 1).int(), 1)[i] # subsample negative labels if we have too many if sum_bg[i] > num_bg: bg_inds = torch.nonzero(labels[i] == 0).view(-1) #rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long() rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).long() disable_inds = bg_inds[rand_num[:bg_inds.size(0)-num_bg]] labels[i][disable_inds] = -1 offset = torch.arange(0, batch_size)*gt_boxes.size(1) argmax_overlaps = argmax_overlaps + offset.view(batch_size, 1).type_as(argmax_overlaps) bbox_targets = _compute_targets_batch(anchors, gt_boxes.view(-1,5)[argmax_overlaps.view(-1), :].view(batch_size, -1, 5)) # use a single value instead of 4 values for easy index. bbox_inside_weights[labels==1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS[0] if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: num_examples = torch.sum(labels[i] >= 0) if num_examples.item()>0: positive_weights = 1.0 / num_examples.item() negative_weights = 1.0 / num_examples.item() else: positive_weights = None negative_weights = None else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) if positive_weights is not None and negative_weights is not None: bbox_outside_weights[labels == 1] = positive_weights bbox_outside_weights[labels == 0] = negative_weights labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0) outputs = [] labels = labels.view(batch_size, height, width, A).permute(0,3,1,2).contiguous() labels = labels.view(batch_size, 1, A * height, width) outputs.append(labels) bbox_targets = bbox_targets.view(batch_size, height, width, A*4).permute(0,3,1,2).contiguous() outputs.append(bbox_targets) anchors_count = bbox_inside_weights.size(1) bbox_inside_weights = bbox_inside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4) bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, height, width, 4*A)\ .permute(0,3,1,2).contiguous() outputs.append(bbox_inside_weights) bbox_outside_weights = bbox_outside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4) bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, height, width, 4*A)\ .permute(0,3,1,2).contiguous() outputs.append(bbox_outside_weights) return outputs