def gt_boxes(self, fmap, im_sizes, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None, train_anchor_inds=None, proposals=None): """ Gets GT boxes! :param fmap: :param im_sizes: :param image_offset: :param gt_boxes: :param gt_classes: :param gt_rels: :param train_anchor_inds: :return: """ assert gt_boxes is not None im_inds = gt_classes[:, 0] - image_offset rois = torch.cat((im_inds.float()[:, None], gt_boxes), 1) if gt_rels is not None and self.training: rois, labels, rel_labels = proposal_assignments_gtbox( rois.data, gt_boxes.data, gt_classes.data, gt_rels.data, image_offset, fg_thresh=0.5) else: labels = gt_classes[:, 1] rel_labels = None return rois, labels, None, None, None, rel_labels
def gt_boxes(self, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None): """ Gets Ground-Truth boxes. :param image_offset: offset onto what image we're on for MGPU training (if single GPU this is 0) :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :param gt_rels: [] gt relations :return rois, labels, rel_labels """ assert gt_boxes is not None im_inds = gt_classes[:, 0] - image_offset rois = torch.cat((im_inds.float()[:, None], gt_boxes), 1) if gt_rels is not None and self.training: rois, labels, rel_labels = proposal_assignments_gtbox( rois.data, gt_boxes.data, gt_classes.data, gt_rels.data, image_offset, fg_thresh=0.5) else: labels = gt_classes[:, 1] rel_labels = None return rois, labels, rel_labels
def forward(self, x, im_sizes, image_offset, gt_boxes=None, gt_masks=None, gt_classes=None, gt_rels=None, pred_boxes=None, pred_masks=None, pred_fmaps=None, pred_dists=None): """ Forward pass for detection :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE] :param im_sizes: A numpy array of (h, w, scale) for each image. :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0) :param gt_boxes: Training parameters: :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will be used to compute the training loss. Each (img_ind, fpn_idx) :return: If train: scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels if test: prob dists, boxes, img inds, maxscores, classes """ result = Result() if self.training: im_inds = gt_classes[:, 0] rois = torch.cat((im_inds.float()[:, None], gt_boxes), 1) rois, labels, result.rel_labels = proposal_assignments_gtbox( rois.data, gt_boxes.data, gt_classes.data, gt_rels.data, image_offset) pred_boxes = gt_boxes pred_masks = gt_masks result.rm_obj_labels = gt_classes[:, 1] else: im_inds = pred_boxes[:, 0].long() pred_boxes = pred_boxes[:, 1:] result.rel_dists = None rel_inds = self.get_rel_inds(result.rel_labels, im_inds, pred_boxes) rois = torch.cat((im_inds[:, None].float(), pred_boxes), 1) visual_rep = self.visual_rep(pred_fmaps, rois, rel_inds[:, 1:]) result.obj_fmap = self.obj_feature_map(pred_fmaps, rois) # Now do the approximation WHEREVER THERES A VALID RELATIONSHIP. result.rm_obj_dists, result.rel_dists = self.message_pass( F.relu(self.edge_unary(visual_rep)), self.obj_unary(result.obj_fmap), rel_inds[:, 1:]) # result.box_deltas_update = box_deltas if self.training: return result scores_nz = F.softmax(result.rm_obj_dists).data scores_nz[:, 0] = 0.0 result.obj_scores, score_ord = scores_nz[:, 1:].sort(dim=1, descending=True) result.obj_preds = score_ord[:, 0] + 1 result.obj_scores = result.obj_scores[:, 0] # # Decode here ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # if self.mode == 'predcls': # # Hack to get the GT object labels # result.obj_scores = result.rm_obj_dists.data.new(gt_classes.size(0)).fill_(1) # result.obj_preds = gt_classes.data[:, 1] # elif self.mode == 'sgdet': # order, obj_scores, obj_preds = filter_det(F.softmax(result.rm_obj_dists), # pred_boxes, # start_ind=0, # max_per_img=100, # thresh=0.00, # pre_nms_topn=6000, # post_nms_topn=300, # nms_thresh=0.3, # nms_filter_duplicates=True) # idx, perm = torch.sort(order) # result.obj_preds = rel_inds.new(result.rm_obj_dists.size(0)).fill_(1) # result.obj_scores = result.rm_obj_dists.data.new(result.rm_obj_dists.size(0)).fill_(0) # result.obj_scores[idx] = obj_scores.data[perm] # result.obj_preds[idx] = obj_preds.data[perm] # else: # scores_nz = F.softmax(result.rm_obj_dists).data # scores_nz[:, 0] = 0.0 # result.obj_scores, score_ord = scores_nz[:, 1:].sort(dim=1, descending=True) # result.obj_preds = score_ord[:, 0] + 1 # result.obj_scores = result.obj_scores[:, 0] result.obj_preds = Variable(result.obj_preds) result.obj_scores = Variable(result.obj_scores) # Set result's bounding boxes to be size # [num_boxes, topk, 4] instead of considering every single object assignment. # twod_inds = arange(result.obj_preds.data) * self.num_classes + result.obj_preds.data # # if self.mode == 'sgdet': # bboxes = result.boxes_all.view(-1, 4)[twod_inds].view(result.boxes_all.size(0), 4) # else: # # Boxes will get fixed by filter_dets function. # bboxes = result.rm_box_priors rel_rep = F.softmax(result.rel_dists) return filter_dets_mask(pred_boxes, pred_masks, result.obj_scores, result.obj_preds, rel_inds[:, 1:], rel_rep)
def forward(self, x, im_sizes, image_offset, gt_boxes=None, gt_masks=None, gt_classes=None, gt_rels=None, pred_boxes=None, pred_masks=None, pred_fmaps=None, pred_dists=None): """ Forward pass for detection :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE] :param im_sizes: A numpy array of (h, w, scale) for each image. :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0) :param gt_boxes: Training parameters: :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will be used to compute the training loss. Each (img_ind, fpn_idx) :return: If train: scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels if test: prob dists, boxes, img inds, maxscores, classes pred_fmaps N*256*14*14 pred_boxes N*4 pred_masks N*28*28 pred_dists N*85 """ #print(pred_fmaps.shape, pred_boxes.shape, pred_masks.shape, pred_dists.shape) if self.training: im_inds = gt_classes[:, 0] rois = torch.cat((im_inds.float()[:, None], gt_boxes), 1) # actually is rel_assignment for sgcls # 指定rel的gt, roi不发生变化 rois, labels, rel_labels = proposal_assignments_gtbox( rois.data, gt_boxes.data, gt_classes.data, gt_rels.data, image_offset) #boxes = rois[:, 1:] pred_boxes = rois[:, 1:] pred_masks = gt_masks pred_dists = Variable(to_onehot(labels.data, self.num_classes)) else: im_inds = pred_boxes[:, 0].long() pred_boxes = pred_boxes[:, 1:] labels = gt_classes[:, 1] rel_labels = None pred_dists = Variable( to_onehot(pred_dists.data.long(), self.num_classes)) rois = torch.cat((im_inds[:, None].float(), pred_boxes), 1) result = Result() #pred_fmaps = pred_fmaps * self.downsample(pred_masks[:, None, :, :]) #result.obj_fmap = self.roi_fmap_obj(pred_fmaps.view(len(pred_fmaps), -1)) result.obj_fmap = self.obj_feature_map(pred_fmaps, rois) result.rm_obj_dists = pred_dists result.rm_obj_labels = labels result.rel_labels = rel_labels #result.boxes_all = None rel_inds = self.get_rel_inds(result.rel_labels, im_inds, pred_boxes) #rois = torch.cat((im_inds[:, None].float(), boxes), 1) # result.obj_fmap = self.obj_feature_map(result.fmap, rois) # print(pred_fmaps[0][0][0]) # print(result.rm_obj_labels[0]) # print(result.rm_obj_dists[0][:10]) # print(pred_boxes.data[[0]]) # Prevent gradients from flowing back into score_fc from elsewhere result.rm_obj_dists, result.obj_preds, edge_ctx = self.context( result.obj_fmap, result.rm_obj_dists, im_inds, result.rm_obj_labels if self.training or self.mode == 'predcls' else None, pred_boxes.data, None) #print(fdsafds) if edge_ctx is None: edge_rep = self.post_emb(result.obj_preds) else: edge_rep = self.post_lstm(edge_ctx) # Split into subject and object representations edge_rep = edge_rep.view(edge_rep.size(0), 2, self.pooling_dim) subj_rep = edge_rep[:, 0] obj_rep = edge_rep[:, 1] prod_rep = subj_rep[rel_inds[:, 1]] * obj_rep[rel_inds[:, 2]] vr = self.visual_rep(pred_fmaps, rois, rel_inds[:, 1:]) prod_rep = prod_rep * vr # if self.use_vision: # vr = self.visual_rep(pred_fmaps, rois, rel_inds[:, 1:]) # if self.limit_vision: # # exact value TBD # prod_rep = torch.cat((prod_rep[:, :2048] * vr[:, :2048], prod_rep[:, 2048:]), 1) # else: # prod_rep = prod_rep * vr if self.use_tanh: prod_rep = F.tanh(prod_rep) result.rel_dists = self.rel_compress(prod_rep) if self.use_bias: result.rel_dists = result.rel_dists + self.freq_bias.index_with_labels( torch.stack(( result.obj_preds[rel_inds[:, 1]], result.obj_preds[rel_inds[:, 2]], ), 1)) if self.training: return result twod_inds = arange( result.obj_preds.data) * self.num_classes + result.obj_preds.data result.obj_scores = F.softmax(result.rm_obj_dists, dim=1).view(-1)[twod_inds] # # Bbox regression # if self.mode == 'sgdet': # bboxes = result.boxes_all.view(-1, 4)[twod_inds].view(result.boxes_all.size(0), 4) # else: # # Boxes will get fixed by filter_dets function. # bboxes = result.rm_box_priors rel_rep = F.softmax(result.rel_dists, dim=1) return filter_dets_mask(pred_boxes, pred_masks, result.obj_scores, result.obj_preds, rel_inds[:, 1:], rel_rep)