def forward(self, obj_fmaps, obj_logits, vr, rel_inds, obj_labels=None, boxes_per_cls=None): if self.mode == 'predcls': obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes)) else: obj_dists2 = obj_logits if self.mode == 'sgdet' and not self.training: # NMS here for baseline probs = F.softmax(obj_dists2, 1) nms_mask = obj_dists2.data.clone() nms_mask.zero_() for c_i in range(1, obj_dists2.size(1)): scores_ci = probs.data[:, c_i] boxes_ci = boxes_per_cls.data[:, c_i] keep = apply_nms(scores_ci, boxes_ci, pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0), nms_thresh=0.3) nms_mask[:, c_i][keep] = 1 obj_preds = Variable(nms_mask * probs.data, volatile=True)[:,1:].max(1)[1] + 1 else: obj_preds = obj_labels if obj_labels is not None else obj_dists2[:,1:].max(1)[1] + 1 f_obj_rel = torch.stack([torch.cat([obj_fmaps[rel_ind[1]], obj_fmaps[rel_ind[2]], vr[index]]) for index, rel_ind in enumerate(rel_inds)]) rel_dists = self.vr_fc(f_obj_rel) return obj_dists2, obj_preds, rel_dists
def forward(self, obj_logits, vr, obj_labels=None, boxes_per_cls=None): if self.mode == 'predcls': obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_obj_cls)) else: obj_dists2 = obj_logits if self.mode == 'sgdet' and not self.training: # NMS here for baseline probs = F.softmax(obj_dists2, 1) nms_mask = obj_dists2.data.clone() nms_mask.zero_() for c_i in range(1, obj_dists2.size(1)): scores_ci = probs.data[:, c_i] boxes_ci = boxes_per_cls.data[:, c_i] keep = apply_nms(scores_ci, boxes_ci, pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0), nms_thresh=0.3) nms_mask[:, c_i][keep] = 1 obj_preds = Variable(nms_mask * probs.data, volatile=True)[:, 1:].max(1)[1] + 1 else: obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max( 1)[1] + 1 rel_dists = self.vr_fc(vr) return obj_dists2, obj_preds, rel_dists
def forward(self, obj_fmaps, obj_logits, rel_inds, vr, obj_labels=None, boxes_per_cls=None): """ Reason relationship classes using knowledge of object and relationship coccurrence. """ # print(rel_inds.shape) # (num_rel, 3) if self.mode == 'predcls': obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_obj_cls)) else: obj_dists2 = obj_logits if self.mode == 'sgdet' and not self.training: # NMS here for baseline probs = F.softmax(obj_dists2, 1) nms_mask = obj_dists2.data.clone() nms_mask.zero_() for c_i in range(1, obj_dists2.size(1)): scores_ci = probs.data[:, c_i] boxes_ci = boxes_per_cls.data[:, c_i] keep = apply_nms(scores_ci, boxes_ci, pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0), nms_thresh=0.3) nms_mask[:, c_i][keep] = 1 obj_preds = Variable(nms_mask * probs.data, volatile=True)[:, 1:].max(1)[1] + 1 else: obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max( 1)[1] + 1 sub_obj_preds = torch.cat((obj_preds[rel_inds[:, 1]].view( -1, 1), obj_preds[rel_inds[:, 2]].view(-1, 1)), 1) obj_fmaps = self.obj_proj(obj_fmaps) vr = self.rel_proj(vr) input_ggnn = torch.stack([ torch.cat([ obj_fmaps[rel_ind[1]].unsqueeze(0), obj_fmaps[rel_ind[2]].unsqueeze(0), vr[index].repeat( self.num_rel_cls, 1) ], 0) for index, rel_ind in enumerate(rel_inds) ]) rel_dists = self.ggnn_rel(rel_inds[:, 1:], sub_obj_preds, input_ggnn) return obj_dists2, obj_preds, rel_dists
def forward(self, obj_fmaps, obj_logits, im_inds, obj_labels=None, box_priors=None, boxes_per_cls=None): """ Forward pass through the object and edge context :param obj_priors: :param obj_fmaps: :param im_inds: :param obj_labels: :param boxes: :return: """ obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight pos_embed = self.pos_embed(center_size(box_priors)) # obj_pre_rep = self.conver_fusion_feature(torch.cat((obj_fmaps, obj_embed, pos_embed), 1)) obj_pre_rep = self.conver_fusion_feature( torch.cat((obj_embed, pos_embed), 1)) # UNSURE WHAT TO DO HERE if self.mode == 'predcls': obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes)) else: obj_dists2 = self.decoder_lin(obj_pre_rep) if self.mode == 'sgdet' and not self.training: # NMS here for baseline probs = F.softmax(obj_dists2, 1) nms_mask = obj_dists2.data.clone() nms_mask.zero_() for c_i in range(1, obj_dists2.size(1)): scores_ci = probs.data[:, c_i] boxes_ci = boxes_per_cls.data[:, c_i] keep = apply_nms(scores_ci, boxes_ci, pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0), nms_thresh=0.3) nms_mask[:, c_i][keep] = 1 obj_preds = Variable(nms_mask * probs.data, volatile=True)[:, 1:].max(1)[1] + 1 else: obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max( 1)[1] + 1 return obj_dists2, obj_preds, obj_pre_rep
def filter_roi_proposals(box_preds, class_preds, boxes_per_im, nms_thresh=0.7, pre_nms_topn=12000, post_nms_topn=2000): inds, im_per = apply_nms( class_preds, box_preds, pre_nms_topn=pre_nms_topn, post_nms_topn=post_nms_topn, boxes_per_im=boxes_per_im, nms_thresh=nms_thresh, ) img_inds = torch.cat([val * torch.ones(i) for val, i in enumerate(im_per)], 0).cuda( box_preds.get_device()) rois = torch.cat((img_inds[:, None], box_preds[inds]), 1) return rois
def forward(self, im_inds, obj_fmaps, obj_logits, rel_inds, vr, obj_labels=None, boxes_per_cls=None): """ Reason relationship classes using knowledge of object and relationship coccurrence. """ # print(rel_inds.shape) # (num_rel, 3) if self.mode == 'predcls': obj_logits = Variable(onehot_logits(obj_labels.data, self.num_obj_cls)) obj_probs = F.softmax(obj_logits, 1) obj_fmaps = self.obj_proj(obj_fmaps) vr = self.rel_proj(vr) rel_logits = [] obj_logits_refined = [] for (_, obj_s, obj_e), (_, rel_s, rel_e) in zip(enumerate_by_image(im_inds.data), enumerate_by_image(rel_inds[:,0])): rl, ol = self.ggnn(rel_inds[rel_s:rel_e, 1:] - obj_s, obj_probs[obj_s:obj_e], obj_fmaps[obj_s:obj_e], vr[rel_s:rel_e]) rel_logits.append(rl) obj_logits_refined.append(ol) rel_logits = torch.cat(rel_logits, 0) if self.ggnn.refine_obj_cls: obj_logits_refined = torch.cat(obj_logits_refined, 0) obj_logits = obj_logits_refined obj_probs = F.softmax(obj_logits, 1) if self.mode == 'sgdet' and not self.training: # NMS here for baseline nms_mask = obj_probs.data.clone() nms_mask.zero_() for c_i in range(1, obj_probs.size(1)): scores_ci = obj_probs.data[:, c_i] boxes_ci = boxes_per_cls.data[:, c_i] keep = apply_nms(scores_ci, boxes_ci, pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0), nms_thresh=0.3) nms_mask[:, c_i][keep] = 1 obj_preds = Variable(nms_mask * obj_probs.data, volatile=True)[:,1:].max(1)[1] + 1 else: obj_preds = obj_labels if obj_labels is not None else obj_probs[:,1:].max(1)[1] + 1 return obj_logits, obj_preds, rel_logits
def filter_roi_proposals(box_preds, class_preds, boxes_per_im, nms_thresh=0.7, pre_nms_topn=12000, post_nms_topn=2000): # print("*** filter_roi_proposals ***") # print("pre_nms_topn", pre_nms_topn) # 6000 # print("post_nms_topn", post_nms_topn) # 1000 inds, im_per = apply_nms( class_preds, box_preds, pre_nms_topn=pre_nms_topn, post_nms_topn=post_nms_topn, boxes_per_im=boxes_per_im, nms_thresh=nms_thresh, ) img_inds = torch.cat([val * torch.ones(i) for val, i in enumerate(im_per)], 0).cuda(box_preds.get_device()) rois = torch.cat((img_inds[:, None], box_preds[inds]), 1) # print("filter_roi_proposals rois.shape", rois.shape) return rois
def filter_det(scores, boxes, start_ind=0, max_per_img=100, thresh=0.001, pre_nms_topn=6000, post_nms_topn=300, nms_thresh=0.3, nms_filter_duplicates=True): """ Filters the detections for a single image :param scores: [num_rois, num_classes] :param boxes: [num_rois, num_classes, 4]. Assumes the boxes have been clamped :param max_per_img: Max detections per image :param thresh: Threshold for calling it a good box :param nms_filter_duplicates: True if we shouldn't allow for mulitple detections of the same box (with different labels) :return: A numpy concatenated array with up to 100 detections/img [num_im, x1, y1, x2, y2, score, cls] """ valid_cls = (scores[:, 1:].data.max(0)[0] > thresh).nonzero() + 1 if valid_cls.dim() == 0: return None nms_mask = scores.data.clone() nms_mask.zero_() for c_i in valid_cls.squeeze(1).cpu(): scores_ci = scores.data[:, c_i] boxes_ci = boxes.data[:, c_i] keep = apply_nms(scores_ci, boxes_ci, pre_nms_topn=pre_nms_topn, post_nms_topn=post_nms_topn, nms_thresh=nms_thresh) nms_mask[:, c_i][keep] = 1 dists_all = Variable(nms_mask * scores.data, volatile=True) if nms_filter_duplicates: scores_pre, labels_pre = dists_all.data.max(1) inds_all = scores_pre.nonzero() assert inds_all.dim() != 0 inds_all = inds_all.squeeze(1) labels_all = labels_pre[inds_all] scores_all = scores_pre[inds_all] else: nz = nms_mask.nonzero() assert nz.dim() != 0 inds_all = nz[:, 0] labels_all = nz[:, 1] scores_all = scores.data.view(-1)[inds_all * scores.data.size(1) + labels_all] # dists_all = dists_all[inds_all] # dists_all[:,0] = 1.0-dists_all.sum(1) # # Limit to max per image detections vs, idx = torch.sort(scores_all, dim=0, descending=True) idx = idx[vs > thresh] if max_per_img < idx.size(0): idx = idx[:max_per_img] inds_all = inds_all[idx] + start_ind scores_all = Variable(scores_all[idx], volatile=True) labels_all = Variable(labels_all[idx], volatile=True) # dists_all = dists_all[idx] return inds_all, scores_all, labels_all
def forward(self, obj_fmaps, obj_logits, im_inds, obj_labels=None, box_priors=None, boxes_per_cls=None): """ Forward pass through the object and edge context :param obj_priors: from faster rcnn output boxes :param obj_fmaps: 4096-dim roi feature maps :param obj_logits: result.rm_obj_dists.detach() :param im_inds: :param obj_labels: od_obj_labels, gt :param boxes: :return: obj_dists2: [#boxes, 151], new score for boxes obj_preds: [#boxes], prediction/class value edge_ctx: [#boxes, 512], new features for boxes """ # Object State: # obj_embed: [#boxes, 200], and self.obj_embed.weight are both Variable # obj_logits: result.rm_obj_dists.detach(), [#boxes, 151], detector scores before softmax obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight # center_size returns boxes as (center_x, center_y, width, height) # pos_embed: [#boxes, 128], Variable, from boxes after Sequential processing pos_embed = self.pos_embed(Variable(center_size(box_priors))) # obj_pre_rep: [#boxes, 4424], Variable obj_pre_rep = torch.cat((obj_fmaps, obj_embed, pos_embed), 1) if self.nl_obj > 0: # obj_dists2: [#boxes, 151], new score for box # obj_preds: [#boxes], prediction/class value # obj_ctx: [#boxes, 512], new features vector for box obj_dists2, obj_preds, obj_ctx = self.obj_ctx( obj_pre_rep, #obj_fmaps, # original: obj_pre_rep, obj_logits, im_inds, obj_labels, box_priors, boxes_per_cls, ) else: # UNSURE WHAT TO DO HERE if self.mode == 'predcls': obj_dists2 = Variable( to_onehot(obj_labels.data, self.num_classes)) else: obj_dists2 = self.decoder_lin(obj_pre_rep) if self.mode == 'sgdet' and not self.training: # NMS here for baseline probs = F.softmax(obj_dists2, 1) nms_mask = obj_dists2.data.clone() nms_mask.zero_() for c_i in range(1, obj_dists2.size(1)): scores_ci = probs.data[:, c_i] boxes_ci = boxes_per_cls.data[:, c_i] keep = apply_nms(scores_ci, boxes_ci, pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0), nms_thresh=0.3) nms_mask[:, c_i][keep] = 1 obj_preds = Variable(nms_mask * probs.data, volatile=True)[:, 1:].max(1)[1] + 1 else: obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max( 1)[1] + 1 obj_ctx = obj_pre_rep # Edge State: edge_ctx = None if self.nl_edge > 0: # edge_ctx: [#boxes, 512] edge_ctx = self.edge_ctx( torch.cat((obj_fmaps, obj_ctx), 1) if self.pass_in_obj_feats_to_edge else obj_ctx, obj_dists=obj_dists2.detach(), # Was previously obj_logits. im_inds=im_inds, obj_preds=obj_preds, box_priors=box_priors, ) return obj_dists2, obj_preds, edge_ctx
def forward(self, obj_fmaps, obj_logits, im_inds, obj_labels=None, box_priors=None, boxes_per_cls=None, batch_size=None, rois=None, od_box_deltas=None, im_sizes=None, image_offset=None, gt_classes=None, gt_boxes=None, ): """ Forward pass through the object and edge context :param obj_priors: :param obj_fmaps: :param im_inds: :param obj_labels: :param boxes: :return: """ obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight pos_embed = self.pos_embed(Variable(center_size(box_priors))) obj_pre_rep = torch.cat((obj_fmaps, obj_embed, pos_embed), 1) if self.mode == 'predcls': obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes)) else: if self.mode == 'sgcls': obj_dists2 = self.decoder_lin1(obj_pre_rep) obj_dists2 = self.decoder_lin2(obj_dists2.view(-1, 1, 1024), 1) obj_dists2 = obj_dists2[1] obj_dists2 = self.decoder_lin3(obj_dists2.view(-1, 1024)) else: # this is for sgdet obj_dists2 = self.decoder_lin1(obj_pre_rep) perm, inv_perm, ls_transposed = self.sort_rois(im_inds.data, None, box_priors) obj_dists2 = obj_dists2[perm].contiguous() obj_dists2 = PackedSequence(obj_dists2, torch.tensor(ls_transposed)) obj_dists2, lengths1 = pad_packed_sequence(obj_dists2, batch_first=False) obj_dists2 = self.decoder_lin2(obj_dists2.view(-1, batch_size, 1024), batch_size)[1] obj_dists2, _ = pack_padded_sequence(obj_dists2, lengths1, batch_first=False) obj_dists2 = self.decoder_lin3(obj_dists2.view(-1, 1024)) obj_dists2 = obj_dists2[inv_perm] if (not self.training and not self.mode == 'gtbox') or self.mode in ('sgdet', 'refinerels'): # try: dont apply nms here, but after own obj_classifier nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes( obj_dists2.clone().detach(), rois, od_box_deltas.clone().detach(), im_sizes, ) im_inds = nms_imgs + image_offset obj_dists2 = obj_dists2[nms_inds] obj_fmap = obj_fmaps[nms_inds] box_deltas = od_box_deltas[nms_inds] box_priors = nms_boxes[:, 0] rois = rois[nms_inds] if self.training and not self.mode == 'gtbox': # NOTE: If we're doing this during training, we need to assign labels here. pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0 max_overlaps, argmax_overlaps = pred_to_gtbox.max(1) rm_obj_labels = gt_classes[:, 1][argmax_overlaps] rm_obj_labels[max_overlaps < 0.5] = 0 else: rm_obj_labels = None if self.mode == 'sgdet' and not self.training: # have tried in training # NMS here for baseline probs = F.softmax(obj_dists2, 1) nms_mask = obj_dists2.data.clone() nms_mask.zero_() for c_i in range(1, obj_dists2.size(1)): scores_ci = probs.data[:, c_i] boxes_ci = nms_boxes.data[:, c_i] keep = apply_nms(scores_ci, boxes_ci, pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0), nms_thresh=0.5)#nms_thresh= 0.3 default nms_mask[:, c_i][keep] = 1 obj_preds = Variable(nms_mask * probs.data, volatile=True)[:, 1:].max(1)[1] + 1 # this for sgdet test #obj_preds=obj_dists2[:,1:].max(1)[1] + 1 else: if self.mode == 'sgdet': # use gt obj_preds = rm_obj_labels if rm_obj_labels is not None else obj_dists2[:, 1:].max(1)[1] + 1 # use_predicted label # obj_preds = obj_dists2[:, 1:].max(1)[1] + 1 else: obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(1)[1] + 1 if self.mode == 'sgdet': return obj_dists2, obj_preds, im_inds, box_priors, rm_obj_labels, rois, nms_boxes else: return obj_dists2, obj_preds