def ext_feat_gt(im_data, im_info, gt_boxes, num_boxes): outs = graphRCNN(im_data, im_info, gt_boxes, num_boxes, True) base_feat, rois, rel_pairs, bbox_pred, x_obj, x_att, x_rel, \ obj_cls_prob, att_cls_prob, rel_cls_prob, \ obj_cls_score, att_cls_score, rel_cls_score = outs ### extract and save global feature global_feat = graphRCNN._head_to_tail(base_feat) np.save(os.path.join(dir_feat, str(img_id[0]) + '_glb_gt'), global_feat.data.cpu().numpy()) ### extract graph feature # filter out the rois that rois_pop_id = bbox_proposal_1(obj_cls_prob, att_cls_prob, rois) rois_pop = rois[0][rois_pop_id] x_obj_pop = x_obj[rois_pop_id] x_att_pop = x_att[rois_pop_id] np.save(os.path.join(dir_feat, str(img_id[0]) + '_obj_gt'), x_obj_pop.cpu().numpy()) np.save(os.path.join(dir_feat, str(img_id[0]) + '_att_gt'), x_att_pop.cpu().numpy()) score_obj_pop = obj_cls_score[:, 1:][rois_pop_id] score_att_pop = att_cls_score[:, 1:][rois_pop_id] np.save(os.path.join(dir_feat, str(img_id[0]) + '_obj_sc_gt'), score_obj_pop.cpu().numpy()) np.save(os.path.join(dir_feat, str(img_id[0]) + '_att_sc_gt'), score_att_pop.cpu().numpy()) # get poped rel pairs according to rois_pop_id rois_pop_id = rois_pop_id.cpu() kept_rois = torch.zeros(rois[0].size(0)) kept_rois_to_idx = torch.zeros(rois[0].size(0)) kept_rois[rois_pop_id] = 1 kept_rois_to_idx[rois_pop_id] = torch.arange(0, rois_pop_id.size(0)) # find the top-N triplets rel_pairs = rel_pairs.cpu() sobj_inds = rel_pairs[0][:, 0] oobj_inds = rel_pairs[0][:, 1] rels_pop_id = (kept_rois[sobj_inds] + kept_rois[oobj_inds]).eq(2) if rels_pop_id.sum() > 0: rels_pop_id = rels_pop_id.nonzero().squeeze() rels_pop = rel_pairs[0][rels_pop_id] rels_pop[:, 0] = kept_rois_to_idx[rels_pop[:, 0]] rels_pop[:, 1] = kept_rois_to_idx[rels_pop[:, 1]] x_rel_pop = x_rel.cpu()[rels_pop_id] rel_score_pop = rel_cls_score.cpu()[rels_pop_id] np.save(os.path.join(dir_feat, str(img_id[0]) + '_rel_gt'), x_rel_pop.numpy()) np.save(os.path.join(dir_feat, str(img_id[0]) + '_rel_sc_gt'), rel_score_pop.numpy()) np.savez(os.path.join(dir_meta, str(img_id[0])), gt_box=gt_box, ori_box=ori_box, info=info) pdb.set_trace() if vis: vis_dets(im_data, im_info, rois, bbox_pred, obj_cls_prob, imdb_vg)
def ext_feat_pred_hdf5_batch(model, im_data, im_info, gt_boxes, num_boxes): # extract graph representations from image and save it into hdf5 outs = model(im_data, im_info, gt_boxes, num_boxes) base_feat, rois, rel_pairs, bbox_pred, x_obj, x_att, x_rel, \ obj_cls_prob, att_cls_prob, rel_cls_prob, \ obj_cls_score, att_cls_score, rel_cls_score = outs batch_size = im_data.size(0) # pdb.set_trace() global_feat_batch = model._head_to_tail(base_feat) obj_feat_batch = [] att_feat_batch = [] rel_feat_batch = [] offset = int(x_obj.size(0) / batch_size) x_rel = x_rel.cpu() rel_pairs = rel_pairs.cpu() for i in range(batch_size): ### extract and save global feature # np.save(os.path.join(dir_feat, str(img_id[0]) + '_glb'), global_feat.data.cpu().numpy()) ### extract graph feature # filter out the rois that rois_pop_id, rois_pop_clss, rois_pop_scores, num_boxes = bbox_proposal( obj_cls_prob[i], att_cls_prob[i], rois[i]) rois_pop = rois[:, rois_pop_id, :] x_obj_pop = x_obj[i * offset + rois_pop_id] score_obj_pop = obj_cls_score[:, 1:][rois_pop_id] # np.save(os.path.join(dir_feat, str(img_id[0]) + '_obj'), torch.cat((x_obj_pop, score_obj_pop), 1).cpu().numpy()) # obj_feat = torch.cat((x_obj_pop, score_obj_pop), 1).cpu() obj_feat = torch.cat((x_obj_pop, rois_pop[0, :, 1:].contiguous()), 1).cpu() obj_feat_batch.append(obj_feat) x_att_pop = x_att[i * offset + rois_pop_id] score_att_pop = att_cls_score[:, 1:][rois_pop_id] # np.save(os.path.join(dir_feat, str(img_id[0]) + '_att'), torch.cat((x_att_pop, score_att_pop), 1).cpu().numpy()) # att_feat = torch.cat((x_att_pop, score_att_pop), 1).cpu() att_feat = x_att_pop.cpu() att_feat_batch.append(att_feat) # get poped rel pairs according to rois_pop_id rois_pop_id_cpu = rois_pop_id.cpu() kept_rois = torch.zeros(rois[0].size(0)) kept_rois_to_idx = torch.zeros(rois[0].size(0)) kept_rois[rois_pop_id_cpu] = 1 kept_rois_to_idx[rois_pop_id_cpu] = torch.arange( 0, rois_pop_id_cpu.size(0)) # find the top-N triplets sobj_inds = rel_pairs[i][:, 0] - i * offset oobj_inds = rel_pairs[i][:, 1] - i * offset rels_pop_id = (kept_rois[sobj_inds] + kept_rois[oobj_inds]).eq(2) rel_feat = torch.zeros(MAX_REL_PAIRS, x_rel.size(1) + rel_pairs[i].size(1)) if rels_pop_id.sum() > 0: rels_pop_id = rels_pop_id.nonzero().squeeze() rels_pop = rel_pairs[i][rels_pop_id] - i * offset rels_pop[:, 0] = kept_rois_to_idx[rels_pop[:, 0]] rels_pop[:, 1] = kept_rois_to_idx[rels_pop[:, 1]] x_rel_pop = x_rel[i * offset + rels_pop_id] rel_score_pop = rel_cls_score.cpu()[rels_pop_id] all_feat = torch.cat((rels_pop.float(), x_rel_pop), 1) if all_feat.size(0) > MAX_REL_PAIRS: rel_feat = all_feat[:MAX_REL_PAIRS, :] elif all_feat.size(0) < MIN_REL_PAIRS: rel_feat[:all_feat.size(0), :] = all_feat rel_feat_batch.append(rel_feat) if vis: vis_dets(im_data[i].unsqueeze(0), im_info[i].unsqueeze(0), rois_pop[i].unsqueeze(0), bbox_pred[i, rois_pop_id, :].unsqueeze(0), obj_cls_prob[i, rois_pop_id, :].unsqueeze(0), imdb_vg) return global_feat_batch.data.cpu().numpy(), torch.stack( obj_feat_batch, 0).numpy(), torch.stack(att_feat_batch, 0).numpy(), torch.stack(rel_feat_batch, 0).numpy()
def ext_feat_pred_hdf5(model, im_data, im_info, gt_boxes, num_boxes): # extract graph representations from image and save it into hdf5 # pdb.set_trace() outs = model(im_data, im_info, gt_boxes, num_boxes, is_oracle) base_feat, rois, rel_pairs, bbox_pred, x_obj, x_att, x_rel, \ obj_cls_prob, att_cls_prob, rel_cls_prob, \ obj_cls_score, att_cls_score, rel_cls_score = outs ### extract and save global feature global_obj_feat = model._head_to_tail(base_feat) global_att_feat = model._head_to_tail_att(base_feat) # np.save(os.path.join(dir_feat, str(img_id[0]) + '_glb'), global_feat.data.cpu().numpy()) # return ### extract graph feature # filter out the rois that # rois_pop_id, _, _, _ = bbox_proposal(obj_cls_prob[0], att_cls_prob[0], rois[0]) _, rois_pop_id = bbox_proposal_fast(obj_cls_prob, att_cls_prob, rois) rois_pop_id = rois_pop_id.view(-1) rois_pop = rois[:, rois_pop_id, :] x_obj_pop = x_obj[rois_pop_id] score_obj_pop = obj_cls_score[:, 1:][rois_pop_id] # np.save(os.path.join(dir_feat, str(img_id[0]) + '_obj'), torch.cat((x_obj_pop, score_obj_pop), 1).cpu().numpy()) obj_feat = torch.cat((x_obj_pop, rois_pop[0, :, 1:].contiguous()), 1).cpu().numpy() x_att_pop = x_att[rois_pop_id] score_att_pop = att_cls_score[:, 1:][rois_pop_id] # np.save(os.path.join(dir_feat, str(img_id[0]) + '_att'), torch.cat((x_att_pop, score_att_pop), 1).cpu().numpy()) att_feat = x_att_pop.cpu().numpy() # get poped rel pairs according to rois_pop_id rois_pop_id_cpu = rois_pop_id.cpu() kept_rois = torch.zeros(rois[0].size(0)) kept_rois_to_idx = torch.zeros(rois[0].size(0)) kept_rois[rois_pop_id_cpu] = 1 kept_rois_to_idx[rois_pop_id_cpu] = torch.arange(0, rois_pop_id_cpu.size(0)) # find the top-N triplets rel_pairs = rel_pairs.cpu() sobj_inds = rel_pairs[0][:, 0] oobj_inds = rel_pairs[0][:, 1] rels_pop_id = (kept_rois[sobj_inds] + kept_rois[oobj_inds]).eq(2) rel_feat = torch.zeros(MAX_REL_PAIRS, x_rel.size(1) + rel_pairs[0].size(1)) # if rels_pop_id.sum() > 0: # rels_pop_id = rels_pop_id.nonzero().squeeze() # rels_pop = rel_pairs[0][rels_pop_id] # rels_pop[:, 0] = kept_rois_to_idx[rels_pop[:, 0]] # rels_pop[:, 1] = kept_rois_to_idx[rels_pop[:, 1]] # x_rel_pop = x_rel.cpu()[rels_pop_id] # rel_score_pop = rel_cls_score.cpu()[rels_pop_id] # all_feat = torch.cat((rels_pop.float(), x_rel_pop), 1) # if all_feat.size(0) > MAX_REL_PAIRS: # rel_feat = all_feat[:MAX_REL_PAIRS, :] # elif all_feat.size(0) < MIN_REL_PAIRS: # rel_feat[:all_feat.size(0), :] = all_feat # pdb.set_trace() # np.savez(os.path.join(dir_meta, str(img_id[0])), rois_pop_clss=rois_pop_clss, rois_pop_score=rois_pop_scores, num_boxes=num_boxes, # num_obj_cls=obj_cls_prob.size(2)-1, num_att_cls=att_cls_prob.size(2)-1, num_rel_cls=rel_cls_prob.size(2)-1, # gt_box=gt_box, ori_box=ori_box, info=info) # pdb.set_trace() if vis: vis_dets(im_data, im_info, rois_pop, bbox_pred[:, rois_pop_id, :], obj_cls_prob[:, rois_pop_id, :], imdb_vg) return global_obj_feat.data.cpu().numpy(), global_att_feat.data.cpu( ).numpy(), obj_feat, att_feat, rel_feat.numpy()
def ext_feat_pred_hdf5_v2(model, im_data, im_info, gt_boxes, num_boxes): # extract graph representations from image and save it into hdf5 base_feat = model.RCNN_base_model(im_data) rois, rpn_loss_cls, rpn_loss_bbox = model.RCNN_rpn(base_feat, im_info.data, gt_boxes.data, num_boxes.data) valid = rois.sum(2).view(-1).nonzero().view(-1) rois = rois[:, valid, :] rois = Variable(rois) if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = model.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = model.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = model.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model x_obj = model._head_to_tail(pooled_feat) # (B x N) x D obj_cls_score = model.RCNN_obj_cls_score(x_obj) obj_cls_prob = F.softmax(obj_cls_score) bbox_pred = model.RCNN_bbox_pred(x_obj) # get attribute scores if cfg.SHARE_OBJ_ATT_FEATURE: x_att = x_obj else: x_att = model._head_to_tail_att(pooled_feat) # (B x N) x D att_cls_score = model.RCNN_att_cls_score(x_att) att_cls_prob = F.softmax(att_cls_score) # filter rois first _, rois_pop_id = bbox_proposal_fast(obj_cls_prob.data.unsqueeze(0), att_cls_prob.data.unsqueeze(0), rois.data) rois_pop_id = rois_pop_id.view(-1) rois_pop = rois[:, rois_pop_id, :].data x_obj = x_obj[rois_pop_id] obj_cls_score = obj_cls_score[rois_pop_id] # get attribute features x_att = x_att[rois_pop_id] # propose relation between rois rel_feats = obj_cls_score.view(rois_pop.size(0), rois_pop.size(1), obj_cls_score.size(1)) roi_rel_pairs, roi_pair_proposals, roi_rel_pairs_score, relpn_loss_cls = \ model.RELPN_rpn(rois_pop, rel_feats, im_info.data, gt_boxes.data, num_boxes.data, False) valid = roi_rel_pairs.sum(2).view(-1).nonzero().view(-1) roi_rel_pairs = roi_rel_pairs[:, valid, :] roi_pair_proposals = roi_pair_proposals[:, valid, :] roi_rel_pairs_score = roi_rel_pairs_score[:, valid, :] size_per_batch = x_obj.size(0) roi_pair_proposals_v = roi_pair_proposals.view(-1, 2) ind_subject = roi_pair_proposals_v[:, 0] ind_object = roi_pair_proposals_v[:, 1] rois_pred = combine_box_pairs(roi_rel_pairs.view(-1, 9)) rois_pred = Variable(rois_pred) # # do roi pooling based on predicted rois # pooled_pred_feat = self.RELPN_roi_pool(base_feat, rois_pred.view(-1,5)) if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(rois_pred.view(-1, 5), base_feat.size()[2:], model.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_pred_feat = model.RELPN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_pred_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_pred_feat = model.RELPN_roi_align(base_feat, rois_pred.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_pred_feat = model.RELPN_roi_pool(base_feat, rois_pred.view(-1, 5)) # # combine subject, object and relation feature tohether x_pred = model._head_to_tail_rel(pooled_pred_feat) ind_subject = roi_pair_proposals_v[:, 0] ind_object = roi_pair_proposals_v[:, 1] # pdb.set_trace() if cfg.GCN_ON_FEATS and cfg.GCN_LAYERS > 0: if cfg.GCN_HAS_ATTENTION: x_sobj = obj_cls_score[ind_subject] x_oobj = obj_cls_score[ind_object] attend_score = model.GRCNN_gcn_att1(x_sobj, x_oobj) # N_rel x 1 attend_score = attend_score.view(1, x_pred.size(0)) else: attend_score = Variable( x_pred.data.new(1, x_pred.size(0)).fill_(1)) # compute the intiial maps, including map_obj_att, map_obj_obj and map_obj_rel # NOTE we have two ways to compute map among objects, one way is based on the overlaps among object rois. # NOTE the intution behind this is that rois with overlaps should share some common features, we need to # NOTE exclude one roi feature from another. # NOTE another way is based on the classfication scores. The intuition is that, objects have some common # cooccurence, such as bus are more frequently appear on the road. # assert x_obj.size() == x_att.size(), "the numbers of object features and attribute features should be the same" size_per_batch = int(x_obj.size(0)) map_obj_att = torch.eye(x_obj.size(0)).type_as(x_obj.data) if cfg.MUTE_ATTRIBUTES: map_obj_att.zero_() x_att = x_att.detach() map_obj_att = Variable(map_obj_att) map_obj_obj = x_obj.data.new(x_obj.size(0), x_obj.size(0)).fill_(0.0) eye_mat = torch.eye(size_per_batch).type_as(x_obj.data) for i in range(batch_size): map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch].fill_(1.0) map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch] =\ map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch]\ - eye_mat map_obj_obj = Variable(map_obj_obj) map_sobj_rel = Variable( x_obj.data.new(x_obj.size(0), x_pred.size(0)).zero_()) map_sobj_rel.scatter_( 0, Variable(ind_subject.contiguous().view(1, x_pred.size(0))), attend_score) map_oobj_rel = Variable( x_obj.data.new(x_obj.size(0), x_pred.size(0)).zero_()) map_oobj_rel.scatter_( 0, Variable(ind_object.contiguous().view(1, x_pred.size(0))), attend_score) map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 2) if cfg.MUTE_RELATIONS: map_obj_rel.data.zero_() x_pred = x_pred.detach() # map_obj_rel = Variable(map_obj_rel) # x_obj = F.relu(self.fc4obj(x_obj)) # x_att = F.relu(self.fc4att(x_att)) # x_pred = F.relu(self.fc4rel(x_pred)) for i in range(cfg.GCN_LAYERS): # pass graph representation to gcn x_obj, x_att, x_pred = model.GRCNN_gcn_feat( x_obj, x_att, x_pred, map_obj_att, map_obj_obj, map_obj_rel) # pdb.set_trace() # compute object classification loss obj_cls_score = model.RCNN_obj_cls_score(x_obj) obj_cls_prob = F.softmax(obj_cls_score) # compute attribute classification loss att_cls_score = model.RCNN_att_cls_score(x_att) att_cls_prob = F.softmax(att_cls_score) att_cls_log_prob = F.log_softmax(att_cls_score) # compute relation classifcation loss # x_sobj = x_obj[ind_subject] # x_oobj = x_obj[ind_object] rel_cls_score = model.RCNN_rel_cls_score(x_pred) rel_cls_prob = F.softmax(rel_cls_score) # pdb.set_trace() obj_feat = torch.cat((x_obj.data, rois_pop[0, :, 1:].contiguous()), 1).cpu() score_att_pop = att_cls_score[:, 1:] att_feat = x_att.data.cpu() rel_cls_prob[:, 0] = 0 val, ind = rel_cls_prob.max(1) _, order_rel = torch.sort(val, 0, True) rel_feat = torch.zeros(MAX_REL_PAIRS, x_pred.size(1) + 2) rel_pop_id = order_rel[:MAX_REL_PAIRS].data all_feat = torch.cat((roi_pair_proposals_v[rel_pop_id].float().cpu(), x_pred[rel_pop_id].data.cpu()), 1) if all_feat.size(0) < MAX_REL_PAIRS: rel_feat[:all_feat.size(0), :] = all_feat else: rel_feat = all_feat # pdb.set_trace() if vis: vis_dets(im_data, im_info, rois_pop, bbox_pred.unsqueeze(0)[:, rois_pop_id, :].data, obj_cls_prob.unsqueeze(0).data, imdb_vg) global_obj_feat = model._head_to_tail(base_feat) global_att_feat = model._head_to_tail_att(base_feat) # pdb.set_trace() return global_obj_feat.data.cpu().numpy(), global_att_feat.data.cpu( ).numpy(), obj_feat.numpy(), att_feat.numpy(), rel_feat.numpy()
def ext_feat_pred(model, im_data, im_info, gt_boxes, num_boxes): outs = model(im_data, im_info, gt_boxes, num_boxes, is_oracle) base_feat, rois, rel_pairs, bbox_pred, x_obj, x_att, x_rel, \ obj_cls_prob, att_cls_prob, rel_cls_prob, \ obj_cls_score, att_cls_score, rel_cls_score = outs ### extract and save global feature global_feat = model._head_to_tail(base_feat) np.save(os.path.join(dir_feat, str(img_id[0]) + '_glb'), global_feat.data.cpu().numpy()) # return ### extract graph feature # filter out the rois that rois_pop_id, rois_pop_clss, rois_pop_scores, num_boxes = bbox_proposal( obj_cls_prob, att_cls_prob, rois) rois_pop = rois[:, rois_pop_id, :] x_obj_pop = x_obj[rois_pop_id] score_obj_pop = obj_cls_score[:, 1:][rois_pop_id] np.save(os.path.join(dir_feat, str(img_id[0]) + '_obj'), torch.cat((x_obj_pop, score_obj_pop), 1).cpu().numpy()) x_att_pop = x_att[rois_pop_id] score_att_pop = att_cls_score[:, 1:][rois_pop_id] np.save(os.path.join(dir_feat, str(img_id[0]) + '_att'), torch.cat((x_att_pop, score_att_pop), 1).cpu().numpy()) # get poped rel pairs according to rois_pop_id rois_pop_id_cpu = rois_pop_id.cpu() kept_rois = torch.zeros(rois[0].size(0)) kept_rois_to_idx = torch.zeros(rois[0].size(0)) kept_rois[rois_pop_id_cpu] = 1 kept_rois_to_idx[rois_pop_id_cpu] = torch.arange(0, rois_pop_id_cpu.size(0)) # find the top-N triplets rel_pairs = rel_pairs.cpu() sobj_inds = rel_pairs[0][:, 0] oobj_inds = rel_pairs[0][:, 1] rels_pop_id = (kept_rois[sobj_inds] + kept_rois[oobj_inds]).eq(2) # pdb.set_trace() if rels_pop_id.sum() > 0: rels_pop_id = rels_pop_id.nonzero().squeeze() rels_pop = rel_pairs[0][rels_pop_id] rels_pop[:, 0] = kept_rois_to_idx[rels_pop[:, 0]] rels_pop[:, 1] = kept_rois_to_idx[rels_pop[:, 1]] x_rel_pop = x_rel.cpu()[rels_pop_id] rel_score_pop = rel_cls_score.cpu()[rels_pop_id] np.save(os.path.join(dir_feat, str(img_id[0]) + '_rel'), torch.cat((x_rel_pop, rel_score_pop), 1).numpy()) np.savez(os.path.join(dir_meta, str(img_id[0])), rois_pop_clss=rois_pop_clss, rois_pop_score=rois_pop_scores, num_boxes=num_boxes, num_obj_cls=obj_cls_prob.size(2) - 1, num_att_cls=att_cls_prob.size(2) - 1, num_rel_cls=rel_cls_prob.size(2) - 1, gt_box=gt_box, ori_box=ori_box, info=info) if vis: vis_dets(im_data, im_info, rois_pop, bbox_pred[:, rois_pop_id, :], obj_cls_prob[:, rois_pop_id, :], imdb_vg)