def _load_subset_ids(self, subset, sampler): # import ipdb; ipdb.set_trace() vcoco = vu.load_vcoco(subset) subset_ids = list(set(vcoco[0]['image_id'][:, 0].astype(int).tolist())) if sampler: # import ipdb; ipdb.set_trace() ''' when changing the model, use sub-dataset to quickly show if there is something wrong ''' subset_ids = random.sample(subset_ids, int(len(subset_ids) * sampler)) return subset_ids
def __init__(self, root_dir, set='trainval', transform=None, color_prob=0): self.root_dir = root_dir self.setname = set self.transform = transform self.color_prob = color_prob self.coco = COCO( os.path.join(self.root_dir, "coco/annotations", "instances_trainval2014.json")) self.vcoco = vu.load_vcoco("vcoco_" + set, os.path.join(self.root_dir, "data")) self.image_ids = self.load_ids() self.load_classes() self.load_vcoco_classes() self.load_ann_by_image()
def select(data_const): for subset in ["vcoco_train", "vcoco_test", "vcoco_val"]: # create the folder/file to save corresponding detection results print('Select detection results for {} dataset'.format(subset.split('_')[1])) subset_dir = os.path.join(data_const.proc_dir, subset) io.mkdir_if_not_exists(subset_dir, recursive=True) print(f'Creating selected_coco_cls_dets.hdf5 file for {subset}...') hdf5_file = os.path.join(subset_dir,'selected_coco_cls_dets.hdf5') f = h5py.File(hdf5_file,'w') # Load the VCOCO annotations for image set vcoco = vu.load_vcoco(subset) img_id_list = vcoco[0]['image_id'][:,0].tolist() # Load faster-rcnn detection results all_faster_rcnn_det_data = h5py.File(os.path.join(subset_dir, 'faster_rcnn_det.hdf5'), 'r') all_nms_keep_indices = io.load_json_object(os.path.join(subset_dir, 'nms_keep_indices.json')) print('Selecting boxes ...') for img_id in tqdm(set(img_id_list)): boxes = all_faster_rcnn_det_data[str(img_id)]['boxes'] scores = all_faster_rcnn_det_data[str(img_id)]['scores'] features = all_faster_rcnn_det_data[str(img_id)]['fc7_feaet'] nms_keep_indices = all_nms_keep_indices[str(img_id)] # import ipdb; ipdb.set_trace() selected_dets, start_end_ids = select_dets(boxes,scores,nms_keep_indices,data_const) selected_feat = [] for rpn_id in selected_dets[:, 5]: selected_feat.append(np.expand_dims(features[rpn_id, :], 0)) selected_feat = np.concatenate(selected_feat, axis=0 ) f.create_group(str(img_id)) f[str(img_id)].create_dataset('boxes_scores_rpn_ids',data=selected_dets) f[str(img_id)].create_dataset('start_end_ids',data=start_end_ids) f[str(img_id)].create_dataset('features',data=selected_feat) f.close()
def parse_data(data_const, args): # just focus on HOI samplers, remove those action with on objects action_class_num = len(vcoco_metadata.action_classes) - len(vcoco_metadata.action_no_obj) # no_action_index = vcoco_metadata.action_index['none'] no_role_index = vcoco_metadata.role_index['none'] # Load COCO annotations for V-COCO images coco = vu.load_coco() for subset in ["vcoco_train", "vcoco_test", "vcoco_val"]: # create file object to save the parsed data if not args.vis_result: print('{} data will be saved into {}/vcoco_data.hdf5 file'.format(subset.split("_")[1], subset)) hdf5_file = os.path.join(data_const.proc_dir, subset, 'vcoco_data.hdf5') save_data = h5py.File(hdf5_file, 'w') # evaluate detection eval_det_file = os.path.join(data_const.proc_dir, subset, 'eval_det_result.json') gt_record = {n:0 for n in vcoco_metadata.action_class_with_object} det_record = gt_record.copy() # load selected data selected_det_data = h5py.File(os.path.join(data_const.proc_dir, subset, "selected_coco_cls_dets.hdf5"), 'r') # Load the VCOCO annotations for vcoco_train image set vcoco_all = vu.load_vcoco(subset) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) # record groundtruths if x['action_name'] in vcoco_metadata.action_class_with_object: if len(x['role_name']) == 2: gt_record[x['action_name']] = sum(x['label'][:,0]) else: for i in range(x['label'].shape[0]): if x['label'][i,0] == 1: role_bbox = x['role_bbox'][i, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) for i_role in range(1, len(x['role_name'])): if x['role_name'][i_role]=='instr' and (not np.isnan(role_bbox[i_role, :][0])): gt_record[x['action_name']+'_with'] +=1 continue if x['role_name'][i_role]=='obj' and (not np.isnan(role_bbox[i_role, :][0])): gt_record[x['action_name']] +=1 # print(gt_record) image_ids = vcoco_all[0]['image_id'][:,0].astype(int).tolist() # all_results = list() unique_image_ids = list() for i_image, image_id in enumerate(image_ids): img_name = coco.loadImgs(ids=image_id)[0]['coco_url'].split('.org')[1][1:] # get image size img_gt = Image.open(os.path.join(data_const.original_image_dir, img_name)).convert('RGB') img_size = img_gt.size # load corresponding selected data for image_id det_boxes = selected_det_data[str(image_id)]['boxes_scores_rpn_ids'][:,:4] det_scores = selected_det_data[str(image_id)]['boxes_scores_rpn_ids'][:,4] det_classes = selected_det_data[str(image_id)]['boxes_scores_rpn_ids'][:,-1].astype(int) det_features = selected_det_data[str(image_id)]['features'] # calculate the number of nodes human_num = len(np.where(det_classes==1)[0]) node_num = len(det_classes) obj_num = node_num - human_num labeled_edge_num = human_num * (node_num-1) # labeled_edge_num = human_num * obj_num # test: just consider h-o if image_id not in unique_image_ids: unique_image_ids.append(image_id) # construct empty edge labels edge_labels = np.zeros((labeled_edge_num, action_class_num)) edge_roles = np.zeros((labeled_edge_num, 3)) # edge_labels[:, no_action_index]=1 edge_roles[:, no_role_index] = 1 else: if not args.vis_result: edge_labels = save_data[str(image_id)]['edge_labels'] edge_roles = save_data[str(image_id)]['edge_roles'] else: continue # import ipdb; ipdb.set_trace() # Ground truth labels for x in vcoco_all: if x['label'][i_image,0] == 1: if x['action_name'] in vcoco_metadata.action_no_obj: continue # role_bbox contain (agent,object/instr) # if i_image == 16: # import ipdb; ipdb.set_trace() role_bbox = x['role_bbox'][i_image, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) # match human box bbox = role_bbox[0, :] human_index = get_node_index(bbox, det_boxes, range(human_num)) if human_index == -1: warnings.warn('human detection missing') # print(img_name) continue assert human_index < human_num # match object box for i_role in range(1, len(x['role_name'])): action_name = x['action_name'] if x['role_name'][i_role]=='instr' and (x['action_name'] == 'cut' or x['action_name'] == 'eat' or x['action_name'] == 'hit'): action_index = vcoco_metadata.action_with_obj_index[x['action_name']+'_with'] action_name +='_with' # import ipdb; ipdb.set_trace() # print('testing') else: action_index = vcoco_metadata.action_with_obj_index[x['action_name']] bbox = role_bbox[i_role, :] if np.isnan(bbox[0]): continue if args.vis_result: img_gt = vis_img_vcoco(img_gt, [role_bbox[0,:], role_bbox[i_role,:]], 1, raw_action=action_index, data_gt=True) obj_index = get_node_index(bbox, det_boxes, range(node_num)) # !Note: Take the human into account # obj_index = get_node_index(bbox, det_boxes, range(human_num, node_num)) # test if obj_index == -1: warnings.warn('object detection missing') # print(img_name) continue if obj_index == human_index: warnings.warn('human detection is the same to object detection') # print(img_name) continue # match labels # if human_index == 0: # edge_index = obj_index - 1 if human_index > obj_index: edge_index = human_index * (node_num-1) + obj_index else: edge_index = human_index * (node_num-1) + obj_index - 1 # edge_index = human_index * obj_num + obj_index - human_num #test det_record[action_name] +=1 edge_labels[edge_index, action_index] = 1 # edge_labels[edge_index, no_action_index] = 0 edge_roles[edge_index, vcoco_metadata.role_index[x['role_name'][i_role]]] = 1 edge_roles[edge_index, no_role_index] = 0 # visualizing result instead of saving result if args.vis_result: # ipdb.set_trace() image_res = Image.open(os.path.join(data_const.original_image_dir, img_name)).convert('RGB') result = vis_img_vcoco(image_res, det_boxes, det_classes, det_scores, edge_labels, score_thresh=0.4) plt.figure(figsize=(100,100)) plt.suptitle(img_name) plt.subplot(1,2,1) plt.imshow(np.array(img_gt)) plt.title('all_ground_truth'+str(i_image)) plt.subplot(1,2,2) plt.imshow(np.array(result)) plt.title('selected_ground_truth') # plt.axis('off') plt.ion() plt.pause(1) plt.close() # save process data else: if str(image_id) not in save_data.keys(): # import ipdb; ipdb.set_trace() save_data.create_group(str(image_id)) save_data[str(image_id)].create_dataset('img_name', data=np.fromstring(img_name, dtype=np.uint8).astype('float64')) save_data[str(image_id)].create_dataset('img_size', data=img_size) save_data[str(image_id)].create_dataset('boxes', data=det_boxes) save_data[str(image_id)].create_dataset('classes', data=det_classes) save_data[str(image_id)].create_dataset('scores', data=det_scores) save_data[str(image_id)].create_dataset('feature', data=det_features) save_data[str(image_id)].create_dataset('node_num', data=node_num) save_data[str(image_id)].create_dataset('edge_labels', data=edge_labels) save_data[str(image_id)].create_dataset('edge_roles', data=edge_roles) else: save_data[str(image_id)]['edge_labels'][:] = edge_labels save_data[str(image_id)]['edge_roles'][:] = edge_roles if not args.vis_result: save_data.close() print("Finished parsing data!") # eval object detection eval_single = {n:det_record[n]/gt_record[n] for n in vcoco_metadata.action_class_with_object} eval_all = sum(det_record.values()) / sum(gt_record.values()) eval_det_result = { 'gt': gt_record, 'det': det_record, 'eval_single': eval_single, 'eval_all': eval_all } io.dump_json_object(eval_det_result, eval_det_file)
for subset in ["vcoco_train", "vcoco_test", "vcoco_val"]: # create the folder/file to save corresponding spatial features print(f"construct spatial features and pose features for {subset}") io.mkdir_if_not_exists(os.path.join(data_const.proc_dir, subset), recursive=True) save_data = h5py.File( os.path.join(data_const.proc_dir, subset, 'spatial_feat.hdf5'), 'w') norm_keypoints = h5py.File( os.path.join(data_const.proc_dir, subset, 'keypoints_feat.hdf5'), 'w') # load selected object detection result vcoco_data = h5py.File( os.path.join(data_const.proc_dir, subset, 'vcoco_data.hdf5'), 'r') vcoco_all = vu.load_vcoco(subset) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() for img_id in tqdm(set(image_ids)): # ipdb.set_trace() det_boxes = vcoco_data[str(img_id)]['boxes'] img_wh = vcoco_data[str(img_id)]['img_size'] keypoints = vcoco_data[str(img_id)]['keypoints'] # spatial_feats = calculate_spatial_feats(det_boxes, img_wh) spatial_feats, pose_to_human, pose_to_obj_offset, pose_to_obj, pose_to_human_tight = calculate_spatial_pose_feats( det_boxes, keypoints, img_wh) save_data.create_dataset(str(img_id), data=spatial_feats) # save feature related to pose norm_keypoints.create_group(str(img_id)) norm_keypoints[str(img_id)].create_dataset('pose_to_human', data=pose_to_human) # norm_keypoints[str(img_id)].create_dataset('pose_to_obj', data=pose_to_obj)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) model.eval() data_const = VcocoConstants() # Load COCO annotations for V-COCO images coco = vu.load_coco() for subset in ["vcoco_train", "vcoco_test", "vcoco_val"]: # create the folder/file to save corresponding detection results io.mkdir_if_not_exists(os.path.join(data_const.proc_dir, subset), recursive=True) faster_rcnn_det_hdf5 = os.path.join(data_const.proc_dir, subset, 'faster_rcnn_det.hdf5') faster_rcnn_det_data = h5py.File(faster_rcnn_det_hdf5, 'w') # load the VCOCO annotations for image set print('Construct object detection results for {} dataset'.format(subset.split('_')[1])) vcoco = vu.load_vcoco(subset) img_id_list = vcoco[0]['image_id'][:,0].tolist() nms_keep_indices_dict = {} # ipdb.set_trace() for img_id in tqdm(set(img_id_list)): img_path = os.path.join('datasets/vcoco/coco/images', coco.loadImgs(ids=img_id)[0]['coco_url'].split('.org')[1][1:]) img = Image.open(img_path).convert('RGB') img_tensor = torchvision.transforms.functional.to_tensor(img) img_tensor = img_tensor.to(device) outputs = model([img_tensor], save_feat=True) # save object detection results faster_rcnn_det_data.create_group(str(img_id)) faster_rcnn_det_data[str(img_id)].create_dataset(name='boxes', data=outputs[0]['boxes'].cpu().detach().numpy()) faster_rcnn_det_data[str(img_id)].create_dataset(name='scores', data=outputs[0]['scores'].cpu().detach().numpy()) faster_rcnn_det_data[str(img_id)].create_dataset(name='fc7_feaet', data=outputs[0]['fc7_feat'].cpu().detach().numpy()) faster_rcnn_det_data[str(img_id)].create_dataset(name='pool_feaet', data=outputs[0]['pool_feat'].cpu().detach().numpy())