Esempio n. 1
0
 def _load_subset_ids(self, subset, sampler):
     # import ipdb; ipdb.set_trace()
     vcoco = vu.load_vcoco(subset)
     subset_ids = list(set(vcoco[0]['image_id'][:, 0].astype(int).tolist()))
     if sampler:
         # import ipdb; ipdb.set_trace()
         ''' when changing the model, use sub-dataset to quickly show if there is something wrong '''
         subset_ids = random.sample(subset_ids,
                                    int(len(subset_ids) * sampler))
     return subset_ids
Esempio n. 2
0
    def __init__(self, root_dir, set='trainval', transform=None, color_prob=0):

        self.root_dir = root_dir
        self.setname = set
        self.transform = transform
        self.color_prob = color_prob

        self.coco = COCO(
            os.path.join(self.root_dir, "coco/annotations",
                         "instances_trainval2014.json"))
        self.vcoco = vu.load_vcoco("vcoco_" + set,
                                   os.path.join(self.root_dir, "data"))

        self.image_ids = self.load_ids()
        self.load_classes()
        self.load_vcoco_classes()
        self.load_ann_by_image()
Esempio n. 3
0
def select(data_const):

    for subset in ["vcoco_train", "vcoco_test", "vcoco_val"]:
        # create the folder/file to save corresponding detection results
        print('Select detection results for {} dataset'.format(subset.split('_')[1]))
        subset_dir = os.path.join(data_const.proc_dir, subset)
        io.mkdir_if_not_exists(subset_dir, recursive=True)

        print(f'Creating selected_coco_cls_dets.hdf5 file for {subset}...')
        hdf5_file = os.path.join(subset_dir,'selected_coco_cls_dets.hdf5')
        f = h5py.File(hdf5_file,'w')

        # Load the VCOCO annotations for image set
        vcoco = vu.load_vcoco(subset)
        img_id_list = vcoco[0]['image_id'][:,0].tolist()

        # Load faster-rcnn detection results
        all_faster_rcnn_det_data = h5py.File(os.path.join(subset_dir, 'faster_rcnn_det.hdf5'), 'r')
        all_nms_keep_indices = io.load_json_object(os.path.join(subset_dir, 'nms_keep_indices.json'))
        print('Selecting boxes ...')
        for img_id in tqdm(set(img_id_list)):

            boxes = all_faster_rcnn_det_data[str(img_id)]['boxes']
            scores = all_faster_rcnn_det_data[str(img_id)]['scores']
            features = all_faster_rcnn_det_data[str(img_id)]['fc7_feaet']
            nms_keep_indices = all_nms_keep_indices[str(img_id)]

            # import ipdb; ipdb.set_trace()
            selected_dets, start_end_ids = select_dets(boxes,scores,nms_keep_indices,data_const)

            selected_feat = []
            for rpn_id in selected_dets[:, 5]:
                selected_feat.append(np.expand_dims(features[rpn_id, :], 0))
            selected_feat = np.concatenate(selected_feat, axis=0
            )
            f.create_group(str(img_id))
            f[str(img_id)].create_dataset('boxes_scores_rpn_ids',data=selected_dets)
            f[str(img_id)].create_dataset('start_end_ids',data=start_end_ids)
            f[str(img_id)].create_dataset('features',data=selected_feat)
            
        f.close()
Esempio n. 4
0
def parse_data(data_const, args):
    # just focus on HOI samplers, remove those action with on objects
    action_class_num = len(vcoco_metadata.action_classes) - len(vcoco_metadata.action_no_obj)
    # no_action_index = vcoco_metadata.action_index['none']
    no_role_index = vcoco_metadata.role_index['none']
    # Load COCO annotations for V-COCO images
    coco = vu.load_coco()
    for subset in ["vcoco_train", "vcoco_test", "vcoco_val"]:
        # create file object to save the parsed data
        if not args.vis_result:
            print('{} data will be saved into {}/vcoco_data.hdf5 file'.format(subset.split("_")[1], subset))
            hdf5_file = os.path.join(data_const.proc_dir, subset, 'vcoco_data.hdf5')
            save_data = h5py.File(hdf5_file, 'w')
            # evaluate detection
            eval_det_file = os.path.join(data_const.proc_dir, subset, 'eval_det_result.json')
            gt_record = {n:0 for n in vcoco_metadata.action_class_with_object}
            det_record = gt_record.copy()

        # load selected data
        selected_det_data = h5py.File(os.path.join(data_const.proc_dir, subset, "selected_coco_cls_dets.hdf5"), 'r')

        # Load the VCOCO annotations for vcoco_train image set
        vcoco_all = vu.load_vcoco(subset)
        for x in vcoco_all:
            x = vu.attach_gt_boxes(x, coco)
            # record groundtruths
            if x['action_name'] in vcoco_metadata.action_class_with_object:
                if len(x['role_name']) == 2:
                    gt_record[x['action_name']] = sum(x['label'][:,0])
                else:
                    for i in range(x['label'].shape[0]):
                        if x['label'][i,0] == 1:
                            role_bbox = x['role_bbox'][i, :] * 1.
                            role_bbox = role_bbox.reshape((-1, 4))
                            for i_role in range(1, len(x['role_name'])):
                                if x['role_name'][i_role]=='instr' and (not np.isnan(role_bbox[i_role, :][0])):
                                    gt_record[x['action_name']+'_with'] +=1
                                    continue
                                if x['role_name'][i_role]=='obj' and (not np.isnan(role_bbox[i_role, :][0])):
                                    gt_record[x['action_name']] +=1                               
        # print(gt_record)
        image_ids = vcoco_all[0]['image_id'][:,0].astype(int).tolist()
        # all_results = list()
        unique_image_ids = list()
        for i_image, image_id in enumerate(image_ids):
            img_name = coco.loadImgs(ids=image_id)[0]['coco_url'].split('.org')[1][1:]
            # get image size
            img_gt = Image.open(os.path.join(data_const.original_image_dir, img_name)).convert('RGB')
            img_size = img_gt.size
            # load corresponding selected data for image_id 
            det_boxes = selected_det_data[str(image_id)]['boxes_scores_rpn_ids'][:,:4]
            det_scores = selected_det_data[str(image_id)]['boxes_scores_rpn_ids'][:,4]
            det_classes = selected_det_data[str(image_id)]['boxes_scores_rpn_ids'][:,-1].astype(int)
            det_features = selected_det_data[str(image_id)]['features']
            # calculate the number of nodes
            human_num = len(np.where(det_classes==1)[0])
            node_num = len(det_classes)
            obj_num = node_num - human_num
            labeled_edge_num = human_num * (node_num-1) 
            # labeled_edge_num = human_num * obj_num      # test: just consider h-o
            if image_id not in unique_image_ids:
                unique_image_ids.append(image_id)
                # construct empty edge labels
                edge_labels = np.zeros((labeled_edge_num, action_class_num))
                edge_roles = np.zeros((labeled_edge_num, 3))
                # edge_labels[:, no_action_index]=1    
                edge_roles[:, no_role_index] = 1
            else:
                if not args.vis_result:
                    edge_labels = save_data[str(image_id)]['edge_labels']
                    edge_roles = save_data[str(image_id)]['edge_roles']
                else:
                    continue
            # import ipdb; ipdb.set_trace()
            # Ground truth labels
            for x in vcoco_all:
                if x['label'][i_image,0] == 1:
                    if x['action_name'] in vcoco_metadata.action_no_obj:
                        continue
                    # role_bbox contain (agent,object/instr)
                    # if i_image == 16:
                    #     import ipdb; ipdb.set_trace()
                    role_bbox = x['role_bbox'][i_image, :] * 1.
                    role_bbox = role_bbox.reshape((-1, 4))
                    # match human box
                    bbox = role_bbox[0, :]
                    human_index = get_node_index(bbox, det_boxes, range(human_num))
                    if human_index == -1:
                        warnings.warn('human detection missing')
                        # print(img_name)
                        continue
                    assert human_index < human_num
                    # match object box
                    for i_role in range(1, len(x['role_name'])):
                        action_name = x['action_name']
                        if x['role_name'][i_role]=='instr' and (x['action_name'] == 'cut' or x['action_name'] == 'eat' or x['action_name'] == 'hit'):
                            action_index = vcoco_metadata.action_with_obj_index[x['action_name']+'_with']
                            action_name +='_with'
                            # import ipdb; ipdb.set_trace()
                            # print('testing')
                        else:
                            action_index = vcoco_metadata.action_with_obj_index[x['action_name']]
                        bbox = role_bbox[i_role, :]
                        if np.isnan(bbox[0]):
                            continue
                        if args.vis_result:
                            img_gt = vis_img_vcoco(img_gt, [role_bbox[0,:], role_bbox[i_role,:]], 1, raw_action=action_index, data_gt=True)
                        obj_index = get_node_index(bbox, det_boxes, range(node_num))    # !Note: Take the human into account
                        # obj_index = get_node_index(bbox, det_boxes, range(human_num, node_num))  # test
                        if obj_index == -1:
                            warnings.warn('object detection missing')
                            # print(img_name)
                            continue
                        if obj_index == human_index:
                            warnings.warn('human detection is the same to object detection')
                            # print(img_name)
                            continue
                        # match labels
                        # if human_index == 0:
                        #     edge_index = obj_index - 1
                        if human_index > obj_index:
                            edge_index = human_index * (node_num-1) + obj_index
                        else:
                            edge_index = human_index * (node_num-1) + obj_index - 1
                            # edge_index = human_index * obj_num + obj_index - human_num  #test
                        det_record[action_name] +=1
                        edge_labels[edge_index, action_index] = 1
                        # edge_labels[edge_index, no_action_index] = 0
                        edge_roles[edge_index, vcoco_metadata.role_index[x['role_name'][i_role]]] = 1
                        edge_roles[edge_index, no_role_index] = 0
                        
            # visualizing result instead of saving result
            if args.vis_result:
                # ipdb.set_trace()
                image_res = Image.open(os.path.join(data_const.original_image_dir, img_name)).convert('RGB')
                result = vis_img_vcoco(image_res, det_boxes, det_classes, det_scores, edge_labels, score_thresh=0.4)
                plt.figure(figsize=(100,100))
                plt.suptitle(img_name)
                plt.subplot(1,2,1)
                plt.imshow(np.array(img_gt))
                plt.title('all_ground_truth'+str(i_image))
                plt.subplot(1,2,2)
                plt.imshow(np.array(result))
                plt.title('selected_ground_truth')
                # plt.axis('off')
                plt.ion()
                plt.pause(1)
                plt.close()
            # save process data
            else:
                if str(image_id) not in save_data.keys():
                    # import ipdb; ipdb.set_trace()
                    save_data.create_group(str(image_id))
                    save_data[str(image_id)].create_dataset('img_name', data=np.fromstring(img_name, dtype=np.uint8).astype('float64'))
                    save_data[str(image_id)].create_dataset('img_size', data=img_size)
                    save_data[str(image_id)].create_dataset('boxes', data=det_boxes)
                    save_data[str(image_id)].create_dataset('classes', data=det_classes)
                    save_data[str(image_id)].create_dataset('scores', data=det_scores)
                    save_data[str(image_id)].create_dataset('feature', data=det_features)
                    save_data[str(image_id)].create_dataset('node_num', data=node_num)
                    save_data[str(image_id)].create_dataset('edge_labels', data=edge_labels)
                    save_data[str(image_id)].create_dataset('edge_roles', data=edge_roles)
                else:
                    save_data[str(image_id)]['edge_labels'][:] = edge_labels
                    save_data[str(image_id)]['edge_roles'][:] = edge_roles  
        if not args.vis_result:   
            save_data.close()      
            print("Finished parsing data!")   
        # eval object detection
        eval_single = {n:det_record[n]/gt_record[n] for n in vcoco_metadata.action_class_with_object}
        eval_all = sum(det_record.values()) / sum(gt_record.values())
        eval_det_result = {
            'gt': gt_record,
            'det': det_record,
            'eval_single': eval_single,
            'eval_all': eval_all
        }
        io.dump_json_object(eval_det_result, eval_det_file)
Esempio n. 5
0
    for subset in ["vcoco_train", "vcoco_test", "vcoco_val"]:
        # create the folder/file to save corresponding spatial features
        print(f"construct spatial features and pose features for {subset}")
        io.mkdir_if_not_exists(os.path.join(data_const.proc_dir, subset),
                               recursive=True)
        save_data = h5py.File(
            os.path.join(data_const.proc_dir, subset, 'spatial_feat.hdf5'),
            'w')
        norm_keypoints = h5py.File(
            os.path.join(data_const.proc_dir, subset, 'keypoints_feat.hdf5'),
            'w')
        # load selected object detection result
        vcoco_data = h5py.File(
            os.path.join(data_const.proc_dir, subset, 'vcoco_data.hdf5'), 'r')
        vcoco_all = vu.load_vcoco(subset)
        image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist()
        for img_id in tqdm(set(image_ids)):
            # ipdb.set_trace()
            det_boxes = vcoco_data[str(img_id)]['boxes']
            img_wh = vcoco_data[str(img_id)]['img_size']
            keypoints = vcoco_data[str(img_id)]['keypoints']
            # spatial_feats = calculate_spatial_feats(det_boxes, img_wh)
            spatial_feats, pose_to_human, pose_to_obj_offset, pose_to_obj, pose_to_human_tight = calculate_spatial_pose_feats(
                det_boxes, keypoints, img_wh)
            save_data.create_dataset(str(img_id), data=spatial_feats)
            # save feature related to pose
            norm_keypoints.create_group(str(img_id))
            norm_keypoints[str(img_id)].create_dataset('pose_to_human',
                                                       data=pose_to_human)
            # norm_keypoints[str(img_id)].create_dataset('pose_to_obj', data=pose_to_obj)
Esempio n. 6
0
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()

    data_const = VcocoConstants()
    # Load COCO annotations for V-COCO images
    coco = vu.load_coco()
    for subset in ["vcoco_train", "vcoco_test", "vcoco_val"]:
        # create the folder/file to save corresponding detection results
        io.mkdir_if_not_exists(os.path.join(data_const.proc_dir, subset), recursive=True)
        faster_rcnn_det_hdf5 = os.path.join(data_const.proc_dir, subset, 'faster_rcnn_det.hdf5')
        faster_rcnn_det_data = h5py.File(faster_rcnn_det_hdf5, 'w')       
    
        # load the VCOCO annotations for image set
        print('Construct object detection results for {} dataset'.format(subset.split('_')[1]))
        vcoco = vu.load_vcoco(subset)
        img_id_list = vcoco[0]['image_id'][:,0].tolist()
        nms_keep_indices_dict = {}
        # ipdb.set_trace()
        for img_id in tqdm(set(img_id_list)):
            img_path = os.path.join('datasets/vcoco/coco/images', coco.loadImgs(ids=img_id)[0]['coco_url'].split('.org')[1][1:])
            img = Image.open(img_path).convert('RGB')
            img_tensor = torchvision.transforms.functional.to_tensor(img)
            img_tensor = img_tensor.to(device)
            outputs = model([img_tensor], save_feat=True)
            # save object detection results
            faster_rcnn_det_data.create_group(str(img_id))
            faster_rcnn_det_data[str(img_id)].create_dataset(name='boxes', data=outputs[0]['boxes'].cpu().detach().numpy()) 
            faster_rcnn_det_data[str(img_id)].create_dataset(name='scores', data=outputs[0]['scores'].cpu().detach().numpy())  
            faster_rcnn_det_data[str(img_id)].create_dataset(name='fc7_feaet', data=outputs[0]['fc7_feat'].cpu().detach().numpy()) 
            faster_rcnn_det_data[str(img_id)].create_dataset(name='pool_feaet', data=outputs[0]['pool_feat'].cpu().detach().numpy())