def plot_set(paths, imageset): imageset = imageset vcoco_imageset = 'val' if imageset == 'test' else 'train' vcoco_path = os.path.join(paths.data_root, '..', 'v-coco') image_folder = os.path.join(vcoco_path, 'coco/images', '{}2014'.format(vcoco_imageset)) result_folder = os.path.join(paths.tmp_root, 'results/VCOCO/detections/gt') if not os.path.exists(result_folder): os.makedirs(result_folder) coco = vu.load_coco() vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset)) image_ids = vcoco_all[0]['image_id'] image_info_list = coco.loadImgs(ids=image_ids[:, 0].tolist()) image_ann_count = dict() for i_action, vcoco in enumerate(vcoco_all): vcoco = vu.attach_gt_boxes(vcoco, coco) action_name = vcoco['action_name'] positive_indices = np.where(vcoco['label'] == 1)[0].tolist() for image_i in positive_indices: # img_id = vcoco['image_id'][image_i, 0] img_name = image_info_list[image_i]['file_name'] image_path = os.path.join(image_folder, img_name) assert os.path.exists(image_path) img = scipy.misc.imread(image_path, mode='RGB') role_bbox = vcoco['role_bbox'][image_i, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) plot_box_with_label(img, role_bbox[0, :].astype(int), (255, 0, 0), action_name) for j in range(1, len(vcoco['role_name'])): if not np.isnan(role_bbox[j, 0]): role = vcoco['role_name'][j] plot_box_with_label(img, role_bbox[j, :].astype(int), (0, 255, 0), role) if img_name not in image_ann_count: image_ann_count[img_name] = 0 else: image_ann_count[img_name] += 1 # plot ground truth annotation plt.imshow(img) plt.axis('off') ax = plt.gca() ax.set_xticklabels([]) ax.set_yticklabels([]) filename, ext = os.path.splitext(img_name) plt.savefig(os.path.join( result_folder, '{}_gt_{:02d}{}'.format(filename, image_ann_count[img_name], ext)), bbox_inches='tight', pad_inches=0, transparent=True) plt.close() # copy original image file shutil.copy(image_path, os.path.join(result_folder, img_name))
def get_imgid_2_vcoco_labels(vcoco_all, coco): """ Get a dict from annotation id to vcoco image labels. """ ret = {} for verb_dict in vcoco_all: verb_dict = vu.attach_gt_boxes(verb_dict, coco) action_name = verb_dict["action_name"] for i in xrange(len(verb_dict["image_id"])): img_id = verb_dict["image_id"][i][0] if img_id not in ret: ret[img_id] = { "image_id": img_id, #"image_path": coco.loadImgs([img_id])[0]["filename"], "verbs": {}, } # Don't overwrite verb_dict while iterating. ret[img_id]["verbs"][action_name] = \ { "role_object_id": verb_dict["role_object_id"][i], "role_name": verb_dict["role_name"], "label": verb_dict["label"][i], "role_bbox": verb_dict["role_bbox"][i], "include": verb_dict["include"], "bbox": verb_dict["bbox"][i], } return ret
def visualization(name="train", action='hit', show_num=1, index=-1): assert name == 'train' or name == 'trainval' or name == 'val' or name == 'test', \ "illegal name " # Load COCO annotations for V-COCO images # instances_vcoco_all_2014.json images+annotations coco = vu.load_coco() # Load the VCOCO annotations for vcoco_train image set vcoco_data = vu.load_vcoco('vcoco_' + name) # train_data add bbox and role_bbox for x in vcoco_data: x = vu.attach_gt_boxes(x, coco) classes = [x['action_name'] for x in vcoco_data] cls_id = classes.index(action) vcoco = vcoco_data[cls_id] # np.random.seed(1) # positive_index = np.where(vcoco['label'] == 1)[0] # positive_index = np.random.permutation(positive_index) positive_index = [index] cc = plt.get_cmap('hsv', lut=4) for i in range(show_num): id = positive_index[i] # get image path = 'train' if "train" in vcoco['file_name'][id] else "val" file_name = 'coco/images/' + path + '2014/' + str( vcoco['file_name'][id]) print(file_name) im = np.asarray(Image.open(file_name)) # scale sy = 4. sx = float(im.shape[1]) / float(im.shape[0]) * sy # draw image fig, ax = subplot(plt, (1, 1), (sy, sx)) ax.set_axis_off() ax.imshow(im) print("label:", vcoco['label'][id], vcoco['role_object_id'][id]) # draw bounding box for agent draw_bbox(plt, ax, vcoco['bbox'][[id], :], edgecolor=cc(0)[:3]) role_bbox = vcoco['role_bbox'][id, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) for j in range(1, len(vcoco['role_name'])): if not np.isnan(role_bbox[j, 0]): draw_bbox(plt, ax, role_bbox[[j], :], edgecolor=cc(j)[:3]) print("draw_bbox") plt.show()
def main(): # Load COCO annotations for V-COCO images coco = vu.load_coco() # Load the VCOCO annotations for vcoco_train image set vcoco_all = vu.load_vcoco('vcoco_train') for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) # with open("img_ids_val.txt", "w") as outfile: # for item in vcoco_all[0]['image_id']: # item_str = 'COCO_val2014_{}'.format(str(item[0]).rjust(12, '0')) # outfile.write(item_str + "\n") # Action classes and roles in V-COCO classes = [x['action_name'] for x in vcoco_all] for i, x in enumerate(vcoco_all): print('{:>20s}'.format(x['action_name']), x['role_name']) # Visualize annotations for some class cls_id = classes.index('hit') vcoco = vcoco_all[cls_id] np.random.seed(1) positive_index = np.where(vcoco['label'] == 1)[0] positive_index = np.random.permutation(positive_index) cc = plt.get_cmap('hsv', lut=4) for i in range(5): id = positive_index[i] # Load image im = cv2.imread( 'coco-data/images/train2014/COCO_train2014_{}.jpg'.format( str(vcoco['image_id'][id][0]).rjust(12, '0'))) cv2.imwrite('trial.jpg', im) # Draw bounding box role_bbox = vcoco['role_bbox'][id, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) for j in range(0, len(vcoco['role_name'])): if not np.isnan(role_bbox[j, 0]): print(vcoco['role_name'][j], role_bbox[j]) im = draw_bbox(im, role_bbox[j]) cv2.imwrite('trial_boxes.jpg', im)
def __init__(self, root, input_imsize, transform, imageset): self.imageset = imageset self.vcoco_imageset = 'val' if imageset == 'test' else 'train' self.vcoco_feature_path = os.path.join(root, 'features_deformable') self.vcoco_path = os.path.join(root, '..', 'v-coco') self.imsize = input_imsize self.transform = transform self.coco = vu.load_coco() self.vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset)) self.hoi_list = list() for i_action, vcoco in enumerate(self.vcoco_all): vcoco = vu.attach_gt_boxes(vcoco, self.coco) positive_index = np.where(vcoco['label'] == 1)[0].tolist() self.hoi_list.extend([(i_action, image_index) for image_index in positive_index]) self.positive_num = len(self.hoi_list) # Hard negative examples image_ids = self.vcoco_all[0]['image_id'] self.image_info_list = self.coco.loadImgs(ids=image_ids[:, 0].tolist()) # self.negative_num = len(self.image_info_list) self.negative_num = 0 if imageset == 'test' else 200
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "--input-dir", default="../datasets/coco/vcoco/", metavar="DIR", help="path to vg images directory", ) parser.add_argument( '--kpts_dir', type=str, default='../datasets/coco/vcoco/keypoints_real/', # help='Path to the directory in which the kpts .npz results are written' ) parser.add_argument( '--rels_dir', type=str, default='../datasets/coco/vcoco/relationships_0_1_look_real/', # help='Path to the directory in which the kpts .npz results are written' ) args = parser.parse_args() # Load COCO annotations for V-COCO images coco = vu.load_coco() # Load the VCOCO annotations for vcoco_train image set vcoco_train = vu.load_vcoco('vcoco_train') for x in vcoco_train: x = vu.attach_gt_boxes(x, coco) vcoco_test = vu.load_vcoco('vcoco_test') for x in vcoco_test: x = vu.attach_gt_boxes(x, coco) vcoco_val = vu.load_vcoco('vcoco_val') for x in vcoco_val: x = vu.attach_gt_boxes(x, coco) vcoco_all = [] for i in range(len(vcoco_train)): dic = {} for k in vcoco_train[i].keys(): if k not in ['action_name', 'role_name', 'include']: sum = np.concatenate( (vcoco_train[i][k], vcoco_test[i][k], vcoco_val[i][k]), axis=0) dic[k] = sum else: dic[k] = vcoco_train[i][k] vcoco_all.append(dic) # Action classes and roles in V-COCO classes = [x['action_name'] for x in vcoco_all] classes_ids_dic = {i + 1: x for i, x in enumerate(classes)} # Reorganize data to generate matrix of relationships img_ids = os.listdir(args.kpts_dir) img_ids = [x.split('.')[0] for x in img_ids] print('Loading kpts from VCOCO...') kpts_dic = {} for img_id in tqdm(img_ids): with np.load(str(args.kpts_dir) + str(img_id) + '.npz', allow_pickle=True) as detec: kpts_dic[img_id] = detec['boxes'] # detec = {'features': visual_descriptors, 'coord': centers, # 'boxes': bboxes, 'scores': scores, 'labels': labels, 'classes': classes} # Set random seed np.random.seed(10) print('Loading relationships...') rels_dic = {} count = 0 for cls in range(len(vcoco_all)): vcoco_cls = vcoco_all[cls] if vcoco_cls['action_name'] in [ 'stand', 'walk', 'run', 'smile', 'look' ]: continue # positive_index = np.where(vcoco_cls['label'] == 1)[0] # positive_index = np.random.permutation(positive_index) # for i in tqdm(range(positive_index.shape[0])): for i in tqdm(range(vcoco_cls['label'].shape[0])): if vcoco_cls['label'][i] == 0: continue img_id = vcoco_cls['image_id'][i][0] img_id_str = str(img_id).rjust(12, '0') # if img_id_str in ['000000223550', '000000542020', '000000050511', '000000182837']: # import ipdb;ipdb.set_trace() if img_id_str not in kpts_dic.keys(): continue kpts = kpts_dic[img_id_str] n = kpts.shape[0] rels_matrix = rels_dic[img_id_str] if img_id_str in rels_dic.keys( ) else np.zeros((n, n)) # Get bounding boxes role_bbox = vcoco_cls['role_bbox'][i, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) if len(vcoco_cls['role_name']) == 2: bbox_0 = role_bbox[0] bbox_1 = role_bbox[1] if np.isnan(bbox_0).any() or np.isnan(bbox_1).any(): continue idx_0 = obj_detected(bbox_0, kpts) idx_1 = obj_detected(bbox_1, kpts) if idx_0 == -1 or idx_1 == -1: continue count += 1 rels_matrix[idx_0, idx_1] = 1 if len(vcoco_cls['role_name']) == 3: bbox_0 = role_bbox[0] bbox_1 = role_bbox[1] bbox_2 = role_bbox[2] if np.isnan(bbox_0).any() or np.isnan(bbox_1).any(): continue idx_0 = obj_detected(bbox_0, kpts) idx_1 = obj_detected(bbox_1, kpts) if idx_0 == -1 or idx_1 == -1: continue count += 1 rels_matrix[idx_0, idx_1] = 1 if np.isnan(bbox_0).any() or np.isnan(bbox_2).any(): continue idx_0 = obj_detected(bbox_0, kpts) idx_2 = obj_detected(bbox_2, kpts) if idx_0 == -1 or idx_2 == -1: continue count += 1 rels_matrix[idx_0, idx_2] = 1 if np.isnan(bbox_1).any() or np.isnan(bbox_2).any(): continue idx_1 = obj_detected(bbox_1, kpts) idx_2 = obj_detected(bbox_2, kpts) if idx_1 == -1 or idx_2 == -1: continue count += 1 rels_matrix[idx_1, idx_2] = 1 # Save relationship matrix rels_dic[img_id_str] = rels_matrix print(count) for img_id in rels_dic.keys(): rels = {'relationships': rels_dic[img_id]} np.savez(args.rels_dir + '/{}.npz'.format(img_id), **rels)
list_file = open('%s/filelist/%s.txt' % (dataDir, dataType), 'w') # 数据集的图片list保存路径 test_pref_file = open('%s/filelist/%s.txt' % (dataDir, prefType), 'w') # 数据集的图片list_pref保存路径 # Load COCO annotations for V-COCO images coco = vu.load_coco() vcoco_anns = ['vcoco_test'] interactions = ['kick', 'read', 'skateboard', 'ski', 'snowboard', 'surf', 'talk_on_phone', 'work_on_computer'] #interactions = ['talk_on_phone'] # Load the VCOCO annotations for vcoco_train image set for anns in vcoco_anns: vcoco_all = vu.load_vcoco(anns) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) # Action classes and roles in V-COCO classes = [x['action_name'] for x in vcoco_all] for cls in interactions: cls_id = classes.index(cls) # Visualize annotations for the some class for cls_sel in interactions: vcoco = vcoco_all[classes.index(cls_sel)] np.random.seed(1) positive_index = np.where(vcoco['label'] == 1)[0] positive_index = np.random.permutation(positive_index) for id in positive_index: coco_image = coco.loadImgs(ids=[vcoco['image_id'][id][0]])[0] file_name = coco_image['file_name'] width = coco_image['width'] # 获取图片尺寸 height = coco_image['height'] # 获取图片尺寸
def parse_features(paths, imageset): roi_size = 49 # Deformable ConvNet # roi_size = 512 * 49 # VGG conv feature # roi_size = 4096 # VGG fully connected feature roi_size = 1000 # ResNet fully connected feature feature_size = 49 feature_type = 'resnet' action_class_num = len(metadata.action_classes) no_action_index = metadata.action_index['none'] no_role_index = metadata.role_index['none'] feature_path = os.path.join(paths.data_root, 'features_{}'.format(feature_type)) det_feature_path = os.path.join(paths.data_root, 'features_deformable') save_data_path = os.path.join(paths.data_root, 'processed', feature_type) if not os.path.exists(save_data_path): os.makedirs(save_data_path) coco = vu.load_coco() vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset)) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() all_results = list() unique_image_ids = list() for i_image, image_id in enumerate(image_ids): filename = coco.loadImgs(ids=[image_id])[0]['file_name'] if image_id not in unique_image_ids: try: bbox_features = np.load(os.path.join(feature_path, '{}_features.npy'.format(filename))) det_classes = np.load(os.path.join(det_feature_path, '{}_classes.npy'.format(filename))) det_boxes = np.load(os.path.join(det_feature_path, '{}_boxes.npy'.format(filename))) det_features = np.load(os.path.join(det_feature_path, '{}_features.npy'.format(filename))) except IOError: warnings.warn('Features and detection results missing for {}'.format(filename)) continue human_num, obj_num, edge_num = parse_classes(det_classes) node_num = human_num + obj_num assert edge_num == human_num * obj_num unique_image_ids.append(image_id) edge_features = np.zeros((human_num+obj_num, human_num+obj_num, roi_size)) node_features = np.zeros((node_num, roi_size+feature_size*2)) adj_mat = np.zeros((human_num+obj_num, human_num+obj_num)) node_labels = np.zeros((node_num, action_class_num)) node_roles = np.zeros((node_num, 3)) node_labels[:, no_action_index] = 1 node_roles[:, no_role_index] = 1 # Node features for i_node in range(node_num): # node_features[i_node, :] = np.reshape(det_features[i_node, ...], roi_size) node_features[i_node, :roi_size] = np.reshape(bbox_features[i_node, ...], roi_size) if i_node < human_num: node_features[i_node, roi_size:roi_size+feature_size] = np.reshape(det_features[i_node, ...], feature_size) else: node_features[i_node, roi_size+feature_size:] = np.reshape(det_features[i_node, ...], feature_size) # Edge features i_edge = 0 for i_human in range(human_num): for i_obj in range(obj_num): edge_features[i_human, human_num + i_obj, :] = np.reshape(bbox_features[node_num + i_edge, ...], roi_size) edge_features[human_num + i_obj, i_human, :] = edge_features[i_human, human_num + i_obj, :] i_edge += 1 else: saved_instance = pickle.load(open(os.path.join(save_data_path, '{}.p'.format(filename)), 'rb')) edge_features = np.load(os.path.join(save_data_path, '{}_edge_features.npy').format(filename)) node_features = np.load(os.path.join(save_data_path, '{}_node_features.npy').format(filename)) adj_mat = saved_instance['adj_mat'] node_labels = saved_instance['node_labels'] node_roles = saved_instance['node_roles'] human_num = saved_instance['human_num'] obj_num = saved_instance['obj_num'] det_boxes = saved_instance['boxes'] det_classes = saved_instance['classes'] # Ground truth labels: adj_mat, node_labels, node_roles for x in vcoco_all: if x['label'][i_image, 0] == 1: try: action_index = metadata.action_index[x['action_name']] role_bbox = x['role_bbox'][i_image, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) bbox = role_bbox[0, :] human_index = get_node_index(bbox, det_boxes, range(human_num)) if human_index == -1: warnings.warn('human detection missing') continue assert human_index < human_num node_labels[human_index, action_index] = 1 node_labels[human_index, no_action_index] = 0 for i_role in range(1, len(x['role_name'])): bbox = role_bbox[i_role, :] if np.isnan(bbox[0]): continue obj_index = get_node_index(bbox, det_boxes, range(human_num, human_num+obj_num)) if obj_index == -1: warnings.warn('object detection missing') continue assert obj_index >= human_num node_labels[obj_index, action_index] = 1 node_labels[obj_index, no_action_index] = 0 node_roles[obj_index, metadata.role_index[x['role_name'][i_role]]] = 1 node_roles[obj_index, no_role_index] = 0 adj_mat[human_index, obj_index] = 1 adj_mat[obj_index, human_index] = 1 except IndexError: warnings.warn('Labels missing for {}'.format(filename)) pass instance = dict() instance['img_id'] = image_id instance['human_num'] = human_num instance['obj_num'] = obj_num instance['img_name'] = filename instance['boxes'] = det_boxes instance['classes'] = det_classes instance['adj_mat'] = adj_mat instance['node_labels'] = node_labels instance['node_roles'] = node_roles np.save(os.path.join(save_data_path, '{}_edge_features'.format(filename)), edge_features) np.save(os.path.join(save_data_path, '{}_node_features'.format(filename)), node_features) pickle.dump(instance, open(os.path.join(save_data_path, '{}.p'.format(filename)), 'wb')) if i_image == len(image_ids) - 1 - image_ids[::-1].index(image_id): append_result(all_results, node_labels, node_roles, int(image_ids[i_image]), det_boxes, human_num, obj_num, adj_mat) print 'total image', len(unique_image_ids), 'total results', len(all_results) vcocoeval = get_vcocoeval(paths, imageset) vcoco_evaluation(paths, vcocoeval, imageset, all_results)
def parse_features(paths, imageset): # roi_size = 49 # Deformable ConvNet # roi_size = 512 * 49 # VGG conv feature # roi_size = 4096 # VGG fully connected feature roi_size = 1000 # ResNet fully connected feature feature_size = 1000 feature_type = 'resnet_noisy' action_class_num = len(metadata.action_classes) no_action_index = metadata.action_index['none'] no_role_index = metadata.role_index['none'] feature_path = os.path.join(paths.data_root, 'features_{}'.format(feature_type)) save_data_path = os.path.join(paths.data_root, 'processed', feature_type) if not os.path.exists(save_data_path): os.makedirs(save_data_path) coco = vu.load_coco(os.path.join(paths.vcoco_data_root, 'data')) vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), os.path.join(paths.vcoco_data_root, 'data')) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() all_results = list() unique_image_ids = list() for i_image, image_id in enumerate(image_ids): filename = coco.loadImgs(ids=[image_id])[0]['file_name'] print(filename) try: part_classes = np.load( os.path.join(feature_path, '{}_part_classes.npy'.format(filename))) part_human_id = np.load( os.path.join(feature_path, '{}_part_human_id.npy'.format(filename))) except IOError: warnings.warn( 'Features and detection results missing for {}'.format( filename)) continue # 1. compute number of human, objects, and edges part_num = len(part_classes) # 2. Create placeholders for edge_feature, node_feature, adj_mat, node_labels, node_roles unique_image_ids.append(image_id) part_adj_mat = np.zeros((part_num, part_num)) # Create part-level adj mats for i_part in range(part_num): for j_part in range(part_num): if part_human_id[i_part] == part_human_id[j_part]: if part_dist[part_classes[i_part], part_classes[j_part]] == 1: part_adj_mat[i_part, j_part] = 1 try: instance = pickle.load( open(os.path.join(save_data_path, '{}.p'.format(filename)), 'rb')) instance['part_adj_mat'] = part_adj_mat pickle.dump( instance, open(os.path.join(save_data_path, '{}.p'.format(filename)), 'wb')) except IOError: warnings.warn('.p file missing for {}'.format(filename)) continue
def extract_features(paths, imageset, vcoco_imageset): feature_type = 'resnet' input_h, input_w = 244, 244 feature_size = (7, 7) adaptive_max_pool = roi_pooling.AdaptiveMaxPool2d(*feature_size) det_feature_path = os.path.join(paths.data_root, 'features_deformable') vcoco_path, det_res_path, feature_path, classes, image_list = get_info( paths, imageset, feature_type) if not os.path.exists(feature_path): os.makedirs(feature_path) feature_network = get_model(paths, feature_type) transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # Read detection results with open(det_res_path, 'r') as f: # detection results: [class_num][img_num][detection_num][x1, y1, x2, y2, score] det_res = pickle.load(f) coco_from_vcoco = vu.load_coco() vcoco_all = vu.load_vcoco('vcoco_{}'.format(vcoco_imageset)) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco_from_vcoco) vcoco_image_ids = vcoco_all[0]['image_id'][:, 0].astype(int) for i_image, img_info in enumerate(image_list): img_id = img_info['id'] indices_in_vcoco = np.where(vcoco_image_ids == img_id)[0].tolist() if len(indices_in_vcoco) == 0: continue img_name = img_info['file_name'] print(img_name) # Extracted bounding boxes and classes det_boxes_all = np.empty((0, 4)) det_classes_all = list() for c in range(1, len(classes)): for detection in det_res[c][i_image]: if detection[4] > 0.7: det_boxes_all = np.vstack( (det_boxes_all, np.array(detection[:4])[np.newaxis, ...])) det_classes_all.append(c) if len(det_classes_all) == 0: continue edge_classes = list() for person_i, person_c in enumerate(det_classes_all): if person_c == 1: for obj_i, obj_c in enumerate(det_classes_all): if obj_c == 1: continue combined_box = combine_box(det_boxes_all[person_i, :], det_boxes_all[obj_i, :]) det_boxes_all = np.vstack((det_boxes_all, combined_box)) edge_classes.append(0) det_classes_all.extend(edge_classes) # try: # det_classes_all = np.load(os.path.join(det_feature_path, '{}_classes.npy'.format(img_name))) # det_boxes_all = np.load(os.path.join(det_feature_path, '{}_boxes.npy'.format(img_name))) # except IOError: # continue # Read image image_path = os.path.join(vcoco_path, 'coco/images', '{}2014'.format(imageset), img_name) assert os.path.exists(image_path) original_img = scipy.misc.imread(image_path, mode='RGB') # Get image feature by applying network to ROI (roi_vgg) if feature_type == 'vgg': roi_features = np.zeros((det_boxes_all.shape[0], 4096)) elif feature_type == 'resnet': roi_features = np.zeros((det_boxes_all.shape[0], 1000)) elif feature_type == 'densenet': roi_features = np.zeros((det_boxes_all.shape[0], 1000)) else: raise ValueError('feature type not recognized') for i_box in range(det_boxes_all.shape[0]): roi = det_boxes_all[i_box, :].astype(int) roi_image = original_img[roi[1]:roi[3] + 1, roi[0]:roi[2] + 1, :] # if det_classes_all[i_box] < 0: # plt.imshow(roi_image) # plt.show() roi_image = transform( cv2.resize(roi_image, (input_h, input_w), interpolation=cv2.INTER_LINEAR)) roi_image = torch.autograd.Variable(roi_image.unsqueeze(0)).cuda() feature, _ = feature_network(roi_image) roi_features[i_box, ...] = feature.data.cpu().numpy() np.save(os.path.join(feature_path, '{}_classes'.format(img_name)), det_classes_all) np.save(os.path.join(feature_path, '{}_boxes'.format(img_name)), det_boxes_all) np.save(os.path.join(feature_path, '{}_features'.format(img_name)), roi_features)
def parse_features(paths, imageset): # roi_size = 49 # Deformable ConvNet # roi_size = 512 * 49 # VGG conv feature # roi_size = 4096 # VGG fully connected feature roi_size = 1000 # ResNet fully connected feature feature_size = 1000 feature_type = 'resnet' action_class_num = len(metadata.action_classes) no_action_index = metadata.action_index['none'] no_role_index = metadata.role_index['none'] feature_path = os.path.join(paths.data_root, 'features_{}_noisy'.format(feature_type)) save_data_path = os.path.join(paths.data_root, 'processed', feature_type) if not os.path.exists(save_data_path): os.makedirs(save_data_path) coco = vu.load_coco(os.path.join(paths.vcoco_data_root, 'data')) vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), os.path.join(paths.vcoco_data_root, 'data')) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() all_results = list() unique_image_ids = list() for i_image, image_id in enumerate(image_ids): filename = coco.loadImgs(ids=[image_id])[0]['file_name'] print(filename) """ For each image, 1. Compute number of human, objects, nodes, groups, edges 3. Extract node features 4. Extract edge features 5. Compute ground truth adj_mat, node_label, node_role """ # if not os.path.exists(os.path.join(save_data_path, '{}.p'.format(filename))) or type(pickle.load(open(os.path.join(save_data_path, '{}.p'.format(filename)), 'rb'))) == dict: # continue # else: # os.remove(os.path.join(save_data_path, '{}.p'.format(filename))) if True: # not os.path.exists(os.path.join(save_data_path, '{}.p'.format(filename))): try: obj_classes = np.load( os.path.join(feature_path, '{}_obj_classes.npy'.format(filename))) obj_boxes = np.load( os.path.join(feature_path, '{}_obj_boxes.npy'.format(filename))) part_classes = np.load( os.path.join(feature_path, '{}_part_classes.npy'.format(filename))) part_boxes = np.load( os.path.join(feature_path, '{}_part_boxes.npy'.format(filename))) part_human_id = np.load( os.path.join(feature_path, '{}_part_human_id.npy'.format(filename))) edge_human = np.load( os.path.join(feature_path, '{}_edge_human_id.npy'.format(filename))) edge_boxes = np.load( os.path.join(feature_path, '{}_edge_boxes.npy'.format(filename))) if feature_type != 'None': obj_features = np.load( os.path.join(feature_path, '{}_obj_features.npy'.format(filename))) part_features = np.load( os.path.join(feature_path, '{}_part_features.npy'.format(filename))) edge_features_in = np.load( os.path.join(feature_path, '{}_edge_features.npy'.format(filename))) except IOError: warnings.warn( 'Features and detection results missing for {}'.format( filename)) continue # 1. compute number of human, objects, and edges part_num = len(part_boxes) obj_num = len(obj_boxes) human_num = len(list(set(part_human_id))) edge_num = len(edge_boxes) # p_node_num = part_num + obj_num node_num = human_num + obj_num assert edge_num == part_num * obj_num if part_num == 0: warnings.warn( 'human detection missing for {}'.format(filename)) continue # 2. Create placeholders for edge_feature, node_feature, adj_mat, node_labels, node_roles unique_image_ids.append(image_id) if feature_type != 'None': edge_features = np.zeros( (part_num + obj_num, part_num + obj_num, feature_size)) node_features = np.zeros( (part_num + obj_num, feature_size * 2)) adj_mat = np.zeros((human_num + obj_num, human_num + obj_num)) node_labels = np.zeros((node_num, action_class_num)) node_roles = np.zeros((node_num, 3)) node_labels[:, no_action_index] = 1 node_roles[:, no_role_index] = 1 # Group human boxes human_boxes, part_human_ids, human_ids = group_boxes( part_boxes, part_human_id) det_boxes = np.vstack([human_boxes, obj_boxes]) if feature_type != 'None': # 3. Extract node features # part : obj for i_node in range(part_num + obj_num): # node_features[i_node, :] = np.reshape(det_features[i_node, ...], roi_size) if i_node < part_num: node_features[i_node, :roi_size] = np.reshape( part_features[i_node, ...], roi_size) else: node_features[i_node, roi_size:] = np.reshape( obj_features[i_node - part_num, ...], roi_size) # 4. Extract edge features i_edge = 0 for i_part in range(part_num): for i_obj in range(obj_num): edge_box = edge_boxes[i_edge] part_box = part_boxes[i_part] obj_box = obj_boxes[i_obj] assert np.linalg.norm( combine_box(part_box, obj_box) - edge_box) == 0 edge_features[i_part, part_num + i_obj, :] = np.reshape( edge_features_in[i_edge, ...], roi_size) edge_features[part_num + i_obj, i_part, :] = edge_features[i_part, part_num + i_obj, :] i_edge += 1 else: instance = pickle.load( open(os.path.join(save_data_path, '{}.p'.format(filename)), 'rb')) image_id = instance['img_id'] human_num = instance['human_num'] obj_num = instance['obj_num'] part_num = instance['part_num'] filename = instance['img_name'] human_boxes = instance['human_boxes'] part_boxes = instance['part_boxes'] obj_boxes = instance['obj_boxes'] obj_classes = instance['obj_classes'] part_classes = instance['part_classes'] adj_mat = instance['adj_mat'] part_human_ids = instance['part_human_id'] node_labels = instance['node_labels'] node_roles = instance['node_roles'] if feature_type != 'None': edge_features = np.load( os.path.join(save_data_path, '{}_edge_features.npy'.format(filename))) node_features = np.load( os.path.join(save_data_path, '{}_node_features.npy'.format(filename))) # 5. Compute ground truth adj_mat, node_label, node_role for x in vcoco_all: if x['label'][i_image, 0] == 1: try: action_index = metadata.action_index[x['action_name']] role_bbox = x['role_bbox'][i_image, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) bbox = role_bbox[0, :] human_index = get_node_index( bbox, human_boxes, np.arange(human_num)) # node_index uses human box if human_index == -1: warnings.warn('human detection missing') continue assert human_index < human_num node_labels[human_index, action_index] = 1 node_labels[human_index, no_action_index] = 0 for i_role in range(1, len(x['role_name'])): bbox = role_bbox[i_role, :] if np.isnan(bbox[0]): continue obj_index = get_node_index( bbox, obj_boxes, np.arange(obj_num)) + human_num if obj_index == human_num - 1: warnings.warn('object detection missing') continue assert obj_index >= human_num and obj_index < human_num + obj_num node_labels[obj_index, action_index] = 1 node_labels[obj_index, no_action_index] = 0 node_roles[ obj_index, metadata.role_index[x['role_name'][i_role]]] = 1 node_roles[obj_index, no_role_index] = 0 adj_mat[human_index, obj_index] = 1 adj_mat[obj_index, human_index] = 1 except IndexError: warnings.warn('Labels missing for {}'.format(filename)) raise pass instance = dict() instance['img_id'] = image_id instance['human_num'] = human_num instance['obj_num'] = obj_num instance['part_num'] = part_num instance['img_name'] = filename instance['human_boxes'] = human_boxes instance['part_boxes'] = part_boxes instance['obj_boxes'] = obj_boxes instance['edge_boxes'] = edge_boxes instance['obj_classes'] = obj_classes instance['part_classes'] = part_classes instance['adj_mat'] = adj_mat instance['part_human_id'] = part_human_ids instance['node_labels'] = node_labels instance['node_roles'] = node_roles if feature_type != 'None': np.save( os.path.join(save_data_path, '{}_edge_features.npy'.format(filename)), edge_features) np.save( os.path.join(save_data_path, '{}_node_features.npy'.format(filename)), node_features) pickle.dump( instance, open(os.path.join(save_data_path, '{}.p'.format(filename)), 'wb')) print('total image', len(unique_image_ids), 'total results', len(all_results))
def parse_features(paths, imageset): # roi_size = 49 # Deformable ConvNet # roi_size = 512 * 49 # VGG conv feature # roi_size = 4096 # VGG fully connected feature roi_size = 1000 # ResNet fully connected feature feature_size = 1000 feature_type = 'resnet' action_class_num = len(metadata.action_classes) no_action_index = metadata.action_index['none'] no_role_index = metadata.role_index['none'] feature_path = os.path.join(paths.data_root, 'features_{}_noisy'.format(feature_type)) save_data_path = os.path.join(paths.data_root, 'processed', feature_type) if not os.path.exists(save_data_path): os.makedirs(save_data_path) coco = vu.load_coco(os.path.join(paths.vcoco_data_root, 'data')) vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), os.path.join(paths.vcoco_data_root, 'data')) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() all_results = list() unique_image_ids = list() part_eye = np.eye(14) obj_eye = np.eye(81) for i_image, image_id in enumerate(image_ids): filename = coco.loadImgs(ids=[image_id])[0]['file_name'] # print(os.path.join(paths.data_root, 'coco', vcoco_mapping[imageset]+'2014', filename)) img = cv2.imread( os.path.join(paths.data_root, 'coco', vcoco_mapping[imageset] + '2014', filename)) img_w = img.shape[0] img_h = img.shape[1] try: data = pickle.load( open( os.path.join(paths.data_root, 'processed', feature_type, '{}.p'.format(filename)), 'rb')) # edge_features = np.load(os.path.join(paths.data_root, 'processed', feature_type, '{}_edge_features.npy').format(filename)) node_features = np.load( os.path.join(paths.data_root, 'processed', feature_type, '{}_node_features.npy').format(filename)) except: continue obj_boxes = data['obj_boxes'] part_boxes = data['part_boxes'] part_num = data['part_num'] obj_num = data['obj_num'] obj_classes = data['obj_classes'] part_classes = data['part_classes'] # append bbox and class to node features # assert img_w > 0 # assert img_h > 0 # assert np.all((part_boxes[:,3] - part_boxes[:,1]) > 0) # assert np.all((obj_boxes[:,3] - obj_boxes[:,1]) > 0) node_features_appd = np.zeros([node_features.shape[0], 6 + 14 + 81]) node_features_appd[:part_num, 0] = (part_boxes[:, 2] - part_boxes[:, 0]) / img_w # relative w node_features_appd[:part_num, 1] = (part_boxes[:, 3] - part_boxes[:, 1]) / img_h # relative h node_features_appd[:part_num, 2] = ( (part_boxes[:, 2] + part_boxes[:, 0]) / 2) / img_w # relative cx node_features_appd[:part_num, 3] = ( (part_boxes[:, 3] + part_boxes[:, 1]) / 2) / img_h # relative cy node_features_appd[:part_num, 4] = (part_boxes[:, 2] - part_boxes[:, 0]) * ( part_boxes[:, 3] - part_boxes[:, 1]) / ( img_w * img_h) # relative area node_features_appd[:part_num, 5] = (part_boxes[:, 2] - part_boxes[:, 0]) / ( part_boxes[:, 3] - part_boxes[:, 1] ) # aspect ratio node_features_appd[:part_num, 6:6 + 14] = part_eye[part_classes] node_features_appd[part_num:, 0] = (obj_boxes[:, 2] - obj_boxes[:, 0]) / img_w # relative w node_features_appd[part_num:, 1] = (obj_boxes[:, 3] - obj_boxes[:, 1]) / img_h # relative h node_features_appd[part_num:, 2] = ( (obj_boxes[:, 2] + obj_boxes[:, 0]) / 2) / img_w # relative cx node_features_appd[part_num:, 3] = ( (obj_boxes[:, 3] + obj_boxes[:, 1]) / 2) / img_h # relative cy node_features_appd[part_num:, 4] = (obj_boxes[:, 2] - obj_boxes[:, 0]) * ( obj_boxes[:, 3] - obj_boxes[:, 1]) / ( img_w * img_h) # relative area node_features_appd[part_num:, 5] = (obj_boxes[:, 2] - obj_boxes[:, 0]) / ( obj_boxes[:, 3] - obj_boxes[:, 1] ) # aspect ratio node_features_appd[part_num:, 6 + 14:] = obj_eye[obj_classes] node_features_appd[np.isnan(node_features_appd)] = 0 node_features_appd[np.isinf(node_features_appd)] = 0 np.save( os.path.join(paths.data_root, 'processed', feature_type, '{}_node_features_appd.npy').format(filename), node_features_appd)
def populateTrainDict(): imgs_dir = '/home/user/data/mscoco/images/train2014' coco = vu.load_coco() vcoco_all = vu.load_vcoco('vcoco_trainval') classes = [x['action_name'] for x in vcoco_all] ## Modifying classes according to new criteria of obj-instr classes[classes.index("eat")] = "eat_obj" classes[classes.index("cut")] = "cut_obj" classes[classes.index("hit")] = "hit_obj" classes.append("eat_instr") classes.append("cut_instr") classes.append("hit_instr") print("# of VCOCO classes:", len(classes)) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) data_dict = {} for idx in range(len(vcoco_all)): actDict = vcoco_all[idx] #print(actDict['role_name'], actDict['action_name']) #continue img_ids = [x[0] for x in actDict['image_id']] for i, ids in enumerate(img_ids): ids = int(ids) if actDict['label'][i][0] == 1: if ids in data_dict: tmp = list(actDict['role_bbox'][i]) tmp.append(actDict['action_name']) data_dict[ids].append(tmp) else: data_dict[ids] = [] tmp = list(actDict['role_bbox'][i]) tmp.append(actDict['action_name']) data_dict[ids].append(tmp) data_list = [] for ids in data_dict.keys(): img_anno = data_dict[ids] tmp_subs = [] for elem in img_anno: tmp_subs.append(tuple(elem[:4])) subject_set = set(tmp_subs) subject_tmp = [] for subj in subject_set: subject_tmp.append(list(subj)) data_list.append([ids, subject_tmp, 0]) tmpDict = {} for subj in subject_set: tmpDict[subj] = [] for elem in img_anno: subj = tuple(elem[:4]) tmpDict[subj].append(elem[4:]) for subj in tmpDict.keys(): data_list.append([ids, list(subj), tmpDict[subj], 1]) for elem in data_list: if elem[-1] == 1: for i in range(len(elem[2])): if elem[2][i][-1] in ['cut', 'hit']: tmp = elem[2][i][4:] tmp[-1] += "_obj" tmpLabel = elem[2][i][-1] + "_instr" elem[2].append(tmp) elem[2][i] = elem[2][i][:4] elem[2][i].append(tmpLabel) elif elem[2][i][-1] in ['eat']: tmp = elem[2][i][4:] tmp[-1] += "_instr" tmpLabel = elem[2][i][-1] + "_obj" elem[2].append(tmp) elem[2][i] = elem[2][i][:4] elem[2][i].append(tmpLabel) return data_list, classes, coco
def generate_json_data_for_vcoco_total_with_gt(): hoi_list = json.load( open(os.path.join(os.getcwd(), 'data_process', 'hoi_list.json'), 'r')) coco = COCO( os.path.join('..', 'coco/annotations/', 'instances_trainval2014.json')) anno_list = [] verb_set = set() object_set = set() for dataset_name in ["vcoco_train", "vcoco_val", "vcoco_test"]: vcoco_data = vu.load_vcoco(dataset_name) # train_data add bbox and role_bbox for x in vcoco_data: x = vu.attach_gt_boxes(x, coco) assert len(vcoco_data) == 26 for data in vcoco_data: # print(data.keys()) verb_set.add(data['action_name']) image_ids = data['image_id'] for i in range(len(image_ids)): assert data['role_object_id'][i][0] == data['ann_id'][i] if data['label'][i] == 0: continue path = 'train' if 'train' in data['file_name'][i] else 'val' global_id = "COCO_" + path + "2014_" + str( image_ids[i][0]).zfill(12) image_path_postfix = os.path.join(path + '2014', global_id + '.jpg') global_id = str(image_ids[i][0]) human_bboxes = data['bbox'][i] assert len(human_bboxes) == 4 # get image size image_size = data['image_size'][i] assert len(image_size) == 2 hois = [] assert len(data['role_name']) <= 3 for j in range(1, len(data['role_name'])): object_anno_id = data['role_object_id'][i][j] if object_anno_id == 0: continue d = {} object_anno = coco.loadAnns(int(object_anno_id))[0] object_bboxes = data['role_bbox'][i][4 * j:4 * (j + 1)] cat_id = object_anno['category_id'] object_name = coco.loadCats(int(cat_id))[0]['name'] object_set.add(object_name) id = -1 for k in hoi_list: if k['action'] == data['action_name'] and k[ 'object'] == object_name: id = k['id'] break assert id > 0 d['object'] = object_name d['human_bboxes'] = human_bboxes d['id'] = id d['invis'] = 0 d['action'] = data['action_name'] d['object_bboxes'] = object_bboxes hois.append(d) if hois == []: # print(i, data["action_name"], data['label'][i], data['role_object_id'][i]) continue d_per_image = {} d_per_image['global_id'] = global_id d_per_image['hois'] = hois d_per_image['image_path_postfix'] = image_path_postfix d_per_image['image_size'] = image_size anno_list.append(d_per_image) print(dataset_name, len(anno_list)) anno_dict = {} print(len(anno_list)) for item in anno_list: assert item['hois'] is not None if item['global_id'] not in anno_dict: anno_dict[item['global_id']] = item else: anno_dict[item['global_id']]['hois'].append(item['hois'][0]) print(len(anno_dict)) anno_list = [] for _, value in anno_dict.items(): anno_list.append(value) io.dump_json_object(anno_list, 'data_process/' + 'anno_list_gt.json') ob_index = 1 object_set = sorted(object_set) object_list = [] for ob in object_set: object_list.append({"id": str(ob_index).zfill(3), "name": ob}) ob_index += 1 io.dump_json_object(object_list, 'data_process/' + 'object_list_gt.json') vb_index = 1 verb_set = sorted(verb_set) verb_list = [] for verb in verb_set: verb_list.append({"id": str(vb_index).zfill(3), "name": verb}) vb_index += 1 io.dump_json_object(verb_list, 'data_process/' + 'verb_list_gt.json')