def setUpClass(cls): cls.roi_dataset, cls.classes = ld.get_roi_test_loader() # This might include non v-coco classes, but is ok for testing. cls.logger = [] vcoco_all = vu.load_vcoco("vcoco_train") # give the fake dataloader stuff that the RoiDataset needs. cls.roi_dataset.vcoco_all = vcoco_all cls.roi_dataset.get_classes = lambda: cls.classes categories = [x["name"] for x in vu.load_coco().cats.itervalues()] cls.translator = ld.VCocoTranslator(vcoco_all, categories) n_action_classes = cls.translator.num_actions n_action_nonagent_roles = cls.translator.num_action_nonagent_roles cls.test_dir = tempfile.mkdtemp() cls.model = fhoi.HoiModel( cls.classes, n_action_classes, n_action_nonagent_roles, faster_rcnn_command_line=["NCLASSES", len(cls.classes)], cuda=[0], save_dir=cls.test_dir) cls.loss = thoi.HoiLoss(cls.model, cls.translator, logger_output=cls.logger) cls.trainer = thoi.HoiTrainer(cls.model, cls.roi_dataset, cuda=[0])
def plot_set(paths, imageset): imageset = imageset vcoco_imageset = 'val' if imageset == 'test' else 'train' vcoco_path = os.path.join(paths.data_root, '..', 'v-coco') image_folder = os.path.join(vcoco_path, 'coco/images', '{}2014'.format(vcoco_imageset)) result_folder = os.path.join(paths.tmp_root, 'results/VCOCO/detections/gt') if not os.path.exists(result_folder): os.makedirs(result_folder) coco = vu.load_coco() vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset)) image_ids = vcoco_all[0]['image_id'] image_info_list = coco.loadImgs(ids=image_ids[:, 0].tolist()) image_ann_count = dict() for i_action, vcoco in enumerate(vcoco_all): vcoco = vu.attach_gt_boxes(vcoco, coco) action_name = vcoco['action_name'] positive_indices = np.where(vcoco['label'] == 1)[0].tolist() for image_i in positive_indices: # img_id = vcoco['image_id'][image_i, 0] img_name = image_info_list[image_i]['file_name'] image_path = os.path.join(image_folder, img_name) assert os.path.exists(image_path) img = scipy.misc.imread(image_path, mode='RGB') role_bbox = vcoco['role_bbox'][image_i, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) plot_box_with_label(img, role_bbox[0, :].astype(int), (255, 0, 0), action_name) for j in range(1, len(vcoco['role_name'])): if not np.isnan(role_bbox[j, 0]): role = vcoco['role_name'][j] plot_box_with_label(img, role_bbox[j, :].astype(int), (0, 255, 0), role) if img_name not in image_ann_count: image_ann_count[img_name] = 0 else: image_ann_count[img_name] += 1 # plot ground truth annotation plt.imshow(img) plt.axis('off') ax = plt.gca() ax.set_xticklabels([]) ax.set_yticklabels([]) filename, ext = os.path.splitext(img_name) plt.savefig(os.path.join( result_folder, '{}_gt_{:02d}{}'.format(filename, image_ann_count[img_name], ext)), bbox_inches='tight', pad_inches=0, transparent=True) plt.close() # copy original image file shutil.copy(image_path, os.path.join(result_folder, img_name))
def __init__(self, root, imageset, node_feature_appd=False): self.root = root self.coco = vu.load_coco(root) vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), root) self.image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() self.unique_image_ids = list(set(self.image_ids)) self.node_feature_appd = node_feature_appd
def __init__(self, imageset, batchsize, node_num, datadir=os.path.join(os.path.dirname(__file__), '../../data/feature_resnet_tengyu'), with_name=False, negative_suppression=False): self.imageset = imageset self.batchsize = batchsize self.datadir = datadir self.with_name = with_name self.node_num = node_num self.negative_suppression = negative_suppression self.thread = None self.coco = vu.load_coco('/mnt/hdd-12t/share/v-coco/data') vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), '/mnt/hdd-12t/share/v-coco/data') self.filenames = [ os.path.join(self.datadir, x['file_name'] + '.data') for x in self.coco.loadImgs( ids=vcoco_all[0]['image_id'][:, 0].astype(int).tolist()) if os.path.exists( os.path.join(self.datadir, x['file_name'] + '.data')) ] self.filenames_backup = copy.deepcopy(self.filenames) pass
def visualization(name="train", action='hit', show_num=1, index=-1): assert name == 'train' or name == 'trainval' or name == 'val' or name == 'test', \ "illegal name " # Load COCO annotations for V-COCO images # instances_vcoco_all_2014.json images+annotations coco = vu.load_coco() # Load the VCOCO annotations for vcoco_train image set vcoco_data = vu.load_vcoco('vcoco_' + name) # train_data add bbox and role_bbox for x in vcoco_data: x = vu.attach_gt_boxes(x, coco) classes = [x['action_name'] for x in vcoco_data] cls_id = classes.index(action) vcoco = vcoco_data[cls_id] # np.random.seed(1) # positive_index = np.where(vcoco['label'] == 1)[0] # positive_index = np.random.permutation(positive_index) positive_index = [index] cc = plt.get_cmap('hsv', lut=4) for i in range(show_num): id = positive_index[i] # get image path = 'train' if "train" in vcoco['file_name'][id] else "val" file_name = 'coco/images/' + path + '2014/' + str( vcoco['file_name'][id]) print(file_name) im = np.asarray(Image.open(file_name)) # scale sy = 4. sx = float(im.shape[1]) / float(im.shape[0]) * sy # draw image fig, ax = subplot(plt, (1, 1), (sy, sx)) ax.set_axis_off() ax.imshow(im) print("label:", vcoco['label'][id], vcoco['role_object_id'][id]) # draw bounding box for agent draw_bbox(plt, ax, vcoco['bbox'][[id], :], edgecolor=cc(0)[:3]) role_bbox = vcoco['role_bbox'][id, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) for j in range(1, len(vcoco['role_name'])): if not np.isnan(role_bbox[j, 0]): draw_bbox(plt, ax, role_bbox[[j], :], edgecolor=cc(j)[:3]) print("draw_bbox") plt.show()
def main(): # Load COCO annotations for V-COCO images coco = vu.load_coco() # Load the VCOCO annotations for vcoco_train image set vcoco_all = vu.load_vcoco('vcoco_train') for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) # with open("img_ids_val.txt", "w") as outfile: # for item in vcoco_all[0]['image_id']: # item_str = 'COCO_val2014_{}'.format(str(item[0]).rjust(12, '0')) # outfile.write(item_str + "\n") # Action classes and roles in V-COCO classes = [x['action_name'] for x in vcoco_all] for i, x in enumerate(vcoco_all): print('{:>20s}'.format(x['action_name']), x['role_name']) # Visualize annotations for some class cls_id = classes.index('hit') vcoco = vcoco_all[cls_id] np.random.seed(1) positive_index = np.where(vcoco['label'] == 1)[0] positive_index = np.random.permutation(positive_index) cc = plt.get_cmap('hsv', lut=4) for i in range(5): id = positive_index[i] # Load image im = cv2.imread( 'coco-data/images/train2014/COCO_train2014_{}.jpg'.format( str(vcoco['image_id'][id][0]).rjust(12, '0'))) cv2.imwrite('trial.jpg', im) # Draw bounding box role_bbox = vcoco['role_bbox'][id, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) for j in range(0, len(vcoco['role_name'])): if not np.isnan(role_bbox[j, 0]): print(vcoco['role_name'][j], role_bbox[j]) im = draw_bbox(im, role_bbox[j]) cv2.imwrite('trial_boxes.jpg', im)
def relevant_vcoco_sets(): coco = vu.load_coco(vcocoroot + 'data/') vsrl_data = vu.load_vcoco('vcoco_train', vcocoroot + 'data/') obj_cats_all = [] instr_cats_all = [] role_cats_all = [] role_names = [] actions = [] for i in range(len(vsrl_data)): # 26 actions, same order as in meta.h5 and action.mat action = vsrl_data[i]['action_name'] role_ids = vsrl_data[i]['role_object_id'] role_name = vsrl_data[i]['role_name'] role_names.append(role_name) actions.append(action) role_cats = [] obj_cats = [] instr_cats = [] if 'obj' in role_name: objcol = role_name.index('obj') obj_ids = role_ids[:,objcol] obj_ids = obj_ids.tolist() obj_ids = [x for x in obj_ids if x != 0] for obj_id in obj_ids: obj_cat = vu.coco_obj_id_to_obj_class(int(obj_id), coco) if obj_cat not in obj_cats: obj_cats.append(obj_cat) role_cats.append(obj_cat) if 'instr' in role_name: instrcol = role_name.index('instr') instr_ids = role_ids[:,instrcol] instr_ids = instr_ids.tolist() instr_ids = [x for x in instr_ids if x != 0] for instr_id in instr_ids: instr_cat = vu.coco_obj_id_to_obj_class(int(instr_id), coco) if instr_cat not in instr_cats: instr_cats.append(instr_cat) role_cats.append(instr_cat) obj_cats_all.append(obj_cats) instr_cats_all.append(instr_cats) role_cats_all.append(role_cats) return role_cats_all, obj_cats_all, instr_cats_all, role_names
def __init__(self, vcoco_set, coco_root, transform=None, coco_transform=None, combined_transform=None): # Don't call the superconstructor (we don't have an annFile) pm = PathManager(coco_root=coco_root) self.root = pm.coco_imgs self.coco = vu.load_coco() self.vcoco_all = vu.load_vcoco(vcoco_set) # If we don't convert to int, COCO library index lookup fails :( self.ids = [int(x) for x in self.vcoco_all[0]["image_id"].ravel()] self.transform = transform self.target_transform = coco_transform self.combined_transform = combined_transform # Get per-image vcoco labels, indexed by image id. self.imgid_2_vcoco = get_imgid_2_vcoco_labels(self.vcoco_all, self.coco)
def __init__(self, imageset, node_num, datadir=os.path.join(os.path.dirname(__file__), '../../data/feature_resnet_tengyu2'), negative_suppression=False, n_jobs=16, part_weight='central', debug=None): self.imageset = imageset self.datadir = datadir self.node_num = node_num self.negative_suppression = negative_suppression self.part_weight = part_weight self.n_jobs = n_jobs self.thread = None self.coco = vu.load_coco('/home/tengyu/dataset/v-coco/data') vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), '/home/tengyu/dataset/v-coco/data') # self.coco = vu.load_coco('/home/tengyu/Data/mscoco/v-coco/data') # vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), '/home/tengyu/Data/mscoco/v-coco/data') self.filenames = list( set([ os.path.join(self.datadir, x['file_name'] + '.data') for x in self.coco.loadImgs( ids=vcoco_all[0]['image_id'][:, 0].astype(int).tolist()) if os.path.exists( os.path.join(self.datadir, x['file_name'] + '.data')) ])) if debug is not None: self.filenames = [ x for x in self.filenames if '%012d' % debug in x ] pass
def __init__(self, root, input_imsize, transform, imageset): self.imageset = imageset self.vcoco_imageset = 'val' if imageset == 'test' else 'train' self.vcoco_feature_path = os.path.join(root, 'features_deformable') self.vcoco_path = os.path.join(root, '..', 'v-coco') self.imsize = input_imsize self.transform = transform self.coco = vu.load_coco() self.vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset)) self.hoi_list = list() for i_action, vcoco in enumerate(self.vcoco_all): vcoco = vu.attach_gt_boxes(vcoco, self.coco) positive_index = np.where(vcoco['label'] == 1)[0].tolist() self.hoi_list.extend([(i_action, image_index) for image_index in positive_index]) self.positive_num = len(self.hoi_list) # Hard negative examples image_ids = self.vcoco_all[0]['image_id'] self.image_info_list = self.coco.loadImgs(ids=image_ids[:, 0].tolist()) # self.negative_num = len(self.image_info_list) self.negative_num = 0 if imageset == 'test' else 200
def main(args): vsgnet_verbs_classes = { 'carry_obj': 0, 'catch_obj': 1, 'cut_instr': 2, 'cut_obj': 3, 'drink_instr': 4, 'eat_instr': 5, 'eat_obj': 6, 'hit_instr': 7, 'hit_obj': 8, 'hold_obj': 9, 'jump_instr': 10, 'kick_obj': 11, 'lay_instr': 12, 'look_obj': 13, 'point_instr': 14, 'read_obj': 15, 'ride_instr': 16, 'run': 17, 'sit_instr': 18, 'skateboard_instr': 19, 'ski_instr': 20, 'smile': 21, 'snowboard_instr': 22, 'stand': 23, 'surf_instr': 24, 'talk_on_phone_instr': 25, 'throw_obj': 26, 'walk': 27, 'work_on_computer_instr': 28 } box_annotations = defaultdict(lambda: { 'annotations': [], 'annotation_ids': [] }) coco = vu.load_coco(args.load_path) img_ids = coco.getImgIds() img_infos = coco.loadImgs(img_ids) for img_info in img_infos: box_annotations[img_info['id']]['file_name'] = img_info['file_name'] annotation_ids = coco.getAnnIds(imgIds=img_ids) annotations = coco.loadAnns(annotation_ids) for annotation in annotations: img_id = annotation['image_id'] category_id = annotation['category_id'] box = np.array(annotation['bbox']) box[2:] += box[:2] box_annotations[img_id]['annotations'].append({ 'category_id': category_id, 'bbox': box.tolist() }) box_annotations[img_id]['annotation_ids'].append(annotation['id']) hoi_trainval = vu.load_vcoco('vcoco_trainval') hoi_test = vu.load_vcoco('vcoco_test') action_classes = [x['action_name'] for x in hoi_trainval] verb_classes = [] for action in hoi_trainval: if len(action['role_name']) == 1: verb_classes.append(action['action_name']) else: verb_classes += [ '{}_{}'.format(action['action_name'], r) for r in action['role_name'][1:] ] print('Verb class') for i, verb_class in enumerate(verb_classes): print('{:02d}: {}'.format(i, verb_class)) hoia_trainval_annotations = set_hoi(box_annotations, hoi_trainval, verb_classes) hoia_test_annotations = set_hoi(box_annotations, hoi_test, verb_classes) print('#Training images: {}, #Test images: {}'.format( len(hoia_trainval_annotations), len(hoia_test_annotations))) with open(os.path.join(args.save_path, 'trainval_vcoco.json'), 'w') as f: json.dump(hoia_trainval_annotations, f) with open(os.path.join(args.save_path, 'test_vcoco.json'), 'w') as f: json.dump(hoia_test_annotations, f) with open(args.prior_path, 'rb') as f: prior = pickle.load(f) prior = [prior[k] for k in sorted(prior.keys())] prior = np.concatenate(prior).T prior = prior[[ vsgnet_verbs_classes[verb_class] for verb_class in verb_classes ]] np.save(os.path.join(args.save_path, 'corre_vcoco.npy'), prior)
def parse_features(paths, imageset): # roi_size = 49 # Deformable ConvNet # roi_size = 512 * 49 # VGG conv feature # roi_size = 4096 # VGG fully connected feature roi_size = 1000 # ResNet fully connected feature feature_size = 1000 feature_type = 'resnet_noisy' action_class_num = len(metadata.action_classes) no_action_index = metadata.action_index['none'] no_role_index = metadata.role_index['none'] feature_path = os.path.join(paths.data_root, 'features_{}'.format(feature_type)) save_data_path = os.path.join(paths.data_root, 'processed', feature_type) if not os.path.exists(save_data_path): os.makedirs(save_data_path) coco = vu.load_coco(os.path.join(paths.vcoco_data_root, 'data')) vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), os.path.join(paths.vcoco_data_root, 'data')) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() all_results = list() unique_image_ids = list() for i_image, image_id in enumerate(image_ids): filename = coco.loadImgs(ids=[image_id])[0]['file_name'] print(filename) try: part_classes = np.load( os.path.join(feature_path, '{}_part_classes.npy'.format(filename))) part_human_id = np.load( os.path.join(feature_path, '{}_part_human_id.npy'.format(filename))) except IOError: warnings.warn( 'Features and detection results missing for {}'.format( filename)) continue # 1. compute number of human, objects, and edges part_num = len(part_classes) # 2. Create placeholders for edge_feature, node_feature, adj_mat, node_labels, node_roles unique_image_ids.append(image_id) part_adj_mat = np.zeros((part_num, part_num)) # Create part-level adj mats for i_part in range(part_num): for j_part in range(part_num): if part_human_id[i_part] == part_human_id[j_part]: if part_dist[part_classes[i_part], part_classes[j_part]] == 1: part_adj_mat[i_part, j_part] = 1 try: instance = pickle.load( open(os.path.join(save_data_path, '{}.p'.format(filename)), 'rb')) instance['part_adj_mat'] = part_adj_mat pickle.dump( instance, open(os.path.join(save_data_path, '{}.p'.format(filename)), 'wb')) except IOError: warnings.warn('.p file missing for {}'.format(filename)) continue
import scipy.misc import matplotlib.pyplot as plt import vsrl_utils as vu imageset = 'train' base_dir = '/home/tengyu/Data/mscoco/v-coco/processed/resnet' # base_dir = '/home/tengyu/Documents/PartGPNN/gpnn/tmp/vcoco/vcoco_features' img_dir = '/home/tengyu/Data/mscoco/coco/' colors = [ 'red', 'blue', 'green', 'yellow', 'cyan', 'magenta', 'black', 'white' ] coco = vu.load_coco('/home/tengyu/Data/mscoco/v-coco/data') vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), '/home/tengyu/Data/mscoco/v-coco/data') for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() for fn in os.listdir(base_dir): if '.p' not in fn: continue base_fn = fn[:-2] if imageset not in base_fn: continue if int(base_fn[:-4].split('_')[-1]) not in image_ids: continue
def extract_features(paths, imageset, vcoco_imageset): feature_type = 'resnet' input_h, input_w = 244, 244 feature_size = (7, 7) adaptive_max_pool = roi_pooling.AdaptiveMaxPool2d(*feature_size) det_feature_path = os.path.join(paths.data_root, 'features_deformable') vcoco_path, det_res_path, feature_path, classes, image_list = get_info( paths, imageset, feature_type) if not os.path.exists(feature_path): os.makedirs(feature_path) feature_network = get_model(paths, feature_type) transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # Read detection results with open(det_res_path, 'r') as f: # detection results: [class_num][img_num][detection_num][x1, y1, x2, y2, score] det_res = pickle.load(f) coco_from_vcoco = vu.load_coco() vcoco_all = vu.load_vcoco('vcoco_{}'.format(vcoco_imageset)) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco_from_vcoco) vcoco_image_ids = vcoco_all[0]['image_id'][:, 0].astype(int) for i_image, img_info in enumerate(image_list): img_id = img_info['id'] indices_in_vcoco = np.where(vcoco_image_ids == img_id)[0].tolist() if len(indices_in_vcoco) == 0: continue img_name = img_info['file_name'] print(img_name) # Extracted bounding boxes and classes det_boxes_all = np.empty((0, 4)) det_classes_all = list() for c in range(1, len(classes)): for detection in det_res[c][i_image]: if detection[4] > 0.7: det_boxes_all = np.vstack( (det_boxes_all, np.array(detection[:4])[np.newaxis, ...])) det_classes_all.append(c) if len(det_classes_all) == 0: continue edge_classes = list() for person_i, person_c in enumerate(det_classes_all): if person_c == 1: for obj_i, obj_c in enumerate(det_classes_all): if obj_c == 1: continue combined_box = combine_box(det_boxes_all[person_i, :], det_boxes_all[obj_i, :]) det_boxes_all = np.vstack((det_boxes_all, combined_box)) edge_classes.append(0) det_classes_all.extend(edge_classes) # try: # det_classes_all = np.load(os.path.join(det_feature_path, '{}_classes.npy'.format(img_name))) # det_boxes_all = np.load(os.path.join(det_feature_path, '{}_boxes.npy'.format(img_name))) # except IOError: # continue # Read image image_path = os.path.join(vcoco_path, 'coco/images', '{}2014'.format(imageset), img_name) assert os.path.exists(image_path) original_img = scipy.misc.imread(image_path, mode='RGB') # Get image feature by applying network to ROI (roi_vgg) if feature_type == 'vgg': roi_features = np.zeros((det_boxes_all.shape[0], 4096)) elif feature_type == 'resnet': roi_features = np.zeros((det_boxes_all.shape[0], 1000)) elif feature_type == 'densenet': roi_features = np.zeros((det_boxes_all.shape[0], 1000)) else: raise ValueError('feature type not recognized') for i_box in range(det_boxes_all.shape[0]): roi = det_boxes_all[i_box, :].astype(int) roi_image = original_img[roi[1]:roi[3] + 1, roi[0]:roi[2] + 1, :] # if det_classes_all[i_box] < 0: # plt.imshow(roi_image) # plt.show() roi_image = transform( cv2.resize(roi_image, (input_h, input_w), interpolation=cv2.INTER_LINEAR)) roi_image = torch.autograd.Variable(roi_image.unsqueeze(0)).cuda() feature, _ = feature_network(roi_image) roi_features[i_box, ...] = feature.data.cpu().numpy() np.save(os.path.join(feature_path, '{}_classes'.format(img_name)), det_classes_all) np.save(os.path.join(feature_path, '{}_boxes'.format(img_name)), det_boxes_all) np.save(os.path.join(feature_path, '{}_features'.format(img_name)), roi_features)
import __init__ import vsrl_utils as vu import os from pycocotools.coco import COCO import save_hios import numpy as np coco = COCO(os.path.join('coco/annotations/', 'instances_trainval2014.json')) # return COCO all data in train_val_test v_coco_all = vu.load_coco() def get_data(name, data_dict, is_positive=False): vcoco = vu.load_vcoco('vcoco_' + name) posivive_labels = [np.where(i['label'] != 0)[0] for i in vcoco] print(len(vcoco)) for k in range(len(vcoco)): # len=26 x = vcoco[k] action_name = x['action_name'] for i in range(1, len(x['role_name'])): if not is_positive: anno_num = vcoco[0]['ann_id'].shape[0] anno_list = range(anno_num) else: anno_list = posivive_labels[k] for j in anno_list: if x['role_object_id'][j][i] > 0:
def parse_features(paths, imageset): # roi_size = 49 # Deformable ConvNet # roi_size = 512 * 49 # VGG conv feature # roi_size = 4096 # VGG fully connected feature roi_size = 1000 # ResNet fully connected feature feature_size = 1000 feature_type = 'resnet' action_class_num = len(metadata.action_classes) no_action_index = metadata.action_index['none'] no_role_index = metadata.role_index['none'] feature_path = os.path.join(paths.data_root, 'features_{}_noisy'.format(feature_type)) save_data_path = os.path.join(paths.data_root, 'processed', feature_type) if not os.path.exists(save_data_path): os.makedirs(save_data_path) coco = vu.load_coco(os.path.join(paths.vcoco_data_root, 'data')) vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), os.path.join(paths.vcoco_data_root, 'data')) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() all_results = list() unique_image_ids = list() for i_image, image_id in enumerate(image_ids): filename = coco.loadImgs(ids=[image_id])[0]['file_name'] print(filename) """ For each image, 1. Compute number of human, objects, nodes, groups, edges 3. Extract node features 4. Extract edge features 5. Compute ground truth adj_mat, node_label, node_role """ # if not os.path.exists(os.path.join(save_data_path, '{}.p'.format(filename))) or type(pickle.load(open(os.path.join(save_data_path, '{}.p'.format(filename)), 'rb'))) == dict: # continue # else: # os.remove(os.path.join(save_data_path, '{}.p'.format(filename))) if True: # not os.path.exists(os.path.join(save_data_path, '{}.p'.format(filename))): try: obj_classes = np.load( os.path.join(feature_path, '{}_obj_classes.npy'.format(filename))) obj_boxes = np.load( os.path.join(feature_path, '{}_obj_boxes.npy'.format(filename))) part_classes = np.load( os.path.join(feature_path, '{}_part_classes.npy'.format(filename))) part_boxes = np.load( os.path.join(feature_path, '{}_part_boxes.npy'.format(filename))) part_human_id = np.load( os.path.join(feature_path, '{}_part_human_id.npy'.format(filename))) edge_human = np.load( os.path.join(feature_path, '{}_edge_human_id.npy'.format(filename))) edge_boxes = np.load( os.path.join(feature_path, '{}_edge_boxes.npy'.format(filename))) if feature_type != 'None': obj_features = np.load( os.path.join(feature_path, '{}_obj_features.npy'.format(filename))) part_features = np.load( os.path.join(feature_path, '{}_part_features.npy'.format(filename))) edge_features_in = np.load( os.path.join(feature_path, '{}_edge_features.npy'.format(filename))) except IOError: warnings.warn( 'Features and detection results missing for {}'.format( filename)) continue # 1. compute number of human, objects, and edges part_num = len(part_boxes) obj_num = len(obj_boxes) human_num = len(list(set(part_human_id))) edge_num = len(edge_boxes) # p_node_num = part_num + obj_num node_num = human_num + obj_num assert edge_num == part_num * obj_num if part_num == 0: warnings.warn( 'human detection missing for {}'.format(filename)) continue # 2. Create placeholders for edge_feature, node_feature, adj_mat, node_labels, node_roles unique_image_ids.append(image_id) if feature_type != 'None': edge_features = np.zeros( (part_num + obj_num, part_num + obj_num, feature_size)) node_features = np.zeros( (part_num + obj_num, feature_size * 2)) adj_mat = np.zeros((human_num + obj_num, human_num + obj_num)) node_labels = np.zeros((node_num, action_class_num)) node_roles = np.zeros((node_num, 3)) node_labels[:, no_action_index] = 1 node_roles[:, no_role_index] = 1 # Group human boxes human_boxes, part_human_ids, human_ids = group_boxes( part_boxes, part_human_id) det_boxes = np.vstack([human_boxes, obj_boxes]) if feature_type != 'None': # 3. Extract node features # part : obj for i_node in range(part_num + obj_num): # node_features[i_node, :] = np.reshape(det_features[i_node, ...], roi_size) if i_node < part_num: node_features[i_node, :roi_size] = np.reshape( part_features[i_node, ...], roi_size) else: node_features[i_node, roi_size:] = np.reshape( obj_features[i_node - part_num, ...], roi_size) # 4. Extract edge features i_edge = 0 for i_part in range(part_num): for i_obj in range(obj_num): edge_box = edge_boxes[i_edge] part_box = part_boxes[i_part] obj_box = obj_boxes[i_obj] assert np.linalg.norm( combine_box(part_box, obj_box) - edge_box) == 0 edge_features[i_part, part_num + i_obj, :] = np.reshape( edge_features_in[i_edge, ...], roi_size) edge_features[part_num + i_obj, i_part, :] = edge_features[i_part, part_num + i_obj, :] i_edge += 1 else: instance = pickle.load( open(os.path.join(save_data_path, '{}.p'.format(filename)), 'rb')) image_id = instance['img_id'] human_num = instance['human_num'] obj_num = instance['obj_num'] part_num = instance['part_num'] filename = instance['img_name'] human_boxes = instance['human_boxes'] part_boxes = instance['part_boxes'] obj_boxes = instance['obj_boxes'] obj_classes = instance['obj_classes'] part_classes = instance['part_classes'] adj_mat = instance['adj_mat'] part_human_ids = instance['part_human_id'] node_labels = instance['node_labels'] node_roles = instance['node_roles'] if feature_type != 'None': edge_features = np.load( os.path.join(save_data_path, '{}_edge_features.npy'.format(filename))) node_features = np.load( os.path.join(save_data_path, '{}_node_features.npy'.format(filename))) # 5. Compute ground truth adj_mat, node_label, node_role for x in vcoco_all: if x['label'][i_image, 0] == 1: try: action_index = metadata.action_index[x['action_name']] role_bbox = x['role_bbox'][i_image, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) bbox = role_bbox[0, :] human_index = get_node_index( bbox, human_boxes, np.arange(human_num)) # node_index uses human box if human_index == -1: warnings.warn('human detection missing') continue assert human_index < human_num node_labels[human_index, action_index] = 1 node_labels[human_index, no_action_index] = 0 for i_role in range(1, len(x['role_name'])): bbox = role_bbox[i_role, :] if np.isnan(bbox[0]): continue obj_index = get_node_index( bbox, obj_boxes, np.arange(obj_num)) + human_num if obj_index == human_num - 1: warnings.warn('object detection missing') continue assert obj_index >= human_num and obj_index < human_num + obj_num node_labels[obj_index, action_index] = 1 node_labels[obj_index, no_action_index] = 0 node_roles[ obj_index, metadata.role_index[x['role_name'][i_role]]] = 1 node_roles[obj_index, no_role_index] = 0 adj_mat[human_index, obj_index] = 1 adj_mat[obj_index, human_index] = 1 except IndexError: warnings.warn('Labels missing for {}'.format(filename)) raise pass instance = dict() instance['img_id'] = image_id instance['human_num'] = human_num instance['obj_num'] = obj_num instance['part_num'] = part_num instance['img_name'] = filename instance['human_boxes'] = human_boxes instance['part_boxes'] = part_boxes instance['obj_boxes'] = obj_boxes instance['edge_boxes'] = edge_boxes instance['obj_classes'] = obj_classes instance['part_classes'] = part_classes instance['adj_mat'] = adj_mat instance['part_human_id'] = part_human_ids instance['node_labels'] = node_labels instance['node_roles'] = node_roles if feature_type != 'None': np.save( os.path.join(save_data_path, '{}_edge_features.npy'.format(filename)), edge_features) np.save( os.path.join(save_data_path, '{}_node_features.npy'.format(filename)), node_features) pickle.dump( instance, open(os.path.join(save_data_path, '{}.p'.format(filename)), 'wb')) print('total image', len(unique_image_ids), 'total results', len(all_results))
transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) input_h, input_w = 224, 224 part_eye = np.eye(21) obj_eye = np.eye(81) vcoco_mapping = {'train': 'train', 'test': 'val', 'val': 'train'} for imageset in ['train', 'test', 'val']: coco = vu.load_coco(vcoco_root) vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), vcoco_root) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() for imageset in ['train', 'test', 'val']: coco = vu.load_coco(vcoco_root) vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), vcoco_root) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() for i_image, image_id in enumerate(image_ids):
def __init__(self, root, imageset): self.root = root self.coco = vu.load_coco() vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset)) self.image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() self.unique_image_ids = list(set(self.image_ids))
def parse_features(paths, imageset): roi_size = 49 # Deformable ConvNet # roi_size = 512 * 49 # VGG conv feature # roi_size = 4096 # VGG fully connected feature roi_size = 1000 # ResNet fully connected feature feature_size = 49 feature_type = 'resnet' action_class_num = len(metadata.action_classes) no_action_index = metadata.action_index['none'] no_role_index = metadata.role_index['none'] feature_path = os.path.join(paths.data_root, 'features_{}'.format(feature_type)) det_feature_path = os.path.join(paths.data_root, 'features_deformable') save_data_path = os.path.join(paths.data_root, 'processed', feature_type) if not os.path.exists(save_data_path): os.makedirs(save_data_path) coco = vu.load_coco() vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset)) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() all_results = list() unique_image_ids = list() for i_image, image_id in enumerate(image_ids): filename = coco.loadImgs(ids=[image_id])[0]['file_name'] if image_id not in unique_image_ids: try: bbox_features = np.load(os.path.join(feature_path, '{}_features.npy'.format(filename))) det_classes = np.load(os.path.join(det_feature_path, '{}_classes.npy'.format(filename))) det_boxes = np.load(os.path.join(det_feature_path, '{}_boxes.npy'.format(filename))) det_features = np.load(os.path.join(det_feature_path, '{}_features.npy'.format(filename))) except IOError: warnings.warn('Features and detection results missing for {}'.format(filename)) continue human_num, obj_num, edge_num = parse_classes(det_classes) node_num = human_num + obj_num assert edge_num == human_num * obj_num unique_image_ids.append(image_id) edge_features = np.zeros((human_num+obj_num, human_num+obj_num, roi_size)) node_features = np.zeros((node_num, roi_size+feature_size*2)) adj_mat = np.zeros((human_num+obj_num, human_num+obj_num)) node_labels = np.zeros((node_num, action_class_num)) node_roles = np.zeros((node_num, 3)) node_labels[:, no_action_index] = 1 node_roles[:, no_role_index] = 1 # Node features for i_node in range(node_num): # node_features[i_node, :] = np.reshape(det_features[i_node, ...], roi_size) node_features[i_node, :roi_size] = np.reshape(bbox_features[i_node, ...], roi_size) if i_node < human_num: node_features[i_node, roi_size:roi_size+feature_size] = np.reshape(det_features[i_node, ...], feature_size) else: node_features[i_node, roi_size+feature_size:] = np.reshape(det_features[i_node, ...], feature_size) # Edge features i_edge = 0 for i_human in range(human_num): for i_obj in range(obj_num): edge_features[i_human, human_num + i_obj, :] = np.reshape(bbox_features[node_num + i_edge, ...], roi_size) edge_features[human_num + i_obj, i_human, :] = edge_features[i_human, human_num + i_obj, :] i_edge += 1 else: saved_instance = pickle.load(open(os.path.join(save_data_path, '{}.p'.format(filename)), 'rb')) edge_features = np.load(os.path.join(save_data_path, '{}_edge_features.npy').format(filename)) node_features = np.load(os.path.join(save_data_path, '{}_node_features.npy').format(filename)) adj_mat = saved_instance['adj_mat'] node_labels = saved_instance['node_labels'] node_roles = saved_instance['node_roles'] human_num = saved_instance['human_num'] obj_num = saved_instance['obj_num'] det_boxes = saved_instance['boxes'] det_classes = saved_instance['classes'] # Ground truth labels: adj_mat, node_labels, node_roles for x in vcoco_all: if x['label'][i_image, 0] == 1: try: action_index = metadata.action_index[x['action_name']] role_bbox = x['role_bbox'][i_image, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) bbox = role_bbox[0, :] human_index = get_node_index(bbox, det_boxes, range(human_num)) if human_index == -1: warnings.warn('human detection missing') continue assert human_index < human_num node_labels[human_index, action_index] = 1 node_labels[human_index, no_action_index] = 0 for i_role in range(1, len(x['role_name'])): bbox = role_bbox[i_role, :] if np.isnan(bbox[0]): continue obj_index = get_node_index(bbox, det_boxes, range(human_num, human_num+obj_num)) if obj_index == -1: warnings.warn('object detection missing') continue assert obj_index >= human_num node_labels[obj_index, action_index] = 1 node_labels[obj_index, no_action_index] = 0 node_roles[obj_index, metadata.role_index[x['role_name'][i_role]]] = 1 node_roles[obj_index, no_role_index] = 0 adj_mat[human_index, obj_index] = 1 adj_mat[obj_index, human_index] = 1 except IndexError: warnings.warn('Labels missing for {}'.format(filename)) pass instance = dict() instance['img_id'] = image_id instance['human_num'] = human_num instance['obj_num'] = obj_num instance['img_name'] = filename instance['boxes'] = det_boxes instance['classes'] = det_classes instance['adj_mat'] = adj_mat instance['node_labels'] = node_labels instance['node_roles'] = node_roles np.save(os.path.join(save_data_path, '{}_edge_features'.format(filename)), edge_features) np.save(os.path.join(save_data_path, '{}_node_features'.format(filename)), node_features) pickle.dump(instance, open(os.path.join(save_data_path, '{}.p'.format(filename)), 'wb')) if i_image == len(image_ids) - 1 - image_ids[::-1].index(image_id): append_result(all_results, node_labels, node_roles, int(image_ids[i_image]), det_boxes, human_num, obj_num, adj_mat) print 'total image', len(unique_image_ids), 'total results', len(all_results) vcocoeval = get_vcocoeval(paths, imageset) vcoco_evaluation(paths, vcocoeval, imageset, all_results)
def populateTrainDict(): imgs_dir = '/home/user/data/mscoco/images/train2014' coco = vu.load_coco() vcoco_all = vu.load_vcoco('vcoco_trainval') classes = [x['action_name'] for x in vcoco_all] ## Modifying classes according to new criteria of obj-instr classes[classes.index("eat")] = "eat_obj" classes[classes.index("cut")] = "cut_obj" classes[classes.index("hit")] = "hit_obj" classes.append("eat_instr") classes.append("cut_instr") classes.append("hit_instr") print("# of VCOCO classes:", len(classes)) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) data_dict = {} for idx in range(len(vcoco_all)): actDict = vcoco_all[idx] #print(actDict['role_name'], actDict['action_name']) #continue img_ids = [x[0] for x in actDict['image_id']] for i, ids in enumerate(img_ids): ids = int(ids) if actDict['label'][i][0] == 1: if ids in data_dict: tmp = list(actDict['role_bbox'][i]) tmp.append(actDict['action_name']) data_dict[ids].append(tmp) else: data_dict[ids] = [] tmp = list(actDict['role_bbox'][i]) tmp.append(actDict['action_name']) data_dict[ids].append(tmp) data_list = [] for ids in data_dict.keys(): img_anno = data_dict[ids] tmp_subs = [] for elem in img_anno: tmp_subs.append(tuple(elem[:4])) subject_set = set(tmp_subs) subject_tmp = [] for subj in subject_set: subject_tmp.append(list(subj)) data_list.append([ids, subject_tmp, 0]) tmpDict = {} for subj in subject_set: tmpDict[subj] = [] for elem in img_anno: subj = tuple(elem[:4]) tmpDict[subj].append(elem[4:]) for subj in tmpDict.keys(): data_list.append([ids, list(subj), tmpDict[subj], 1]) for elem in data_list: if elem[-1] == 1: for i in range(len(elem[2])): if elem[2][i][-1] in ['cut', 'hit']: tmp = elem[2][i][4:] tmp[-1] += "_obj" tmpLabel = elem[2][i][-1] + "_instr" elem[2].append(tmp) elem[2][i] = elem[2][i][:4] elem[2][i].append(tmpLabel) elif elem[2][i][-1] in ['eat']: tmp = elem[2][i][4:] tmp[-1] += "_instr" tmpLabel = elem[2][i][-1] + "_obj" elem[2].append(tmp) elem[2][i] = elem[2][i][:4] elem[2][i].append(tmpLabel) return data_list, classes, coco
# labels 目录若不存在,创建labels目录。若存在,则清空目录 if not os.path.exists('%s/labels/test/' % dataDir): os.makedirs('%s/labels/test/' % dataDir) else: shutil.rmtree('%s/labels/test/' % dataDir) os.makedirs('%s/labels/test/' % dataDir) # filelist 目录若不存在,创建filelist目录。 if not os.path.exists('%s/filelist/' % dataDir): os.makedirs('%s/filelist/' % dataDir) list_file = open('%s/filelist/%s.txt' % (dataDir, dataType), 'w') # 数据集的图片list保存路径 test_pref_file = open('%s/filelist/%s.txt' % (dataDir, prefType), 'w') # 数据集的图片list_pref保存路径 # Load COCO annotations for V-COCO images coco = vu.load_coco() vcoco_anns = ['vcoco_test'] interactions = ['kick', 'read', 'skateboard', 'ski', 'snowboard', 'surf', 'talk_on_phone', 'work_on_computer'] #interactions = ['talk_on_phone'] # Load the VCOCO annotations for vcoco_train image set for anns in vcoco_anns: vcoco_all = vu.load_vcoco(anns) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) # Action classes and roles in V-COCO classes = [x['action_name'] for x in vcoco_all] for cls in interactions: cls_id = classes.index(cls) # Visualize annotations for the some class
def parse_features(paths, imageset): # roi_size = 49 # Deformable ConvNet # roi_size = 512 * 49 # VGG conv feature # roi_size = 4096 # VGG fully connected feature roi_size = 1000 # ResNet fully connected feature feature_size = 1000 feature_type = 'resnet' action_class_num = len(metadata.action_classes) no_action_index = metadata.action_index['none'] no_role_index = metadata.role_index['none'] feature_path = os.path.join(paths.data_root, 'features_{}_noisy'.format(feature_type)) save_data_path = os.path.join(paths.data_root, 'processed', feature_type) if not os.path.exists(save_data_path): os.makedirs(save_data_path) coco = vu.load_coco(os.path.join(paths.vcoco_data_root, 'data')) vcoco_all = vu.load_vcoco('vcoco_{}'.format(imageset), os.path.join(paths.vcoco_data_root, 'data')) for x in vcoco_all: x = vu.attach_gt_boxes(x, coco) image_ids = vcoco_all[0]['image_id'][:, 0].astype(int).tolist() all_results = list() unique_image_ids = list() part_eye = np.eye(14) obj_eye = np.eye(81) for i_image, image_id in enumerate(image_ids): filename = coco.loadImgs(ids=[image_id])[0]['file_name'] # print(os.path.join(paths.data_root, 'coco', vcoco_mapping[imageset]+'2014', filename)) img = cv2.imread( os.path.join(paths.data_root, 'coco', vcoco_mapping[imageset] + '2014', filename)) img_w = img.shape[0] img_h = img.shape[1] try: data = pickle.load( open( os.path.join(paths.data_root, 'processed', feature_type, '{}.p'.format(filename)), 'rb')) # edge_features = np.load(os.path.join(paths.data_root, 'processed', feature_type, '{}_edge_features.npy').format(filename)) node_features = np.load( os.path.join(paths.data_root, 'processed', feature_type, '{}_node_features.npy').format(filename)) except: continue obj_boxes = data['obj_boxes'] part_boxes = data['part_boxes'] part_num = data['part_num'] obj_num = data['obj_num'] obj_classes = data['obj_classes'] part_classes = data['part_classes'] # append bbox and class to node features # assert img_w > 0 # assert img_h > 0 # assert np.all((part_boxes[:,3] - part_boxes[:,1]) > 0) # assert np.all((obj_boxes[:,3] - obj_boxes[:,1]) > 0) node_features_appd = np.zeros([node_features.shape[0], 6 + 14 + 81]) node_features_appd[:part_num, 0] = (part_boxes[:, 2] - part_boxes[:, 0]) / img_w # relative w node_features_appd[:part_num, 1] = (part_boxes[:, 3] - part_boxes[:, 1]) / img_h # relative h node_features_appd[:part_num, 2] = ( (part_boxes[:, 2] + part_boxes[:, 0]) / 2) / img_w # relative cx node_features_appd[:part_num, 3] = ( (part_boxes[:, 3] + part_boxes[:, 1]) / 2) / img_h # relative cy node_features_appd[:part_num, 4] = (part_boxes[:, 2] - part_boxes[:, 0]) * ( part_boxes[:, 3] - part_boxes[:, 1]) / ( img_w * img_h) # relative area node_features_appd[:part_num, 5] = (part_boxes[:, 2] - part_boxes[:, 0]) / ( part_boxes[:, 3] - part_boxes[:, 1] ) # aspect ratio node_features_appd[:part_num, 6:6 + 14] = part_eye[part_classes] node_features_appd[part_num:, 0] = (obj_boxes[:, 2] - obj_boxes[:, 0]) / img_w # relative w node_features_appd[part_num:, 1] = (obj_boxes[:, 3] - obj_boxes[:, 1]) / img_h # relative h node_features_appd[part_num:, 2] = ( (obj_boxes[:, 2] + obj_boxes[:, 0]) / 2) / img_w # relative cx node_features_appd[part_num:, 3] = ( (obj_boxes[:, 3] + obj_boxes[:, 1]) / 2) / img_h # relative cy node_features_appd[part_num:, 4] = (obj_boxes[:, 2] - obj_boxes[:, 0]) * ( obj_boxes[:, 3] - obj_boxes[:, 1]) / ( img_w * img_h) # relative area node_features_appd[part_num:, 5] = (obj_boxes[:, 2] - obj_boxes[:, 0]) / ( obj_boxes[:, 3] - obj_boxes[:, 1] ) # aspect ratio node_features_appd[part_num:, 6 + 14:] = obj_eye[obj_classes] node_features_appd[np.isnan(node_features_appd)] = 0 node_features_appd[np.isinf(node_features_appd)] = 0 np.save( os.path.join(paths.data_root, 'processed', feature_type, '{}_node_features_appd.npy').format(filename), node_features_appd)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "--input-dir", default="../datasets/coco/vcoco/", metavar="DIR", help="path to vg images directory", ) parser.add_argument( '--kpts_dir', type=str, default='../datasets/coco/vcoco/keypoints_real/', # help='Path to the directory in which the kpts .npz results are written' ) parser.add_argument( '--rels_dir', type=str, default='../datasets/coco/vcoco/relationships_0_1_look_real/', # help='Path to the directory in which the kpts .npz results are written' ) args = parser.parse_args() # Load COCO annotations for V-COCO images coco = vu.load_coco() # Load the VCOCO annotations for vcoco_train image set vcoco_train = vu.load_vcoco('vcoco_train') for x in vcoco_train: x = vu.attach_gt_boxes(x, coco) vcoco_test = vu.load_vcoco('vcoco_test') for x in vcoco_test: x = vu.attach_gt_boxes(x, coco) vcoco_val = vu.load_vcoco('vcoco_val') for x in vcoco_val: x = vu.attach_gt_boxes(x, coco) vcoco_all = [] for i in range(len(vcoco_train)): dic = {} for k in vcoco_train[i].keys(): if k not in ['action_name', 'role_name', 'include']: sum = np.concatenate( (vcoco_train[i][k], vcoco_test[i][k], vcoco_val[i][k]), axis=0) dic[k] = sum else: dic[k] = vcoco_train[i][k] vcoco_all.append(dic) # Action classes and roles in V-COCO classes = [x['action_name'] for x in vcoco_all] classes_ids_dic = {i + 1: x for i, x in enumerate(classes)} # Reorganize data to generate matrix of relationships img_ids = os.listdir(args.kpts_dir) img_ids = [x.split('.')[0] for x in img_ids] print('Loading kpts from VCOCO...') kpts_dic = {} for img_id in tqdm(img_ids): with np.load(str(args.kpts_dir) + str(img_id) + '.npz', allow_pickle=True) as detec: kpts_dic[img_id] = detec['boxes'] # detec = {'features': visual_descriptors, 'coord': centers, # 'boxes': bboxes, 'scores': scores, 'labels': labels, 'classes': classes} # Set random seed np.random.seed(10) print('Loading relationships...') rels_dic = {} count = 0 for cls in range(len(vcoco_all)): vcoco_cls = vcoco_all[cls] if vcoco_cls['action_name'] in [ 'stand', 'walk', 'run', 'smile', 'look' ]: continue # positive_index = np.where(vcoco_cls['label'] == 1)[0] # positive_index = np.random.permutation(positive_index) # for i in tqdm(range(positive_index.shape[0])): for i in tqdm(range(vcoco_cls['label'].shape[0])): if vcoco_cls['label'][i] == 0: continue img_id = vcoco_cls['image_id'][i][0] img_id_str = str(img_id).rjust(12, '0') # if img_id_str in ['000000223550', '000000542020', '000000050511', '000000182837']: # import ipdb;ipdb.set_trace() if img_id_str not in kpts_dic.keys(): continue kpts = kpts_dic[img_id_str] n = kpts.shape[0] rels_matrix = rels_dic[img_id_str] if img_id_str in rels_dic.keys( ) else np.zeros((n, n)) # Get bounding boxes role_bbox = vcoco_cls['role_bbox'][i, :] * 1. role_bbox = role_bbox.reshape((-1, 4)) if len(vcoco_cls['role_name']) == 2: bbox_0 = role_bbox[0] bbox_1 = role_bbox[1] if np.isnan(bbox_0).any() or np.isnan(bbox_1).any(): continue idx_0 = obj_detected(bbox_0, kpts) idx_1 = obj_detected(bbox_1, kpts) if idx_0 == -1 or idx_1 == -1: continue count += 1 rels_matrix[idx_0, idx_1] = 1 if len(vcoco_cls['role_name']) == 3: bbox_0 = role_bbox[0] bbox_1 = role_bbox[1] bbox_2 = role_bbox[2] if np.isnan(bbox_0).any() or np.isnan(bbox_1).any(): continue idx_0 = obj_detected(bbox_0, kpts) idx_1 = obj_detected(bbox_1, kpts) if idx_0 == -1 or idx_1 == -1: continue count += 1 rels_matrix[idx_0, idx_1] = 1 if np.isnan(bbox_0).any() or np.isnan(bbox_2).any(): continue idx_0 = obj_detected(bbox_0, kpts) idx_2 = obj_detected(bbox_2, kpts) if idx_0 == -1 or idx_2 == -1: continue count += 1 rels_matrix[idx_0, idx_2] = 1 if np.isnan(bbox_1).any() or np.isnan(bbox_2).any(): continue idx_1 = obj_detected(bbox_1, kpts) idx_2 = obj_detected(bbox_2, kpts) if idx_1 == -1 or idx_2 == -1: continue count += 1 rels_matrix[idx_1, idx_2] = 1 # Save relationship matrix rels_dic[img_id_str] = rels_matrix print(count) for img_id in rels_dic.keys(): rels = {'relationships': rels_dic[img_id]} np.savez(args.rels_dir + '/{}.npz'.format(img_id), **rels)
def main(args): parser = argparse.ArgumentParser( description=("Run deep models for visual semantic role segmentation " "(or detection)")) parser.add_argument("mode", help="Mode to run model in (e.g. 'train')") parser.add_argument("-s", "--save_dir", help="directory for saving the model", default="saved_models/%s" % dt.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")) parser.add_argument("-e", "--epochs", help="number of epochs for training", type=int, default=50) parser.add_argument("-p", "--save_per", help="epochs to wait before saving", type=int, default=5) parser.add_argument("-l", "--learn_rate", help="learning rate", type=float, default=0.001) parser.add_argument("-c", "--cuda", type=int, nargs="+", help="ids of gpus to use during training", default=[]) parser.add_argument( "--net", help="file in which model is stored. Used in test mode.", default=None) parser.add_argument( "--coco_root", help=("path to coco directory, which must have an 'images'" "subfolder with images inside"), default=ld.COCO_ROOT) parser.add_argument( "--vcoco_root", help=("path to v-coco directory, which must have a 'coco'" "subfolder with another subfolder containing" "images/train2014 and images/val2014"), default=ld.VCOCO_ROOT) cfg = parser.parse_args(args) pathman = ld.PathManager(coco_root=cfg.coco_root, vcoco_root=cfg.vcoco_root) if cfg.mode == 'train': vcoco_all = vu.load_vcoco("vcoco_train") categories = [x["name"] for x in vu.load_coco().cats.itervalues()] translator = ld.VCocoTranslator(vcoco_all, categories) n_action_classes = translator.num_actions n_action_nonagent_roles = translator.num_action_nonagent_roles dataloader = ld.RoiVCocoBoxes("vcoco_train", pathman.coco_root, pathman.vcoco_root) classes = dataloader.get_classes() model = fhoi.HoiModel( classes, n_action_classes, n_action_nonagent_roles, faster_rcnn_command_line=["NCLASSES", len(classes)], cuda=cfg.cuda) trainer = thoi.HoiTrainer(model, dataloader, **vars(cfg)) logging.getLogger(__name__).info("Beginning Training...") trainer.train(cfg.epochs) """ # TODO build this test code. elif cfg.mode == 'test': checkpoint = torch.load(cfg.net) model = checkpoint["model"] evaluator = ev.Evaluator(**vars(cfg)) ev.do_eval(evaluator, model, "vcoco_val", cfg.save_dir) """ else: logging.getLogger(__name__).error("Invalid mode '%s'" % str(cfg.mode)) sys.exit(1)