def __init__(self, data_const=VcocoConstants(), subset='vcoco_train', data_aug=False, sampler=None): super(VcocoDataset, self).__init__() self.data_aug = data_aug self.data_const = data_const self.subset_ids = self._load_subset_ids(subset, sampler) self.sub_app_data = self._load_subset_app_data(subset) self.sub_spatial_data = self._load_subset_spatial_data(subset) self.word2vec = h5py.File(self.data_const.word2vec, 'r')
type=str, default='adam', choices=['sgd', 'adam'], required=True, help='which optimizer to be use: adam ') parser.add_argument( '--diff_edge', type=str2bool, default='false', required=True, help='h_h edge, h_o edge, o_o edge are different with each other') parser.add_argument( '--sampler', type=float, default=0, help='h_h edge, h_o edge, o_o edge are different with each other') parser.add_argument( '--hico', type=str, default=None, help='location of the pretrained model of HICO_DET dataset: None') args = parser.parse_args() if __name__ == "__main__": data_const = VcocoConstants(feat_type=args.feat_type) run_model(args, data_const)
save_data[str(image_id)].create_dataset('feature', data=det_features) save_data[str(image_id)].create_dataset('node_num', data=node_num) save_data[str(image_id)].create_dataset('edge_labels', data=edge_labels) save_data[str(image_id)].create_dataset('edge_roles', data=edge_roles) else: save_data[str(image_id)]['edge_labels'][:] = edge_labels save_data[str(image_id)]['edge_roles'][:] = edge_roles if not args.vis_result: save_data.close() print("Finished parsing data!") # eval object detection eval_single = {n:det_record[n]/gt_record[n] for n in vcoco_metadata.action_class_with_object} eval_all = sum(det_record.values()) / sum(gt_record.values()) eval_det_result = { 'gt': gt_record, 'det': det_record, 'eval_single': eval_single, 'eval_all': eval_all } io.dump_json_object(eval_det_result, eval_det_file) if __name__ == "__main__": parse = argparse.ArgumentParser("Parse the VCOCO annotion data!!!") parse.add_argument('--vis_result', '--v_r', action="store_true", default=False, help='visualize the result or not') args = parse.parse_args() data_const = VcocoConstants() parse_data(data_const, args)
def main(args): # Load checkpoint and set up model try: # use GPU if available else revert to CPU device = torch.device( 'cuda:0' if torch.cuda.is_available() and args.gpu else 'cpu') print("Testing on", device) # set up model and initialize it with uploaded checkpoint if args.dataset == 'hico': # load checkpoint checkpoint = torch.load(args.main_pretrained_hico, map_location=device) print('vsgats Checkpoint loaded!') pg_checkpoint = torch.load(args.pretrained_hico, map_location=device) data_const = HicoConstants(feat_type=checkpoint['feat_type']) vs_gats = vsgat_hico(feat_type=checkpoint['feat_type'], bias=checkpoint['bias'], bn=checkpoint['bn'], dropout=checkpoint['dropout'], multi_attn=checkpoint['multi_head'], layer=checkpoint['layers'], diff_edge=checkpoint['diff_edge']) #2 ) if args.dataset == 'vcoco': # load checkpoint checkpoint = torch.load(args.main_pretrained_vcoco, map_location=device) print('vsgats Checkpoint loaded!') pg_checkpoint = torch.load(args.pretrained_vcoco, map_location=device) data_const = VcocoConstants() vs_gats = vsgat_vcoco(feat_type=checkpoint['feat_type'], bias=checkpoint['bias'], bn=checkpoint['bn'], dropout=checkpoint['dropout'], multi_attn=checkpoint['multi_head'], layer=checkpoint['layers'], diff_edge=checkpoint['diff_edge']) #2 ) vs_gats.load_state_dict(checkpoint['state_dict']) vs_gats.to(device) vs_gats.eval() print(pg_checkpoint['o_c_l'], pg_checkpoint['b_l'], pg_checkpoint['attn'], pg_checkpoint['lr'], pg_checkpoint['dropout']) # pgception = PGception(action_num=24, classifier_mod='cat', o_c_l=[64,64,128,128], last_h_c=256, bias=pg_checkpoint['bias'], drop=pg_checkpoint['dropout'], bn=pg_checkpoint['bn']) pgception = PGception(action_num=pg_checkpoint['a_n'], layers=1, classifier_mod=pg_checkpoint['classifier_mod'], o_c_l=pg_checkpoint['o_c_l'], last_h_c=pg_checkpoint['last_h_c'], bias=pg_checkpoint['bias'], drop=pg_checkpoint['dropout'], bn=pg_checkpoint['bn'], agg_first=pg_checkpoint['agg_first'], attn=pg_checkpoint['attn'], b_l=pg_checkpoint['b_l']) # pgception = PGception(action_num=pg_checkpoint['a_n'], drop=pg_checkpoint['dropout']) pgception.load_state_dict(pg_checkpoint['state_dict']) pgception.to(device) pgception.eval() print('Constructed model successfully!') except Exception as e: print('Failed to load checkpoint or construct model!', e) sys.exit(1) # prepare for data if args.dataset == 'hico': original_imgs_dir = os.path.join(data_const.infer_dir, 'original_imgs/hico') # original_imgs_dir = './datasets/hico/images/test2015' save_path = os.path.join(data_const.infer_dir, 'processed_imgs/hico') test_dataset = HicoDataset(data_const=data_const, subset='test') dataloader = sorted(os.listdir(original_imgs_dir)) # dataloader = ['HICO_test2015_00000128.jpg'] else: original_imgs_dir = os.path.join(data_const.infer_dir, 'original_imgs/vcoco') # original_imgs_dir = './datasets/vcoco/coco/images/val2014' save_path = os.path.join(data_const.infer_dir, 'processed_imgs/vcoco') test_dataset = VcocoDataset(data_const=data_const, subset='vcoco_test', pg_only=False) # dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, collate_fn=vcoco_collate_fn) dataloader = sorted(os.listdir(original_imgs_dir)) dataloader = ['COCO_val2014_000000150361.jpg'] if not os.path.exists(original_imgs_dir): os.makedirs(original_imgs_dir) if not os.path.exists(save_path): os.mkdir(save_path) print('result images will be kept here{}'.format(save_path)) # ipdb.set_trace() for data in tqdm(dataloader): # load corresponding data # print("Testing on image named {}".format(img)) if args.dataset == 'hico': img = data global_id = data.split('.')[0] test_data = test_dataset.sample_date(global_id) test_data = collate_fn([test_data]) det_boxes = test_data['det_boxes'][0] roi_scores = test_data['roi_scores'][0] roi_labels = test_data['roi_labels'][0] keypoints = test_data['keypoints'][0] edge_labels = test_data['edge_labels'] node_num = test_data['node_num'] features = test_data['features'] spatial_feat = test_data['spatial_feat'] word2vec = test_data['word2vec'] pose_normalized = test_data["pose_to_human"] pose_to_obj_offset = test_data["pose_to_obj_offset"] else: # global_id = data['global_id'][0] img = data global_id = str(int((data.split('.')[0].split('_')[-1]))) test_data = test_dataset.sample_date(global_id) test_data = vcoco_collate_fn([test_data]) # img = data['img_name'][0][:].astype(np.uint8).tostring().decode('ascii').split("/")[-1] # test_data = data det_boxes = test_data['det_boxes'][0] roi_scores = test_data['roi_scores'][0] roi_labels = test_data['roi_labels'][0] edge_labels = test_data['edge_labels'] node_num = test_data['node_num'] features = test_data['features'] spatial_feat = test_data['spatial_feat'] word2vec = test_data['word2vec'] pose_normalized = test_data["pose_to_human"] pose_to_obj_offset = test_data["pose_to_obj_offset"] # inference pose_to_obj_offset, pose_normalized, features, spatial_feat, word2vec = pose_to_obj_offset.to( device), pose_normalized.to(device), features.to( device), spatial_feat.to(device), word2vec.to(device) outputs, attn, attn_lang = vs_gats( node_num, features, spatial_feat, word2vec, [roi_labels]) # !NOTE: it is important to set [roi_labels] pg_outputs = pgception(pose_normalized, pose_to_obj_offset) # action_score = nn.Sigmoid()(outputs+pg_outputs) # action_score = action_score.cpu().detach().numpy() det_outputs = nn.Sigmoid()(outputs + pg_outputs) det_outputs = det_outputs.cpu().detach().numpy() # show result # import ipdb; ipdb.set_trace() if args.dataset == 'hico': image = Image.open( os.path.join('datasets/hico/images/test2015', img)).convert('RGB') image_temp = image.copy() gt_img = vis_img(image, det_boxes, roi_labels, roi_scores, edge_labels.cpu().numpy(), score_thresh=0.5) det_img = vis_img(image_temp, det_boxes, roi_labels, roi_scores, det_outputs, score_thresh=0.5) if args.dataset == 'vcoco': image = Image.open( os.path.join(data_const.original_image_dir, 'val2014', img)).convert('RGB') image_temp = image.copy() gt_img = vis_img_vcoco(image, det_boxes, roi_labels, roi_scores, edge_labels.cpu().numpy(), score_thresh=0.1) det_img = vis_img_vcoco(image_temp, det_boxes, roi_labels, roi_scores, det_outputs, score_thresh=0.5) # det_img.save('/home/birl/ml_dl_projects/bigjun/hoi/VS_GATs/inference_imgs/original_imgs'+'/'+img) det_img.save(save_path + '/' + img.split("/")[-1])
def main(args): # use GPU if available else revert to CPU device = torch.device( 'cuda' if torch.cuda.is_available() and args.gpu else 'cpu') print("Testing on", device) # Load checkpoint and set up model try: # load checkpoint checkpoint = torch.load(args.main_pretrained, map_location=device) print('vsgats Checkpoint loaded!') pg_checkpoint = torch.load(args.pretrained, map_location=device) # set up model and initialize it with uploaded checkpoint if not args.exp_ver: args.exp_ver = args.pretrained.split( "/")[-2] + "_" + args.pretrained.split("/")[-1].split("_")[-2] # import ipdb; ipdb.set_trace() data_const = VcocoConstants(feat_type=checkpoint['feat_type'], exp_ver=args.exp_ver) vs_gats = AGRNN(feat_type=checkpoint['feat_type'], bias=checkpoint['bias'], bn=checkpoint['bn'], dropout=checkpoint['dropout'], multi_attn=checkpoint['multi_head'], layer=checkpoint['layers'], diff_edge=checkpoint['diff_edge']) #2 ) vs_gats.load_state_dict(checkpoint['state_dict']) vs_gats.to(device) vs_gats.eval() print(pg_checkpoint['o_c_l'], pg_checkpoint['lr'], pg_checkpoint['dropout']) # pgception = PGception(action_num=24, classifier_mod='cat', o_c_l=[64,64,128,128], last_h_c=256, bias=pg_checkpoint['bias'], drop=pg_checkpoint['dropout'], bn=pg_checkpoint['bn']) if 'b_l' in pg_checkpoint.keys(): print(pg_checkpoint['b_l']) pgception = PGception( action_num=pg_checkpoint['a_n'], layers=1, classifier_mod=pg_checkpoint['classifier_mod'], o_c_l=pg_checkpoint['o_c_l'], last_h_c=pg_checkpoint['last_h_c'], bias=pg_checkpoint['bias'], drop=pg_checkpoint['dropout'], bn=pg_checkpoint['bn'], agg_first=pg_checkpoint['agg_first'], attn=pg_checkpoint['attn'], b_l=pg_checkpoint['b_l']) else: pgception = PGception( action_num=pg_checkpoint['a_n'], layers=1, classifier_mod=pg_checkpoint['classifier_mod'], o_c_l=pg_checkpoint['o_c_l'], last_h_c=pg_checkpoint['last_h_c'], bias=pg_checkpoint['bias'], drop=pg_checkpoint['dropout'], bn=pg_checkpoint['bn'], agg_first=pg_checkpoint['agg_first'], attn=pg_checkpoint['attn']) pgception.load_state_dict(pg_checkpoint['state_dict']) pgception.to(device) pgception.eval() print('Constructed model successfully!') except Exception as e: print('Failed to load checkpoint or construct model!', e) sys.exit(1) io.mkdir_if_not_exists(data_const.result_dir, recursive=True) det_save_file = os.path.join(data_const.result_dir, 'detection_results.pkl') if not os.path.isfile(det_save_file) or args.rewrite: test_dataset = VcocoDataset(data_const=data_const, subset='vcoco_test', pg_only=False) test_dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn) # save detection result det_data_list = [] # for global_id in tqdm(test_list): # import ipdb; ipdb.set_trace() for data in tqdm(test_dataloader): global_id = data['global_id'][0] det_boxes = data['det_boxes'][0] roi_scores = data['roi_scores'][0] roi_labels = data['roi_labels'][0] node_num = data['node_num'] features = data['features'] spatial_feat = data['spatial_feat'] word2vec = data['word2vec'] pose_normalized = data["pose_to_human"] pose_to_obj_offset = data["pose_to_obj_offset"] # referencing features, spatial_feat, word2vec = features.to( device), spatial_feat.to(device), word2vec.to(device) pose_to_obj_offset, pose_normalized = pose_to_obj_offset.to( device), pose_normalized.to(device) outputs, attn, attn_lang = vs_gats( node_num, features, spatial_feat, word2vec, [roi_labels]) # !NOTE: it is important to set [roi_labels] if 'b_l' in checkpoint.keys() and 4 in checkpoint['b_l']: pg_outputs1, pg_outputs2 = pgception(pose_normalized, pose_to_obj_offset) action_scores = nn.Sigmoid()(outputs + pg_outputs1 + pg_outputs2) else: pg_outputs = pgception(pose_normalized, pose_to_obj_offset) action_scores = nn.Sigmoid()(outputs + pg_outputs) action_scores = action_scores.cpu().detach().numpy() h_idxs = np.where(roi_labels == 1)[0] # import ipdb; ipdb.set_trace() for h_idx in h_idxs: for i_idx in range(node_num[0]): if i_idx == h_idx: continue # save hoi results in single image single_result = {} single_result['image_id'] = global_id single_result['person_box'] = det_boxes[h_idx, :] if h_idx > i_idx: edge_idx = h_idx * (node_num[0] - 1) + i_idx else: edge_idx = h_idx * (node_num[0] - 1) + i_idx - 1 try: score = roi_scores[h_idx] * roi_scores[ i_idx] * action_scores[edge_idx] # score = score + pg_score except Exception as e: import ipdb ipdb.set_trace() for action in vcoco_metadata.action_class_with_object: if action == 'none': continue action_idx = vcoco_metadata.action_with_obj_index[ action] single_action_score = score[action_idx] if action == 'cut_with' or action == 'eat_with' or action == 'hit_with': action = action.split('_')[0] role_name = 'instr' else: role_name = vcoco_metadata.action_roles[action][1] action_role_key = '{}_{}'.format(action, role_name) single_result[action_role_key] = np.append( det_boxes[i_idx, :], single_action_score) det_data_list.append(single_result) # save all detected results pickle.dump(det_data_list, open(det_save_file, 'wb')) # evaluate vcocoeval = VCOCOeval( os.path.join(data_const.original_data_dir, 'data/vcoco/vcoco_test.json'), os.path.join(data_const.original_data_dir, 'data/instances_vcoco_all_2014.json'), os.path.join(data_const.original_data_dir, 'data/splits/vcoco_test.ids')) vcocoeval._do_eval(data_const, det_save_file, ovr_thresh=0.5)
def main(args): # use GPU if available else revert to CPU device = torch.device( 'cuda' if torch.cuda.is_available() and args.gpu else 'cpu') print("Testing on", device) # Load checkpoint and set up model try: # load checkpoint checkpoint = torch.load(args.pretrained, map_location=device) print('Checkpoint loaded!') # set up model and initialize it with uploaded checkpoint # ipdb.set_trace() if not args.exp_ver: args.exp_ver = args.pretrained.split( "/")[-3] + "_" + args.pretrained.split("/")[-1].split("_")[-2] data_const = VcocoConstants(feat_type=checkpoint['feat_type'], exp_ver=args.exp_ver) model = AGRNN(feat_type=checkpoint['feat_type'], bias=checkpoint['bias'], bn=checkpoint['bn'], dropout=checkpoint['dropout'], multi_attn=checkpoint['multi_head'], layer=checkpoint['layers'], diff_edge=checkpoint['diff_edge']) #2 ) # ipdb.set_trace() model.load_state_dict(checkpoint['state_dict']) model.to(device) model.eval() print('Constructed model successfully!') except Exception as e: print('Failed to load checkpoint or construct model!', e) sys.exit(1) io.mkdir_if_not_exists(data_const.result_dir) det_save_file = os.path.join(data_const.result_dir, 'detection_results.pkl') if not os.path.isfile(det_save_file) or args.rewrite: test_dataset = VcocoDataset(data_const=data_const, subset='vcoco_test') test_dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn) # save detection result det_data_list = [] # for global_id in tqdm(test_list): for data in tqdm(test_dataloader): train_data = data global_id = train_data['global_id'][0] det_boxes = train_data['det_boxes'][0] roi_scores = train_data['roi_scores'][0] roi_labels = train_data['roi_labels'][0] node_num = train_data['node_num'] features = train_data['features'] spatial_feat = train_data['spatial_feat'] word2vec = train_data['word2vec'] # referencing features, spatial_feat, word2vec = features.to( device), spatial_feat.to(device), word2vec.to(device) outputs, attn, attn_lang = model( node_num, features, spatial_feat, word2vec, [roi_labels]) # !NOTE: it is important to set [roi_labels] action_scores = nn.Sigmoid()(outputs) action_scores = action_scores.cpu().detach().numpy() attn = attn.cpu().detach().numpy() attn_lang = attn_lang.cpu().detach().numpy() h_idxs = np.where(roi_labels == 1)[0] # import ipdb; ipdb.set_trace() for h_idx in h_idxs: for i_idx in range(node_num[0]): if i_idx == h_idx: continue # save hoi results in single image single_result = {} single_result['image_id'] = global_id single_result['person_box'] = det_boxes[h_idx, :] if h_idx > i_idx: edge_idx = h_idx * (node_num[0] - 1) + i_idx else: edge_idx = h_idx * (node_num[0] - 1) + i_idx - 1 # score = roi_scores[h_idx] * roi_scores[i_idx] * action_score[edge_idx] * (attn[h_idx][i_idx-1]+attn_lang[h_idx][i_idx-1]) try: score = roi_scores[h_idx] * roi_scores[ i_idx] * action_scores[edge_idx] except Exception as e: import ipdb ipdb.set_trace() for action in vcoco_metadata.action_class_with_object: if action == 'none': continue action_idx = vcoco_metadata.action_with_obj_index[ action] single_action_score = score[action_idx] if action == 'cut_with' or action == 'eat_with' or action == 'hit_with': action = action.split('_')[0] role_name = 'instr' else: role_name = vcoco_metadata.action_roles[action][1] action_role_key = '{}_{}'.format(action, role_name) single_result[action_role_key] = np.append( det_boxes[i_idx, :], single_action_score) det_data_list.append(single_result) # save all detected results pickle.dump(det_data_list, open(det_save_file, 'wb')) # evaluate vcocoeval = VCOCOeval( os.path.join(data_const.original_data_dir, 'data/vcoco/vcoco_test.json'), os.path.join(data_const.original_data_dir, 'data/instances_vcoco_all_2014.json'), os.path.join(data_const.original_data_dir, 'data/splits/vcoco_test.ids')) vcocoeval._do_eval(data_const, det_save_file, ovr_thresh=0.5)