def main(): const = VisualGenomeConstants() io.mkdir_if_not_exists(const.proc_dir,recursive=True) print('Loading objects.json ...') objects = io.load_json_object(const.objects_json) print('Loading object_synsets.json ...') object_synsets = io.load_json_object(const.object_synsets_json) print('Creating image_id_to_object_id.json ...') image_id_to_object_id = get_image_id_to_object_id(objects) io.dump_json_object( image_id_to_object_id, os.path.join(const.proc_dir,'image_id_to_object_id.json')) print('Loading attributes.json ...') attributes = io.load_json_object(const.attributes_json) print('Loading attribute_synsets.json ...') attribute_synsets = io.load_json_object(const.attribute_synsets_json) print('Creating object_annos.json ...') object_annos = get_object_annos(objects,attributes,attribute_synsets) io.dump_json_object( object_annos, os.path.join(const.proc_dir,'object_annos.json'))
def main(**kwargs): subset = kwargs['subset'] const = FlickrConstants() io.mkdir_if_not_exists(const.flickr_paths['proc_dir']) image_ids = io.read(const.subset_ids[subset]) image_ids = [idx.decode() for idx in image_ids.split()] # Write boxes to json boxes = {} for image_id in tqdm(image_ids): box_xml = os.path.join(const.flickr_paths['anno_dir'], f'{image_id}.xml') boxes[image_id] = get_annotations(box_xml) io.dump_json_object(boxes, const.box_json[subset]) # Write sentence annos to json sent = {} for image_id in tqdm(image_ids): sent_txt = os.path.join(const.flickr_paths['sent_dir'], f'{image_id}.txt') sent[image_id] = get_sentence_data(sent_txt) io.dump_json_object(sent, const.sent_json[subset])
def main(exp_const, data_const): print(f'Creating directory {exp_const.exp_dir} ...') io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True) print('Saving constants ...') save_constants({'exp': exp_const, 'data': data_const}, exp_const.exp_dir) print('Loading data ...') img_id_to_obj_id = io.load_json_object( data_const.image_id_to_object_id_json) object_annos = io.load_json_object(data_const.object_annos_json) cooccur = {} for img_id, obj_ids in tqdm(img_id_to_obj_id.items()): synset_list = create_synset_list(object_annos, obj_ids) for synset1 in synset_list: for synset2 in synset_list: if synset1 not in cooccur: cooccur[synset1] = {} if synset2 not in cooccur[synset1]: cooccur[synset1][synset2] = 0 cooccur[synset1][synset2] += 1 synset_cooccur_json = os.path.join(exp_const.exp_dir, 'synset_cooccur.json') io.dump_json_object(cooccur, synset_cooccur_json)
def prepare_data(exp_const, data_const): io.mkdir_if_not_exists(exp_const.exp_dir) print('Writing constants to exp dir ...') data_const_json = os.path.join(exp_const.exp_dir, 'data_const.json') data_const.to_json(data_const_json) exp_const_json = os.path.join(exp_const.exp_dir, 'exp_const.json') exp_const.to_json(exp_const_json) print('Loading anno_list.json ...') anno_list = io.load_json_object(data_const.anno_list_json) print('Creating input json for faster rcnn ...') images_in_out = [None] * len(anno_list) for i, anno in enumerate(anno_list): global_id = anno['global_id'] image_in_out = dict() image_in_out['in_path'] = os.path.join(data_const.images_dir, anno['image_path_postfix']) image_in_out['out_dir'] = os.path.join(data_const.proc_dir, 'faster_rcnn_boxes') image_in_out['prefix'] = f'{global_id}_' images_in_out[i] = image_in_out images_in_out_json = os.path.join(exp_const.exp_dir, 'faster_rcnn_im_in_out.json') io.dump_json_object(images_in_out, images_in_out_json)
def main(): args = parser.parse_args() data_const = HicoConstants(exp_ver=args.exp_ver) print('Creating output dir ...') io.mkdir_if_not_exists(data_const.result_dir + '/map', recursive=True) # Load hoi_list hoi_list_json = os.path.join(data_const.proc_dir, 'hoi_list.json') hoi_list = io.load_json_object(hoi_list_json) # Load subset ids to eval on split_ids_json = os.path.join(data_const.proc_dir, 'split_ids.json') split_ids = io.load_json_object(split_ids_json) global_ids = split_ids[args.subset] global_ids_set = set(global_ids) # Create gt_dets print('Creating GT dets ...') gt_dets = load_gt_dets(data_const.proc_dir, global_ids_set) eval_inputs = [] for hoi in hoi_list: eval_inputs.append((hoi['id'], global_ids, gt_dets, data_const.result_dir + '/pred_hoi_dets.hdf5', data_const.result_dir + '/map')) # import ipdb; ipdb.set_trace() # eval_hoi(*eval_inputs[0]) print(f'Starting a pool of {args.num_processes} workers ...') p = Pool(args.num_processes) print(f'Begin mAP computation ...') output = p.starmap(eval_hoi, eval_inputs) #output = eval_hoi('003',global_ids,gt_dets,args.pred_hoi_dets_hdf5,args.out_dir) p.close() p.join() mAP = { 'AP': {}, 'mAP': 0, 'invalid': 0, } map_ = 0 count = 0 for ap, hoi_id in output: mAP['AP'][hoi_id] = ap if not np.isnan(ap): count += 1 map_ += ap mAP['mAP'] = map_ / count mAP['invalid'] = len(output) - count mAP_json = os.path.join(data_const.result_dir + '/map', 'mAP.json') io.dump_json_object(mAP, mAP_json) print(f'APs have been saved to {data_const.result_dir}/map')
def main(): const = ImagenetConstants() io.mkdir_if_not_exists(const.img_dir) print('Loading urls ...') wnid_to_urls = io.load_json_object(const.wnid_to_urls_json) print('Starting pool ...') with Pool(40) as p: p.starmap(downloader,product([const.img_dir],wnid_to_urls.items()))
def main(**kwargs): print('Creating Caption Encoder (tokenizer) ...') cap_encoder = CapEncoder(CapEncoderConstants()) nltk.download('punkt') data_const = FlickrDatasetConstants(kwargs['subset']) data_const.read_noun_token_ids = False dataset = FlickrDataset(data_const) noun_token_ids = [None] * len(dataset) noun_vocab = set() num_human_captions = 0 num_noun_captions = 0 for i, data in enumerate(tqdm(dataset)): image_id = data['image_id'] cap_id = data['cap_id'] caption = data['caption'] token_ids, tokens = cap_encoder.tokenize(caption) nltk_tokens = nltk.word_tokenize(caption.lower()) pos_tags = nltk.pos_tag(nltk_tokens) pos_tags = ignore_words_from_pos(pos_tags, ['is', 'has', 'have', 'had', 'be']) alignment = align_pos_tokens(pos_tags, tokens) noun_token_ids_, noun_words = get_noun_token_ids(pos_tags, alignment) noun_token_ids_ = group_token_ids(noun_token_ids_, tokens) if len(noun_token_ids_) > 0: num_noun_captions += 1 noun_token_ids[i] = { 'image_id': image_id, 'cap_id': cap_id, 'token_ids': noun_token_ids_, 'words': list(noun_words) } noun_vocab.update(noun_words) for human_word in [ 'man', 'person', 'human', 'woman', 'boy', 'girl', 'men', 'women', 'boys', 'girls', 'child', 'children' ]: if human_word in tokens: num_human_captions += 1 break io.mkdir_if_not_exists( os.path.join(flickr_paths['proc_dir'], 'annotations')) io.dump_json_object(noun_token_ids, data_const.noun_tokens_json) io.dump_json_object(sorted(list(noun_vocab)), data_const.noun_vocab_json) print('Number of human captions:', num_human_captions) print('Number of noun captions:', num_noun_captions) print('Total number of captions:', len(dataset)) print('Size of noun vocabulary:', len(noun_vocab))
def main(exp_const, data_const, model_const): io.mkdir_if_not_exists(exp_const.vis_dir) print('Creating network ...') model = Model() model.const = model_const model.net = ResnetModel(model.const.net) if model.const.model_num is not None: model.net.load_state_dict(torch.load(model.const.net_path)) model.net.cuda() if exp_const.feedforward == False: model.AttributeEmbeddings = AttributeEmbeddings( model.const.AttributeEmbeddings) if model.const.model_num is not None: model.AttributeEmbeddings.load_state_dict( torch.load(model.const.AttributeEmbeddings_path)) model.AttributeEmbeddings.cuda() model.img_mean = np.array([0.485, 0.456, 0.406]) model.img_std = np.array([0.229, 0.224, 0.225]) print('Creating dataloader ...') dataset = Cifar100Dataset(data_const) dataloader = DataLoader(dataset, batch_size=exp_const.batch_size, shuffle=True, num_workers=exp_const.num_workers) eval_results = eval_model(model, dataloader, exp_const) confmat_npy = os.path.join(exp_const.exp_dir, 'confmat.npy') np.save(confmat_npy, eval_results['Conf Mat']) results = { 'Avg Loss': eval_results['Avg Loss'], 'Acc': eval_results['Acc'] } print(results) results_json = os.path.join(exp_const.exp_dir, 'results.json') io.dump_json_object(results, results_json) embeddings_npy = os.path.join(exp_const.exp_dir, 'embeddings.npy') if exp_const.feedforward == True: np.save(embeddings_npy, model.net.resnet_layers.fc.weight.data.cpu().numpy()) else: np.save(embeddings_npy, model.AttributeEmbeddings.embed.weight.data.cpu().numpy()) labels_npy = os.path.join(exp_const.exp_dir, 'labels.npy') np.save(labels_npy, dataset.labels)
def main(): url = 'https://gist.githubusercontent.com/yrevar/6135f1bd8dcf2e0cc683/' + \ 'raw/d133d61a09d7e5a3b36b8c111a8dd5c4b5d560ee/' + \ 'imagenet1000_clsid_to_human.pkl' outdir = os.path.join(os.getcwd(), 'symlinks/data/imagenet/proc') io.mkdir_if_not_exists(outdir, recursive=True) labels_json = os.path.join(outdir, 'labels.json') labels_dict = pickle.load(urlrequest.urlopen(url)) labels = [] for i in range(len(labels_dict)): labels.append(labels_dict[i]) io.dump_json_object(labels, labels_json)
def main(exp_const, data_const): io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True) save_constants({'exp': exp_const, 'data': data_const}, exp_const.exp_dir) print('Loading glove embeddings ...') glove_idx = io.load_json_object(data_const.glove_idx) glove_h5py = h5py.File(data_const.glove_h5py, 'r') glove_embeddings = glove_h5py['embeddings'][()] num_glove_words, glove_dim = glove_embeddings.shape print('-' * 80) print(f'number of glove words: {num_glove_words}') print(f'glove dim: {glove_dim}') print('-' * 80) print('Loading visual features ...') visual_features_idx = io.load_json_object(data_const.visual_features_idx) visual_features_h5py = h5py.File(data_const.visual_features_h5py, 'r') visual_features = visual_features_h5py['features'][()] num_visual_features, visual_features_dim = visual_features.shape print('-' * 80) print(f'number of visual features: {num_visual_features}') print(f'visual feature dim: {visual_features_dim}') print('-' * 80) print('Combining glove with visual features ...') visual_word_vecs_idx_json = os.path.join(exp_const.exp_dir, 'visual_word_vecs_idx.json') io.dump_json_object(glove_idx, visual_word_vecs_idx_json) visual_word_vecs_h5py = h5py.File( os.path.join(exp_const.exp_dir, 'visual_word_vecs.h5py'), 'w') visual_word_vec_dim = glove_dim + visual_features_dim visual_word_vecs = np.zeros([num_glove_words, visual_word_vec_dim]) mean_visual_feature = visual_features_h5py['mean'][()] for word in tqdm(glove_idx.keys()): glove_id = glove_idx[word] glove_vec = glove_embeddings[glove_id] if word in visual_features_idx: feature_id = visual_features_idx[word] feature = visual_features[feature_id] else: feature = mean_visual_feature visual_word_vec = np.concatenate( (glove_vec, (feature - mean_visual_feature))) # visual_word_vec = np.concatenate(( # normalize(glove_vec), # normalize(feature))) visual_word_vecs[glove_id] = visual_word_vec visual_word_vecs_h5py.create_dataset('embeddings', data=visual_word_vecs, chunks=(1, visual_word_vec_dim)) visual_word_vecs_h5py.close()
def main(exp_const, data_const, model_const): io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True) io.mkdir_if_not_exists(exp_const.log_dir) io.mkdir_if_not_exists(exp_const.model_dir) io.mkdir_if_not_exists(exp_const.vis_dir) configure(exp_const.log_dir) save_constants({ 'exp': exp_const, 'data': data_const, 'model': model_const }, exp_const.exp_dir) print('Creating network ...') model = Model() model.const = model_const model.net = NET(model.const.net) if model.const.model_num is not None: model.net.load_state_dict(torch.load(model.const.net_path)) model.net.cuda() model.img_mean = np.array([0.485, 0.456, 0.406]) model.img_std = np.array([0.229, 0.224, 0.225]) model.to_file(os.path.join(exp_const.exp_dir, 'model.txt')) print('Creating dataloader ...') dataloaders = {} for mode, subset in exp_const.subset.items(): data_const = copy.deepcopy(data_const) data_const.subset = subset dataset = DATASET(data_const) dataloaders[mode] = DataLoader(dataset, batch_size=exp_const.batch_size, shuffle=True, num_workers=exp_const.num_workers) train_model(model, dataloaders, exp_const)
def generate(exp_const, data_const, data_sign): print(f'Creating exp_dir: {exp_const.exp_dir}') io.mkdir_if_not_exists(exp_const.exp_dir) save_constants({'exp': exp_const, 'data': data_const}, exp_const.exp_dir) print(f'Reading split_ids.json ...') split_ids = io.load_json_object(data_const.split_ids_json) print('Creating an object-detector-only HOI detector ...') hoi_cand_gen = HoiCandidatesGenerator(data_const, data_sign) print(f'Creating a hoi_candidates_{exp_const.subset}.hdf5 file ...') hoi_cand_hdf5 = os.path.join(exp_const.exp_dir, f'hoi_candidates_{exp_const.subset}.hdf5') f = h5py.File(hoi_cand_hdf5, 'w') # 从Faster RCNN的所有预测结果中选择的高分预测 print('Reading selected dets from hdf5 file ...') all_selected_dets = h5py.File(data_const.selected_dets_hdf5, 'r') for global_id in tqdm(split_ids[exp_const.subset]): selected_dets = { 'boxes': {}, 'scores': {}, 'rpn_ids': {}, 'obj_cls': {} } start_end_ids = all_selected_dets[global_id]['start_end_ids'][()] boxes_scores_rpn_ids = \ all_selected_dets[global_id]['boxes_scores_rpn_ids'][()] for cls_ind, cls_name in enumerate(COCO_CLASSES): start_id, end_id = start_end_ids[cls_ind] boxes = boxes_scores_rpn_ids[start_id:end_id, :4] scores = boxes_scores_rpn_ids[start_id:end_id, 4] rpn_ids = boxes_scores_rpn_ids[start_id:end_id, 5] object_cls = np.full((end_id - start_id, ), cls_ind) selected_dets['boxes'][cls_name] = boxes selected_dets['scores'][cls_name] = scores selected_dets['rpn_ids'][cls_name] = rpn_ids selected_dets['obj_cls'][cls_name] = object_cls pred_dets, start_end_ids = hoi_cand_gen.predict(selected_dets) f.create_group(global_id) f[global_id].create_dataset('boxes_scores_rpn_ids_hoi_idx', data=pred_dets) f[global_id].create_dataset('start_end_ids', data=start_end_ids) f.close()
def main(exp_const,data_const): io.mkdir_if_not_exists(exp_const.exp_dir) print('Reading anno_list.json ...') anno_list = io.load_json_object(data_const.anno_list_json) anno_dict = {anno['global_id']:anno for anno in anno_list} print('Reading box and pose features ...') human_pose_feats = h5py.File(data_const.human_pose_feats_h5py,'r') hoi_cand = h5py.File(data_const.hoi_cand_h5py,'r') for count,global_id in enumerate(tqdm(human_pose_feats.keys())): if count>=exp_const.max_count: break human_boxes = hoi_cand[global_id]['boxes_scores_rpn_ids_hoi_idx'][:,:4] human_rpn_ids = hoi_cand[global_id]['boxes_scores_rpn_ids_hoi_idx'][:,10] B = human_boxes.shape[0] absolute_pose = human_pose_feats[global_id]['absolute_pose'][()] absolute_pose = np.reshape(absolute_pose,(B,data_const.num_keypts,3)) x1y1 = human_boxes[:,:2] # Bx2 wh = 0*x1y1 # Bx2 wh[:,0] = (human_boxes[:,2] - human_boxes[:,0]) wh[:,1] = (human_boxes[:,3] - human_boxes[:,1]) x1y1 = np.tile(x1y1[:,np.newaxis,:],(1,data_const.num_keypts,1)) # Bx18x2 wh = np.tile(wh[:,np.newaxis,:],(1,data_const.num_keypts,1)) # Bx18x2 keypts = 0*absolute_pose keypts[:,:,:2] = absolute_pose[:,:,:2]*wh + x1y1 keypts[:,:,2] = absolute_pose[:,:,2] img_path = os.path.join( data_const.images_dir, anno_dict[global_id]['image_path_postfix']) img = skio.imread(img_path) if len(img.shape)==2: img = np.tile(img[:,:,np.newaxis],(1,1,3)) seen_rpn_ids = set() for i in range(B): rpn_id = human_rpn_ids[i] if rpn_id in seen_rpn_ids: continue else: seen_rpn_ids.add(rpn_id) img = bbox_utils.vis_human_keypts(img,keypts[i],modify=True) img_out_path = os.path.join( exp_const.exp_dir, f'{global_id}.png') skio.imsave(img_out_path,img)
def __init__(self,const): super(BaseDataset,self).__init__() self.const = copy.deepcopy(const) if self.const.download==True: io.mkdir_if_not_exists(self.const.root) self.transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0., 0., 0.), (1., 1., 1.)) ]) self.transforms_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0., 0., 0.), (1., 1., 1.)) ])
def select(data_const): io.mkdir_if_not_exists(data_const.proc_dir) select_boxes_dir = data_const.proc_dir # Print where the boxes are coming from and where the output is written print(f'Boxes will be written to: {select_boxes_dir}') print('Loading anno_list.json ...') anno_list = io.load_json_object(data_const.anno_list_json) print('Creating selected_coco_cls_dets.hdf5 file ...') # hdf5_file = os.path.join(select_boxes_dir,'selected_coco_cls_dets_0.1eval.hdf5') hdf5_file = os.path.join(select_boxes_dir, 'selected_coco_cls_dets.hdf5') f = h5py.File(hdf5_file, 'w') # Load faster-rcnn detection results all_faster_rcnn_det_data = h5py.File(data_const.faster_det_fc7_feat, 'r') all_nms_keep_indices = io.load_json_object( os.path.join(data_const.proc_dir, 'nms_keep_indices.json')) print('Selecting boxes ...') for anno in tqdm(anno_list): global_id = anno['global_id'] # # get more detection for evaluation # if 'test' in global_id: # data_const.human_score_thresh = 0.1 # data_const.object_score_thresh = 0.1 boxes = all_faster_rcnn_det_data[global_id]['boxes'] scores = all_faster_rcnn_det_data[global_id]['scores'] features = all_faster_rcnn_det_data[global_id]['fc7_feat'] nms_keep_indices = all_nms_keep_indices[global_id] # import ipdb; ipdb.set_trace() selected_dets, start_end_ids = select_dets(boxes, scores, nms_keep_indices, data_const) selected_feat = [] for rpn_id in selected_dets[:, 5]: selected_feat.append(np.expand_dims(features[rpn_id, :], 0)) selected_feat = np.concatenate(selected_feat, axis=0) f.create_group(global_id) f[global_id].create_dataset('boxes_scores_rpn_ids', data=selected_dets) f[global_id].create_dataset('start_end_ids', data=start_end_ids) f[global_id].create_dataset('features', data=selected_feat) f.close()
def select(exp_const,data_const): io.mkdir_if_not_exists(exp_const.exp_dir) select_boxes_dir = exp_const.exp_dir # Print where the boxes are coming from and where the output is written print(f'Boxes will be read from: {data_const.faster_rcnn_boxes}') print(f'Boxes will be written to: {select_boxes_dir}') print('Writing constants to exp dir ...') data_const_json = os.path.join(exp_const.exp_dir,'data_const.json') data_const.to_json(data_const_json) exp_const_json = os.path.join(exp_const.exp_dir,'exp_const.json') exp_const.to_json(exp_const_json) print('Loading anno_list.json ...') anno_list = io.load_json_object(data_const.anno_list_json) print('Creating selected_coco_cls_dets.hdf5 file ...') hdf5_file = os.path.join(select_boxes_dir,'selected_coco_cls_dets.hdf5') f = h5py.File(hdf5_file,'w') print('Selecting boxes ...') for anno in tqdm(anno_list): global_id = anno['global_id'] boxes_npy = os.path.join( data_const.faster_rcnn_boxes, f'{global_id}_boxes.npy') boxes = np.load(boxes_npy) scores_npy = os.path.join( data_const.faster_rcnn_boxes, f'{global_id}_scores.npy') scores = np.load(scores_npy) nms_keep_indices_json = os.path.join( data_const.faster_rcnn_boxes, f'{global_id}_nms_keep_indices.json') nms_keep_indices = io.load_json_object(nms_keep_indices_json) selected_dets, start_end_ids = select_dets(boxes,scores,nms_keep_indices,exp_const) f.create_group(global_id) f[global_id].create_dataset('boxes_scores_rpn_ids',data=selected_dets) f[global_id].create_dataset('start_end_ids',data=start_end_ids) f.close()
def select(data_const): io.mkdir_if_not_exists(data_const.proc_dir) select_boxes_dir = data_const.proc_dir # Print where the boxes are coming from and where the output is written print(f'Boxes will be read from: {data_const.faster_rcnn_boxes}') print(f'Boxes will be written to: {select_boxes_dir}') print('Loading anno_list.json ...') anno_list = io.load_json_object(data_const.anno_list_json) print('Creating selected_coco_cls_dets.hdf5 file ...') # hdf5_file = os.path.join(select_boxes_dir,'selected_coco_cls_dets_0.1eval.hdf5') hdf5_file = os.path.join(select_boxes_dir, 'selected_coco_cls_dets.hdf5') f = h5py.File(hdf5_file, 'w') print('Selecting boxes ...') for anno in tqdm(anno_list): global_id = anno['global_id'] # # get more detection for evaluation # if 'test' in global_id: # data_const.human_score_thresh = 0.1 # data_const.object_score_thresh = 0.1 boxes_npy = os.path.join(data_const.faster_rcnn_boxes, f'{global_id}_boxes.npy') boxes = np.load(boxes_npy) scores_npy = os.path.join(data_const.faster_rcnn_boxes, f'{global_id}_scores.npy') scores = np.load(scores_npy) nms_keep_indices_json = os.path.join( data_const.faster_rcnn_boxes, f'{global_id}_nms_keep_indices.json') nms_keep_indices = io.load_json_object(nms_keep_indices_json) # import ipdb; ipdb.set_trace() selected_dets, start_end_ids = select_dets(boxes, scores, nms_keep_indices, data_const) f.create_group(global_id) f[global_id].create_dataset('boxes_scores_rpn_ids', data=selected_dets) f[global_id].create_dataset('start_end_ids', data=start_end_ids) f.close()
def main(exp_const): io.mkdir_if_not_exists(exp_const.exp_dir) results = {} for embed_name, exp_prefix in exp_const.prefix.items(): results[embed_name] = [] for run in exp_const.runs: results_json = f'{exp_const.runs_prefix}{run}/' + \ f'{exp_prefix}{exp_const.held_out_classes}/' + \ 'selected_model_results.json' results[embed_name].append( io.load_json_object(results_json)['Unseen Acc']) print_header(exp_const.runs) for embed_name, exp_prefix in exp_const.prefix.items(): print_row(embed_name, results[embed_name])
def main(): const = SemEval201810Constants() io.mkdir_if_not_exists(const.proc_dir) subset_txt_file = { 'train': const.train_txt, 'val': const.val_txt, 'test': const.test_txt, 'truth': const.truth_txt } for subset, txt_file in subset_txt_file.items(): print(f'Converting {subset}.txt file to json ...') data = read_txt(txt_file) print(f'Number of samples: {len(data)}') io.dump_json_object(data, os.path.join(const.proc_dir, f'{subset}.json'))
def assign(exp_const, data_const): io.mkdir_if_not_exists(exp_const.exp_dir) print('Saving constants ...') save_constants({'exp': exp_const, 'data':data_const}, exp_const.exp_dir) print(f'Reading hoi_candidates_{exp_const.subset}.hdf5 ...') hoi_cand_hdf5 = h5py.File(data_const.hoi_cand_hdf5, 'r') print(f'Creating hoi_candidate_labels_{exp_const.subset}.hdf5 ...') filename = os.path.join( exp_const.exp_dir, f'hoi_candidate_labels_{exp_const.subset}.hdf5') hoi_cand_label_hdf5 = h5py.File(filename, 'w') print('Loading gt hoi detections ...') split_ids = io.load_json_object(data_const.split_ids_json) global_ids = split_ids[exp_const.subset] gt_dets = load_gt_dets(data_const.anno_list_json, global_ids) print('Loading hoi_list.json ...') hoi_list = io.load_json_object(data_const.hoi_list_json) hoi_ids = [hoi['id'] for hoi in hoi_list] for global_id in tqdm(global_ids): boxes_scores_rpn_ids_hoi_idx = \ hoi_cand_hdf5[global_id]['boxes_scores_rpn_ids_hoi_idx'] start_end_ids = hoi_cand_hdf5[global_id]['start_end_ids'] num_cand = boxes_scores_rpn_ids_hoi_idx.shape[0] labels = np.zeros([num_cand]) for hoi_id in gt_dets[global_id]: start_id, end_id = start_end_ids[int(hoi_id)-1] for i in range(start_id, end_id): cand_det = { 'human_box': boxes_scores_rpn_ids_hoi_idx[i, :4], 'object_box': boxes_scores_rpn_ids_hoi_idx[i, 4:8], } # 查看检测结果ho候选对与gt中ho候选对的匹配情况,如果有所匹配(iou>0.5)则label置1 is_match = match_hoi(cand_det, gt_dets[global_id][hoi_id]) if is_match: labels[i] = 1.0 hoi_cand_label_hdf5.create_dataset(global_id, data=labels) hoi_cand_label_hdf5.close()
def main(exp_const, data_const): print(f'Creating directory {exp_const.exp_dir} ...') io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True) print('Saving constants ...') save_constants({'exp': exp_const, 'data': data_const}, exp_const.exp_dir) print('Creating dataloader ...') data_const = copy.deepcopy(data_const) dataset = ImagenetNoImgsDataset(data_const) collate_fn = dataset.create_collate_fn() dataloader = DataLoader(dataset, batch_size=exp_const.batch_size, shuffle=False, num_workers=exp_const.num_workers, collate_fn=collate_fn) create_gt_synset_cooccur(exp_const, dataloader)
def main(embedPath, outdir, vocab_json, embed_type): io.mkdir_if_not_exists(outdir) vocab = io.load_json_object(vocab_json) with open(embedPath, 'r', encoding='latin') as fileId: # Read only the word, ignore feature vector lines = [] for line in tqdm(fileId.readlines()): lines.append(line.split(' ', 1)) #import pdb; pdb.set_trace() #lines = [line.split(' ', 1) for line in fileId.readlines()]; #[0] #vocab_size = int(lines[0][0]) vocab_size = len(vocab) dim = int(lines[0][1][:-1]) print(vocab_size, dim) embed = np.zeros([vocab_size, dim]) word_to_idx = {} count = 0 for line in tqdm(lines[1:]): word = str(line[0]) #.lower() if word not in vocab: continue vec = line[1] # space separated string of numbers with '\n' at the end if embed_type == 'word2vec_wiki' or embed_type == 'visual_word2vec_wiki': vec = vec[:-1] vec = vec.split(' ') else: vec = vec.split(' ')[:-1] # get rid of the '\n' count = vocab[word] word_to_idx[word] = count embed[count] = [float(s) for s in vec] #count+=1 import pdb pdb.set_trace() embed_npy = os.path.join(outdir, 'visual_embeddings.npy') np.save(embed_npy, embed) word_to_idx_json = os.path.join(outdir, 'word_to_idx.json') io.dump_json_object(word_to_idx, word_to_idx_json)
def main(): const = VisualGenomeConstants() io.mkdir_if_not_exists(const.proc_dir, recursive=True) print('Loading object_annos.json ...') object_annos = io.load_json_object(const.object_annos_json) print('Computing object frequencies ...') object_freqs = compute_object_freqs(object_annos) print(f'Number of objects: {len(object_freqs)}') io.dump_json_object(object_freqs, os.path.join(const.proc_dir, 'object_freqs.json')) print('Computing object synset frequencies ...') object_synset_freqs = compute_object_synset_freqs(object_annos) print(f'Number of object_synsets: {len(object_synset_freqs)}') io.dump_json_object( object_synset_freqs, os.path.join(const.proc_dir, 'object_synset_freqs.json'))
def select(data_const): for subset in ["vcoco_train", "vcoco_test", "vcoco_val"]: # create the folder/file to save corresponding detection results print('Select detection results for {} dataset'.format(subset.split('_')[1])) subset_dir = os.path.join(data_const.proc_dir, subset) io.mkdir_if_not_exists(subset_dir, recursive=True) print(f'Creating selected_coco_cls_dets.hdf5 file for {subset}...') hdf5_file = os.path.join(subset_dir,'selected_coco_cls_dets.hdf5') f = h5py.File(hdf5_file,'w') # Load the VCOCO annotations for image set vcoco = vu.load_vcoco(subset) img_id_list = vcoco[0]['image_id'][:,0].tolist() # Load faster-rcnn detection results all_faster_rcnn_det_data = h5py.File(os.path.join(subset_dir, 'faster_rcnn_det.hdf5'), 'r') all_nms_keep_indices = io.load_json_object(os.path.join(subset_dir, 'nms_keep_indices.json')) print('Selecting boxes ...') for img_id in tqdm(set(img_id_list)): boxes = all_faster_rcnn_det_data[str(img_id)]['boxes'] scores = all_faster_rcnn_det_data[str(img_id)]['scores'] features = all_faster_rcnn_det_data[str(img_id)]['fc7_feaet'] nms_keep_indices = all_nms_keep_indices[str(img_id)] # import ipdb; ipdb.set_trace() selected_dets, start_end_ids = select_dets(boxes,scores,nms_keep_indices,data_const) selected_feat = [] for rpn_id in selected_dets[:, 5]: selected_feat.append(np.expand_dims(features[rpn_id, :], 0)) selected_feat = np.concatenate(selected_feat, axis=0 ) f.create_group(str(img_id)) f[str(img_id)].create_dataset('boxes_scores_rpn_ids',data=selected_dets) f[str(img_id)].create_dataset('start_end_ids',data=start_end_ids) f[str(img_id)].create_dataset('features',data=selected_feat) f.close()
def main(exp_const, data_const, model_const): io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True) io.mkdir_if_not_exists(exp_const.log_dir) io.mkdir_if_not_exists(exp_const.model_dir) io.mkdir_if_not_exists(exp_const.vis_dir) configure(exp_const.log_dir) if model_const.model_num is None: const_dict = { 'exp': exp_const, 'data': data_const, 'model': model_const } else: const_dict = { f'exp_finetune_{model_const.model_num}': exp_const, f'data_finetune_{model_const.model_num}': data_const, f'model_finetune_{model_const.model_num}': model_const } save_constants(const_dict, exp_const.exp_dir) print('Creating network ...') model = Model() model.const = model_const model.net = LogBilinear(model.const.net) if model.const.model_num is not None: model.net.load_state_dict(torch.load(model.const.net_path)) model.net.cuda() model.to_file(os.path.join(exp_const.exp_dir, 'model.txt')) print('Creating positive dataloader ...') dataset = MultiSenseCooccurDataset(data_const) collate_fn = dataset.create_collate_fn() dataloader = DataLoader(dataset, batch_size=exp_const.batch_size, shuffle=True, num_workers=exp_const.num_workers, collate_fn=collate_fn) print('Creating negative dataloader ...') neg_dataset = NegMultiSenseCooccurDataset(data_const) collate_fn = neg_dataset.create_collate_fn() neg_dataloader = DataLoader(neg_dataset, batch_size=exp_const.batch_size, shuffle=True, num_workers=exp_const.num_workers, collate_fn=collate_fn) err_msg = f'Num words mismatch (try {len(dataset.words)})' assert (len(dataset.words) == model.const.net.num_words), err_msg train_model(model, dataloader, neg_dataloader, exp_const)
def __init__(self, const): super(Cifar100Dataset, self).__init__() self.const = copy.deepcopy(const) if self.const.download == True: io.mkdir_if_not_exists(self.const.root) self.dataset = torchvision.datasets.CIFAR100( self.const.root, self.const.train, download=self.const.download) self.labels = self.load_labels() TEST_LABELS_LG, SUPER_TO_IDX, FINE_TO_SUPER = \ self.get_test_labels() self.held_out_labels = copy.deepcopy(TEST_LABELS_LG) for l in self.held_out_labels: assert (l in self.labels), 'held out label not in labels' self.held_out_idx = self.get_held_out_idx() self.fine_to_super = copy.deepcopy(FINE_TO_SUPER) self.super_to_idx = copy.deepcopy(SUPER_TO_IDX) self.fine_idx_to_super_idx = self.get_fine_idx_to_super_idx() self.transforms = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ])
def main(exp_const): io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True) cooccur = {} nltk.download('wordnet') nltk.download('stopwords') stop_words = set(stopwords.words('english')) for synset in wn.all_synsets(): words = synset_to_words(synset, stop_words) for word1 in words: for word2 in words: if word1 not in cooccur: cooccur[word1] = {} if word2 not in cooccur[word1]: cooccur[word1][word2] = 0 cooccur[word1][word2] += 1 cooccur_json = os.path.join(exp_const.exp_dir, 'word_cooccur.json') io.dump_json_object(cooccur, cooccur_json)
def main(exp_const, data_const_train, data_const_val, model_const, data_sign='hico'): io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True) io.mkdir_if_not_exists(exp_const.log_dir) io.mkdir_if_not_exists(exp_const.model_dir) configure(exp_const.log_dir) save_constants( { 'exp': exp_const, 'data_train': data_const_train, 'data_val': data_const_val, 'model': model_const }, exp_const.exp_dir) print('Creating model ...') model = Model() model.const = model_const model.hoi_classifier = HoiClassifier(model.const.hoi_classifier, data_sign).cuda() model.to_txt(exp_const.exp_dir, single_file=True) print('Creating data loaders ...') dataset_train = Features(data_const_train) dataset_val = Features(data_const_val) train_model(model, dataset_train, dataset_val, exp_const)
def main(exp_const,data_const,model_const): io.mkdir_if_not_exists(exp_const.exp_dir,recursive=True) io.mkdir_if_not_exists(exp_const.log_dir) io.mkdir_if_not_exists(exp_const.model_dir) configure(exp_const.log_dir) save_constants({ 'exp': exp_const, 'data': data_const, 'model': model_const}, exp_const.exp_dir) print('Creating network ...') model = Model() model.const = model_const model.encoder = Encoder(model.const.encoder).cuda() model.decoder = Decoder(model.const.decoder).cuda() encoder_path = os.path.join( exp_const.model_dir, f'encoder_{-1}') torch.save(model.encoder.state_dict(),encoder_path) decoder_path = os.path.join( exp_const.model_dir, f'decoder_{-1}') torch.save(model.decoder.state_dict(),decoder_path) print('Creating dataloader ...') dataset = VisualFeaturesDataset(data_const) dataloader = DataLoader( dataset, batch_size=exp_const.batch_size, shuffle=True) train_model(model,dataloader,exp_const)
def main(exp_const): io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True) print('Loading results ...') results = { #'chance': {}, 'random(100)': {}, 'GloVe+random(200)': {}, 'GloVe+random(100)': {}, 'GloVe': {}, 'GloVe+ViCo(linear,100)': {}, 'GloVe+ViCo(linear,200)': {}, 'GloVe+ViCo(select,200)': {} } for num_held_out_classes in exp_const.held_out_classes: print('Num held classes: ', num_held_out_classes) for embed_type in results.keys(): print('embed type: ', embed_type) if embed_type == 'chance': continue results[embed_type][num_held_out_classes] = {} num_runs = len(exp_const.runs) for run_dir in exp_const.runs: exp_dir = os.path.join( run_dir, exp_const.prefix[embed_type] + str(num_held_out_classes)) results_json = os.path.join(exp_dir, 'selected_model_results.json') results_ = io.load_json_object(results_json) for k, v in results_.items(): if k not in results[embed_type][num_held_out_classes]: results[embed_type][num_held_out_classes][k] = [] results[embed_type][num_held_out_classes][k].append(v) for metric_name in results['GloVe'][exp_const.held_out_classes[0]].keys(): filename = os.path.join(exp_const.exp_dir, f'{metric_name}.html') plot_acc_vs_classes(results, metric_name, filename)