Beispiel #1
0
def main():
    const = VisualGenomeConstants()
    io.mkdir_if_not_exists(const.proc_dir,recursive=True)
    
    print('Loading objects.json ...')
    objects = io.load_json_object(const.objects_json)
    
    print('Loading object_synsets.json ...')
    object_synsets = io.load_json_object(const.object_synsets_json)
    
    print('Creating image_id_to_object_id.json ...')
    image_id_to_object_id = get_image_id_to_object_id(objects)
    io.dump_json_object(
        image_id_to_object_id,
        os.path.join(const.proc_dir,'image_id_to_object_id.json'))

    print('Loading attributes.json ...')
    attributes = io.load_json_object(const.attributes_json)
    
    print('Loading attribute_synsets.json ...')
    attribute_synsets = io.load_json_object(const.attribute_synsets_json)

    print('Creating object_annos.json ...')
    object_annos = get_object_annos(objects,attributes,attribute_synsets)
    io.dump_json_object(
        object_annos,
        os.path.join(const.proc_dir,'object_annos.json'))
Beispiel #2
0
def main(**kwargs):
    subset = kwargs['subset']
    const = FlickrConstants()

    io.mkdir_if_not_exists(const.flickr_paths['proc_dir'])

    image_ids = io.read(const.subset_ids[subset])
    image_ids = [idx.decode() for idx in image_ids.split()]

    # Write boxes to json
    boxes = {}
    for image_id in tqdm(image_ids):
        box_xml = os.path.join(const.flickr_paths['anno_dir'],
                               f'{image_id}.xml')
        boxes[image_id] = get_annotations(box_xml)

    io.dump_json_object(boxes, const.box_json[subset])

    # Write sentence annos to json
    sent = {}
    for image_id in tqdm(image_ids):
        sent_txt = os.path.join(const.flickr_paths['sent_dir'],
                                f'{image_id}.txt')
        sent[image_id] = get_sentence_data(sent_txt)

    io.dump_json_object(sent, const.sent_json[subset])
Beispiel #3
0
def main(exp_const, data_const):
    print(f'Creating directory {exp_const.exp_dir} ...')
    io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True)

    print('Saving constants ...')
    save_constants({'exp': exp_const, 'data': data_const}, exp_const.exp_dir)

    print('Loading data ...')
    img_id_to_obj_id = io.load_json_object(
        data_const.image_id_to_object_id_json)
    object_annos = io.load_json_object(data_const.object_annos_json)

    cooccur = {}
    for img_id, obj_ids in tqdm(img_id_to_obj_id.items()):
        synset_list = create_synset_list(object_annos, obj_ids)
        for synset1 in synset_list:
            for synset2 in synset_list:
                if synset1 not in cooccur:
                    cooccur[synset1] = {}

                if synset2 not in cooccur[synset1]:
                    cooccur[synset1][synset2] = 0

                cooccur[synset1][synset2] += 1

    synset_cooccur_json = os.path.join(exp_const.exp_dir,
                                       'synset_cooccur.json')
    io.dump_json_object(cooccur, synset_cooccur_json)
Beispiel #4
0
def prepare_data(exp_const, data_const):
    io.mkdir_if_not_exists(exp_const.exp_dir)

    print('Writing constants to exp dir ...')
    data_const_json = os.path.join(exp_const.exp_dir, 'data_const.json')
    data_const.to_json(data_const_json)

    exp_const_json = os.path.join(exp_const.exp_dir, 'exp_const.json')
    exp_const.to_json(exp_const_json)

    print('Loading anno_list.json ...')
    anno_list = io.load_json_object(data_const.anno_list_json)

    print('Creating input json for faster rcnn ...')
    images_in_out = [None] * len(anno_list)
    for i, anno in enumerate(anno_list):
        global_id = anno['global_id']
        image_in_out = dict()
        image_in_out['in_path'] = os.path.join(data_const.images_dir,
                                               anno['image_path_postfix'])
        image_in_out['out_dir'] = os.path.join(data_const.proc_dir,
                                               'faster_rcnn_boxes')
        image_in_out['prefix'] = f'{global_id}_'
        images_in_out[i] = image_in_out

    images_in_out_json = os.path.join(exp_const.exp_dir,
                                      'faster_rcnn_im_in_out.json')
    io.dump_json_object(images_in_out, images_in_out_json)
Beispiel #5
0
def main():
    args = parser.parse_args()

    data_const = HicoConstants(exp_ver=args.exp_ver)
    print('Creating output dir ...')
    io.mkdir_if_not_exists(data_const.result_dir + '/map', recursive=True)

    # Load hoi_list
    hoi_list_json = os.path.join(data_const.proc_dir, 'hoi_list.json')
    hoi_list = io.load_json_object(hoi_list_json)

    # Load subset ids to eval on
    split_ids_json = os.path.join(data_const.proc_dir, 'split_ids.json')
    split_ids = io.load_json_object(split_ids_json)
    global_ids = split_ids[args.subset]
    global_ids_set = set(global_ids)

    # Create gt_dets
    print('Creating GT dets ...')
    gt_dets = load_gt_dets(data_const.proc_dir, global_ids_set)

    eval_inputs = []
    for hoi in hoi_list:
        eval_inputs.append((hoi['id'], global_ids, gt_dets,
                            data_const.result_dir + '/pred_hoi_dets.hdf5',
                            data_const.result_dir + '/map'))

    # import ipdb; ipdb.set_trace()
    # eval_hoi(*eval_inputs[0])

    print(f'Starting a pool of {args.num_processes} workers ...')
    p = Pool(args.num_processes)

    print(f'Begin mAP computation ...')
    output = p.starmap(eval_hoi, eval_inputs)
    #output = eval_hoi('003',global_ids,gt_dets,args.pred_hoi_dets_hdf5,args.out_dir)

    p.close()
    p.join()

    mAP = {
        'AP': {},
        'mAP': 0,
        'invalid': 0,
    }
    map_ = 0
    count = 0
    for ap, hoi_id in output:
        mAP['AP'][hoi_id] = ap
        if not np.isnan(ap):
            count += 1
            map_ += ap

    mAP['mAP'] = map_ / count
    mAP['invalid'] = len(output) - count

    mAP_json = os.path.join(data_const.result_dir + '/map', 'mAP.json')
    io.dump_json_object(mAP, mAP_json)

    print(f'APs have been saved to {data_const.result_dir}/map')
Beispiel #6
0
def main():
    const = ImagenetConstants()
    io.mkdir_if_not_exists(const.img_dir)

    print('Loading urls ...')
    wnid_to_urls = io.load_json_object(const.wnid_to_urls_json)

    print('Starting pool ...')
    with Pool(40) as p:
        p.starmap(downloader,product([const.img_dir],wnid_to_urls.items()))
def main(**kwargs):
    print('Creating Caption Encoder (tokenizer) ...')
    cap_encoder = CapEncoder(CapEncoderConstants())

    nltk.download('punkt')

    data_const = FlickrDatasetConstants(kwargs['subset'])
    data_const.read_noun_token_ids = False
    dataset = FlickrDataset(data_const)
    noun_token_ids = [None] * len(dataset)
    noun_vocab = set()
    num_human_captions = 0
    num_noun_captions = 0
    for i, data in enumerate(tqdm(dataset)):
        image_id = data['image_id']
        cap_id = data['cap_id']
        caption = data['caption']
        token_ids, tokens = cap_encoder.tokenize(caption)

        nltk_tokens = nltk.word_tokenize(caption.lower())
        pos_tags = nltk.pos_tag(nltk_tokens)
        pos_tags = ignore_words_from_pos(pos_tags,
                                         ['is', 'has', 'have', 'had', 'be'])

        alignment = align_pos_tokens(pos_tags, tokens)
        noun_token_ids_, noun_words = get_noun_token_ids(pos_tags, alignment)
        noun_token_ids_ = group_token_ids(noun_token_ids_, tokens)
        if len(noun_token_ids_) > 0:
            num_noun_captions += 1

        noun_token_ids[i] = {
            'image_id': image_id,
            'cap_id': cap_id,
            'token_ids': noun_token_ids_,
            'words': list(noun_words)
        }

        noun_vocab.update(noun_words)

        for human_word in [
                'man', 'person', 'human', 'woman', 'boy', 'girl', 'men',
                'women', 'boys', 'girls', 'child', 'children'
        ]:
            if human_word in tokens:
                num_human_captions += 1
                break

    io.mkdir_if_not_exists(
        os.path.join(flickr_paths['proc_dir'], 'annotations'))
    io.dump_json_object(noun_token_ids, data_const.noun_tokens_json)
    io.dump_json_object(sorted(list(noun_vocab)), data_const.noun_vocab_json)
    print('Number of human captions:', num_human_captions)
    print('Number of noun captions:', num_noun_captions)
    print('Total number of captions:', len(dataset))
    print('Size of noun vocabulary:', len(noun_vocab))
Beispiel #8
0
def main(exp_const, data_const, model_const):
    io.mkdir_if_not_exists(exp_const.vis_dir)

    print('Creating network ...')
    model = Model()
    model.const = model_const

    model.net = ResnetModel(model.const.net)
    if model.const.model_num is not None:
        model.net.load_state_dict(torch.load(model.const.net_path))
    model.net.cuda()

    if exp_const.feedforward == False:
        model.AttributeEmbeddings = AttributeEmbeddings(
            model.const.AttributeEmbeddings)
        if model.const.model_num is not None:
            model.AttributeEmbeddings.load_state_dict(
                torch.load(model.const.AttributeEmbeddings_path))
        model.AttributeEmbeddings.cuda()

    model.img_mean = np.array([0.485, 0.456, 0.406])
    model.img_std = np.array([0.229, 0.224, 0.225])

    print('Creating dataloader ...')
    dataset = Cifar100Dataset(data_const)
    dataloader = DataLoader(dataset,
                            batch_size=exp_const.batch_size,
                            shuffle=True,
                            num_workers=exp_const.num_workers)

    eval_results = eval_model(model, dataloader, exp_const)

    confmat_npy = os.path.join(exp_const.exp_dir, 'confmat.npy')
    np.save(confmat_npy, eval_results['Conf Mat'])

    results = {
        'Avg Loss': eval_results['Avg Loss'],
        'Acc': eval_results['Acc']
    }

    print(results)
    results_json = os.path.join(exp_const.exp_dir, 'results.json')
    io.dump_json_object(results, results_json)

    embeddings_npy = os.path.join(exp_const.exp_dir, 'embeddings.npy')
    if exp_const.feedforward == True:
        np.save(embeddings_npy,
                model.net.resnet_layers.fc.weight.data.cpu().numpy())
    else:
        np.save(embeddings_npy,
                model.AttributeEmbeddings.embed.weight.data.cpu().numpy())

    labels_npy = os.path.join(exp_const.exp_dir, 'labels.npy')
    np.save(labels_npy, dataset.labels)
Beispiel #9
0
def main():
    url = 'https://gist.githubusercontent.com/yrevar/6135f1bd8dcf2e0cc683/' + \
        'raw/d133d61a09d7e5a3b36b8c111a8dd5c4b5d560ee/' + \
        'imagenet1000_clsid_to_human.pkl'
    outdir = os.path.join(os.getcwd(), 'symlinks/data/imagenet/proc')
    io.mkdir_if_not_exists(outdir, recursive=True)
    labels_json = os.path.join(outdir, 'labels.json')
    labels_dict = pickle.load(urlrequest.urlopen(url))
    labels = []
    for i in range(len(labels_dict)):
        labels.append(labels_dict[i])
    io.dump_json_object(labels, labels_json)
def main(exp_const, data_const):
    io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True)
    save_constants({'exp': exp_const, 'data': data_const}, exp_const.exp_dir)

    print('Loading glove embeddings ...')
    glove_idx = io.load_json_object(data_const.glove_idx)
    glove_h5py = h5py.File(data_const.glove_h5py, 'r')
    glove_embeddings = glove_h5py['embeddings'][()]
    num_glove_words, glove_dim = glove_embeddings.shape
    print('-' * 80)
    print(f'number of glove words: {num_glove_words}')
    print(f'glove dim: {glove_dim}')
    print('-' * 80)

    print('Loading visual features ...')
    visual_features_idx = io.load_json_object(data_const.visual_features_idx)
    visual_features_h5py = h5py.File(data_const.visual_features_h5py, 'r')
    visual_features = visual_features_h5py['features'][()]
    num_visual_features, visual_features_dim = visual_features.shape
    print('-' * 80)
    print(f'number of visual features: {num_visual_features}')
    print(f'visual feature dim: {visual_features_dim}')
    print('-' * 80)

    print('Combining glove with visual features ...')
    visual_word_vecs_idx_json = os.path.join(exp_const.exp_dir,
                                             'visual_word_vecs_idx.json')
    io.dump_json_object(glove_idx, visual_word_vecs_idx_json)
    visual_word_vecs_h5py = h5py.File(
        os.path.join(exp_const.exp_dir, 'visual_word_vecs.h5py'), 'w')
    visual_word_vec_dim = glove_dim + visual_features_dim
    visual_word_vecs = np.zeros([num_glove_words, visual_word_vec_dim])
    mean_visual_feature = visual_features_h5py['mean'][()]
    for word in tqdm(glove_idx.keys()):
        glove_id = glove_idx[word]
        glove_vec = glove_embeddings[glove_id]
        if word in visual_features_idx:
            feature_id = visual_features_idx[word]
            feature = visual_features[feature_id]
        else:
            feature = mean_visual_feature
        visual_word_vec = np.concatenate(
            (glove_vec, (feature - mean_visual_feature)))
        # visual_word_vec = np.concatenate((
        #     normalize(glove_vec),
        #     normalize(feature)))
        visual_word_vecs[glove_id] = visual_word_vec

    visual_word_vecs_h5py.create_dataset('embeddings',
                                         data=visual_word_vecs,
                                         chunks=(1, visual_word_vec_dim))
    visual_word_vecs_h5py.close()
Beispiel #11
0
def main(exp_const, data_const, model_const):
    io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True)
    io.mkdir_if_not_exists(exp_const.log_dir)
    io.mkdir_if_not_exists(exp_const.model_dir)
    io.mkdir_if_not_exists(exp_const.vis_dir)
    configure(exp_const.log_dir)
    save_constants({
        'exp': exp_const,
        'data': data_const,
        'model': model_const
    }, exp_const.exp_dir)

    print('Creating network ...')
    model = Model()
    model.const = model_const
    model.net = NET(model.const.net)
    if model.const.model_num is not None:
        model.net.load_state_dict(torch.load(model.const.net_path))
    model.net.cuda()
    model.img_mean = np.array([0.485, 0.456, 0.406])
    model.img_std = np.array([0.229, 0.224, 0.225])
    model.to_file(os.path.join(exp_const.exp_dir, 'model.txt'))

    print('Creating dataloader ...')
    dataloaders = {}
    for mode, subset in exp_const.subset.items():
        data_const = copy.deepcopy(data_const)
        data_const.subset = subset
        dataset = DATASET(data_const)
        dataloaders[mode] = DataLoader(dataset,
                                       batch_size=exp_const.batch_size,
                                       shuffle=True,
                                       num_workers=exp_const.num_workers)

    train_model(model, dataloaders, exp_const)
def generate(exp_const, data_const, data_sign):
    print(f'Creating exp_dir: {exp_const.exp_dir}')
    io.mkdir_if_not_exists(exp_const.exp_dir)

    save_constants({'exp': exp_const, 'data': data_const}, exp_const.exp_dir)

    print(f'Reading split_ids.json ...')
    split_ids = io.load_json_object(data_const.split_ids_json)

    print('Creating an object-detector-only HOI detector ...')
    hoi_cand_gen = HoiCandidatesGenerator(data_const, data_sign)

    print(f'Creating a hoi_candidates_{exp_const.subset}.hdf5 file ...')
    hoi_cand_hdf5 = os.path.join(exp_const.exp_dir,
                                 f'hoi_candidates_{exp_const.subset}.hdf5')
    f = h5py.File(hoi_cand_hdf5, 'w')

    # 从Faster RCNN的所有预测结果中选择的高分预测
    print('Reading selected dets from hdf5 file ...')
    all_selected_dets = h5py.File(data_const.selected_dets_hdf5, 'r')

    for global_id in tqdm(split_ids[exp_const.subset]):
        selected_dets = {
            'boxes': {},
            'scores': {},
            'rpn_ids': {},
            'obj_cls': {}
        }
        start_end_ids = all_selected_dets[global_id]['start_end_ids'][()]
        boxes_scores_rpn_ids = \
            all_selected_dets[global_id]['boxes_scores_rpn_ids'][()]

        for cls_ind, cls_name in enumerate(COCO_CLASSES):
            start_id, end_id = start_end_ids[cls_ind]
            boxes = boxes_scores_rpn_ids[start_id:end_id, :4]
            scores = boxes_scores_rpn_ids[start_id:end_id, 4]
            rpn_ids = boxes_scores_rpn_ids[start_id:end_id, 5]
            object_cls = np.full((end_id - start_id, ), cls_ind)
            selected_dets['boxes'][cls_name] = boxes
            selected_dets['scores'][cls_name] = scores
            selected_dets['rpn_ids'][cls_name] = rpn_ids
            selected_dets['obj_cls'][cls_name] = object_cls

        pred_dets, start_end_ids = hoi_cand_gen.predict(selected_dets)
        f.create_group(global_id)
        f[global_id].create_dataset('boxes_scores_rpn_ids_hoi_idx',
                                    data=pred_dets)
        f[global_id].create_dataset('start_end_ids', data=start_end_ids)

    f.close()
Beispiel #13
0
def main(exp_const,data_const):
    io.mkdir_if_not_exists(exp_const.exp_dir)
    
    print('Reading anno_list.json ...')
    anno_list  = io.load_json_object(data_const.anno_list_json)
    anno_dict = {anno['global_id']:anno for anno in anno_list}

    print('Reading box and pose features ...')
    human_pose_feats = h5py.File(data_const.human_pose_feats_h5py,'r')
    hoi_cand = h5py.File(data_const.hoi_cand_h5py,'r')

    for count,global_id in enumerate(tqdm(human_pose_feats.keys())):
        if count>=exp_const.max_count:
            break
        human_boxes = hoi_cand[global_id]['boxes_scores_rpn_ids_hoi_idx'][:,:4]
        human_rpn_ids = hoi_cand[global_id]['boxes_scores_rpn_ids_hoi_idx'][:,10]
        B = human_boxes.shape[0]
        absolute_pose = human_pose_feats[global_id]['absolute_pose'][()]
        absolute_pose = np.reshape(absolute_pose,(B,data_const.num_keypts,3))
        x1y1 = human_boxes[:,:2]    # Bx2
        wh = 0*x1y1 # Bx2
        wh[:,0] = (human_boxes[:,2] - human_boxes[:,0])
        wh[:,1] = (human_boxes[:,3] - human_boxes[:,1])
        x1y1 = np.tile(x1y1[:,np.newaxis,:],(1,data_const.num_keypts,1)) # Bx18x2
        wh = np.tile(wh[:,np.newaxis,:],(1,data_const.num_keypts,1))    # Bx18x2
        keypts = 0*absolute_pose
        keypts[:,:,:2] = absolute_pose[:,:,:2]*wh + x1y1
        keypts[:,:,2] = absolute_pose[:,:,2]
        img_path = os.path.join(
            data_const.images_dir,
            anno_dict[global_id]['image_path_postfix'])
        img = skio.imread(img_path)
        if len(img.shape)==2:
            img = np.tile(img[:,:,np.newaxis],(1,1,3))

        seen_rpn_ids = set()
        for i in range(B):
            rpn_id = human_rpn_ids[i]
            if rpn_id in seen_rpn_ids:
                continue
            else:
                seen_rpn_ids.add(rpn_id)
        
            img = bbox_utils.vis_human_keypts(img,keypts[i],modify=True)

            img_out_path = os.path.join(
                exp_const.exp_dir,
                f'{global_id}.png')
            skio.imsave(img_out_path,img)
Beispiel #14
0
 def __init__(self,const):
     super(BaseDataset,self).__init__()
     self.const = copy.deepcopy(const)
     if self.const.download==True:
         io.mkdir_if_not_exists(self.const.root)
     
     self.transforms = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0., 0., 0.), (1., 1., 1.))
     ])
     
     self.transforms_test = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0., 0., 0.), (1., 1., 1.))
     ])
Beispiel #15
0
def select(data_const):
    io.mkdir_if_not_exists(data_const.proc_dir)

    select_boxes_dir = data_const.proc_dir

    # Print where the boxes are coming from and where the output is written
    print(f'Boxes will be written to: {select_boxes_dir}')

    print('Loading anno_list.json ...')
    anno_list = io.load_json_object(data_const.anno_list_json)

    print('Creating selected_coco_cls_dets.hdf5 file ...')
    # hdf5_file = os.path.join(select_boxes_dir,'selected_coco_cls_dets_0.1eval.hdf5')
    hdf5_file = os.path.join(select_boxes_dir, 'selected_coco_cls_dets.hdf5')
    f = h5py.File(hdf5_file, 'w')

    # Load faster-rcnn detection results
    all_faster_rcnn_det_data = h5py.File(data_const.faster_det_fc7_feat, 'r')
    all_nms_keep_indices = io.load_json_object(
        os.path.join(data_const.proc_dir, 'nms_keep_indices.json'))
    print('Selecting boxes ...')
    for anno in tqdm(anno_list):
        global_id = anno['global_id']

        # # get more detection for evaluation
        # if 'test' in global_id:
        #     data_const.human_score_thresh = 0.1
        #     data_const.object_score_thresh = 0.1

        boxes = all_faster_rcnn_det_data[global_id]['boxes']
        scores = all_faster_rcnn_det_data[global_id]['scores']
        features = all_faster_rcnn_det_data[global_id]['fc7_feat']
        nms_keep_indices = all_nms_keep_indices[global_id]

        # import ipdb; ipdb.set_trace()
        selected_dets, start_end_ids = select_dets(boxes, scores,
                                                   nms_keep_indices,
                                                   data_const)
        selected_feat = []
        for rpn_id in selected_dets[:, 5]:
            selected_feat.append(np.expand_dims(features[rpn_id, :], 0))
        selected_feat = np.concatenate(selected_feat, axis=0)
        f.create_group(global_id)
        f[global_id].create_dataset('boxes_scores_rpn_ids', data=selected_dets)
        f[global_id].create_dataset('start_end_ids', data=start_end_ids)
        f[global_id].create_dataset('features', data=selected_feat)

    f.close()
Beispiel #16
0
def select(exp_const,data_const):
    io.mkdir_if_not_exists(exp_const.exp_dir)
    
    select_boxes_dir = exp_const.exp_dir

    # Print where the boxes are coming from and where the output is written
    print(f'Boxes will be read from: {data_const.faster_rcnn_boxes}')
    print(f'Boxes will be written to: {select_boxes_dir}')
    
    print('Writing constants to exp dir ...')
    data_const_json = os.path.join(exp_const.exp_dir,'data_const.json')
    data_const.to_json(data_const_json)

    exp_const_json = os.path.join(exp_const.exp_dir,'exp_const.json')
    exp_const.to_json(exp_const_json)

    print('Loading anno_list.json ...')
    anno_list = io.load_json_object(data_const.anno_list_json)

    print('Creating selected_coco_cls_dets.hdf5 file ...')
    hdf5_file = os.path.join(select_boxes_dir,'selected_coco_cls_dets.hdf5')
    f = h5py.File(hdf5_file,'w')

    print('Selecting boxes ...')
    for anno in tqdm(anno_list):
        global_id = anno['global_id']

        boxes_npy = os.path.join(
            data_const.faster_rcnn_boxes,
            f'{global_id}_boxes.npy')
        boxes = np.load(boxes_npy)
        
        scores_npy = os.path.join(
            data_const.faster_rcnn_boxes,
            f'{global_id}_scores.npy')
        scores = np.load(scores_npy)
        
        nms_keep_indices_json = os.path.join(
            data_const.faster_rcnn_boxes,
            f'{global_id}_nms_keep_indices.json')
        nms_keep_indices = io.load_json_object(nms_keep_indices_json)

        selected_dets, start_end_ids = select_dets(boxes,scores,nms_keep_indices,exp_const)
        f.create_group(global_id)
        f[global_id].create_dataset('boxes_scores_rpn_ids',data=selected_dets)
        f[global_id].create_dataset('start_end_ids',data=start_end_ids)
        
    f.close()
def select(data_const):
    io.mkdir_if_not_exists(data_const.proc_dir)

    select_boxes_dir = data_const.proc_dir

    # Print where the boxes are coming from and where the output is written
    print(f'Boxes will be read from: {data_const.faster_rcnn_boxes}')
    print(f'Boxes will be written to: {select_boxes_dir}')

    print('Loading anno_list.json ...')
    anno_list = io.load_json_object(data_const.anno_list_json)

    print('Creating selected_coco_cls_dets.hdf5 file ...')
    # hdf5_file = os.path.join(select_boxes_dir,'selected_coco_cls_dets_0.1eval.hdf5')
    hdf5_file = os.path.join(select_boxes_dir, 'selected_coco_cls_dets.hdf5')
    f = h5py.File(hdf5_file, 'w')

    print('Selecting boxes ...')
    for anno in tqdm(anno_list):
        global_id = anno['global_id']

        # # get more detection for evaluation
        # if 'test' in global_id:
        #     data_const.human_score_thresh = 0.1
        #     data_const.object_score_thresh = 0.1

        boxes_npy = os.path.join(data_const.faster_rcnn_boxes,
                                 f'{global_id}_boxes.npy')
        boxes = np.load(boxes_npy)

        scores_npy = os.path.join(data_const.faster_rcnn_boxes,
                                  f'{global_id}_scores.npy')
        scores = np.load(scores_npy)

        nms_keep_indices_json = os.path.join(
            data_const.faster_rcnn_boxes, f'{global_id}_nms_keep_indices.json')
        nms_keep_indices = io.load_json_object(nms_keep_indices_json)

        # import ipdb; ipdb.set_trace()
        selected_dets, start_end_ids = select_dets(boxes, scores,
                                                   nms_keep_indices,
                                                   data_const)
        f.create_group(global_id)
        f[global_id].create_dataset('boxes_scores_rpn_ids', data=selected_dets)
        f[global_id].create_dataset('start_end_ids', data=start_end_ids)

    f.close()
Beispiel #18
0
def main(exp_const):
    io.mkdir_if_not_exists(exp_const.exp_dir)

    results = {}
    for embed_name, exp_prefix in exp_const.prefix.items():
        results[embed_name] = []
        for run in exp_const.runs:
            results_json = f'{exp_const.runs_prefix}{run}/' + \
                f'{exp_prefix}{exp_const.held_out_classes}/' + \
                'selected_model_results.json'

            results[embed_name].append(
                io.load_json_object(results_json)['Unseen Acc'])

    print_header(exp_const.runs)
    for embed_name, exp_prefix in exp_const.prefix.items():
        print_row(embed_name, results[embed_name])
Beispiel #19
0
def main():
    const = SemEval201810Constants()
    io.mkdir_if_not_exists(const.proc_dir)

    subset_txt_file = {
        'train': const.train_txt,
        'val': const.val_txt,
        'test': const.test_txt,
        'truth': const.truth_txt
    }

    for subset, txt_file in subset_txt_file.items():
        print(f'Converting {subset}.txt file to json ...')
        data = read_txt(txt_file)
        print(f'Number of samples: {len(data)}')
        io.dump_json_object(data, os.path.join(const.proc_dir,
                                               f'{subset}.json'))
def assign(exp_const, data_const):
    io.mkdir_if_not_exists(exp_const.exp_dir)

    print('Saving constants ...')
    save_constants({'exp': exp_const, 'data':data_const}, exp_const.exp_dir)

    print(f'Reading hoi_candidates_{exp_const.subset}.hdf5 ...')
    hoi_cand_hdf5 = h5py.File(data_const.hoi_cand_hdf5, 'r')

    print(f'Creating hoi_candidate_labels_{exp_const.subset}.hdf5 ...')
    filename = os.path.join(
        exp_const.exp_dir,
        f'hoi_candidate_labels_{exp_const.subset}.hdf5')
    hoi_cand_label_hdf5 = h5py.File(filename, 'w')

    print('Loading gt hoi detections ...')
    split_ids = io.load_json_object(data_const.split_ids_json)
    global_ids = split_ids[exp_const.subset]
    gt_dets = load_gt_dets(data_const.anno_list_json, global_ids)

    print('Loading hoi_list.json ...')
    hoi_list = io.load_json_object(data_const.hoi_list_json)
    hoi_ids = [hoi['id'] for hoi in hoi_list]

    for global_id in tqdm(global_ids):
        boxes_scores_rpn_ids_hoi_idx = \
            hoi_cand_hdf5[global_id]['boxes_scores_rpn_ids_hoi_idx']
        start_end_ids = hoi_cand_hdf5[global_id]['start_end_ids']
        num_cand = boxes_scores_rpn_ids_hoi_idx.shape[0]
        labels = np.zeros([num_cand])
        for hoi_id in gt_dets[global_id]:
            start_id, end_id = start_end_ids[int(hoi_id)-1]
            for i in range(start_id, end_id):
                cand_det = {
                    'human_box': boxes_scores_rpn_ids_hoi_idx[i, :4],
                    'object_box': boxes_scores_rpn_ids_hoi_idx[i, 4:8],
                }
                # 查看检测结果ho候选对与gt中ho候选对的匹配情况,如果有所匹配(iou>0.5)则label置1
                is_match = match_hoi(cand_det, gt_dets[global_id][hoi_id])
                if is_match:
                    labels[i] = 1.0

        hoi_cand_label_hdf5.create_dataset(global_id, data=labels)

    hoi_cand_label_hdf5.close()
def main(exp_const, data_const):
    print(f'Creating directory {exp_const.exp_dir} ...')
    io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True)

    print('Saving constants ...')
    save_constants({'exp': exp_const, 'data': data_const}, exp_const.exp_dir)

    print('Creating dataloader ...')
    data_const = copy.deepcopy(data_const)
    dataset = ImagenetNoImgsDataset(data_const)
    collate_fn = dataset.create_collate_fn()
    dataloader = DataLoader(dataset,
                            batch_size=exp_const.batch_size,
                            shuffle=False,
                            num_workers=exp_const.num_workers,
                            collate_fn=collate_fn)

    create_gt_synset_cooccur(exp_const, dataloader)
Beispiel #22
0
def main(embedPath, outdir, vocab_json, embed_type):
    io.mkdir_if_not_exists(outdir)

    vocab = io.load_json_object(vocab_json)

    with open(embedPath, 'r', encoding='latin') as fileId:
        # Read only the word, ignore feature vector
        lines = []
        for line in tqdm(fileId.readlines()):
            lines.append(line.split(' ', 1))
            #import pdb; pdb.set_trace()
        #lines = [line.split(' ', 1) for line in fileId.readlines()]; #[0]

    #vocab_size = int(lines[0][0])
    vocab_size = len(vocab)
    dim = int(lines[0][1][:-1])
    print(vocab_size, dim)
    embed = np.zeros([vocab_size, dim])
    word_to_idx = {}
    count = 0
    for line in tqdm(lines[1:]):
        word = str(line[0])  #.lower()
        if word not in vocab:
            continue

        vec = line[1]  # space separated string of numbers with '\n' at the end
        if embed_type == 'word2vec_wiki' or embed_type == 'visual_word2vec_wiki':
            vec = vec[:-1]
            vec = vec.split(' ')
        else:
            vec = vec.split(' ')[:-1]  # get rid of the '\n'

        count = vocab[word]
        word_to_idx[word] = count
        embed[count] = [float(s) for s in vec]
        #count+=1

    import pdb
    pdb.set_trace()
    embed_npy = os.path.join(outdir, 'visual_embeddings.npy')
    np.save(embed_npy, embed)

    word_to_idx_json = os.path.join(outdir, 'word_to_idx.json')
    io.dump_json_object(word_to_idx, word_to_idx_json)
Beispiel #23
0
def main():
    const = VisualGenomeConstants()
    io.mkdir_if_not_exists(const.proc_dir, recursive=True)

    print('Loading object_annos.json ...')
    object_annos = io.load_json_object(const.object_annos_json)

    print('Computing object frequencies ...')
    object_freqs = compute_object_freqs(object_annos)
    print(f'Number of objects: {len(object_freqs)}')
    io.dump_json_object(object_freqs,
                        os.path.join(const.proc_dir, 'object_freqs.json'))

    print('Computing object synset frequencies ...')
    object_synset_freqs = compute_object_synset_freqs(object_annos)
    print(f'Number of object_synsets: {len(object_synset_freqs)}')
    io.dump_json_object(
        object_synset_freqs,
        os.path.join(const.proc_dir, 'object_synset_freqs.json'))
Beispiel #24
0
def select(data_const):

    for subset in ["vcoco_train", "vcoco_test", "vcoco_val"]:
        # create the folder/file to save corresponding detection results
        print('Select detection results for {} dataset'.format(subset.split('_')[1]))
        subset_dir = os.path.join(data_const.proc_dir, subset)
        io.mkdir_if_not_exists(subset_dir, recursive=True)

        print(f'Creating selected_coco_cls_dets.hdf5 file for {subset}...')
        hdf5_file = os.path.join(subset_dir,'selected_coco_cls_dets.hdf5')
        f = h5py.File(hdf5_file,'w')

        # Load the VCOCO annotations for image set
        vcoco = vu.load_vcoco(subset)
        img_id_list = vcoco[0]['image_id'][:,0].tolist()

        # Load faster-rcnn detection results
        all_faster_rcnn_det_data = h5py.File(os.path.join(subset_dir, 'faster_rcnn_det.hdf5'), 'r')
        all_nms_keep_indices = io.load_json_object(os.path.join(subset_dir, 'nms_keep_indices.json'))
        print('Selecting boxes ...')
        for img_id in tqdm(set(img_id_list)):

            boxes = all_faster_rcnn_det_data[str(img_id)]['boxes']
            scores = all_faster_rcnn_det_data[str(img_id)]['scores']
            features = all_faster_rcnn_det_data[str(img_id)]['fc7_feaet']
            nms_keep_indices = all_nms_keep_indices[str(img_id)]

            # import ipdb; ipdb.set_trace()
            selected_dets, start_end_ids = select_dets(boxes,scores,nms_keep_indices,data_const)

            selected_feat = []
            for rpn_id in selected_dets[:, 5]:
                selected_feat.append(np.expand_dims(features[rpn_id, :], 0))
            selected_feat = np.concatenate(selected_feat, axis=0
            )
            f.create_group(str(img_id))
            f[str(img_id)].create_dataset('boxes_scores_rpn_ids',data=selected_dets)
            f[str(img_id)].create_dataset('start_end_ids',data=start_end_ids)
            f[str(img_id)].create_dataset('features',data=selected_feat)
            
        f.close()
Beispiel #25
0
def main(exp_const, data_const, model_const):
    io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True)
    io.mkdir_if_not_exists(exp_const.log_dir)
    io.mkdir_if_not_exists(exp_const.model_dir)
    io.mkdir_if_not_exists(exp_const.vis_dir)
    configure(exp_const.log_dir)
    if model_const.model_num is None:
        const_dict = {
            'exp': exp_const,
            'data': data_const,
            'model': model_const
        }
    else:
        const_dict = {
            f'exp_finetune_{model_const.model_num}': exp_const,
            f'data_finetune_{model_const.model_num}': data_const,
            f'model_finetune_{model_const.model_num}': model_const
        }
    save_constants(const_dict, exp_const.exp_dir)

    print('Creating network ...')
    model = Model()
    model.const = model_const
    model.net = LogBilinear(model.const.net)
    if model.const.model_num is not None:
        model.net.load_state_dict(torch.load(model.const.net_path))
    model.net.cuda()
    model.to_file(os.path.join(exp_const.exp_dir, 'model.txt'))

    print('Creating positive dataloader ...')
    dataset = MultiSenseCooccurDataset(data_const)
    collate_fn = dataset.create_collate_fn()
    dataloader = DataLoader(dataset,
                            batch_size=exp_const.batch_size,
                            shuffle=True,
                            num_workers=exp_const.num_workers,
                            collate_fn=collate_fn)

    print('Creating negative dataloader ...')
    neg_dataset = NegMultiSenseCooccurDataset(data_const)
    collate_fn = neg_dataset.create_collate_fn()
    neg_dataloader = DataLoader(neg_dataset,
                                batch_size=exp_const.batch_size,
                                shuffle=True,
                                num_workers=exp_const.num_workers,
                                collate_fn=collate_fn)

    err_msg = f'Num words mismatch (try {len(dataset.words)})'
    assert (len(dataset.words) == model.const.net.num_words), err_msg

    train_model(model, dataloader, neg_dataloader, exp_const)
Beispiel #26
0
 def __init__(self, const):
     super(Cifar100Dataset, self).__init__()
     self.const = copy.deepcopy(const)
     if self.const.download == True:
         io.mkdir_if_not_exists(self.const.root)
     self.dataset = torchvision.datasets.CIFAR100(
         self.const.root, self.const.train, download=self.const.download)
     self.labels = self.load_labels()
     TEST_LABELS_LG, SUPER_TO_IDX, FINE_TO_SUPER = \
         self.get_test_labels()
     self.held_out_labels = copy.deepcopy(TEST_LABELS_LG)
     for l in self.held_out_labels:
         assert (l in self.labels), 'held out label not in labels'
     self.held_out_idx = self.get_held_out_idx()
     self.fine_to_super = copy.deepcopy(FINE_TO_SUPER)
     self.super_to_idx = copy.deepcopy(SUPER_TO_IDX)
     self.fine_idx_to_super_idx = self.get_fine_idx_to_super_idx()
     self.transforms = transforms.Compose([
         transforms.RandomCrop(32, padding=4),
         transforms.RandomHorizontalFlip(),
     ])
Beispiel #27
0
def main(exp_const):
    io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True)

    cooccur = {}
    nltk.download('wordnet')
    nltk.download('stopwords')
    stop_words = set(stopwords.words('english'))
    for synset in wn.all_synsets():
        words = synset_to_words(synset, stop_words)
        for word1 in words:
            for word2 in words:
                if word1 not in cooccur:
                    cooccur[word1] = {}

                if word2 not in cooccur[word1]:
                    cooccur[word1][word2] = 0

                cooccur[word1][word2] += 1

    cooccur_json = os.path.join(exp_const.exp_dir, 'word_cooccur.json')
    io.dump_json_object(cooccur, cooccur_json)
def main(exp_const,
         data_const_train,
         data_const_val,
         model_const,
         data_sign='hico'):
    io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True)
    io.mkdir_if_not_exists(exp_const.log_dir)
    io.mkdir_if_not_exists(exp_const.model_dir)
    configure(exp_const.log_dir)
    save_constants(
        {
            'exp': exp_const,
            'data_train': data_const_train,
            'data_val': data_const_val,
            'model': model_const
        }, exp_const.exp_dir)

    print('Creating model ...')
    model = Model()
    model.const = model_const
    model.hoi_classifier = HoiClassifier(model.const.hoi_classifier,
                                         data_sign).cuda()
    model.to_txt(exp_const.exp_dir, single_file=True)

    print('Creating data loaders ...')
    dataset_train = Features(data_const_train)
    dataset_val = Features(data_const_val)

    train_model(model, dataset_train, dataset_val, exp_const)
Beispiel #29
0
def main(exp_const,data_const,model_const):
    io.mkdir_if_not_exists(exp_const.exp_dir,recursive=True)
    io.mkdir_if_not_exists(exp_const.log_dir)
    io.mkdir_if_not_exists(exp_const.model_dir)
    configure(exp_const.log_dir)
    save_constants({
        'exp': exp_const,
        'data': data_const,
        'model': model_const},
        exp_const.exp_dir)

    print('Creating network ...')
    model = Model()
    model.const = model_const
    model.encoder = Encoder(model.const.encoder).cuda()
    model.decoder = Decoder(model.const.decoder).cuda()

    encoder_path = os.path.join(
        exp_const.model_dir,
        f'encoder_{-1}')
    torch.save(model.encoder.state_dict(),encoder_path)

    decoder_path = os.path.join(
        exp_const.model_dir,
        f'decoder_{-1}')
    torch.save(model.decoder.state_dict(),decoder_path)

    print('Creating dataloader ...')
    dataset = VisualFeaturesDataset(data_const)
    dataloader = DataLoader(
        dataset,
        batch_size=exp_const.batch_size,
        shuffle=True)

    train_model(model,dataloader,exp_const)
def main(exp_const):
    io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True)

    print('Loading results ...')
    results = {
        #'chance': {},
        'random(100)': {},
        'GloVe+random(200)': {},
        'GloVe+random(100)': {},
        'GloVe': {},
        'GloVe+ViCo(linear,100)': {},
        'GloVe+ViCo(linear,200)': {},
        'GloVe+ViCo(select,200)': {}
    }
    for num_held_out_classes in exp_const.held_out_classes:
        print('Num held classes: ', num_held_out_classes)
        for embed_type in results.keys():
            print('embed type: ', embed_type)
            if embed_type == 'chance':

                continue

            results[embed_type][num_held_out_classes] = {}
            num_runs = len(exp_const.runs)
            for run_dir in exp_const.runs:
                exp_dir = os.path.join(
                    run_dir,
                    exp_const.prefix[embed_type] + str(num_held_out_classes))
                results_json = os.path.join(exp_dir,
                                            'selected_model_results.json')
                results_ = io.load_json_object(results_json)
                for k, v in results_.items():
                    if k not in results[embed_type][num_held_out_classes]:
                        results[embed_type][num_held_out_classes][k] = []

                    results[embed_type][num_held_out_classes][k].append(v)

    for metric_name in results['GloVe'][exp_const.held_out_classes[0]].keys():
        filename = os.path.join(exp_const.exp_dir, f'{metric_name}.html')
        plot_acc_vs_classes(results, metric_name, filename)