コード例 #1
0
    def __getitem__(self, idx):
        # get query id and corresponding video id
        qid = str(self.qids[idx])
        vid = self.anns[qid]["video_id"]
        timestamp = self.anns[qid]["timestamps"]
        duration = self.anns[qid]["duration"]

        # get query labels
        if self.in_memory:
            q_label = self.query_labels[qid]
        else:
            query_labels = h5py.File(self.paths["query_labels"], "r")
            q_label = query_labels[qid][:]
        q_leng = self.query_lengths[qid]

        # get grounding label
        if self.in_memory:
            start_pos = self.s_pos[qid]
            end_pos = self.e_pos[qid]
        else:
            grd_info = io_utils.load_hdf5(self.paths["grounding_info"], False)
            start_pos = grd_info["start_pos/"+qid][()]
            end_pos = grd_info["end_pos/"+qid][()]

        # get video features
        if self.in_memory:
            vid_feat_all = self.feats[vid]
        else:
            vid_feat_all = io_utils.load_hdf5(self.feat_hdf5, verbose=False)[vid]["c3d_features"]
        vid_feat, nfeats, start_index, end_index = self.get_fixed_length_feat(
                vid_feat_all, self.S, start_pos, end_pos)

        # get video masks
        vid_mask = np.zeros((self.S, 1))
        vid_mask[:nfeats] = 1

        # get attention mask
        if self.in_memory:
            att_mask = self.att_mask[qid]
        else:
            att_mask = grd_info["att_mask/"+qid][:]
        instance = {
            "vids": vid,
            "qids": qid,
            "timestamps": timestamp, # GT location [s, e] (seconds)
            "duration": duration, # video span (seconds)
            "query_lengths": q_leng,
            "query_labels": torch.LongTensor(q_label).unsqueeze(0),     # [1,L_q_max]
            "query_masks": (torch.FloatTensor(q_label)>0).unsqueeze(0), # [1,L_q_max]
            "grounding_start_pos": torch.FloatTensor([start_pos]), # [1]; normalized
            "grounding_end_pos": torch.FloatTensor([end_pos]),     # [1]; normalized
            "grounding_att_masks": torch.FloatTensor(att_mask),  # [L_v]
            "nfeats": torch.FloatTensor([nfeats]),
            "video_feats": torch.FloatTensor(vid_feat), # [L_v,D_v]
            "video_masks": torch.ByteTensor(vid_mask), # [L_v,1]
        }

        return instance
コード例 #2
0
    def __init__(self, config):
        super(self.__class__, self).__init__(config)

        # get options
        self.S = config.get("num_segment", 128)
        self.split = config.get("split", "train")
        self.data_dir = config.get("data_dir", "")
        self.feature_type = config.get("feature_type", "C3D")
        self.in_memory = config.get("in_memory", False)
        self.feat_hdf5 = config.get(
            "video_feature_path",
            "data/ActivityNet/feats/sub_activitynet_v1-3.c3d.hdf5")

        # cropping augmentation settings
        self.cropping_augmentation = config.get("cropping_augmentation", False)
        self.cropping_prob = config.get("cropping_prob", 0.5)
        self.cropping_factor = config.get("cropping_factor", 0.5)
        self.no_aug = False

        # get paths for proposals and captions
        paths = self._get_data_path(config)

        # create labels (or load existing one)
        ann_path = config.get(
            "annotation_path",
            "data/ActivityNet/captions/annotations/train.json")
        self.anns, self.qids, self.vids = self._load_annotation(ann_path)
        if not self._exist_data(paths):
            self.generate_labels(config)

        # load features if use in_memory
        if self.in_memory:
            self.feats = {}
            h = io_utils.load_hdf5(self.feat_hdf5, verbose=False)
            for k in tqdm(self.vids, desc="In-Memory: vid_feat"):
                self.feats[k] = h[k]["c3d_features"][:]

            self.s_pos, self.e_pos, self.att_mask = {}, {}, {}
            grd_info = io_utils.load_hdf5(self.paths["grounding_info"], False)
            for k in tqdm(self.qids, desc="In-Memory: grounding"):
                self.s_pos[k] = grd_info["start_pos/" + k][()]
                self.e_pos[k] = grd_info["end_pos/" + k][()]
                self.att_mask[k] = grd_info["att_mask/" + k][()]

            self.query_labels = {}
            query_labels = h5py.File(self.paths["query_labels"], "r")
            for k in tqdm(self.qids, desc="In-Memory: query"):
                self.query_labels[k] = query_labels[k][:]

        # load and prepare json files
        query_info = io_utils.load_json(self.paths["query_info"])
        self.wtoi = query_info["wtoi"]
        self.itow = query_info["itow"]
        self.query_lengths = query_info["query_lengths"]

        self.batch_size = config.get("batch_size", 64)
        self.num_instances = len(self.qids)
コード例 #3
0
    def __init__(self, config):
        super(self.__class__, self).__init__(config)

        # get options
        self.S = config.get("num_segment", 128)
        self.split = config.get("split", "train")
        self.data_dir = config.get("data_dir", "data/charades")
        self.feature_type = config.get("feature_type", "I3D")
        self.in_memory = config.get("in_memory", False)
        self.feat_hdf5 = config.get(
            "video_feature_path",
            "data/charades/features/i3d_finetuned/i3d_finetuned.h5")
        self.feat_path = config.get("video_feature_path",
                                    "data/charades/features/i3d_finetuned.h5")

        # get paths for proposals and captions
        paths = self._get_data_path(config)

        # create labels (or load existing one)
        ann_path = "data/charades/annotations/charades_sta_{}.txt".format(
            self.split)
        aux_ann_path = "data/charades/annotations/Charades_v1_{}.csv".format(
            self.split)
        self.anns, self.qids, self.vids = self._load_annotation(
            ann_path, aux_ann_path)
        if not self._exist_data(paths):
            self.generate_labels(config)

        # load features if use in_memory
        if self.in_memory:
            self.feats = {}
            h = io_utils.load_hdf5(self.feat_hdf5, verbose=False)
            for vid in tqdm(self.vids, desc="In-Memory: vid_feat"):
                self.feats[vid] = h[vid][()]
            h.close()

            self.s_pos, self.e_pos, self.att_mask = {}, {}, {}
            grd_info = io_utils.load_hdf5(self.paths["grounding_info"], False)
            for k in tqdm(self.qids, desc="In-Memory: grounding"):
                self.s_pos[k] = grd_info["start_pos/" + k][()]
                self.e_pos[k] = grd_info["end_pos/" + k][()]
                self.att_mask[k] = grd_info["att_mask/" + k][()]

            self.query_labels = {}
            query_labels = h5py.File(self.paths["query_labels"], "r")
            for k in tqdm(self.qids, desc="In-Memory: query"):
                self.query_labels[k] = query_labels[k][:]

        # load query information
        query_info = io_utils.load_json(self.paths["query_info"])
        self.wtoi = query_info["wtoi"]
        self.itow = query_info["itow"]
        self.query_lengths = query_info["query_lengths"]

        self.batch_size = config.get("batch_size", 64)
        self.num_instances = len(self.qids)
コード例 #4
0
    def __getitem__(self, idx):
        """ Retrun a data (images, question_label, question length and answers)
        Returns:
            img (or feat): image (or feature)
            qst_label: question label
            qst_length: question length
            answer: answer for questions
        """

        # obtain image (as raw or feature)
        img_filename = self.json_file["image_filenames"][idx]
        if self.use_img:
            img_path = os.path.join(self.img_dir, img_filename)
            img = Image.open(img_path).convert("RGB")
            img = self.prepro(img)

        else:
            feat_path = os.path.join(self.feat_dir,
                                     img_filename.replace(".png", ".npy"))
            img = np.load(feat_path)
            img = torch.Tensor(img)

        # obtain question label and its length
        hdf5_file = io_utils.load_hdf5(self.hdf5_path, verbose=False)
        qst_label = torch.from_numpy(hdf5_file["question_labels"][idx])
        qst_length = hdf5_file["question_length"][idx]

        # obtain answer label
        answer = hdf5_file["answer_labels"][idx]
        answer = torch.from_numpy(np.asarray([answer])).long()
        hdf5_file.close()

        # obtain img info (question id)
        qst_id = self.json_file["question_ids"][idx]

        # prepare batch output
        out = [img, qst_label, qst_length]
        if self.assignment_path != "":
            # NOTE: DEPRECATED
            # obtain assignment label
            assignment_file = io_utils.load_hdf5(self.assignment_path,
                                                 verbose=False)
            assignments = torch.from_numpy(assignment_file["assignments"][idx])
            out.append(assignments)
        if self.base_logits_path != "":
            # obtain assignment label
            base_logits = io_utils.load_hdf5(self.base_logits_path,
                                             verbose=False)
            base_logits = torch.from_numpy(base_logits["base_logits"][idx])
            out.append(base_logits)
        out.append(answer)
        if self.vis_mode:
            out.append(img_filename)
        else:
            out.append(qst_id)
        return out
コード例 #5
0
    def __init__(self, config):

        # get configions
        print(json.dumps(config, indent=4))
        self.hdf5_path = utils.get_value_from_dict(config, "encoded_hdf5_path", \
                "data/CLEVR_v1.0/preprocess/encoded_qa/vocab_train_raw/" \
                + "all_questions_use_zero_token/qa_train.h5")
        self.json_path = utils.get_value_from_dict(config, "encoded_json_path", \
                "data/CLEVR_v1.0/preprocess/encoded_qa/vocab_train_raw/" \
                + "all_questions_use_zero_token/qa_train.json")
        self.img_size = utils.get_value_from_dict(config, "img_size", 224)
        self.batch_size = utils.get_value_from_dict(config, "batch_size", 32)
        self.use_img = utils.get_value_from_dict(config, "use_img", False)
        self.use_gpu = utils.get_value_from_dict(config, "use_gpu", True)
        if self.use_img:
            self.img_dir = utils.get_value_from_dict(config, "img_dir",
                                                     "data/CLEVR_v1.0/images")
            self.prepro = trn.Compose([
                trn.Resize(self.img_size),
                trn.CenterCrop(self.img_size),
                trn.ToTensor(),
                trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        else:
            self.feat_dir = utils.get_value_from_dict(config, "feat_dir",
                                                      "data/CLEVR_v1.0/feats")

        # load hdf5 file including question_labels, question_length,
        # answer_labels
        hdf5_file = io_utils.load_hdf5(self.hdf5_path)
        self.max_time_steps = hdf5_file["question_labels"].shape[1]

        # load json file including woti, itow, atoi, itoa, splits, vocab_info,
        # question_ids, image_filenames
        self.json_file = io_utils.load_json(self.json_path)

        # set path of pre-computed assignments
        # NOTE: DEPRECATED
        self.assignment_path = utils.get_value_from_dict(
            config, "assignment_path", "")

        # set path of pre-computed logits of base models
        self.base_logits_path = utils.get_value_from_dict(
            config, "base_logits_path", "")

        self.fetching_answer_option = "simple"

        self.vis_mode = config.get("vis_mode", False)
コード例 #6
0
ファイル: ensemble.py プロジェクト: JonghwanMun/MCL-KD
def ensemble(config):

    """ Build data loader """
    dset = dataset.DataSet(config["test_loader"])
    L = data.DataLoader( \
            dset, batch_size=config["test_loader"]["batch_size"], \
            num_workers=config["num_workers"], \
            shuffle=False, collate_fn=dataset.collate_fn)

    """ Load assignments if exists """
    with_assignment = False
    if config["assignment_path"] != "None":
        with_assignment = True
        assignment_file = io_utils.load_hdf5(config["assignment_path"], verbose=False)
        assignments = assignment_file["assignments"][:]
        cnt_mapping = np.zeros((3,3))

    """ Build network """
    nets = []
    net_configs = []
    for i in range(len(config["checkpoint_paths"])):
        net_configs.append(io_utils.load_yaml(config["config_paths"][i]))
        net_configs[i] = M.override_config_from_loader(net_configs[i], dset)
        nets.append(M(net_configs[i]))
        nets[i].bring_loader_info(dset)
        apply_cc_after = utils.get_value_from_dict(
                net_configs[i]["model"], "apply_curriculum_learning_after", -1)
        # load checkpoint if exists
        nets[i].load_checkpoint(config["checkpoint_paths"][i])
        start_epoch = int(utils.get_filename_from_path(
                config["checkpoint_paths"][i]).split("_")[-1])
        # If checkpoint use curriculum learning
        if (apply_cc_after > 0) and (start_epoch >= apply_cc_after):
            nets[i].apply_curriculum_learning()

    # ship network to use gpu
    if config["use_gpu"]:
        for i in range(len(nets)):
            nets[i].gpu_mode()
    for i in range(len(nets)):
        nets[i].eval_mode()

    # initialize counters for different tau
    metrics = ["top1-avg", "top1-max", "oracle"]
    for i in range(len(nets)):
        modelname = "M{}".format(i)
        metrics.append(modelname)
    tau = [1.0, 1.2, 1.5, 2.0, 5.0, 10.0, 50.0, 100.0]
    counters = OrderedDict()
    for T in tau:
        tau_name = "tau-"+str(T)
        counters[tau_name] = OrderedDict()
        for mt in metrics:
            counters[tau_name][mt] = accumulator.Accumulator(mt)

    """ Run training network """
    ii = 0
    itoa = dset.get_itoa()
    predictions = []
    for batch in tqdm(L):
        # Forward networks
        probs = 0
        B = batch[0][0].size(0)
        if type(batch[0][-1]) == type(list()):
            gt = batch[0][-1][0]
        else:
            gt = batch[0][-1]

        correct = 0
        probs = {}
        for T in tau:
            tau_name = "tau-"+str(T)
            probs[tau_name] = 0

        prob_list = []
        for i in range(len(nets)):
            outputs = nets[i].evaluate(batch)
            prob_list.append(outputs[1]) # m*[B,A]

        if config["save_logits"]:
            TODO = True

        for T in tau:
            tau_name = "tau-"+str(T)
            probs = [net_utils.get_data(F.softmax(logits/T, dim=1)) \
                     for logits in prob_list] # m*[B,A]

            # count correct numbers for each model
            for i in range(len(nets)):
                val, idx = probs[i].max(dim=1)
                correct = torch.eq(idx, gt)
                num_correct = torch.sum(correct)
                modelname = "M{}".format(i)
                counters[tau_name][modelname].add(num_correct, B)

                # add prob of each model
                if i == 0:
                    oracle_correct = correct
                else:
                    oracle_correct = oracle_correct + correct


            # top1-max accuracy for ensemble
            ens_probs, ens_idx = torch.stack(probs,0).max(0) # [B,A]
            max_val, max_idx = ens_probs.max(dim=1)
            num_correct = torch.sum(torch.eq(max_idx, gt))
            counters[tau_name]["top1-max"].add(num_correct, B)

            # top1-avg accuracy for ensemble
            ens_probs = sum(probs) # [B,A]
            max_val, max_idx = ens_probs.max(dim=1)
            num_correct = torch.sum(torch.eq(max_idx, gt))
            counters[tau_name]["top1-avg"].add(num_correct, B)

            # oracle accuracy for ensemble
            num_oracle_correct = torch.sum(torch.ge(oracle_correct, 1))
            counters[tau_name]["oracle"].add(num_oracle_correct, B)

            # attach predictions
            for i in range(len(batch[1])):
                qid = batch[1][i]
                predictions.append({
                    "question_id": qid,
                    "answer": utils.label2string(itoa, max_idx[i])
                })

        # epoch done

    # print accuracy
    for cnt_k,cnt_v in counters.items():
        txt = cnt_k + " "
        for k,v in cnt_v.items():
            txt += ", {} = {:.5f}".format(v.get_name(), v.get_average())
        print(txt)

    save_dir = os.path.join("results", "ensemble_predictions")
    io_utils.check_and_create_dir(save_dir)
    io_utils.write_json(os.path.join(save_dir, config["out"]+".json"), predictions)
コード例 #7
0
    def __getitem__(self, idx):
        # get query id and corresponding video id
        qid = str(self.qids[idx])
        vid = self.anns[qid]["video_id"]
        timestamp = self.anns[qid]["timestamps"]
        duration = self.anns[qid]["duration"]

        # get query labels
        if self.in_memory:
            q_label = self.query_labels[qid]
        else:
            query_labels = h5py.File(self.paths["query_labels"], "r")
            q_label = query_labels[qid][:]
        q_leng = self.query_lengths[qid]

        # get grounding label
        if self.in_memory:
            start_pos = self.s_pos[qid]
            end_pos = self.e_pos[qid]
        else:
            grd_info = io_utils.load_hdf5(self.paths["grounding_info"], False)
            start_pos = grd_info["start_pos/" + qid][()]
            end_pos = grd_info["end_pos/" + qid][()]

        # get video features
        if self.in_memory:
            vid_feat_all = self.feats[vid]
        else:
            vid_feat_all = np.load(self.feat_path.format(vid)).squeeze()

        # treat defective case
        if timestamp[1] > duration:
            duration = timestamp[1]
        if timestamp[0] > timestamp[1]:
            timestamp = [timestamp[1], timestamp[0]]

        # Cropping Augmentation, part 1
        cropping = random()
        do_crop = self.cropping_augmentation and self.split == "train" and (
            not self.no_aug) and (cropping < self.cropping_prob)
        if do_crop:
            # treat defective case
            cut_start = random() * timestamp[0] * self.cropping_factor
            cut_end = random() * (duration -
                                  timestamp[1]) * self.cropping_factor
            # modify vid_all
            nfeats_all = vid_feat_all.shape[0]
            keep = utils.timestamp_to_featstamp(
                [cut_start, duration - cut_end], nfeats_all, duration)
            vid_feat_all = vid_feat_all[keep[0]:keep[1] + 1]
            # modify duration, timestamp, grounding label
            duration = duration - cut_start - cut_end
            timestamp = [timestamp[0] - cut_start, timestamp[1] - cut_start]
            start_pos = timestamp[0] / duration
            end_pos = timestamp[1] / duration

        # Adjust video feats
        vid_feat, nfeats, start_index, end_index = self.get_fixed_length_feat(
            vid_feat_all, self.S, start_pos, end_pos)

        # Cropping augmentation, part 2
        if do_crop:
            # if training, make attention mask
            fs = utils.timestamp_to_featstamp(timestamp, nfeats, duration)
            att_mask = np.zeros((self.S))
            att_mask[fs[0]:fs[1] + 1] = 1
        else:
            # if not training, get attention mask
            if self.in_memory:
                att_mask = self.att_mask[qid]
            else:
                att_mask = grd_info["att_mask/" + qid][:]

        # get video masks
        vid_mask = np.zeros((self.S, 1))
        vid_mask[:nfeats] = 1

        instance = {
            "vids": vid,
            "qids": qid,
            "timestamps": timestamp,  # GT location [s, e] (second)
            "duration": duration,  # video span (second)
            "query_lengths": q_leng,
            "query_labels":
            torch.LongTensor(q_label).unsqueeze(0),  # [1,L_q_max]
            "query_masks":
            (torch.FloatTensor(q_label) > 0).unsqueeze(0),  # [1,L_q_max]
            "grounding_start_pos":
            torch.FloatTensor([start_pos]),  # [1]; normalized
            "grounding_end_pos":
            torch.FloatTensor([end_pos]),  # [1]; normalized
            "grounding_att_masks": torch.FloatTensor(att_mask),  # [L_v]
            "nfeats": torch.FloatTensor([nfeats]),
            "video_feats": torch.FloatTensor(vid_feat),  # [L_v,D_v]
            "video_masks": torch.ByteTensor(vid_mask),  # [L_v,1]
        }

        return instance
コード例 #8
0
    def __init__(self, config):
        super(self.__class__, self).__init__(config)

        # get options
        self.S = config.get("num_segment", 128)
        self.split = config.get("split", "train")
        self.data_dir = config.get("data_dir", "data/charades")
        self.feature_type = config.get("feature_type", "I3D")
        self.in_memory = config.get("in_memory", False)
        if self.feature_type == "I3D":
            self.feat_path = config.get(
                "video_feature_path",
                "data/charades/features/i3d_finetuned/{}.npy")
        else:
            raise ValueError("Wrong feature_type")
        self.num_captions_per_segment = 5  # number of captions per segment. for example, if there are 4 back-translated per an original sentence, it should be 5.

        # cropping augmentation settings
        self.cropping_augmentation = config.get("cropping_augmentation", False)
        self.cropping_prob = config.get("cropping_prob", 0.5)
        self.cropping_factor = config.get("cropping_factor", 0.5)
        self.no_aug = False
        # CR settings
        self.cr_compare_all = config.get("cr_compare_all", True)
        # get paths for proposals and captions
        paths = self._get_data_path(config)

        # create labels (or load existing one)
        ann_path = "data/charades/annotations/charades_sta_{}.txt".format(
            self.split)
        aux_ann_path = "data/charades/annotations/Charades_v1_{}.csv".format(
            self.split)
        self.anns, self.qids, self.vids = self._load_annotation(
            ann_path, aux_ann_path)
        if not self._exist_data(paths):
            self.generate_labels(config)

        # load features if use in_memory
        if self.in_memory:
            self.feats = {}
            for vid in tqdm(self.vids, desc="In-Memory: vid_feat"):
                self.feats[vid] = np.load(self.feat_path.format(vid)).squeeze()

            self.s_pos, self.e_pos, self.att_mask = {}, {}, {}
            grd_info = io_utils.load_hdf5(self.paths["grounding_info"], False)
            for k in tqdm(self.qids, desc="In-Memory: grounding"):
                self.s_pos[k] = grd_info["start_pos/" + k][()]
                self.e_pos[k] = grd_info["end_pos/" + k][()]
                self.att_mask[k] = grd_info["att_mask/" + k][()]

            self.query_labels = {}
            query_labels = h5py.File(self.paths["query_labels"], "r")
            for k in tqdm(self.qids, desc="In-Memory: query"):
                self.query_labels[k] = query_labels[k][:]

        # load query information
        query_info = io_utils.load_json(self.paths["query_info"])
        self.wtoi = query_info["wtoi"]
        self.itow = query_info["itow"]
        self.query_lengths = query_info["query_lengths"]

        self.batch_size = config.get("batch_size", 64)
        if self.split == 'train':
            self.num_instances = len(
                self.qids) // self.num_captions_per_segment
        else:
            self.num_instances = len(self.qids)
コード例 #9
0
    def generate_labels(self, config):
        """ Generate and save labels for temporal language grouding
            1)query_info (.json) with
                - wtoi: word to index dictionary (vocabulary)
                - itow: index to word dictionary (vocabulary)
                - query_lengths: lengths for queries
            2)query_labels (.h5): qid -> label
            3)grounding_labels (.h5): qid -> label
        """

        """ Query information """
        if not os.path.exists(self.paths["query_labels"]):
            # build vocabulary from training data
            train_ann_path = "data/ActivityNet/captions/annotations/train.json"
            train_anns, _, _ = self._load_annotation(train_ann_path)
            wtoi = self._build_vocab(train_anns)
            itow = {v:k for k,v in wtoi.items()}

            # encode query and save labels (+lenghts)
            L = config.get("max_length", 25)
            encoded = self._encode_query(self.anns, wtoi, L)
            query_labels = io_utils.open_hdf5( self.paths["query_labels"], "w")
            for qid in tqdm(encoded["query_lengths"].keys(), desc="Saving query"):
                _ = query_labels.create_dataset(str(qid), data=encoded["query_labels"][qid])
            query_labels.close()

            # save vocabulary and query length
            query_info = {
                "wtoi": wtoi,
                "itow": itow,
                "query_lengths": encoded["query_lengths"],
            }
            io_utils.write_json(self.paths["query_info"], query_info)

        """ Grounding information """
        if not os.path.exists(self.paths["grounding_info"]):
            if self.feature_type == "C3D":
                features = io_utils.load_hdf5(self.feat_hdf5)
            grd_dataset = io_utils.open_hdf5(self.paths["grounding_info"], "w")
            start_pos = grd_dataset.create_group("start_pos")
            end_pos = grd_dataset.create_group("end_pos")
            att_masks = grd_dataset.create_group("att_mask")

            for qid,ann in tqdm(self.anns.items(), desc="Gen. Grd. Labels"):
                # get starting/ending positions
                ts = ann["timestamps"]
                vid_d = ann["duration"]
                start = ts[0] / vid_d
                end = ts[1] / vid_d

                # get attention calibration mask
                vid = ann["video_id"]
                nfeats = features[vid]["c3d_features"][:].shape[0]
                nfeats = min(nfeats, self.S)

                fs = utils.timestamp_to_featstamp(ts, nfeats, vid_d)
                att_mask = np.zeros((self.S))
                att_mask[fs[0]:fs[1]+1] = 1

                _ = start_pos.create_dataset(qid, data=start, dtype="float")
                _ = end_pos.create_dataset(qid, data=end, dtype="float")
                _ = att_masks.create_dataset(qid, data=att_mask, dtype="float")

            # save the encoded proposal labels and video ids
            grd_dataset.close()