Esempio n. 1
0
    def apply(self, games):

        batch = collections.defaultdict(list)
        batch_size = len(games)

        for i, game in enumerate(games):

            batch['raw'].append(game)

            # Add objects: spatial features + categories (Guesser)
            obj_spats = [
                get_spatial_feat(obj.bbox, game.picture.width,
                                 game.picture.height) for obj in game.objects
            ]
            obj_cats = [obj.category_id for obj in game.objects]

            batch['obj_spats'].append(obj_spats)
            batch['obj_cats'].append(obj_cats)

            # Pick one random object in the game: TODO clean a bit
            if self.train:
                random_index = random.randint(0, len(game.objects) - 1)
            else:
                random_index = game.objects.index(game.object)

            target_object = game.objects[random_index]

            # update the game with the target object
            game.object = target_object
            game.object_id = target_object.id

            batch['targets_index'].append(random_index)
            batch['targets_spatial'].append(obj_spats[random_index])
            batch['targets_category'].append(obj_cats[random_index])

            batch['debug'].append(
                (target_object.category, (target_object.bbox.x_center,
                                          target_object.bbox.y_center),
                 game.picture.url))

            # image
            img = game.picture.get_image()
            if img is not None:
                if "images" not in batch:  # initialize an empty array for better memory consumption
                    batch["images"] = np.zeros((batch_size, ) + img.shape)
                batch["images"][i] = img

        # Pad objects
        batch['obj_spats'], obj_length = padder_3d(batch['obj_spats'])
        batch['obj_cats'], obj_length = padder(batch['obj_cats'])

        # Compute the object mask
        max_objects = max(obj_length)
        batch['obj_mask'] = np.zeros((batch_size, max_objects),
                                     dtype=np.float32)
        for i in range(batch_size):
            batch['obj_mask'][i, :obj_length[i]] = 1.0

        return batch
Esempio n. 2
0
    def apply(self, games):

        batch = collections.defaultdict(list)
        batch_size = len(games)

        assert batch_size > 0

        for i, game in enumerate(games):

            batch["raw"].append(game)

            # Get question
            question = self.tokenizer.encode_question(game.question)
            batch['question'].append(question)

            if 'glove' in self.sources:
                # Add glove vectors (NB even <unk> may have a specific glove)
                words = self.tokenizer.tokenize_question(game.question)
                glove_vectors = self.glove.get_embeddings(
                    words)  # slow (copy gloves in process)
                batch['glove'].append(glove_vectors)

            # Get answers
            if "answer_count" not in batch:  # initialize an empty array for better memory consumption
                batch["answer_count"] = np.zeros(
                    (batch_size, self.tokenizer.no_answers))

            for answer in game.answers:
                answer_id = self.tokenizer.encode_answer(answer)
                if answer_id == self.tokenizer.unknown_answer and self.remove_unknown:
                    continue
                batch["answer_count"][i][answer_id] += 1

            # retrieve the image source type
            img = game.image.get_image()
            if "image" not in batch:  # initialize an empty array for better memory consumption
                batch["image"] = np.zeros((batch_size, ) + img.shape)
            batch["image"][i] = img

        # pad the questions
        batch['question'], batch['seq_length'] = padder(
            batch['question'], padding_symbol=self.tokenizer.padding_token)
        if 'glove' in self.sources:
            batch['glove'], _ = padder_3d(batch['glove'])

        # create mask
        max_len = batch['seq_length'].max()
        batch['seq_mask'] = np.zeros((batch_size, max_len))
        for i, l in enumerate(batch['seq_length']):
            batch['seq_mask'][i, :l] = 1.0

        return batch
    def apply(self, games):

        batch = collections.defaultdict(list)
        batch_size = len(games)

        all_answer_indices = []
        for i, game in enumerate(games):

            batch['raw'].append(game)

            # Flattened question answers
            q_tokens = [self.tokenizer.apply(q) for q in game.questions]
            a_tokens = [
                self.tokenizer.apply(a, is_answer=True) for a in game.answers
            ]

            tokens = [self.tokenizer.start_token]  # Add start token
            answer_indices = []
            cur_index = 0
            for q_tok, a_tok in zip(q_tokens, a_tokens):
                tokens += q_tok
                tokens += a_tok

                # Compute index of answer in the full dialogue
                answer_indices += [cur_index + len(q_tok) + 1]
                cur_index = answer_indices[-1]

            tokens += [self.tokenizer.stop_dialogue]  # Add STOP token

            batch["dialogues"].append(tokens)
            all_answer_indices.append(answer_indices)

            # Object embedding
            obj_spats, obj_cats = [], []
            for index, obj in enumerate(game.objects):
                spatial = get_spatial_feat(obj.bbox, game.image.width,
                                           game.image.height)
                category = obj.category_id

                if obj.id == game.object_id:
                    batch['targets_category'].append(category)
                    batch['targets_spatial'].append(spatial)
                    batch['targets_index'].append(index)

                obj_spats.append(spatial)
                obj_cats.append(category)
            batch['obj_spats'].append(obj_spats)
            batch['obj_cats'].append(obj_cats)

            # image
            img = game.image.get_image()
            if img is not None:
                if "images" not in batch:  # initialize an empty array for better memory consumption
                    batch["images"] = np.zeros((batch_size, ) + img.shape)
                batch["images"][i] = img

        # Pad dialogue tokens tokens
        batch['dialogues'], batch['seq_length'] = padder(
            batch['dialogues'], padding_symbol=self.tokenizer.padding_token)
        seq_length = batch['seq_length']
        max_length = max(seq_length)

        # Compute the token mask
        batch['padding_mask'] = np.ones((batch_size, max_length),
                                        dtype=np.float32)
        for i in range(batch_size):
            batch['padding_mask'][i, (seq_length[i] + 1):] = 0.

        # Compute the answer mask
        batch['answer_mask'] = np.ones((batch_size, max_length),
                                       dtype=np.float32)
        for i in range(batch_size):
            batch['answer_mask'][i, all_answer_indices[i]] = 0.

        # Pad objects
        batch['obj_spats'], obj_length = padder_3d(batch['obj_spats'])
        batch['obj_cats'], obj_length = padder(batch['obj_cats'])

        # Compute the object mask
        max_objects = max(obj_length)
        batch['obj_mask'] = np.zeros((batch_size, max_objects),
                                     dtype=np.float32)
        for i in range(batch_size):
            batch['obj_mask'][i, :obj_length[i]] = 1.0

        return batch
    def apply(self, games):

        batch = collections.defaultdict(list)
        batch_size = len(games)

        for i, game in enumerate(games):

            batch["raw"].append(game)

            # Get referit sentence
            sentence = self.tokenizer.encode_question(game.sentence)
            batch['question'].append(sentence)

            # Get gloves
            if self.glove is not None:
                words = self.tokenizer.tokenize_question(game.sentence)
                glove_vectors = self.glove.get_embeddings(words)
                batch['glove'].append(glove_vectors)

            if 'answer' in self.sources:
                answer = [0, 0]
                answer[int(game.correct_object)] = 1
                batch['answer'].append(answer)

            if "image" in self.sources:
                img = game.image.get_image()
                if "image" not in batch:  # initialize an empty array for better memory consumption
                    batch["image"] = np.zeros((batch_size, ) + img.shape)
                batch["image"][i] = img

            if "crop" in self.sources:
                crop = game.object.get_crop()
                if "crop" not in batch:  # initialize an empty array for better memory consumption
                    batch["crop"] = np.zeros((batch_size, ) + crop.shape)
                batch["crop"][i] = crop

            if 'image_mask' in self.sources:
                assert "image" in batch, "mask input require the image source"
                mask = game.object.get_mask()

                ft_width, ft_height = batch['image'][-1].shape[1], \
                                      batch['image'][-1].shape[0]  # Use the image feature size (not the original img size)

                mask = resize_image(PImage.fromarray(mask),
                                    height=ft_height,
                                    width=ft_width)
                batch['image_mask'].append(np.array(mask))

            if 'crop_mask' in self.sources:
                assert "crop" in batch, "mask input require the crop source"
                cmask = game.object.get_mask()

                ft_width, ft_height = batch['crop'][-1].shape[1], \
                                      batch['crop'][-1].shape[0]  # Use the crop feature size (not the original img size)

                cmask = scaled_crop_and_pad(raw_img=PImage.fromarray(cmask),
                                            bbox=game.object.bbox,
                                            scale=game.object.crop_scale)
                cmask = resize_image(cmask, height=ft_height, width=ft_width)
                batch['crop_mask'].append(np.array(cmask))

            if 'category' in self.sources:
                batch['category'].append(game.object.category_id)

            if 'spatial' in self.sources:
                spat_feat = get_spatial_feat(game.object.bbox,
                                             game.image.width,
                                             game.image.height)
                batch['spatial'].append(spat_feat)

        # Pad referit sentence
        batch['question'], batch['seq_length'] = padder(
            batch['question'], padding_symbol=self.tokenizer.padding_token)

        if self.glove is not None:
            batch['glove'], _ = padder_3d(batch['glove'])

        return batch
    def apply(self, games, skip_targets=False):

        batch = collections.defaultdict(list)
        batch["raw"] = games

        batch_size = len(games)

        for i, game in enumerate(games):

            # Encode question answers
            q_tokens = [self.tokenizer.encode(q, add_start_token=True, add_stop_token=True) for q in game.questions]
            a_tokens = [self.tokenizer.encode(a, is_answer=True) for a in game.answers]

            # reward
            # if "cum_reward" in self.sources and not skip_targets and not self.generate and not self.supervised:
            if "cum_reward" in self.sources and not skip_targets and not self.supervised:
                # full_game = game.user_data["full_game"]
                # total_number_question = len(full_game.question_ids) - int(game.user_data["has_stop_token"])
                # number_question_left = total_number_question - len(game.question_ids)
                #  - number_question_left * 0.1
                reward = int(game.status == "success")
                cum_reward = [[reward] * len(q) for q in q_tokens]
                if self.generate:
                    cum_reward.append([])
                cum_reward_pad, _, _ = padder(cum_reward, padding_symbol=self.tokenizer.padding_token,
                                                    max_seq_length=13)

                batch["cum_reward"].append(cum_reward_pad)

            if self.generate:  # Add a dummy question at eval time to not ignore the last question
                q_tokens.append([])
                a_tokens.append([])

                a_tokens, a_lengths, _ = padder(a_tokens, padding_symbol=self.tokenizer.padding_token, max_seq_length=1)

            # no need for dialog
            # # Flatten questions/answers except the last one
            # dialogue = [self.tokenizer.start_token]  # Add start token (to avoid empty dialogue at the beginning)
            # for q_tok, a_tok in zip(q_tokens[:-1], a_tokens[:-1]):
            #     dialogue += q_tok
            #     dialogue += a_tok

            # Extract question to predict
            # question = [self.tokenizer.start_token] + q_tokens[-1]

            # pad the question
            q_tokens_pad, q_lengths, _ = padder(q_tokens, padding_symbol=self.tokenizer.padding_token, max_seq_length=13)
            # print(q_tokens_pad.shape)
            batch["q_his"].append(q_tokens_pad)
            batch["q_his_lengths"].append(q_lengths)
            batch["a_his"].append(a_tokens)

            # image
            if 'image' in self.sources:
                img = game.image.get_image()
                if "image" not in batch:  # initialize an empty array for better memory consumption
                    batch["image"] = np.zeros((batch_size,) + img.shape)
                batch["image"][i] = img

        # Pad dialogue tokens
        batch["q_his"], max_turn = padder_3d(batch["q_his"])
        # print("turn", max_turn)
        q_his_lengths_true, _, _ = padder(batch["q_his_lengths"], padding_symbol=1)
        batch["q_his_lengths"], _, batch["q_turn"] = padder(batch["q_his_lengths"], padding_symbol=1)
        batch["a_his"], _ = padder_3d(batch["a_his"], feature_size=1)
        batch["q_his_mask"] = mask_generate(lengths=q_his_lengths_true-1, feature_size=10)
        # print("-------")
        # print("hisq")
        # print(batch["q_his"][:4])
        # print("l")
        # print(batch["q_his_lengths"][:4])
        # print("hisa")
        # print(batch["a_his"][:4])
        # print("mask")
        # print(batch["q_mask"][:4])

        if 'cum_reward' in batch:
            batch['cum_reward'], _ = padder_3d(batch['cum_reward'])

        return batch
    def apply(self, games):
        sources = self.sources

        batch = collections.defaultdict(list)
        batch_size = len(games)
        assert batch_size > 0

        for i, game in enumerate(games):
            batch['raw'].append(game)
            image = game.image

            if 'question' in sources:
                question = self.tokenizer_question.apply(game.questions[0],
                                                         use_dict_ques=False)
                # print("+++++ words_question = {} ".format(question))

                sp_zeros = np.zeros((14))
                sp_zeros[0:len(question)] = question

                batch["question"].append(sp_zeros)
                batch["seq_length_question"].append(len(question))

            if 'embedding_vector_ques' in sources:
                assert len(game.questions) == 1
                # Add glove vectors (NB even <unk> may have a specific glove)
                # print("oracle_batchifier | question = {}".format(game.questions[0]))

                words = self.tokenizer_question.apply(game.questions[0],
                                                      tokent_int=False)

                if "question_pos" in sources:
                    # print("/////////// question_pos")
                    embedding_vectors, embedding_pos = get_embeddings(
                        words,
                        pos=self.config["model"]["question"]["pos"],
                        lemme=self.config["model"]["question"]["lemme"],
                        model_wordd=self.model_wordd,
                        model_worddl=self.model_worddl,
                        model_word=self.model_word,
                        model_wordl=self.model_wordl,
                        model_posd=self.model_posd,
                        model_pos=self.model_pos
                    )  # slow (copy gloves in process)
                    # print("..... question_pos............. embedding_vectors",len(embedding_vectors[0]))
                    batch['embedding_vector_ques'].append(embedding_vectors)
                    batch['embedding_vector_ques_pos'].append(embedding_pos)
                    batch['question_pos'].append(question)

                else:
                    embedding_vectors = self.embedding.get_embedding(words)

                    # print("embedding = {}".format(np.asarray(embedding_vectors).shape  ))
                    # exit()

                    # if "embedding_vector_ques" not in batch:
                    #     batch['embedding_vector_ques'] = np.zeros((batch_size,7,100))
                    batch['embedding_vector_ques'].append(embedding_vectors)

            if 'description' in sources:

                description = self.tokenizer_question.apply(
                    game.image.description, use_dict_ques=False)
                # print("+++++ words_question = {} ".format(question))

                batch["description"].append(description)

            if 'ques_hist_H0' in sources:
                assert len(game.questions) == 1

                # description = self.tokenizer_description.apply(game.image.description)

                # batch['description'].append(description)

                for j in range(6):
                    question_answer = game.all_last_question[0]
                    words = []

                    if len(question_answer) > 1:
                        word = self.tokenizer_question.apply(
                            game.all_last_question[0][1][0])
                        words = word
                    else:
                        word = self.tokenizer_question.apply(
                            game.all_last_question[0][0])
                        words = word

                    sp_zeros = np.zeros((14))
                    # print("words = {} ".format(words))
                    sp_zeros[0:len(words)] = words
                    # print("sp_zeros = {} ".format(sp_zeros))

                    batch['ques_hist_H{}'.format(j)].append(sp_zeros)
                    batch['seq_length_question_history_H{}'.format(j)].append(
                        len(words))

            # print('embedding_vector_des'in sources)
            if 'embedding_vector_des' in sources:
                description = self.tokenizer_description.apply(
                    game.image.description, tokent_int=False)

                #print("*************** Description =",description)
                # batch['description'].append(description)
                if "des_pos" in sources:
                    embedding_vectors, embedding_pos = get_embeddings(
                        description,
                        pos=self.config["model"]["question"]["pos"],
                        lemme=self.config["model"]["question"]["lemme"],
                        model_wordd=self.model_wordd,
                        model_worddl=self.model_worddl,
                        model_word=self.model_word,
                        model_wordl=self.model_wordl,
                        model_posd=self.model_posd,
                        model_pos=self.model_pos
                    )  # slow (copy gloves in process)
                    batch['embedding_vector_des'].append(embedding_vectors)
                    batch['embedding_vector_des_pos'].append(embedding_pos)
                    # batch['des_pos'].append(question)

                else:
                    if self.config["model"]["fasttext"]:
                        #print("++++++----- ++++++++ Dans fasttext ")
                        embedding_vectors, _ = get_embeddings(
                            description,
                            pos=self.config["model"]["question"]["pos"],
                            lemme=self.config["model"]["question"]["lemme"],
                            model_wordd=self.model_wordd,
                            model_worddl=self.model_worddl,
                            model_word=self.model_word,
                            model_wordl=self.model_wordl,
                            model_posd=self.model_posd,
                            model_pos=self.model_pos
                        )  # slow (copy gloves in process)
                    elif self.config["model"]["glove"]:
                        #print("++++++----- ++++++++ Dans glove ")
                        embedding_vectors = self.glove.get_embeddings(
                            description)

                    # print("------ ELSE".format(embedding_vectors))
                    # exit()
                    batch['embedding_vector_des'].append(embedding_vectors)

            if 'answer' in sources:

                if "answer" not in batch:
                    batch["answer"] = np.zeros((batch_size, 3))

                # print("game.amswer = {}".format(game.answers))

                # exit()
                assert len(game.answers) == 1
                batch['answer'][i] = answer_dict[game.answers[0]]
                #print(" Correct Answer = ",game.answers[0])

            if 'category' in sources:
                use_embedding_cat = self.config["model"]["category"][
                    "use_embedding"]

                if "category" not in batch:
                    if use_embedding_cat:
                        batch['category'] = np.zeros((batch_size, 100))
                    else:
                        batch['category'] = np.zeros((batch_size))

                if use_embedding_cat:
                    embc = np.asarray(
                        self.embedding.get_embedding([game.object.category]))
                    # embc = self.tokenizer_question.apply(game.object.category,use_dict_ques=False)
                    category_input = embc.reshape((100))
                else:
                    category_input = game.object.category_id

                # print("category = {} ".format(category_input))
                batch['category'][i] = category_input

            if 'allcategory' in sources:
                allcategory = []
                allcategory_hot = np.zeros(shape=(90), dtype=int)
                # print("Oracle_batchifier |  Allcategory -------------------------------")

                for obj in game.objects:
                    allcategory.append(obj.category_id - 1)

                allcategory_hot[allcategory] = 1
                batch['allcategory'].append(allcategory_hot)

            if 'spatial' in sources:
                if 'spatial' not in batch:
                    batch['spatial'] = np.zeros((batch_size, 8), dtype=float)
                spat_feat = get_spatial_feat(game.object.bbox, image.width,
                                             image.height)
                batch['spatial'][i] = spat_feat

            if 'crop' in sources:
                batch['crop'].append(game.object.get_crop())
                batch['image_id'].append(image.get_idimage())
                # batch['crop_id'].append(game.object_id)
                # print("crop_id=",game.object.get_crop().shape)
                # exit()

            if 'image' in sources:
                features_image = image.get_image()
                batch['image'].append(features_image)
                batch['image_id'].append(image.get_idimage())

            if 'mask' in sources:
                assert "image" in batch[
                    'image'], "mask input require the image source"
                mask = game.object.get_mask()
                ft_width, ft_height = batch['image'][-1].shape[1],\
                                     batch['image'][-1].shape[2] # Use the image feature size (not the original img size)
                mask = resize_image(Image.fromarray(mask),
                                    height=ft_height,
                                    width=ft_width)
                batch['mask'].append(mask)

        # padding = self.embedding.get_embeddings(["<padding>"])[0]
        # print("padding | = {}".format(padding))

        # pad the questions

        # if "question" in sources:
        #     batch['question'] , batch['seq_length_question'] = padder(batch['question'],max_seq_length=14)

        if "question_pos" in sources:
            batch['question_pos'], batch['seq_length_ques_pos'] = padder(
                batch['question_pos'],
                padding_symbol=self.tokenizer_question.padding_token)

        if "description" in sources:
            batch['description'], batch['seq_length_description'] = padder(
                batch['description'])

        # batch['embedding_vector_pos'], _ = padder_3d(batch['embedding_vector_pos'])

        if 'embedding_vector_ques' in sources:
            batch['embedding_vector_ques'], s = padder_3d(
                batch['embedding_vector_ques'], max_seq_length=12)

        if 'embedding_vector_ques_hist' in sources:
            # print("Shape=",np.asarray(batch['embedding_vector_ques_hist'] ).shape)
            batch_hist, size_sentences, max_seq = padder_4d(
                batch['embedding_vector_ques_hist'], max_seq_length=14)
            batch_hist = np.asarray(batch_hist)
            size_sentences = np.asarray(size_sentences)

            batch['embedding_vector_ques_hist'] = batch_hist

            for i in range(6):
                batch['ques_hist_H{}'.format(i)] = batch_hist[:, i, :]
                batch['seq_length_question_history_H{}'.format(
                    i)] = size_sentences[:, i]

            #print("Len=",len(batch['seq_length_question']))

        if 'embedding_vector_ques_pos' in sources:
            batch['embedding_vector_ques_pos'], _ = padder_3d(
                batch['embedding_vector_ques_pos'])

        if 'embedding_vector_des' in sources:
            batch['embedding_vector_des'], batch[
                'seq_length_description'] = padder_3d(
                    batch['embedding_vector_des'])

        if 'embedding_vector_des_pos' in sources:
            batch['embedding_vector_des_pos'], _ = padder_3d(
                batch['embedding_vector_des_pos'])

        # if 'description' in sources:
        #     # complete par padding en prenons la taille maximal
        # batch['description'], batch['seq_length_description'] = padder_3d(batch['description'])

        # print(" Bath = {} ".format(batch.keys()))
        # exit()
        # print("finish oracle_bachifier .... time=",total)
        # print("TotalBatch=",total)

        #print("TotalBatch=",total)

        return batch
    def apply(self, games, skip_targets=False):

        batch = collections.defaultdict(list)
        batch["raw"] = games
        batch_size = len(games)

        for i, game in enumerate(games):

            if 'question' in self.sources:
                assert len(game.questions) == 1
                batch['question'].append(
                    self.tokenizer.encode(game.questions[0]))
                # questions = []
                # for q, a in zip(game.questions[:-1], game.answers[:-1]):
                #     questions.append(self.tokenizer.encode(q, add_stop_token=True))
                #     questions.append(self.tokenizer.encode(a, is_answer=True))
                # questions.append(self.tokenizer.encode(game.questions[-1], add_stop_token=True))
                # batch['question'].append(list(chain.from_iterable(questions)))

            if 'glove' in self.sources:
                words = self.tokenizer.decode(batch['question'][i])
                glove_vectors = self.glove.get_embeddings(words)
                batch['glove'].append(glove_vectors)

            if 'answer' in self.sources and not skip_targets:
                batch['answer'].append(
                    self.tokenizer.encode_oracle_answer(game.answers[-1],
                                                        sparse=False))

            if 'category' in self.sources:
                batch['category'].append(game.object.category_id)

            if 'spatial' in self.sources:
                spat_feat = get_spatial_feat(game.object.bbox,
                                             game.image.width,
                                             game.image.height)
                batch['spatial'].append(spat_feat)

            if 'crop' in self.sources:
                crop = game.object.get_crop()
                if "crop" not in batch:  # initialize an empty array for better memory consumption
                    batch["crop"] = np.zeros((batch_size, ) + crop.shape)
                batch["crop"][i] = crop

            if 'image' in self.sources:
                img = game.image.get_image()
                if "image" not in batch:  # initialize an empty array for better memory consumption
                    batch["image"] = np.zeros((batch_size, ) + img.shape)
                batch["image"][i] = img

            if 'image_mask' in self.sources:
                assert "image" in batch, "mask input require the image source"
                mask = game.object.get_mask()

                ft_width, ft_height = img.shape[1], img.shape[0]
                # ft_width, ft_height = batch['image'][-1].shape[1], \
                #                       batch['image'][-1].shape[0]  # Use the image feature size (not the original img size)

                mask = resize_image(Image.fromarray(mask),
                                    height=ft_height,
                                    width=ft_width)
                batch['image_mask'].append(np.array(mask))

            if 'crop_mask' in self.sources:
                assert "crop" in batch, "mask input require the crop source"
                cmask = game.object.get_mask()

                ft_width, ft_height = batch['crop'][-1].shape[1], \
                                      batch['crop'][-1].shape[0]  # Use the crop feature size (not the original img size)

                cmask = scaled_crop_and_pad(raw_img=Image.fromarray(cmask),
                                            bbox=game.object.bbox,
                                            scale=game.object.crop_scale)
                cmask = resize_image(cmask, height=ft_height, width=ft_width)
                batch['crop_mask'].append(np.array(cmask))

        # Pad the questions
        if 'question' in self.sources:
            batch['question'], batch['seq_length'], _ = padder(
                batch['question'], padding_symbol=self.tokenizer.padding_token)

        if 'glove' in self.sources:
            # (?, 16, 300)   (batch, max num word, glove emb size)
            batch['glove'], _ = padder_3d(batch['glove'])

        return batch
    def apply(self, games, skip_targets=False):

        batch = collections.defaultdict(list)
        batch["raw"] = games
        batch_size = len(games)

        for i, game in enumerate(games):

            # Encode question answers
            q_tokens = [self.tokenizer.encode(q, add_stop_token=True) for q in game.questions]
            a_tokens = [self.tokenizer.encode(a, is_answer=True) for a in game.answers]

            # if self.generate:  # Add a dummy question at eval time to not ignore the last question
            #     q_tokens.append([])
            #     a_tokens.append([])
            #
            a_tokens, a_lengths, _ = padder(a_tokens, padding_symbol=self.tokenizer.padding_token, max_seq_length=1)

            # pad the question
            q_tokens_pad, q_lengths, _ = padder(q_tokens, padding_symbol=self.tokenizer.padding_token,
                                                max_seq_length=12)
            # print(q_tokens_pad.shape)
            batch["q_his"].append(q_tokens_pad)
            batch["q_his_lengths"].append(q_lengths)
            batch["a_his"].append(a_tokens)

            # Object embedding
            obj_spats, obj_cats = [], []
            for index, obj in enumerate(game.objects):

                bbox = obj.bbox
                spatial = get_spatial_feat(bbox, game.image.width, game.image.height)
                category = obj.category_id

                #                    1 point                 width         height
                bbox_coord = [bbox.x_left, bbox.y_upper, bbox.x_width, bbox.y_height]

                if obj.id == game.object.id and not skip_targets:
                    batch['target_category'].append(category)
                    batch['target_spatial'].append(spatial)
                    batch['target_index'].append(index)
                    batch['target_bbox'].append(bbox_coord)

                obj_spats.append(spatial)
                obj_cats.append(category)
            batch['obj_spat'].append(obj_spats)
            batch['obj_cat'].append(obj_cats)

            # image
            if 'image' in self.sources:
                img = game.image.get_image()
                if "image" not in batch:  # initialize an empty array for better memory consumption
                    batch["image"] = np.zeros((batch_size,) + img.shape)
                batch["image"][i] = img

        # Pad dialogue tokens
        batch["q_his"], max_turn = padder_3d(batch["q_his"])
        batch["q_his_lengths"], batch["q_turn"], batch["max_turn"] = padder(batch["q_his_lengths"], padding_symbol=1)
        batch["a_his"], _ = padder_3d(batch["a_his"], feature_size=1)
        # print(batch["q_turn"])

        # Pad objects
        batch['obj_spat'], _ = padder_3d(batch['obj_spat'])   # , max_seq_length=20)
        batch['obj_cat'], obj_length, _ = padder(batch['obj_cat'])  # , max_seq_length=20)
        batch['obj_seq_length'] = obj_length
        return batch