def apply(self, games): batch = collections.defaultdict(list) batch_size = len(games) assert batch_size > 0 for i, game in enumerate(games): batch["raw"].append(game) # Get question question = self.tokenizer.encode_question(game.question) batch['question'].append(question) # Get answers answer = self.tokenizer.encode_answer(game.answer) batch['answer'].append(answer) # retrieve the image source type img = game.image.get_image() if "image" not in batch: # initialize an empty array for better memory consumption batch["image"] = np.zeros((batch_size,) + img.shape, dtype=np.float32) batch["image"][i] = img # pad the questions batch['question'], batch['seq_length'] = padder(batch['question'], padding_symbol=self.tokenizer.padding_token) return batch
def apply(self, games): batch = collections.defaultdict(list) batch_size = len(games) for i, game in enumerate(games): batch['raw'].append(game) # Add objects: spatial features + categories (Guesser) obj_spats = [ get_spatial_feat(obj.bbox, game.picture.width, game.picture.height) for obj in game.objects ] obj_cats = [obj.category_id for obj in game.objects] batch['obj_spats'].append(obj_spats) batch['obj_cats'].append(obj_cats) # Pick one random object in the game: TODO clean a bit if self.train: random_index = random.randint(0, len(game.objects) - 1) else: random_index = game.objects.index(game.object) target_object = game.objects[random_index] # update the game with the target object game.object = target_object game.object_id = target_object.id batch['targets_index'].append(random_index) batch['targets_spatial'].append(obj_spats[random_index]) batch['targets_category'].append(obj_cats[random_index]) batch['debug'].append( (target_object.category, (target_object.bbox.x_center, target_object.bbox.y_center), game.picture.url)) # image img = game.picture.get_image() if img is not None: if "images" not in batch: # initialize an empty array for better memory consumption batch["images"] = np.zeros((batch_size, ) + img.shape) batch["images"][i] = img # Pad objects batch['obj_spats'], obj_length = padder_3d(batch['obj_spats']) batch['obj_cats'], obj_length = padder(batch['obj_cats']) # Compute the object mask max_objects = max(obj_length) batch['obj_mask'] = np.zeros((batch_size, max_objects), dtype=np.float32) for i in range(batch_size): batch['obj_mask'][i, :obj_length[i]] = 1.0 return batch
def apply(self, games): batch = collections.defaultdict(list) batch_size = len(games) assert batch_size > 0 for i, game in enumerate(games): batch["raw"].append(game) # Get question question = self.tokenizer.encode_question(game.question) batch['question'].append(question) if 'glove' in self.sources: # Add glove vectors (NB even <unk> may have a specific glove) words = self.tokenizer.tokenize_question(game.question) glove_vectors = self.glove.get_embeddings( words) # slow (copy gloves in process) batch['glove'].append(glove_vectors) # Get answers if "answer_count" not in batch: # initialize an empty array for better memory consumption batch["answer_count"] = np.zeros( (batch_size, self.tokenizer.no_answers)) for answer in game.answers: answer_id = self.tokenizer.encode_answer(answer) if answer_id == self.tokenizer.unknown_answer and self.remove_unknown: continue batch["answer_count"][i][answer_id] += 1 # retrieve the image source type img = game.image.get_image() if "image" not in batch: # initialize an empty array for better memory consumption batch["image"] = np.zeros((batch_size, ) + img.shape) batch["image"][i] = img # pad the questions batch['question'], batch['seq_length'] = padder( batch['question'], padding_symbol=self.tokenizer.padding_token) if 'glove' in self.sources: batch['glove'], _ = padder_3d(batch['glove']) # create mask max_len = batch['seq_length'].max() batch['seq_mask'] = np.zeros((batch_size, max_len)) for i, l in enumerate(batch['seq_length']): batch['seq_mask'][i, :l] = 1.0 return batch
def apply(self, games): sources = self.sources tokenizer = self.tokenizer batch = collections.defaultdict(list) for i, game in enumerate(games): batch['raw'].append(game) image = game.image if 'question' in sources: assert len(game.questions) == 1 batch['question'].append(tokenizer.apply(game.questions[0])) if 'answer' in sources: assert len(game.answers) == 1 batch['answer'].append(answer_dict[game.answers[0]]) if 'category' in sources: batch['category'].append(game.object.category_id) if 'spatial' in sources: spat_feat = get_spatial_feat(game.object.bbox, image.width, image.height) batch['spatial'].append(spat_feat) if 'crop' in sources: batch['crop'].append(game.object.get_crop()) if 'image' in sources: batch['image'].append(image.get_image()) if 'mask' in sources: assert "image" in batch[ 'image'], "mask input require the image source" mask = game.object.get_mask() ft_width, ft_height = batch['image'][-1].shape[1],\ batch['image'][-1].shape[2] # Use the image feature size (not the original img size) mask = resize_image(Image.fromarray(mask), height=ft_height, width=ft_width) batch['mask'].append(mask) # pad the questions if 'question' in sources: batch['question'], batch['seq_length'] = padder( batch['question'], padding_symbol=tokenizer.word2i['<padding>']) return batch
def apply(self, games): sources = self.sources tokenizer = self.tokenizer batch = collections.defaultdict(list) for i, game in enumerate(games): batch['raw'].append(game) picture = game.picture if 'question' in sources: assert len(game.questions) == 1 batch['question'].append(tokenizer.apply(game.questions[0])) if 'answer' in sources: assert len(game.answers) == 1 batch['answer'].append(answer_dict[game.answers[0]]) if 'category' in sources: batch['category'].append(game.object.category_id) if 'spatial' in sources: spat_feat = get_spatial_feat(game.object.bbox, picture.width, picture.height) batch['spatial'].append(spat_feat) if 'crop' in sources: batch['crop'].append( game.object.get_crop(bbox=game.object.bbox, image_id=picture.id)) if 'image' in sources: batch['image'].append(picture.get_image()) # pad the questions if 'question' in sources: batch['question'], batch['seq_length'] = padder( batch['question'], padding_symbol=tokenizer.word2i['<padding>']) return batch
def apply(self, games): batch = collections.defaultdict(list) batch_size = len(games) all_answer_indices = [] for i, game in enumerate(games): batch['raw'].append(game) # Flattened question answers q_tokens = [self.tokenizer.apply(q) for q in game.questions] a_tokens = [ self.tokenizer.apply(a, is_answer=True) for a in game.answers ] tokens = [self.tokenizer.start_token] # Add start token answer_indices = [] cur_index = 0 for q_tok, a_tok in zip(q_tokens, a_tokens): tokens += q_tok tokens += a_tok # Compute index of answer in the full dialogue answer_indices += [cur_index + len(q_tok) + 1] cur_index = answer_indices[-1] tokens += [self.tokenizer.stop_dialogue] # Add STOP token batch["dialogues"].append(tokens) all_answer_indices.append(answer_indices) # Object embedding obj_spats, obj_cats = [], [] for index, obj in enumerate(game.objects): spatial = get_spatial_feat(obj.bbox, game.image.width, game.image.height) category = obj.category_id if obj.id == game.object_id: batch['targets_category'].append(category) batch['targets_spatial'].append(spatial) batch['targets_index'].append(index) obj_spats.append(spatial) obj_cats.append(category) batch['obj_spats'].append(obj_spats) batch['obj_cats'].append(obj_cats) # image img = game.image.get_image() if img is not None: if "images" not in batch: # initialize an empty array for better memory consumption batch["images"] = np.zeros((batch_size, ) + img.shape) batch["images"][i] = img # Pad dialogue tokens tokens batch['dialogues'], batch['seq_length'] = padder( batch['dialogues'], padding_symbol=self.tokenizer.padding_token) seq_length = batch['seq_length'] max_length = max(seq_length) # Compute the token mask batch['padding_mask'] = np.ones((batch_size, max_length), dtype=np.float32) for i in range(batch_size): batch['padding_mask'][i, (seq_length[i] + 1):] = 0. # Compute the answer mask batch['answer_mask'] = np.ones((batch_size, max_length), dtype=np.float32) for i in range(batch_size): batch['answer_mask'][i, all_answer_indices[i]] = 0. # Pad objects batch['obj_spats'], obj_length = padder_3d(batch['obj_spats']) batch['obj_cats'], obj_length = padder(batch['obj_cats']) # Compute the object mask max_objects = max(obj_length) batch['obj_mask'] = np.zeros((batch_size, max_objects), dtype=np.float32) for i in range(batch_size): batch['obj_mask'][i, :obj_length[i]] = 1.0 return batch
def apply(self, games): batch = collections.defaultdict(list) batch_size = len(games) for i, game in enumerate(games): batch["raw"].append(game) # Get referit sentence sentence = self.tokenizer.encode_question(game.sentence) batch['question'].append(sentence) # Get gloves if self.glove is not None: words = self.tokenizer.tokenize_question(game.sentence) glove_vectors = self.glove.get_embeddings(words) batch['glove'].append(glove_vectors) if 'answer' in self.sources: answer = [0, 0] answer[int(game.correct_object)] = 1 batch['answer'].append(answer) if "image" in self.sources: img = game.image.get_image() if "image" not in batch: # initialize an empty array for better memory consumption batch["image"] = np.zeros((batch_size, ) + img.shape) batch["image"][i] = img if "crop" in self.sources: crop = game.object.get_crop() if "crop" not in batch: # initialize an empty array for better memory consumption batch["crop"] = np.zeros((batch_size, ) + crop.shape) batch["crop"][i] = crop if 'image_mask' in self.sources: assert "image" in batch, "mask input require the image source" mask = game.object.get_mask() ft_width, ft_height = batch['image'][-1].shape[1], \ batch['image'][-1].shape[0] # Use the image feature size (not the original img size) mask = resize_image(PImage.fromarray(mask), height=ft_height, width=ft_width) batch['image_mask'].append(np.array(mask)) if 'crop_mask' in self.sources: assert "crop" in batch, "mask input require the crop source" cmask = game.object.get_mask() ft_width, ft_height = batch['crop'][-1].shape[1], \ batch['crop'][-1].shape[0] # Use the crop feature size (not the original img size) cmask = scaled_crop_and_pad(raw_img=PImage.fromarray(cmask), bbox=game.object.bbox, scale=game.object.crop_scale) cmask = resize_image(cmask, height=ft_height, width=ft_width) batch['crop_mask'].append(np.array(cmask)) if 'category' in self.sources: batch['category'].append(game.object.category_id) if 'spatial' in self.sources: spat_feat = get_spatial_feat(game.object.bbox, game.image.width, game.image.height) batch['spatial'].append(spat_feat) # Pad referit sentence batch['question'], batch['seq_length'] = padder( batch['question'], padding_symbol=self.tokenizer.padding_token) if self.glove is not None: batch['glove'], _ = padder_3d(batch['glove']) return batch
def apply(self, games, skip_targets=False): batch = collections.defaultdict(list) batch["raw"] = games batch_size = len(games) for i, game in enumerate(games): # Encode question answers q_tokens = [self.tokenizer.encode(q, add_start_token=True, add_stop_token=True) for q in game.questions] a_tokens = [self.tokenizer.encode(a, is_answer=True) for a in game.answers] # reward # if "cum_reward" in self.sources and not skip_targets and not self.generate and not self.supervised: if "cum_reward" in self.sources and not skip_targets and not self.supervised: # full_game = game.user_data["full_game"] # total_number_question = len(full_game.question_ids) - int(game.user_data["has_stop_token"]) # number_question_left = total_number_question - len(game.question_ids) # - number_question_left * 0.1 reward = int(game.status == "success") cum_reward = [[reward] * len(q) for q in q_tokens] if self.generate: cum_reward.append([]) cum_reward_pad, _, _ = padder(cum_reward, padding_symbol=self.tokenizer.padding_token, max_seq_length=13) batch["cum_reward"].append(cum_reward_pad) if self.generate: # Add a dummy question at eval time to not ignore the last question q_tokens.append([]) a_tokens.append([]) a_tokens, a_lengths, _ = padder(a_tokens, padding_symbol=self.tokenizer.padding_token, max_seq_length=1) # no need for dialog # # Flatten questions/answers except the last one # dialogue = [self.tokenizer.start_token] # Add start token (to avoid empty dialogue at the beginning) # for q_tok, a_tok in zip(q_tokens[:-1], a_tokens[:-1]): # dialogue += q_tok # dialogue += a_tok # Extract question to predict # question = [self.tokenizer.start_token] + q_tokens[-1] # pad the question q_tokens_pad, q_lengths, _ = padder(q_tokens, padding_symbol=self.tokenizer.padding_token, max_seq_length=13) # print(q_tokens_pad.shape) batch["q_his"].append(q_tokens_pad) batch["q_his_lengths"].append(q_lengths) batch["a_his"].append(a_tokens) # image if 'image' in self.sources: img = game.image.get_image() if "image" not in batch: # initialize an empty array for better memory consumption batch["image"] = np.zeros((batch_size,) + img.shape) batch["image"][i] = img # Pad dialogue tokens batch["q_his"], max_turn = padder_3d(batch["q_his"]) # print("turn", max_turn) q_his_lengths_true, _, _ = padder(batch["q_his_lengths"], padding_symbol=1) batch["q_his_lengths"], _, batch["q_turn"] = padder(batch["q_his_lengths"], padding_symbol=1) batch["a_his"], _ = padder_3d(batch["a_his"], feature_size=1) batch["q_his_mask"] = mask_generate(lengths=q_his_lengths_true-1, feature_size=10) # print("-------") # print("hisq") # print(batch["q_his"][:4]) # print("l") # print(batch["q_his_lengths"][:4]) # print("hisa") # print(batch["a_his"][:4]) # print("mask") # print(batch["q_mask"][:4]) if 'cum_reward' in batch: batch['cum_reward'], _ = padder_3d(batch['cum_reward']) return batch
def apply(self, games): sources = self.sources batch = collections.defaultdict(list) batch_size = len(games) assert batch_size > 0 for i, game in enumerate(games): batch['raw'].append(game) image = game.image if 'question' in sources: question = self.tokenizer_question.apply(game.questions[0], use_dict_ques=False) # print("+++++ words_question = {} ".format(question)) sp_zeros = np.zeros((14)) sp_zeros[0:len(question)] = question batch["question"].append(sp_zeros) batch["seq_length_question"].append(len(question)) if 'embedding_vector_ques' in sources: assert len(game.questions) == 1 # Add glove vectors (NB even <unk> may have a specific glove) # print("oracle_batchifier | question = {}".format(game.questions[0])) words = self.tokenizer_question.apply(game.questions[0], tokent_int=False) if "question_pos" in sources: # print("/////////// question_pos") embedding_vectors, embedding_pos = get_embeddings( words, pos=self.config["model"]["question"]["pos"], lemme=self.config["model"]["question"]["lemme"], model_wordd=self.model_wordd, model_worddl=self.model_worddl, model_word=self.model_word, model_wordl=self.model_wordl, model_posd=self.model_posd, model_pos=self.model_pos ) # slow (copy gloves in process) # print("..... question_pos............. embedding_vectors",len(embedding_vectors[0])) batch['embedding_vector_ques'].append(embedding_vectors) batch['embedding_vector_ques_pos'].append(embedding_pos) batch['question_pos'].append(question) else: embedding_vectors = self.embedding.get_embedding(words) # print("embedding = {}".format(np.asarray(embedding_vectors).shape )) # exit() # if "embedding_vector_ques" not in batch: # batch['embedding_vector_ques'] = np.zeros((batch_size,7,100)) batch['embedding_vector_ques'].append(embedding_vectors) if 'description' in sources: description = self.tokenizer_question.apply( game.image.description, use_dict_ques=False) # print("+++++ words_question = {} ".format(question)) batch["description"].append(description) if 'ques_hist_H0' in sources: assert len(game.questions) == 1 # description = self.tokenizer_description.apply(game.image.description) # batch['description'].append(description) for j in range(6): question_answer = game.all_last_question[0] words = [] if len(question_answer) > 1: word = self.tokenizer_question.apply( game.all_last_question[0][1][0]) words = word else: word = self.tokenizer_question.apply( game.all_last_question[0][0]) words = word sp_zeros = np.zeros((14)) # print("words = {} ".format(words)) sp_zeros[0:len(words)] = words # print("sp_zeros = {} ".format(sp_zeros)) batch['ques_hist_H{}'.format(j)].append(sp_zeros) batch['seq_length_question_history_H{}'.format(j)].append( len(words)) # print('embedding_vector_des'in sources) if 'embedding_vector_des' in sources: description = self.tokenizer_description.apply( game.image.description, tokent_int=False) #print("*************** Description =",description) # batch['description'].append(description) if "des_pos" in sources: embedding_vectors, embedding_pos = get_embeddings( description, pos=self.config["model"]["question"]["pos"], lemme=self.config["model"]["question"]["lemme"], model_wordd=self.model_wordd, model_worddl=self.model_worddl, model_word=self.model_word, model_wordl=self.model_wordl, model_posd=self.model_posd, model_pos=self.model_pos ) # slow (copy gloves in process) batch['embedding_vector_des'].append(embedding_vectors) batch['embedding_vector_des_pos'].append(embedding_pos) # batch['des_pos'].append(question) else: if self.config["model"]["fasttext"]: #print("++++++----- ++++++++ Dans fasttext ") embedding_vectors, _ = get_embeddings( description, pos=self.config["model"]["question"]["pos"], lemme=self.config["model"]["question"]["lemme"], model_wordd=self.model_wordd, model_worddl=self.model_worddl, model_word=self.model_word, model_wordl=self.model_wordl, model_posd=self.model_posd, model_pos=self.model_pos ) # slow (copy gloves in process) elif self.config["model"]["glove"]: #print("++++++----- ++++++++ Dans glove ") embedding_vectors = self.glove.get_embeddings( description) # print("------ ELSE".format(embedding_vectors)) # exit() batch['embedding_vector_des'].append(embedding_vectors) if 'answer' in sources: if "answer" not in batch: batch["answer"] = np.zeros((batch_size, 3)) # print("game.amswer = {}".format(game.answers)) # exit() assert len(game.answers) == 1 batch['answer'][i] = answer_dict[game.answers[0]] #print(" Correct Answer = ",game.answers[0]) if 'category' in sources: use_embedding_cat = self.config["model"]["category"][ "use_embedding"] if "category" not in batch: if use_embedding_cat: batch['category'] = np.zeros((batch_size, 100)) else: batch['category'] = np.zeros((batch_size)) if use_embedding_cat: embc = np.asarray( self.embedding.get_embedding([game.object.category])) # embc = self.tokenizer_question.apply(game.object.category,use_dict_ques=False) category_input = embc.reshape((100)) else: category_input = game.object.category_id # print("category = {} ".format(category_input)) batch['category'][i] = category_input if 'allcategory' in sources: allcategory = [] allcategory_hot = np.zeros(shape=(90), dtype=int) # print("Oracle_batchifier | Allcategory -------------------------------") for obj in game.objects: allcategory.append(obj.category_id - 1) allcategory_hot[allcategory] = 1 batch['allcategory'].append(allcategory_hot) if 'spatial' in sources: if 'spatial' not in batch: batch['spatial'] = np.zeros((batch_size, 8), dtype=float) spat_feat = get_spatial_feat(game.object.bbox, image.width, image.height) batch['spatial'][i] = spat_feat if 'crop' in sources: batch['crop'].append(game.object.get_crop()) batch['image_id'].append(image.get_idimage()) # batch['crop_id'].append(game.object_id) # print("crop_id=",game.object.get_crop().shape) # exit() if 'image' in sources: features_image = image.get_image() batch['image'].append(features_image) batch['image_id'].append(image.get_idimage()) if 'mask' in sources: assert "image" in batch[ 'image'], "mask input require the image source" mask = game.object.get_mask() ft_width, ft_height = batch['image'][-1].shape[1],\ batch['image'][-1].shape[2] # Use the image feature size (not the original img size) mask = resize_image(Image.fromarray(mask), height=ft_height, width=ft_width) batch['mask'].append(mask) # padding = self.embedding.get_embeddings(["<padding>"])[0] # print("padding | = {}".format(padding)) # pad the questions # if "question" in sources: # batch['question'] , batch['seq_length_question'] = padder(batch['question'],max_seq_length=14) if "question_pos" in sources: batch['question_pos'], batch['seq_length_ques_pos'] = padder( batch['question_pos'], padding_symbol=self.tokenizer_question.padding_token) if "description" in sources: batch['description'], batch['seq_length_description'] = padder( batch['description']) # batch['embedding_vector_pos'], _ = padder_3d(batch['embedding_vector_pos']) if 'embedding_vector_ques' in sources: batch['embedding_vector_ques'], s = padder_3d( batch['embedding_vector_ques'], max_seq_length=12) if 'embedding_vector_ques_hist' in sources: # print("Shape=",np.asarray(batch['embedding_vector_ques_hist'] ).shape) batch_hist, size_sentences, max_seq = padder_4d( batch['embedding_vector_ques_hist'], max_seq_length=14) batch_hist = np.asarray(batch_hist) size_sentences = np.asarray(size_sentences) batch['embedding_vector_ques_hist'] = batch_hist for i in range(6): batch['ques_hist_H{}'.format(i)] = batch_hist[:, i, :] batch['seq_length_question_history_H{}'.format( i)] = size_sentences[:, i] #print("Len=",len(batch['seq_length_question'])) if 'embedding_vector_ques_pos' in sources: batch['embedding_vector_ques_pos'], _ = padder_3d( batch['embedding_vector_ques_pos']) if 'embedding_vector_des' in sources: batch['embedding_vector_des'], batch[ 'seq_length_description'] = padder_3d( batch['embedding_vector_des']) if 'embedding_vector_des_pos' in sources: batch['embedding_vector_des_pos'], _ = padder_3d( batch['embedding_vector_des_pos']) # if 'description' in sources: # # complete par padding en prenons la taille maximal # batch['description'], batch['seq_length_description'] = padder_3d(batch['description']) # print(" Bath = {} ".format(batch.keys())) # exit() # print("finish oracle_bachifier .... time=",total) # print("TotalBatch=",total) #print("TotalBatch=",total) return batch
def apply(self, games, skip_targets=False): batch = collections.defaultdict(list) batch["raw"] = games batch_size = len(games) for i, game in enumerate(games): if 'question' in self.sources: assert len(game.questions) == 1 batch['question'].append( self.tokenizer.encode(game.questions[0])) # questions = [] # for q, a in zip(game.questions[:-1], game.answers[:-1]): # questions.append(self.tokenizer.encode(q, add_stop_token=True)) # questions.append(self.tokenizer.encode(a, is_answer=True)) # questions.append(self.tokenizer.encode(game.questions[-1], add_stop_token=True)) # batch['question'].append(list(chain.from_iterable(questions))) if 'glove' in self.sources: words = self.tokenizer.decode(batch['question'][i]) glove_vectors = self.glove.get_embeddings(words) batch['glove'].append(glove_vectors) if 'answer' in self.sources and not skip_targets: batch['answer'].append( self.tokenizer.encode_oracle_answer(game.answers[-1], sparse=False)) if 'category' in self.sources: batch['category'].append(game.object.category_id) if 'spatial' in self.sources: spat_feat = get_spatial_feat(game.object.bbox, game.image.width, game.image.height) batch['spatial'].append(spat_feat) if 'crop' in self.sources: crop = game.object.get_crop() if "crop" not in batch: # initialize an empty array for better memory consumption batch["crop"] = np.zeros((batch_size, ) + crop.shape) batch["crop"][i] = crop if 'image' in self.sources: img = game.image.get_image() if "image" not in batch: # initialize an empty array for better memory consumption batch["image"] = np.zeros((batch_size, ) + img.shape) batch["image"][i] = img if 'image_mask' in self.sources: assert "image" in batch, "mask input require the image source" mask = game.object.get_mask() ft_width, ft_height = img.shape[1], img.shape[0] # ft_width, ft_height = batch['image'][-1].shape[1], \ # batch['image'][-1].shape[0] # Use the image feature size (not the original img size) mask = resize_image(Image.fromarray(mask), height=ft_height, width=ft_width) batch['image_mask'].append(np.array(mask)) if 'crop_mask' in self.sources: assert "crop" in batch, "mask input require the crop source" cmask = game.object.get_mask() ft_width, ft_height = batch['crop'][-1].shape[1], \ batch['crop'][-1].shape[0] # Use the crop feature size (not the original img size) cmask = scaled_crop_and_pad(raw_img=Image.fromarray(cmask), bbox=game.object.bbox, scale=game.object.crop_scale) cmask = resize_image(cmask, height=ft_height, width=ft_width) batch['crop_mask'].append(np.array(cmask)) # Pad the questions if 'question' in self.sources: batch['question'], batch['seq_length'], _ = padder( batch['question'], padding_symbol=self.tokenizer.padding_token) if 'glove' in self.sources: # (?, 16, 300) (batch, max num word, glove emb size) batch['glove'], _ = padder_3d(batch['glove']) return batch
def apply(self, games, skip_targets=False): batch = collections.defaultdict(list) batch["raw"] = games batch_size = len(games) for i, game in enumerate(games): # Encode question answers q_tokens = [self.tokenizer.encode(q, add_stop_token=True) for q in game.questions] a_tokens = [self.tokenizer.encode(a, is_answer=True) for a in game.answers] # if self.generate: # Add a dummy question at eval time to not ignore the last question # q_tokens.append([]) # a_tokens.append([]) # a_tokens, a_lengths, _ = padder(a_tokens, padding_symbol=self.tokenizer.padding_token, max_seq_length=1) # pad the question q_tokens_pad, q_lengths, _ = padder(q_tokens, padding_symbol=self.tokenizer.padding_token, max_seq_length=12) # print(q_tokens_pad.shape) batch["q_his"].append(q_tokens_pad) batch["q_his_lengths"].append(q_lengths) batch["a_his"].append(a_tokens) # Object embedding obj_spats, obj_cats = [], [] for index, obj in enumerate(game.objects): bbox = obj.bbox spatial = get_spatial_feat(bbox, game.image.width, game.image.height) category = obj.category_id # 1 point width height bbox_coord = [bbox.x_left, bbox.y_upper, bbox.x_width, bbox.y_height] if obj.id == game.object.id and not skip_targets: batch['target_category'].append(category) batch['target_spatial'].append(spatial) batch['target_index'].append(index) batch['target_bbox'].append(bbox_coord) obj_spats.append(spatial) obj_cats.append(category) batch['obj_spat'].append(obj_spats) batch['obj_cat'].append(obj_cats) # image if 'image' in self.sources: img = game.image.get_image() if "image" not in batch: # initialize an empty array for better memory consumption batch["image"] = np.zeros((batch_size,) + img.shape) batch["image"][i] = img # Pad dialogue tokens batch["q_his"], max_turn = padder_3d(batch["q_his"]) batch["q_his_lengths"], batch["q_turn"], batch["max_turn"] = padder(batch["q_his_lengths"], padding_symbol=1) batch["a_his"], _ = padder_3d(batch["a_his"], feature_size=1) # print(batch["q_turn"]) # Pad objects batch['obj_spat'], _ = padder_3d(batch['obj_spat']) # , max_seq_length=20) batch['obj_cat'], obj_length, _ = padder(batch['obj_cat']) # , max_seq_length=20) batch['obj_seq_length'] = obj_length return batch