def __getitem__(self, idx): info = self.qa.iloc[idx] answers = info['answer'] answer_idxs = [self.answer_vocabulary.get(ans, -1) for ans in answers] if len(answer_idxs) < 10: answer_idxs = answer_idxs + [-1] * (10 - len(answer_idxs)) image = self.preprocessed_imgs[self.image_id_to_index[ info['image_id']]] question = self.text_to_instance(info["preprocessed_question"]) return question, image, numpy.array(answer_idxs) def my_collate(batch, vocab): questions = Batch([x[0] for x in batch]) questions.index_instances(vocab) rest = [x[1:] for x in batch] question_batch = questions.as_tensor_dict()["question"]["tokens"] image_batch, answer_batch = default_collate(rest) return [(question_batch, image_batch), answer_batch] if __name__ == "__main__": data = VisualQATrainDataset( **init_config("data", VisualQATrainDataset.__init__)) dl = DataLoader(data, batch_size=12, collate_fn=partial(my_collate, vocab=data.vocab)) elem = next(iter(dl)) print(elem)
idxs = np.random.choice(len(qa), n_examples) if isinstance(qa, DataFrame): qs = [elem[1] for elem in qa.iloc[idxs].iterrows()] imgs = [read_image(img_path, id_['image_id'], True) for id_ in qs] else: qs = [qa[idx] for idx in idxs] imgs = [read_image(img_path, q['image_id'], True) for q in qs] return imgs, qs def preprocess_questions_answers(train_annotations, val_annotations, train_questions, val_questions, train_qa_result_file, val_qa_result_file, max_answers): train_data = preprocess_part_questions_answers(*read_questions_answers( train_questions, train_annotations), max_answers=max_answers, only_one_word_answers=False) val_data = preprocess_part_questions_answers(*read_questions_answers( val_questions, val_annotations), max_answers=None, only_one_word_answers=False, flatten=True) save_qa_data(train_data, train_qa_result_file) save_qa_data(val_data, val_qa_result_file) if __name__ == "__main__": preprocess_questions_answers( **init_config("data", preprocess_questions_answers))
return self.zero_vector # return numpy.random.uniform(-1., 1., self.emb_size) return self.mean_vec def __getitem__(self, word): return self.get(word) def create_embeddings(pretrained_embeddings, vocab_result_file, embeddings_result_file): kv = KeyedVectors.load_word2vec_format(pretrained_embeddings, binary=True) with open(vocab_result_file) as f: vocab = set(f.read().split("\n")) word_to_vec = {} for word in vocab: if word in kv: word_to_vec[word] = kv[word] with open(embeddings_result_file, "wb") as f: pickle.dump(word_to_vec, f) def read_embeddings(embeddings_result_file): with open(embeddings_result_file, "rb") as f: return SavedEmbeddings(pickle.load(f)) if __name__ == '__main__': create_embeddings(**init_config("data", create_embeddings))
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt='%H:%M:%S') DEBUGGING_MODE = int(environ.get("DEBUG_MODE", 0)) == 1 if DEBUGGING_MODE: logging.info( "Run was started in debugging mode: no info will be stored in mlflow or tensorboard" ) else: logging.info( "Run was started in normal mode: info will be stored in mlflow and tensorboard" ) device = "cuda" if torch.cuda.is_available() else "cpu" logging.info(f"Using device: {device}") experiment_config = init_config() data_config = experiment_config.pop("data") training_config = experiment_config.pop("training") train_dataset = VisualQATrainDataset( **filter_config(data_config, VisualQATrainDataset.__init__)) vocab = train_dataset.vocab val_dataset = VisualQAValDataset( **filter_config(data_config, VisualQAValDataset.__init__), vocab=vocab, answer_vocabulary=train_dataset.answer_vocabulary) train_loader = DataLoader(train_dataset, batch_size=training_config.pop("train_batch_size"), shuffle=True, collate_fn=partial(my_collate, vocab=vocab),
image_emb = self.lrelu(self.image_to_hidden(image_emb)) combined = question_features * image_emb combined = self.dropout(combined) combined = self.lrelu(self.hidden_to_hidden(combined)) combined = self.dropout(combined) logits = self.scores_layer(combined) return logits @property def device(self): return next(self.parameters()).device if __name__ == "__main__": config = init_config() data_config = config.pop("data") data = VisualQATrainDataset( **filter_config(data_config, VisualQATrainDataset.__init__)) dl = DataLoader(data, batch_size=12, collate_fn=partial(my_collate, vocab=data.vocab)) x, y = next(iter(dl)) model = BaselineModel( config=config["model"], vocab=data.vocab, embeddings_result_file=data_config["embeddings_result_file"]) model(x)
all_idxs = [] for idxs, batch in tqdm(dataloader): all_idxs.extend(idxs) process_batch(model, batch.cuda(), preprocessed) assert sorted(all_idxs) == all_idxs filenames = [x.stem for x in files] images = np.concatenate(preprocessed) filenames_saving_path = Path(filenames_saving_path) create_parent_dir_if_not_exists(filenames_saving_path) with filenames_saving_path.open("w"): json.dump(filenames, filenames_saving_path.open("w")) images_saving_path = Path(images_saving_path) create_parent_dir_if_not_exists(images_saving_path) with h5py.File(images_saving_path, "w") as f: f.create_dataset("images", data=images, dtype='float32') return filenames, images preprocess_dataset(train_images, filenames_saving_path=train_filenames_result_file, images_saving_path=train_images_result_file) preprocess_dataset(val_images, filenames_saving_path=val_filenames_result_file, images_saving_path=val_images_result_file) info("Processed raw images!") if __name__ == "__main__": preprocess_images(**init_config("data", preprocess_images))