Beispiel #1
0
    def __getitem__(self, idx):
        info = self.qa.iloc[idx]
        answers = info['answer']
        answer_idxs = [self.answer_vocabulary.get(ans, -1) for ans in answers]
        if len(answer_idxs) < 10:
            answer_idxs = answer_idxs + [-1] * (10 - len(answer_idxs))
        image = self.preprocessed_imgs[self.image_id_to_index[
            info['image_id']]]
        question = self.text_to_instance(info["preprocessed_question"])
        return question, image, numpy.array(answer_idxs)


def my_collate(batch, vocab):
    questions = Batch([x[0] for x in batch])
    questions.index_instances(vocab)
    rest = [x[1:] for x in batch]
    question_batch = questions.as_tensor_dict()["question"]["tokens"]
    image_batch, answer_batch = default_collate(rest)
    return [(question_batch, image_batch), answer_batch]


if __name__ == "__main__":
    data = VisualQATrainDataset(
        **init_config("data", VisualQATrainDataset.__init__))
    dl = DataLoader(data,
                    batch_size=12,
                    collate_fn=partial(my_collate, vocab=data.vocab))
    elem = next(iter(dl))
    print(elem)
Beispiel #2
0
    idxs = np.random.choice(len(qa), n_examples)
    if isinstance(qa, DataFrame):
        qs = [elem[1] for elem in qa.iloc[idxs].iterrows()]
        imgs = [read_image(img_path, id_['image_id'], True) for id_ in qs]
    else:
        qs = [qa[idx] for idx in idxs]
        imgs = [read_image(img_path, q['image_id'], True) for q in qs]
    return imgs, qs


def preprocess_questions_answers(train_annotations, val_annotations,
                                 train_questions, val_questions,
                                 train_qa_result_file, val_qa_result_file,
                                 max_answers):
    train_data = preprocess_part_questions_answers(*read_questions_answers(
        train_questions, train_annotations),
                                                   max_answers=max_answers,
                                                   only_one_word_answers=False)
    val_data = preprocess_part_questions_answers(*read_questions_answers(
        val_questions, val_annotations),
                                                 max_answers=None,
                                                 only_one_word_answers=False,
                                                 flatten=True)
    save_qa_data(train_data, train_qa_result_file)
    save_qa_data(val_data, val_qa_result_file)


if __name__ == "__main__":
    preprocess_questions_answers(
        **init_config("data", preprocess_questions_answers))
                return self.zero_vector
            # return numpy.random.uniform(-1., 1., self.emb_size)
            return self.mean_vec

    def __getitem__(self, word):
        return self.get(word)


def create_embeddings(pretrained_embeddings, vocab_result_file,
                      embeddings_result_file):
    kv = KeyedVectors.load_word2vec_format(pretrained_embeddings, binary=True)
    with open(vocab_result_file) as f:
        vocab = set(f.read().split("\n"))

    word_to_vec = {}
    for word in vocab:
        if word in kv:
            word_to_vec[word] = kv[word]

    with open(embeddings_result_file, "wb") as f:
        pickle.dump(word_to_vec, f)


def read_embeddings(embeddings_result_file):
    with open(embeddings_result_file, "rb") as f:
        return SavedEmbeddings(pickle.load(f))


if __name__ == '__main__':
    create_embeddings(**init_config("data", create_embeddings))
Beispiel #4
0
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt='%H:%M:%S')

DEBUGGING_MODE = int(environ.get("DEBUG_MODE", 0)) == 1
if DEBUGGING_MODE:
    logging.info(
        "Run was started in debugging mode: no info will be stored in mlflow or tensorboard"
    )
else:
    logging.info(
        "Run was started in normal mode: info will be stored in mlflow and tensorboard"
    )

device = "cuda" if torch.cuda.is_available() else "cpu"
logging.info(f"Using device: {device}")

experiment_config = init_config()
data_config = experiment_config.pop("data")
training_config = experiment_config.pop("training")
train_dataset = VisualQATrainDataset(
    **filter_config(data_config, VisualQATrainDataset.__init__))
vocab = train_dataset.vocab

val_dataset = VisualQAValDataset(
    **filter_config(data_config, VisualQAValDataset.__init__),
    vocab=vocab,
    answer_vocabulary=train_dataset.answer_vocabulary)

train_loader = DataLoader(train_dataset,
                          batch_size=training_config.pop("train_batch_size"),
                          shuffle=True,
                          collate_fn=partial(my_collate, vocab=vocab),
Beispiel #5
0
        image_emb = self.lrelu(self.image_to_hidden(image_emb))

        combined = question_features * image_emb
        combined = self.dropout(combined)

        combined = self.lrelu(self.hidden_to_hidden(combined))
        combined = self.dropout(combined)

        logits = self.scores_layer(combined)
        return logits

    @property
    def device(self):
        return next(self.parameters()).device


if __name__ == "__main__":
    config = init_config()
    data_config = config.pop("data")
    data = VisualQATrainDataset(
        **filter_config(data_config, VisualQATrainDataset.__init__))
    dl = DataLoader(data,
                    batch_size=12,
                    collate_fn=partial(my_collate, vocab=data.vocab))
    x, y = next(iter(dl))
    model = BaselineModel(
        config=config["model"],
        vocab=data.vocab,
        embeddings_result_file=data_config["embeddings_result_file"])
    model(x)
Beispiel #6
0
        all_idxs = []
        for idxs, batch in tqdm(dataloader):
            all_idxs.extend(idxs)
            process_batch(model, batch.cuda(), preprocessed)
        assert sorted(all_idxs) == all_idxs
        filenames = [x.stem for x in files]
        images = np.concatenate(preprocessed)

        filenames_saving_path = Path(filenames_saving_path)
        create_parent_dir_if_not_exists(filenames_saving_path)
        with filenames_saving_path.open("w"):
            json.dump(filenames, filenames_saving_path.open("w"))

        images_saving_path = Path(images_saving_path)
        create_parent_dir_if_not_exists(images_saving_path)
        with h5py.File(images_saving_path, "w") as f:
            f.create_dataset("images", data=images, dtype='float32')
        return filenames, images

    preprocess_dataset(train_images,
                       filenames_saving_path=train_filenames_result_file,
                       images_saving_path=train_images_result_file)
    preprocess_dataset(val_images,
                       filenames_saving_path=val_filenames_result_file,
                       images_saving_path=val_images_result_file)
    info("Processed raw images!")


if __name__ == "__main__":
    preprocess_images(**init_config("data", preprocess_images))