Esempio n. 1
0
    def forward(self, image_feat, embedding):
        image_feat_mean = image_feat.mean(1)

        # Get LSTM state
        state = registry.get("{}_lstm_state".format(image_feat.device))
        h1, c1 = state["td_hidden"]
        h2, c2 = state["lm_hidden"]

        h1, c1 = self.top_down_lstm(
            torch.cat([h2, image_feat_mean, embedding], dim=1), (h1, c1)
        )

        state["td_hidden"] = (h1, c1)

        image_fa = self.fa_image(image_feat)
        hidden_fa = self.fa_hidden(h1)

        joint_feature = self.relu(image_fa + hidden_fa.unsqueeze(1))
        joint_feature = self.dropout(joint_feature)

        return joint_feature
Esempio n. 2
0
    def __init__(self, params={}):
        super().__init__()
        self.writer = registry.get("writer")
        if "type" not in params:
            raise ValueError("Parameters to loss must have 'type' field to"
                             "specify type of loss to instantiate")

        loss_name = params["type"]
        self.name = loss_name

        loss_class = registry.get_loss_class(loss_name)

        if loss_class is None:
            raise ValueError(
                "No loss named {} is registered to registry".format(loss_name))
        # Special case of multi as it requires an array
        if loss_name == "multi":
            self.loss_criterion = loss_class(params)
        else:
            loss_params = params.get("params", {})
            self.loss_criterion = loss_class(**loss_params)
Esempio n. 3
0
    def __init__(self, trainer):
        """
        Generates a path for saving model which can also be used for resuming
        from a checkpoint.
        """
        self.trainer = trainer

        self.config = self.trainer.config

        self.save_dir = self.config.training_parameters.save_dir

        # personalize model name
        self.model_name = self.config.training_parameters.model_file
        self.exp_name = self.config.training_parameters.experiment_name

        self.ckpt_foldername = ckpt_name_from_core_args(self.config)
        self.ckpt_foldername += foldername_from_config_override(
            self.trainer.args)

        self.device = registry.get("current_device")

        self.ckpt_prefix = ""

        if hasattr(self.trainer.model, "get_ckpt_name"):
            self.ckpt_prefix = self.trainer.model.get_ckpt_name() + "_"

        self.config["log_foldername"] = self.ckpt_foldername
        self.ckpt_foldername = os.path.join(self.save_dir,
                                            self.ckpt_foldername)
        self.pth_filepath = os.path.join(
            self.ckpt_foldername,
            self.ckpt_prefix + self.model_name + "_final.pth")

        self.models_foldername = os.path.join(self.ckpt_foldername, "models")
        if not os.path.exists(self.models_foldername):
            os.makedirs(self.models_foldername)

        self.save_config()
        self.repo_path = updir(os.path.abspath(__file__), n=3)
        self.repo = git.Repo(self.repo_path)
Esempio n. 4
0
    def __init__(self, config, *args, **kwargs):
        self.writer = registry.get("writer")

        if not hasattr(config, "type"):
            raise AttributeError(
                "Config must have 'type' attribute to specify type of processor"
            )

        processor_class = registry.get_processor_class(config.type)

        params = {}
        if not hasattr(config, "params"):
            self.writer.write("Config doesn't have 'params' attribute to "
                              "specify parameters of the processor "
                              "of type {}. Setting to default \{\}".format(
                                  config.type))
        else:
            params = config.params

        self.processor = processor_class(params, *args, **kwargs)

        self._dir_representation = dir(self)
Esempio n. 5
0
    def change_dataloader(self):
        if self._num_datasets <= 1:
            return
        choice = 0

        if self._is_main_process:
            choice = np.random.choice(self._num_datasets,
                                      1,
                                      p=self._dataset_probablities)[0]

            while choice in self._finished_iterators:
                choice = np.random.choice(self._num_datasets,
                                          1,
                                          p=self._dataset_probablities)[0]

        choice = broadcast_scalar(choice,
                                  0,
                                  device=registry.get("current_device"))
        self._loader_index = choice
        self._chosen_dataset = self._datasets[self._loader_index]
        self._chosen_loader = self._loaders[self._loader_index]
        self._chosen_iterator = self._iterators[self._loader_index]
Esempio n. 6
0
    def calculate(self, sample_list, model_output, *args, **kwargs):
        answer_processor = registry.get(sample_list.dataset_name +
                                        "_answer_processor")

        batch_size = sample_list.context_tokens_enc.size(0)
        pred_answers = model_output["scores"].argmax(dim=-1)
        context_tokens_enc = sample_list.context_tokens_enc.cpu().numpy()
        gt_answers_enc = sample_list.gt_answers_enc.cpu().numpy()
        answer_space_size = answer_processor.get_true_vocab_size()

        predictions = []
        from pythia.utils.objects_to_byte_tensor import dec_bytes2obj
        from pythia.utils.text_utils import word_tokenize
        for idx in range(batch_size):
            context_tokens = dec_bytes2obj(context_tokens_enc[idx])
            answer_words = []
            for answer_id in pred_answers[idx].tolist():
                if answer_id >= answer_space_size:
                    answer_id -= answer_space_size
                    answer_words.append(
                        word_tokenize(context_tokens[answer_id]))
                else:
                    if answer_id == answer_processor.EOS_IDX:
                        break
                    answer_words.append(
                        answer_processor.answer_vocab.idx2word(answer_id))

            pred_answer = ' '.join(answer_words).replace(" 's", "'s")
            gt_answers = dec_bytes2obj(gt_answers_enc[idx])
            predictions.append({
                "pred_answer": pred_answer,
                "gt_answers": gt_answers,
            })

        accuracy = self.evaluator.eval_pred_list(predictions)
        accuracy = torch.tensor(accuracy).to(
            sample_list.context_tokens_enc.device)  # .cuda()

        return accuracy
Esempio n. 7
0
    def load(self):
        self._init_process_group()

        self.run_type = self.config.training_parameters.get(
            "run_type", "train")
        self.dataset_loader = DatasetLoader(self.config)
        self._datasets = self.config.datasets

        self.writer = Logger(self.config)
        registry.register("writer", self.writer)

        self.configuration = registry.get("configuration")
        self.configuration.pretty_print()

        self.config_based_setup()

        self.load_task()
        self.load_model()
        self.load_optimizer()
        self.load_extras()
        if visualization_flag:
            self.generator = GenerateWord(
                'data/m4c_captioner_vocabs/textcaps/vocab_textcap_threshold_10.txt'
            )
Esempio n. 8
0
    def __init__(self, config, *args, **kwargs):
        self.writer = registry.get("writer")
        if not hasattr(config, "vocab_file"):
            raise AttributeError("'vocab_file' argument required, but not "
                                 "present in AnswerProcessor's config")

        self.answer_vocab = VocabDict(config.vocab_file, *args, **kwargs)

        self.preprocessor = None

        if hasattr(config, "preprocessor"):
            self.preprocessor = Processor(config.preprocessor)

            if self.preprocessor is None:
                raise ValueError("No processor named {} is defined.".format(
                    config.preprocessor))

        if hasattr(config, "num_answers"):
            self.num_answers = config.num_answers
        else:
            self.num_answers = self.DEFAULT_NUM_ANSWERS
            warnings.warn("'num_answers' not defined in the config. "
                          "Setting to default of {}".format(
                              self.DEFAULT_NUM_ANSWERS))
Esempio n. 9
0
    def _load_state_dict_mapping(self, ckpt_model):
        model = self.trainer.model
        attr_mapping = {
            "image_feature_encoders": "img_feat_encoders",
            "image_feature_embeddings_list": "img_embeddings_list",
            "image_text_multi_modal_combine_layer":
            "multi_modal_combine_layer",
            "text_embeddings": "text_embeddings",
            "classifier": "classifier",
        }

        data_parallel = registry.get("data_parallel")

        if not data_parallel:
            for key in attr_mapping:
                print("Keyeeeee : ", key)
                if key == 'classifier.module.linear_text.weight':
                    print(attr_mapping[key])

                attr_mapping[key.replace("module.", "")] = attr_mapping[key]
                attr_mapping.pop(key)

        for key in attr_mapping:
            getattr(model, key).load_state_dict(ckpt_model[attr_mapping[key]])
Esempio n. 10
0
    def _summarize_report(self, meter, prefix="", should_print=True, extra={}):
        if not is_main_process():
            return

        scalar_dict = meter.get_scalar_dict()
        self.writer.add_scalars(scalar_dict, registry.get("current_iteration"))

        if not should_print:
            return

        print_str = []

        if len(prefix):
            print_str += [prefix + ":"]

        print_str += [
            "{}/{}".format(self.current_iteration, self.max_iterations)
        ]
        print_str += [str(meter)]
        print_str += [
            "{}: {}".format(key, value) for key, value in extra.items()
        ]

        self.writer.write(meter.delimiter.join(print_str))
Esempio n. 11
0
    def _load(self, file, force=False):
        self.trainer.writer.write("Loading checkpoint")
        ckpt = self._torch_load(file)

        data_parallel = registry.get("data_parallel") or registry.get(
            "distributed")

        if "model" in ckpt:
            ckpt_model = ckpt["model"]
        else:
            ckpt_model = ckpt
            ckpt = {"model": ckpt}

        pretrained_mapping = self.config.training_parameters.pretrained_mapping

        if not self.config.training_parameters.load_pretrained or force is True:
            pretrained_mapping = {}

        new_dict = {}

        # TODO: Move to separate function
        for attr in ckpt_model:
            if "fa_history" in attr:
                new_dict[attr.replace("fa_history",
                                      "fa_context")] = ckpt_model[attr]
            elif data_parallel is False and attr.startswith("module."):
                # In case the ckpt was actually a data parallel model
                # replace first module. from dataparallel with empty string
                new_dict[attr.replace("module.", "", 1)] = ckpt_model[attr]
            elif data_parallel is not False and not attr.startswith("module."):
                new_dict["module." + attr] = ckpt_model[attr]
            else:
                new_dict[attr] = ckpt_model[attr]

        if len(pretrained_mapping.items()) == 0:
            final_dict = new_dict

            self.trainer.model.load_state_dict(final_dict)

            if "optimizer" in ckpt:
                self.trainer.optimizer.load_state_dict(ckpt["optimizer"])
            else:
                warnings.warn("'optimizer' key is not present in the "
                              "checkpoint asked to be loaded. Skipping.")

            self.trainer.early_stopping.init_from_checkpoint(ckpt)

            self.trainer.writer.write("Checkpoint loaded")

            if "best_iteration" in ckpt:
                self.trainer.current_iteration = ckpt["best_iteration"]
                registry.register("current_iteration",
                                  self.trainer.current_iteration)

            if "best_epoch" in ckpt:
                self.trainer.current_epoch = ckpt["best_epoch"]
                registry.register("current_epoch", self.trainer.current_epoch)
        else:
            final_dict = {}
            model = self.trainer.model
            own_state = model.state_dict()

            for key, value in pretrained_mapping.items():
                key += "."
                value += "."
                for attr in new_dict:
                    for own_attr in own_state:
                        if (key in attr and value in own_attr and attr.replace(
                                key, "") == own_attr.replace(value, "")):
                            self.trainer.writer.write("Copying " + attr + " " +
                                                      own_attr)
                            own_state[own_attr].copy_(new_dict[attr])
            self.trainer.writer.write("Pretrained model loaded")
Esempio n. 12
0
 def __init__(self):
     super().__init__("clevr")
     self.writer = registry.get("writer")
     self.dataset_class = CLEVRDataset
Esempio n. 13
0
 def __init__(self, task_name):
     super(BaseTask, self).__init__()
     self.task_name = task_name
     self.writer = registry.get("writer")
Esempio n. 14
0
 def _init_classifier(self):
     num_hidden = self.config["text_embedding"]["num_hidden"]
     num_choices = registry.get(self._datasets[0] + "_num_final_outputs")
     dropout = self.config["classifier"]["dropout"]
     self.classifier = WeightNormClassifier(num_hidden, num_choices,
                                            num_hidden * 2, dropout)
Esempio n. 15
0
 def _build_word_embedding(self):
     text_processor = registry.get(self._datasets[0] + "_text_processor")
     vocab = text_processor.vocab
     self.word_embedding = vocab.get_embedding(torch.nn.Embedding,
                                               embedding_dim=300)
Esempio n. 16
0
 def __init__(self, config):
     super(BAN, self).__init__(config)
     self.config = config
     self._global_config = registry.get("config")
     self._datasets = self._global_config.datasets.split(",")
Esempio n. 17
0
 def __init__(self, config):
     super().__init__(config)
     self._global_config = registry.get("config")
     self._datasets = self._global_config.datasets.split(",")
     self._build_word_embedding()
Esempio n. 18
0
    def __init__(self, vocab_file, embedding_name, *args, **kwargs):
        """Use this vocab class when you have a custom vocabulary class but you
        want to use pretrained embedding vectos for it. This will only load
        the vectors which intersect with your vocabulary. Use the
        embedding_name specified in torchtext's pretrained aliases:
        ['charngram.100d', 'fasttext.en.300d', 'fasttext.simple.300d',
         'glove.42B.300d', 'glove.840B.300d', 'glove.twitter.27B.25d',
         'glove.twitter.27B.50d', 'glove.twitter.27B.100d',
         'glove.twitter.27B.200d', 'glove.6B.50d', 'glove.6B.100d',
         'glove.6B.200d', 'glove.6B.300d']

        Parameters
        ----------
        vocab_file : str
            Vocabulary file containing list of words with one word per line
            which will be used to collect vectors
        embedding_name : str
            Embedding name picked up from the list of the pretrained aliases
            mentioned above
        """
        super(IntersectedVocab, self).__init__(vocab_file, *args, **kwargs)

        self.type = "intersected"

        name = embedding_name.split(".")[0]
        dim = embedding_name.split(".")[2][:-1]
        middle = embedding_name.split(".")[1]

        class_name = EMBEDDING_NAME_CLASS_MAPPING[name]

        if not hasattr(vocab, class_name):
            from pythia.common.registry import registry

            writer = registry.get("writer")
            error = "Unknown embedding type: %s" % name, "error"
            if writer is not None:
                writer.write(error, "error")
            raise RuntimeError(error)

        params = [middle]

        if name == "glove":
            params.append(int(dim))

        vector_cache = os.path.join(get_pythia_root(), ".vector_cache")
        embedding = getattr(vocab, class_name)(*params, cache=vector_cache)

        self.vectors = torch.empty(
            (self.get_size(), len(embedding.vectors[0])), dtype=torch.float
        )

        self.embedding_dim = len(embedding.vectors[0])

        for i in range(0, 4):
            self.vectors[i] = torch.ones_like(self.vectors[i]) * 0.1 * i

        for i in range(4, self.get_size()):
            word = self.itos[i]
            embedding_index = embedding.stoi.get(word, None)

            if embedding_index is None:
                self.vectors[i] = self.vectors[self.UNK_INDEX].clone()
            else:
                self.vectors[i] = embedding.vectors[embedding_index]
Esempio n. 19
0
 def __init__(self):
     super().__init__("caption_bleu4")
     self.caption_processor = registry.get("coco_caption_processor")
Esempio n. 20
0
 def __init__(self):
     import nltk.translate.bleu_score as bleu_score
     self._bleu_score = bleu_score
     super().__init__("caption_bleu4")
     self.caption_processor = registry.get("coco_caption_processor")
Esempio n. 21
0
 def __init__(self, dataset_type="train"):
     self._dataset_type = dataset_type
     self.writer = registry.get("writer")
     self._is_main_process = is_main_process()
     self._global_config = registry.get("config")
Esempio n. 22
0
    def __init__(self):
        super().__init__("verbal_spatial")
        self.writer = registry.get("writer")

        self.dataset_class = Verbal_SpatialDataset
Esempio n. 23
0
 def __init__(self):
     super().__init__("caption_bleu4")
     # TODO: fix this so it can set coco_caption_processor or youcookII_caption_processor
     self.caption_processor = registry.get("coco_caption_processor")
Esempio n. 24
0
 def __init__(self):
     super().__init__()
     self.dataset_name = "visual_genome"
     self.dataset_proper_name = "Visual Genome"
     self.dataset_class = VisualGenomeDataset
     self.writer = registry.get("writer")
Esempio n. 25
0
    def __init__(self, metric_list):
        if not isinstance(metric_list, list):
            metric_list = [metric_list]

        self.writer = registry.get("writer")
        self.metrics = self._init_metrics(metric_list)
Esempio n. 26
0
	def __init__(self):
		super().__init__("objpart")
		self.writer = registry.get("writer")

		self.dataset_class = ObjPartDataset
Esempio n. 27
0
 def __init__(self, config):
     super().__init__()
     self.config = config
     self.writer = registry.get("writer")
Esempio n. 28
0
    def __init__(self):
        super().__init__("vqamb")
        self.writer = registry.get("writer")

        self.dataset_class = VQAmbDataset
Esempio n. 29
0
    def forward(self, sample_list, model_output):
        scores = model_output["scores"]
        targets = sample_list["targets"]
        loss_mask = sample_list["train_loss_mask"]
        assert scores.dim() == 3 and loss_mask.dim() == 2

        losses = F.binary_cross_entropy_with_logits(scores,
                                                    targets,
                                                    reduction="none")
        losses *= loss_mask.unsqueeze(-1)

        # add the anls as additional rewards
        # rewards calculation
        batch_size = sample_list.context_tokens_enc.size(0)
        pred_answers = scores.argmax(dim=-1)
        context_tokens_enc = sample_list.context_tokens_enc.cpu().numpy()
        gt_answers_enc = sample_list.gt_answers_enc.cpu().numpy()
        answer_processor = registry.get(sample_list.dataset_name +
                                        "_answer_processor")
        answer_space_size = answer_processor.get_true_vocab_size()

        predictions = []
        from pythia.utils.objects_to_byte_tensor import dec_bytes2obj
        from pythia.utils.text_utils import word_tokenize
        for idx in range(batch_size):
            context_tokens = dec_bytes2obj(context_tokens_enc[idx])
            answer_words = []
            for answer_id in pred_answers[idx].tolist():
                if answer_id >= answer_space_size:
                    answer_id -= answer_space_size
                    answer_words.append(
                        word_tokenize(context_tokens[answer_id]))
                else:
                    if answer_id == answer_processor.EOS_IDX:
                        break
                    answer_words.append(
                        answer_processor.answer_vocab.idx2word(answer_id))

            pred_answer = ' '.join(answer_words).replace(" 's", "'s")
            gt_answers = dec_bytes2obj(gt_answers_enc[idx])
            predictions.append({
                "pred_answer": pred_answer,
                "gt_answers": gt_answers,
            })

        pred_scores = []
        for entry in predictions:
            anls = max(
                self.get_anls(entry['pred_answer'], gt)
                for gt in entry['gt_answers'])
            pred_scores.append(anls)

        rewards = torch.tensor(pred_scores).reshape(-1, 1).to(scores.device)

        max_element = torch.argmax(F.softmax(scores, -1), -1)  # 21*12
        fake_targets = torch.zeros_like(targets)  # 21*12*5050
        for i in range(fake_targets.shape[0]):
            fake_targets[i][range(fake_targets.shape[1]), max_element[i]] = 1
        fake_bce = F.binary_cross_entropy_with_logits(scores,
                                                      fake_targets,
                                                      reduction="none")
        # import pdb; pdb.set_trace()
        fake_bce *= loss_mask.unsqueeze(-1)
        fake_bce *= (rewards - 0.5 + 1e-8).unsqueeze(-1)

        count = torch.max(torch.sum(loss_mask), self.one.to(losses.device))
        loss = torch.sum(losses) / count

        alpha = 1000

        loss = alpha * loss + torch.sum(fake_bce) / count
        return loss
Esempio n. 30
0
 def __init__(self, config):
     super().__init__(config)
     self.mmt_config = BertConfig(**self.config.mmt)
     self.graph_config = BertConfig(**self.config.global_graph)
     self._datasets = registry.get("config").datasets.split(",")