def build(self): assert len(self._datasets) > 0 num_question_choices = registry.get( _TEMPLATES["question_vocab_size"].format(self._datasets[0])) num_answer_choices = registry.get( _TEMPLATES["number_of_answers"].format(self._datasets[0])) self.text_embedding = nn.Embedding( num_question_choices, self.config.text_embedding.embedding_dim) self.lstm = nn.LSTM(**self.config.lstm) layers_config = self.config.cnn.layers conv_layers = [] for i in range(len(layers_config.input_dims)): conv_layers.append( ConvNet( layers_config.input_dims[i], layers_config.output_dims[i], kernel_size=layers_config.kernel_sizes[i], )) conv_layers.append(Flatten()) self.cnn = nn.Sequential(*conv_layers) # As we generate output dim dynamically, we need to copy the config # to update it classifier_config = deepcopy(self.config.classifier) classifier_config.params.out_dim = num_answer_choices self.classifier = ClassifierLayer(classifier_config.type, **classifier_config.params)
def upgrade_state_dict(self, state_dict): data_parallel = registry.get("data_parallel") or registry.get( "distributed") data_parallel = data_parallel or isinstance( self.trainer.model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel), ) if data_parallel: model = self.trainer.model.module else: model = self.trainer.model new_dict = {} for attr in state_dict: new_attr = model.format_state_key(attr) if not data_parallel and attr.startswith("module."): # In case the ckpt was actually a data parallel model # replace first module. from dataparallel with empty string new_attr = new_attr.replace("module.", "", 1) elif data_parallel and not attr.startswith("module."): new_attr = "module." + new_attr # Log if key has changed but not when the difference # is only due to data parallel's `module` if new_attr != attr and ("module." + new_attr != attr): logger.info(f"Will load key {new_attr} from {attr}") new_dict[new_attr] = state_dict[attr] return new_dict
def setup_imports(): from VisualBERT.mmf.common.registry import registry # First, check if imports are already setup has_already_setup = registry.get("imports_setup", no_warning=True) if has_already_setup: return # Automatically load all of the modules, so that # they register with registry root_folder = registry.get("mmf_root", no_warning=True) if root_folder is None: root_folder = os.path.dirname(os.path.abspath(__file__)) root_folder = os.path.join(root_folder, "..") environment_mmf_path = os.environ.get("MMF_PATH", os.environ.get("PYTHIA_PATH")) if environment_mmf_path is not None: root_folder = environment_mmf_path registry.register("pythia_path", root_folder) registry.register("mmf_path", root_folder) trainer_folder = os.path.join(root_folder, "trainers") trainer_pattern = os.path.join(trainer_folder, "**", "*.py") datasets_folder = os.path.join(root_folder, "datasets") datasets_pattern = os.path.join(datasets_folder, "**", "*.py") model_folder = os.path.join(root_folder, "models") model_pattern = os.path.join(model_folder, "**", "*.py") importlib.import_module("mmf.common.meter") files = (glob.glob(datasets_pattern, recursive=True) + glob.glob(model_pattern, recursive=True) + glob.glob(trainer_pattern, recursive=True)) for f in files: f = os.path.realpath(f) if f.endswith(".py") and not f.endswith("__init__.py"): splits = f.split(os.sep) import_prefix_index = 0 for idx, split in enumerate(splits): if split == "mmf": import_prefix_index = idx + 1 file_name = splits[-1] module_name = file_name[:file_name.find(".py")] module = ".".join(["VisualBERT", "mmf"] + splits[import_prefix_index:-1] + [module_name]) importlib.import_module(module) registry.register("imports_setup", True)
def log_progress(info: Union[Dict, Any], log_format="simple"): """Useful for logging progress dict. Args: info (dict|any): If dict, will be logged as key value pair. Otherwise, it will be logged directly. log_format (str, optional): json|simple. Defaults to "simple". Will use simple mode. """ caller, key = _find_caller() logger = logging.getLogger(caller) if not isinstance(info, collections.Mapping): logger.info(info) if log_format == "simple": config = registry.get("config") if config: log_format = config.training.log_format if log_format == "simple": output = ", ".join([f"{key}: {value}" for key, value in info.items()]) elif log_format == "json": output = json.dumps(info) else: output = str(info) logger.info(output)
def __init__(self): import nltk.translate.bleu_score as bleu_score self._bleu_score = bleu_score super().__init__("caption_bleu4") self.caption_processor = registry.get("coco_caption_processor") self.required_params = ["scores", "answers", "captions"]
def __init__(self, optimizer, *args, **kwargs): from VisualBERT.mmf.utils.general import lr_lambda_update self._lambda_func = lr_lambda_update self._global_config = registry.get("config") super().__init__(optimizer, self.lr_lambda, *args, **kwargs)
def __init__(self, multi_task_instance): self.test_task = multi_task_instance self.task_type = multi_task_instance.dataset_type self.config = registry.get("config") self.report = [] self.timer = Timer() self.training_config = self.config.training self.num_workers = self.training_config.num_workers self.batch_size = self.training_config.batch_size self.report_folder_arg = get_mmf_env(key="report_dir") self.experiment_name = self.training_config.experiment_name self.datasets = [] for dataset in self.test_task.get_datasets(): self.datasets.append(dataset) self.current_dataset_idx = -1 self.current_dataset = self.datasets[self.current_dataset_idx] self.save_dir = get_mmf_env(key="save_dir") self.report_folder = ckpt_name_from_core_args(self.config) self.report_folder += foldername_from_config_override(self.config) self.report_folder = os.path.join(self.save_dir, self.report_folder) self.report_folder = os.path.join(self.report_folder, "reports") if self.report_folder_arg: self.report_folder = self.report_folder_arg PathManager.mkdirs(self.report_folder)
def _build_word_embedding(self): self.text_processor = registry.get(self._datasets[0] + "_text_processor") self.vocab = self.text_processor.vocab self.vocab_size = self.vocab.get_size() self.word_embedding = self.vocab.get_embedding( torch.nn.Embedding, embedding_dim=self.config.embedding_dim) self.text_embeddings_out_dim = self.config.embedding_dim
def get_mmf_root(): from VisualBERT.mmf.common.registry import registry mmf_root = registry.get("mmf_root", no_warning=True) if mmf_root is None: mmf_root = os.path.dirname(os.path.abspath(__file__)) mmf_root = os.path.abspath(os.path.join(mmf_root, "..")) registry.register("mmf_root", mmf_root) return mmf_root
def _build_output(self): # dynamic OCR-copying scores with pointer network self.ocr_ptr_net = OcrPtrNet(**self.config.classifier.ocr_ptr_net) # fixed answer vocabulary scores num_choices = registry.get(self._datasets[0] + "_num_final_outputs") # remove the OCR copying dimensions in LoRRA's classifier output # (OCR copying will be handled separately) num_choices -= self.config.classifier.ocr_max_num self.classifier = ClassifierLayer( self.config.classifier.type, in_dim=self.mmt_config.hidden_size, out_dim=num_choices, **self.config.classifier.params, ) self.answer_processor = registry.get(self._datasets[0] + "_answer_processor")
def _init_classifier(self, combined_embedding_dim): # TODO: Later support multihead num_choices = registry.get(self._datasets[0] + "_num_final_outputs") self.classifier = ClassifierLayer( self.config.classifier.type, in_dim=combined_embedding_dim, out_dim=num_choices, **self.config.classifier.params, )
def __init__(self, loss_list): super().__init__() self.losses = nn.ModuleList() config = registry.get("config") self._evaluation_predict = False if config: self._evaluation_predict = config.get("evaluation", {}).get("predict", False) for loss in loss_list: self.losses.append(MMFLoss(loss))
def get_global_config(key=None): config = registry.get("config") if config is None: configuration = Configuration() config = configuration.get_config() registry.register("config", config) if key: config = OmegaConf.select(config, key) return config
def calculate(self, sample_list, model_output, *args, **kwargs): """Calculate vqa accuracy and return it back. Args: sample_list (SampleList): SampleList provided by DataLoader for current iteration model_output (Dict): Dict returned by model. Returns: torch.FloatTensor: VQA Accuracy """ output = model_output["scores"] expected = sample_list["answers"] answer_processor = registry.get(sample_list.dataset_name + "_answer_processor") answer_space_size = answer_processor.get_true_vocab_size() output = self._masked_unk_softmax(output, 1, 0) output = output.argmax(dim=1).clone().tolist() accuracy = [] for idx, answer_id in enumerate(output): if answer_id >= answer_space_size: answer_id -= answer_space_size answer = sample_list["context_tokens"][idx][answer_id] else: answer = answer_processor.idx2word(answer_id) answer = self.evalai_answer_processor(answer) gt_answers = [ self.evalai_answer_processor(x) for x in expected[idx] ] gt_answers = list(enumerate(gt_answers)) gt_acc = [] for gt_answer in gt_answers: other_answers = [ item for item in gt_answers if item != gt_answer ] matching_answers = [ item for item in other_answers if item[1] == answer ] acc = min(1, float(len(matching_answers)) / 3) gt_acc.append(acc) avgGTAcc = float(sum(gt_acc)) / len(gt_acc) accuracy.append(avgGTAcc) accuracy = float(sum(accuracy)) / len(accuracy) return model_output["scores"].new_tensor(accuracy, dtype=torch.float)
def _init_classifier(self, combined_embedding_dim: int): # TODO: Later support multihead num_choices = registry.get(self._datasets[0] + "_num_final_outputs") params = self.config["classifier"].get("params") if params is None: params = {} self.classifier = ClassifierLayer( self.config.classifier.type, in_dim=combined_embedding_dim, out_dim=num_choices, **params )
def test_init_processors(self): path = os.path.join( os.path.abspath(__file__), "../../../mmf/configs/datasets/vqa2/defaults.yaml", ) args = dummy_args() args.opts.append(f"config={path}") configuration = Configuration(args) answer_processor = ( configuration.get_config().dataset_config.vqa2.processors.answer_processor ) vocab_path = os.path.join( os.path.abspath(__file__), "..", "..", "data", "vocab.txt" ) answer_processor.params.vocab_file = os.path.abspath(vocab_path) self._fix_configuration(configuration) configuration.freeze() base_dataset = BaseDataset( "vqa2", configuration.get_config().dataset_config.vqa2, "train" ) expected_processors = [ "answer_processor", "ocr_token_processor", "bbox_processor", ] # Check no processors are initialized before init_processors call self.assertFalse(any(hasattr(base_dataset, key) for key in expected_processors)) for processor in expected_processors: self.assertIsNone(registry.get("{}_{}".format("vqa2", processor))) # Check processors are initialized after init_processors base_dataset.init_processors() self.assertTrue(all(hasattr(base_dataset, key) for key in expected_processors)) for processor in expected_processors: self.assertIsNotNone(registry.get("{}_{}".format("vqa2", processor)))
def __init__(self, dataset_name, config, dataset_type="train", *args, **kwargs): super().__init__() if config is None: config = {} self.config = config self._dataset_name = dataset_name self._dataset_type = dataset_type self._global_config = registry.get("config") self._device = get_current_device() self.use_cuda = "cuda" in str(self._device)
def forward(self, weighted_attn): # Get LSTM state state = registry.get(f"{weighted_attn.device}_lstm_state") h1, c1 = state["td_hidden"] h2, c2 = state["lm_hidden"] # Language LSTM h2, c2 = self.language_lstm(torch.cat([weighted_attn, h1], dim=1), (h2, c2)) predictions = self.fc(self.dropout(h2)) # Update hidden state for t+1 state["lm_hidden"] = (h2, c2) return predictions
def forward(self, image_feat, embedding): image_feat_mean = image_feat.mean(1) # Get LSTM state state = registry.get(f"{image_feat.device}_lstm_state") h1, c1 = state["td_hidden"] h2, c2 = state["lm_hidden"] h1, c1 = self.top_down_lstm( torch.cat([h2, image_feat_mean, embedding], dim=1), (h1, c1)) state["td_hidden"] = (h1, c1) image_fa = self.fa_image(image_feat) hidden_fa = self.fa_hidden(h1) joint_feature = self.relu(image_fa + hidden_fa.unsqueeze(1)) joint_feature = self.dropout(joint_feature) return joint_feature
def build_processors( processors_config: mmf_typings.DictConfig, registry_key: str = None, *args, **kwargs ) -> ProcessorDict: """Given a processor config, builds the processors present and returns back a dict containing processors mapped to keys as per the config Args: processors_config (mmf_typings.DictConfig): OmegaConf DictConfig describing the parameters and type of each processor passed here registry_key (str, optional): If passed, function would look into registry for this particular key and return it back. .format with processor_key will be called on this string. Defaults to None. Returns: ProcessorDict: Dictionary containing key to processor mapping """ from VisualBERT.mmf.datasets.processors.processors import Processor processor_dict = {} for processor_key, processor_params in processors_config.items(): if not processor_params: continue processor_instance = None if registry_key is not None: full_key = registry_key.format(processor_key) processor_instance = registry.get(full_key, no_warning=True) if processor_instance is None: processor_instance = Processor(processor_params, *args, **kwargs) # We don't register back here as in case of hub interface, we # want the processors to be instantiate every time. BaseDataset # can register at its own end processor_dict[processor_key] = processor_instance return processor_dict
def calculate(self, sample_list, model_output, *args, **kwargs): answer_processor = registry.get(sample_list.dataset_name + "_answer_processor") batch_size = sample_list.context_tokens.size(0) pred_answers = model_output["scores"].argmax(dim=-1) context_tokens = sample_list.context_tokens.cpu().numpy() answers = sample_list.get(self.gt_key).cpu().numpy() answer_space_size = answer_processor.get_true_vocab_size() predictions = [] from VisualBERT.mmf.utils.distributed import byte_tensor_to_object from VisualBERT.mmf.utils.text import word_tokenize for idx in range(batch_size): tokens = byte_tensor_to_object(context_tokens[idx]) answer_words = [] for answer_id in pred_answers[idx].tolist(): if answer_id >= answer_space_size: answer_id -= answer_space_size answer_words.append(word_tokenize(tokens[answer_id])) else: if answer_id == answer_processor.EOS_IDX: break answer_words.append( answer_processor.answer_vocab.idx2word(answer_id)) pred_answer = " ".join(answer_words).replace(" 's", "'s") gt_answers = byte_tensor_to_object(answers[idx]) predictions.append({ "pred_answer": pred_answer, "gt_answers": gt_answers }) accuracy = self.evaluator.eval_pred_list(predictions) accuracy = torch.tensor(accuracy).to(sample_list.context_tokens.device) return accuracy
def __init__(self, config): super().__init__(config) self.config = config self._global_config = registry.get("config") self._datasets = self._global_config.datasets.split(",")
def save(self, update, iteration=None, update_best=False): # Only save in main process if not is_master(): return if not iteration: iteration = update ckpt_filepath = os.path.join(self.models_foldername, "model_%d.ckpt" % update) best_ckpt_filepath = os.path.join(self.ckpt_foldername, self.ckpt_prefix + "best.ckpt") current_ckpt_filepath = os.path.join(self.ckpt_foldername, self.ckpt_prefix + "current.ckpt") best_iteration = (self.trainer.early_stop_callback.early_stopping. best_monitored_iteration) best_update = (self.trainer.early_stop_callback.early_stopping. best_monitored_update) best_metric = (self.trainer.early_stop_callback.early_stopping. best_monitored_value) model = self.trainer.model data_parallel = registry.get("data_parallel") or registry.get( "distributed") fp16_scaler = getattr(self.trainer, "scaler", None) fp16_scaler_dict = None if fp16_scaler is not None: fp16_scaler_dict = fp16_scaler.state_dict() if data_parallel is True: model = model.module ckpt = { "model": model.state_dict(), "optimizer": self.trainer.optimizer.state_dict(), "best_iteration": best_iteration, "current_iteration": iteration, "current_epoch": self.trainer.current_epoch, "num_updates": update, "best_update": best_update, "best_metric_value": best_metric, "fp16_scaler": fp16_scaler_dict, # Convert to container to avoid any dependencies "config": OmegaConf.to_container(self.config, resolve=True), } lr_scheduler = self.trainer.lr_scheduler_callback._scheduler if lr_scheduler is not None: ckpt["lr_scheduler"] = lr_scheduler.state_dict() if self.git_repo: git_metadata_dict = self._get_vcs_fields() ckpt.update(git_metadata_dict) with PathManager.open(ckpt_filepath, "wb") as f: torch.save(ckpt, f) if update_best: with PathManager.open(best_ckpt_filepath, "wb") as f: torch.save(ckpt, f) # Save current always with PathManager.open(current_ckpt_filepath, "wb") as f: torch.save(ckpt, f) # Remove old checkpoints if max_to_keep is set if self.max_to_keep > 0: if len(self.saved_iterations) == self.max_to_keep: self.remove(self.saved_iterations.pop(0)) self.saved_iterations.append(update)
def __init__(self, config): super().__init__(config) self.mmt_config = BertConfig(**self.config.mmt) self._datasets = registry.get("config").datasets.split(",")
def _init_classifier(self): num_hidden = self.config.text_embedding.num_hidden num_choices = registry.get(self._datasets[0] + "_num_final_outputs") dropout = self.config.classifier.dropout self.classifier = WeightNormClassifier(num_hidden, num_choices, num_hidden * 2, dropout)
def _build_word_embedding(self): assert len(self._datasets) > 0 text_processor = registry.get(self._datasets[0] + "_text_processor") vocab = text_processor.vocab self.word_embedding = vocab.get_embedding(torch.nn.Embedding, embedding_dim=300)