def forward(self, image_feat, embedding): image_feat_mean = image_feat.mean(1) # Get LSTM state state = registry.get("{}_lstm_state".format(image_feat.device)) h1, c1 = state["td_hidden"] h2, c2 = state["lm_hidden"] h1, c1 = self.top_down_lstm( torch.cat([h2, image_feat_mean, embedding], dim=1), (h1, c1) ) state["td_hidden"] = (h1, c1) image_fa = self.fa_image(image_feat) hidden_fa = self.fa_hidden(h1) joint_feature = self.relu(image_fa + hidden_fa.unsqueeze(1)) joint_feature = self.dropout(joint_feature) return joint_feature
def __init__(self, params={}): super().__init__() self.writer = registry.get("writer") if "type" not in params: raise ValueError("Parameters to loss must have 'type' field to" "specify type of loss to instantiate") loss_name = params["type"] self.name = loss_name loss_class = registry.get_loss_class(loss_name) if loss_class is None: raise ValueError( "No loss named {} is registered to registry".format(loss_name)) # Special case of multi as it requires an array if loss_name == "multi": self.loss_criterion = loss_class(params) else: loss_params = params.get("params", {}) self.loss_criterion = loss_class(**loss_params)
def __init__(self, trainer): """ Generates a path for saving model which can also be used for resuming from a checkpoint. """ self.trainer = trainer self.config = self.trainer.config self.save_dir = self.config.training_parameters.save_dir # personalize model name self.model_name = self.config.training_parameters.model_file self.exp_name = self.config.training_parameters.experiment_name self.ckpt_foldername = ckpt_name_from_core_args(self.config) self.ckpt_foldername += foldername_from_config_override( self.trainer.args) self.device = registry.get("current_device") self.ckpt_prefix = "" if hasattr(self.trainer.model, "get_ckpt_name"): self.ckpt_prefix = self.trainer.model.get_ckpt_name() + "_" self.config["log_foldername"] = self.ckpt_foldername self.ckpt_foldername = os.path.join(self.save_dir, self.ckpt_foldername) self.pth_filepath = os.path.join( self.ckpt_foldername, self.ckpt_prefix + self.model_name + "_final.pth") self.models_foldername = os.path.join(self.ckpt_foldername, "models") if not os.path.exists(self.models_foldername): os.makedirs(self.models_foldername) self.save_config() self.repo_path = updir(os.path.abspath(__file__), n=3) self.repo = git.Repo(self.repo_path)
def __init__(self, config, *args, **kwargs): self.writer = registry.get("writer") if not hasattr(config, "type"): raise AttributeError( "Config must have 'type' attribute to specify type of processor" ) processor_class = registry.get_processor_class(config.type) params = {} if not hasattr(config, "params"): self.writer.write("Config doesn't have 'params' attribute to " "specify parameters of the processor " "of type {}. Setting to default \{\}".format( config.type)) else: params = config.params self.processor = processor_class(params, *args, **kwargs) self._dir_representation = dir(self)
def change_dataloader(self): if self._num_datasets <= 1: return choice = 0 if self._is_main_process: choice = np.random.choice(self._num_datasets, 1, p=self._dataset_probablities)[0] while choice in self._finished_iterators: choice = np.random.choice(self._num_datasets, 1, p=self._dataset_probablities)[0] choice = broadcast_scalar(choice, 0, device=registry.get("current_device")) self._loader_index = choice self._chosen_dataset = self._datasets[self._loader_index] self._chosen_loader = self._loaders[self._loader_index] self._chosen_iterator = self._iterators[self._loader_index]
def calculate(self, sample_list, model_output, *args, **kwargs): answer_processor = registry.get(sample_list.dataset_name + "_answer_processor") batch_size = sample_list.context_tokens_enc.size(0) pred_answers = model_output["scores"].argmax(dim=-1) context_tokens_enc = sample_list.context_tokens_enc.cpu().numpy() gt_answers_enc = sample_list.gt_answers_enc.cpu().numpy() answer_space_size = answer_processor.get_true_vocab_size() predictions = [] from pythia.utils.objects_to_byte_tensor import dec_bytes2obj from pythia.utils.text_utils import word_tokenize for idx in range(batch_size): context_tokens = dec_bytes2obj(context_tokens_enc[idx]) answer_words = [] for answer_id in pred_answers[idx].tolist(): if answer_id >= answer_space_size: answer_id -= answer_space_size answer_words.append( word_tokenize(context_tokens[answer_id])) else: if answer_id == answer_processor.EOS_IDX: break answer_words.append( answer_processor.answer_vocab.idx2word(answer_id)) pred_answer = ' '.join(answer_words).replace(" 's", "'s") gt_answers = dec_bytes2obj(gt_answers_enc[idx]) predictions.append({ "pred_answer": pred_answer, "gt_answers": gt_answers, }) accuracy = self.evaluator.eval_pred_list(predictions) accuracy = torch.tensor(accuracy).to( sample_list.context_tokens_enc.device) # .cuda() return accuracy
def load(self): self._init_process_group() self.run_type = self.config.training_parameters.get( "run_type", "train") self.dataset_loader = DatasetLoader(self.config) self._datasets = self.config.datasets self.writer = Logger(self.config) registry.register("writer", self.writer) self.configuration = registry.get("configuration") self.configuration.pretty_print() self.config_based_setup() self.load_task() self.load_model() self.load_optimizer() self.load_extras() if visualization_flag: self.generator = GenerateWord( 'data/m4c_captioner_vocabs/textcaps/vocab_textcap_threshold_10.txt' )
def __init__(self, config, *args, **kwargs): self.writer = registry.get("writer") if not hasattr(config, "vocab_file"): raise AttributeError("'vocab_file' argument required, but not " "present in AnswerProcessor's config") self.answer_vocab = VocabDict(config.vocab_file, *args, **kwargs) self.preprocessor = None if hasattr(config, "preprocessor"): self.preprocessor = Processor(config.preprocessor) if self.preprocessor is None: raise ValueError("No processor named {} is defined.".format( config.preprocessor)) if hasattr(config, "num_answers"): self.num_answers = config.num_answers else: self.num_answers = self.DEFAULT_NUM_ANSWERS warnings.warn("'num_answers' not defined in the config. " "Setting to default of {}".format( self.DEFAULT_NUM_ANSWERS))
def _load_state_dict_mapping(self, ckpt_model): model = self.trainer.model attr_mapping = { "image_feature_encoders": "img_feat_encoders", "image_feature_embeddings_list": "img_embeddings_list", "image_text_multi_modal_combine_layer": "multi_modal_combine_layer", "text_embeddings": "text_embeddings", "classifier": "classifier", } data_parallel = registry.get("data_parallel") if not data_parallel: for key in attr_mapping: print("Keyeeeee : ", key) if key == 'classifier.module.linear_text.weight': print(attr_mapping[key]) attr_mapping[key.replace("module.", "")] = attr_mapping[key] attr_mapping.pop(key) for key in attr_mapping: getattr(model, key).load_state_dict(ckpt_model[attr_mapping[key]])
def _summarize_report(self, meter, prefix="", should_print=True, extra={}): if not is_main_process(): return scalar_dict = meter.get_scalar_dict() self.writer.add_scalars(scalar_dict, registry.get("current_iteration")) if not should_print: return print_str = [] if len(prefix): print_str += [prefix + ":"] print_str += [ "{}/{}".format(self.current_iteration, self.max_iterations) ] print_str += [str(meter)] print_str += [ "{}: {}".format(key, value) for key, value in extra.items() ] self.writer.write(meter.delimiter.join(print_str))
def _load(self, file, force=False): self.trainer.writer.write("Loading checkpoint") ckpt = self._torch_load(file) data_parallel = registry.get("data_parallel") or registry.get( "distributed") if "model" in ckpt: ckpt_model = ckpt["model"] else: ckpt_model = ckpt ckpt = {"model": ckpt} pretrained_mapping = self.config.training_parameters.pretrained_mapping if not self.config.training_parameters.load_pretrained or force is True: pretrained_mapping = {} new_dict = {} # TODO: Move to separate function for attr in ckpt_model: if "fa_history" in attr: new_dict[attr.replace("fa_history", "fa_context")] = ckpt_model[attr] elif data_parallel is False and attr.startswith("module."): # In case the ckpt was actually a data parallel model # replace first module. from dataparallel with empty string new_dict[attr.replace("module.", "", 1)] = ckpt_model[attr] elif data_parallel is not False and not attr.startswith("module."): new_dict["module." + attr] = ckpt_model[attr] else: new_dict[attr] = ckpt_model[attr] if len(pretrained_mapping.items()) == 0: final_dict = new_dict self.trainer.model.load_state_dict(final_dict) if "optimizer" in ckpt: self.trainer.optimizer.load_state_dict(ckpt["optimizer"]) else: warnings.warn("'optimizer' key is not present in the " "checkpoint asked to be loaded. Skipping.") self.trainer.early_stopping.init_from_checkpoint(ckpt) self.trainer.writer.write("Checkpoint loaded") if "best_iteration" in ckpt: self.trainer.current_iteration = ckpt["best_iteration"] registry.register("current_iteration", self.trainer.current_iteration) if "best_epoch" in ckpt: self.trainer.current_epoch = ckpt["best_epoch"] registry.register("current_epoch", self.trainer.current_epoch) else: final_dict = {} model = self.trainer.model own_state = model.state_dict() for key, value in pretrained_mapping.items(): key += "." value += "." for attr in new_dict: for own_attr in own_state: if (key in attr and value in own_attr and attr.replace( key, "") == own_attr.replace(value, "")): self.trainer.writer.write("Copying " + attr + " " + own_attr) own_state[own_attr].copy_(new_dict[attr]) self.trainer.writer.write("Pretrained model loaded")
def __init__(self): super().__init__("clevr") self.writer = registry.get("writer") self.dataset_class = CLEVRDataset
def __init__(self, task_name): super(BaseTask, self).__init__() self.task_name = task_name self.writer = registry.get("writer")
def _init_classifier(self): num_hidden = self.config["text_embedding"]["num_hidden"] num_choices = registry.get(self._datasets[0] + "_num_final_outputs") dropout = self.config["classifier"]["dropout"] self.classifier = WeightNormClassifier(num_hidden, num_choices, num_hidden * 2, dropout)
def _build_word_embedding(self): text_processor = registry.get(self._datasets[0] + "_text_processor") vocab = text_processor.vocab self.word_embedding = vocab.get_embedding(torch.nn.Embedding, embedding_dim=300)
def __init__(self, config): super(BAN, self).__init__(config) self.config = config self._global_config = registry.get("config") self._datasets = self._global_config.datasets.split(",")
def __init__(self, config): super().__init__(config) self._global_config = registry.get("config") self._datasets = self._global_config.datasets.split(",") self._build_word_embedding()
def __init__(self, vocab_file, embedding_name, *args, **kwargs): """Use this vocab class when you have a custom vocabulary class but you want to use pretrained embedding vectos for it. This will only load the vectors which intersect with your vocabulary. Use the embedding_name specified in torchtext's pretrained aliases: ['charngram.100d', 'fasttext.en.300d', 'fasttext.simple.300d', 'glove.42B.300d', 'glove.840B.300d', 'glove.twitter.27B.25d', 'glove.twitter.27B.50d', 'glove.twitter.27B.100d', 'glove.twitter.27B.200d', 'glove.6B.50d', 'glove.6B.100d', 'glove.6B.200d', 'glove.6B.300d'] Parameters ---------- vocab_file : str Vocabulary file containing list of words with one word per line which will be used to collect vectors embedding_name : str Embedding name picked up from the list of the pretrained aliases mentioned above """ super(IntersectedVocab, self).__init__(vocab_file, *args, **kwargs) self.type = "intersected" name = embedding_name.split(".")[0] dim = embedding_name.split(".")[2][:-1] middle = embedding_name.split(".")[1] class_name = EMBEDDING_NAME_CLASS_MAPPING[name] if not hasattr(vocab, class_name): from pythia.common.registry import registry writer = registry.get("writer") error = "Unknown embedding type: %s" % name, "error" if writer is not None: writer.write(error, "error") raise RuntimeError(error) params = [middle] if name == "glove": params.append(int(dim)) vector_cache = os.path.join(get_pythia_root(), ".vector_cache") embedding = getattr(vocab, class_name)(*params, cache=vector_cache) self.vectors = torch.empty( (self.get_size(), len(embedding.vectors[0])), dtype=torch.float ) self.embedding_dim = len(embedding.vectors[0]) for i in range(0, 4): self.vectors[i] = torch.ones_like(self.vectors[i]) * 0.1 * i for i in range(4, self.get_size()): word = self.itos[i] embedding_index = embedding.stoi.get(word, None) if embedding_index is None: self.vectors[i] = self.vectors[self.UNK_INDEX].clone() else: self.vectors[i] = embedding.vectors[embedding_index]
def __init__(self): super().__init__("caption_bleu4") self.caption_processor = registry.get("coco_caption_processor")
def __init__(self): import nltk.translate.bleu_score as bleu_score self._bleu_score = bleu_score super().__init__("caption_bleu4") self.caption_processor = registry.get("coco_caption_processor")
def __init__(self, dataset_type="train"): self._dataset_type = dataset_type self.writer = registry.get("writer") self._is_main_process = is_main_process() self._global_config = registry.get("config")
def __init__(self): super().__init__("verbal_spatial") self.writer = registry.get("writer") self.dataset_class = Verbal_SpatialDataset
def __init__(self): super().__init__("caption_bleu4") # TODO: fix this so it can set coco_caption_processor or youcookII_caption_processor self.caption_processor = registry.get("coco_caption_processor")
def __init__(self): super().__init__() self.dataset_name = "visual_genome" self.dataset_proper_name = "Visual Genome" self.dataset_class = VisualGenomeDataset self.writer = registry.get("writer")
def __init__(self, metric_list): if not isinstance(metric_list, list): metric_list = [metric_list] self.writer = registry.get("writer") self.metrics = self._init_metrics(metric_list)
def __init__(self): super().__init__("objpart") self.writer = registry.get("writer") self.dataset_class = ObjPartDataset
def __init__(self, config): super().__init__() self.config = config self.writer = registry.get("writer")
def __init__(self): super().__init__("vqamb") self.writer = registry.get("writer") self.dataset_class = VQAmbDataset
def forward(self, sample_list, model_output): scores = model_output["scores"] targets = sample_list["targets"] loss_mask = sample_list["train_loss_mask"] assert scores.dim() == 3 and loss_mask.dim() == 2 losses = F.binary_cross_entropy_with_logits(scores, targets, reduction="none") losses *= loss_mask.unsqueeze(-1) # add the anls as additional rewards # rewards calculation batch_size = sample_list.context_tokens_enc.size(0) pred_answers = scores.argmax(dim=-1) context_tokens_enc = sample_list.context_tokens_enc.cpu().numpy() gt_answers_enc = sample_list.gt_answers_enc.cpu().numpy() answer_processor = registry.get(sample_list.dataset_name + "_answer_processor") answer_space_size = answer_processor.get_true_vocab_size() predictions = [] from pythia.utils.objects_to_byte_tensor import dec_bytes2obj from pythia.utils.text_utils import word_tokenize for idx in range(batch_size): context_tokens = dec_bytes2obj(context_tokens_enc[idx]) answer_words = [] for answer_id in pred_answers[idx].tolist(): if answer_id >= answer_space_size: answer_id -= answer_space_size answer_words.append( word_tokenize(context_tokens[answer_id])) else: if answer_id == answer_processor.EOS_IDX: break answer_words.append( answer_processor.answer_vocab.idx2word(answer_id)) pred_answer = ' '.join(answer_words).replace(" 's", "'s") gt_answers = dec_bytes2obj(gt_answers_enc[idx]) predictions.append({ "pred_answer": pred_answer, "gt_answers": gt_answers, }) pred_scores = [] for entry in predictions: anls = max( self.get_anls(entry['pred_answer'], gt) for gt in entry['gt_answers']) pred_scores.append(anls) rewards = torch.tensor(pred_scores).reshape(-1, 1).to(scores.device) max_element = torch.argmax(F.softmax(scores, -1), -1) # 21*12 fake_targets = torch.zeros_like(targets) # 21*12*5050 for i in range(fake_targets.shape[0]): fake_targets[i][range(fake_targets.shape[1]), max_element[i]] = 1 fake_bce = F.binary_cross_entropy_with_logits(scores, fake_targets, reduction="none") # import pdb; pdb.set_trace() fake_bce *= loss_mask.unsqueeze(-1) fake_bce *= (rewards - 0.5 + 1e-8).unsqueeze(-1) count = torch.max(torch.sum(loss_mask), self.one.to(losses.device)) loss = torch.sum(losses) / count alpha = 1000 loss = alpha * loss + torch.sum(fake_bce) / count return loss
def __init__(self, config): super().__init__(config) self.mmt_config = BertConfig(**self.config.mmt) self.graph_config = BertConfig(**self.config.global_graph) self._datasets = registry.get("config").datasets.split(",")