def _init_processors(self): with open(os.path.join(BASE_VQA_DIR_PATH, "model_data/pythia.yaml")) as f: config = yaml.load(f) config = ConfigNode(config) # Remove warning config.training_parameters.evalai_inference = True registry.register("config", config) self.config = config vqa_config = config.task_attributes.vqa.dataset_attributes.vqa2 text_processor_config = vqa_config.processors.text_processor answer_processor_config = vqa_config.processors.answer_processor text_processor_config.params.vocab.vocab_file = os.path.join( BASE_VQA_DIR_PATH, "model_data/vocabulary_100k.txt" ) answer_processor_config.params.vocab_file = os.path.join( BASE_VQA_DIR_PATH, "model_data/answers_vqa.txt" ) # Add preprocessor as that will needed when we are getting questions from user self.text_processor = VocabProcessor(text_processor_config.params) self.answer_processor = VQAAnswerProcessor(answer_processor_config.params) registry.register("vqa2_text_processor", self.text_processor) registry.register("vqa2_answer_processor", self.answer_processor) registry.register( "vqa2_num_final_outputs", self.answer_processor.get_vocab_size() )
def test_caption_processor(self): path = os.path.join( os.path.abspath(__file__), "../../../pythia/common/defaults/configs/tasks/captioning/coco.yml", ) with open(os.path.abspath(path)) as f: config = yaml.load(f, Loader=yaml.FullLoader) config = ConfigNode(config) captioning_config = config.task_attributes.captioning.dataset_attributes.coco caption_processor_config = captioning_config.processors.caption_processor vocab_path = os.path.join(os.path.abspath(__file__), "../../modules/vocab.txt") caption_processor_config.params.vocab.vocab_file = os.path.abspath(vocab_path) caption_processor = CaptionProcessor(caption_processor_config.params) tokens = [1, 4, 5, 6, 4, 7, 8, 2, 0, 0, 0] caption = caption_processor(tokens) # Test start, stop, pad are removed self.assertNotIn('<s>', caption["tokens"]) self.assertNotIn('</s>', caption["tokens"]) self.assertNotIn('<pad>', caption["tokens"]) # Test caption is correct self.assertEqual(caption["caption"], "a man with a red helmet")
def build_processors(self): print('Tiki : Initializing : Building - Text Processors') with open('/final/data/pythia.yaml') as f: config = yaml.load(f, Loader=yaml.FullLoader) config = ConfigNode(config) config.training_parameters.evalai_inference = True # Remove warning registry.register('config', config) self.config = config vqa_config = config.task_attributes.vqa.dataset_attributes.vqa2 text_processor_config = vqa_config.processors.text_processor answer_processor_config = vqa_config.processors.answer_processor text_processor_config.params.vocab.vocab_file = '/final/data/vocabulary_100k.txt' answer_processor_config.params.vocab_file = '/final/data/answers_vqa.txt' self.text_processor = VocabProcessor(text_processor_config.params) self.answer_processor = VQAAnswerProcessor( answer_processor_config.params) registry.register('vqa2_text_processor', self.text_processor) registry.register('vqa2_answer_processor', self.answer_processor) registry.register('vqa2_num_final_outputs', self.answer_processor.get_vocab_size())
def __init__(self, use_constrained=False): super(PythiaCaptioner, self).__init__() # load configuration file with open(config_file) as f: config = yaml.load(f) config = ConfigNode(config) self.use_constrained = use_constrained # the following blocks of code read some configuration # parameter in Pythia config.training_parameters.evalai_inference = True registry.register("config", config) self.config = config captioning_config = config.task_attributes.captioning.dataset_attributes.coco text_processor_config = captioning_config.processors.text_processor caption_processor_config = captioning_config.processors.caption_processor # text_processor and caption_processor are used to pre-process the text text_processor_config.params.vocab.vocab_file = vocab_file caption_processor_config.params.vocab.vocab_file = vocab_file self.text_processor = VocabProcessor(text_processor_config.params) self.caption_processor = CaptionProcessor( caption_processor_config.params) registry.register("coco_text_processor", self.text_processor) registry.register("coco_caption_processor", self.caption_processor) self.model = self._build_model()
def __init__(self, max_pred, mask_prob, vocab_words, indexer, max_len=512, block_mask=False, truncate_config={}, mask_image_regions=False, mode="s2s", len_vis_input=49, vis_mask_prob=0.25, region_bbox_prefix='', region_bbox_file=None, region_det_file_prefix='', local_rank=-1, load_vqa_ann=False, id_digits=3): super().__init__() self.max_pred = max_pred # max tokens of prediction self.mask_prob = mask_prob # masking probability self.vocab_words = vocab_words # vocabulary (sub)words self.indexer = indexer # function from token to token index self.max_len = max_len self._tril_matrix = torch.tril( torch.ones((max_len, max_len), dtype=torch.long)) self.always_truncate_tail = truncate_config.get( 'always_truncate_tail', False) self.max_len_b = truncate_config.get('max_len_b', None) self.trunc_seg = truncate_config.get('trunc_seg', None) self.mask_image_regions = mask_image_regions assert mode in ("s2s", "bi") self.mode = mode self.region_bbox_prefix = region_bbox_prefix self.region_bbox_file = region_bbox_file self.region_det_file_prefix = region_det_file_prefix self.id_digits = id_digits self.len_vis_input = len_vis_input self.vis_mask_prob = vis_mask_prob self.task_idx = 0 # for images if load_vqa_ann: # import packages from pythia import pythia.tasks.processors as pythia_proc # VQAAnswerProcessor from pythia.utils.configuration import ConfigNode args = { 'vocab_file': '/home/jupyter/VLP/pythia/data/vocabs/answers_vqa.txt', 'num_answers': 10, 'preprocessor': { 'type': 'simple_word', 'params': {} } } args = ConfigNode(args) self.ans_proc = pythia_proc.registry.get_processor_class( 'vqa_answer')(args) else: self.ans_proc = None
def __init__(self, use_constrained=False): super(PythiaCaptioner, self).__init__() # load configuration file with open(config_file) as f: config = yaml.load(f) config = ConfigNode(config) self.use_constrained = use_constrained # TODO: not sure what these two lines really means config.training_parameters.evalai_inference = True registry.register("config", config) self.config = config captioning_config = config.task_attributes.captioning.dataset_attributes.coco text_processor_config = captioning_config.processors.text_processor caption_processor_config = captioning_config.processors.caption_processor text_processor_config.params.vocab.vocab_file = vocab_file caption_processor_config.params.vocab.vocab_file = vocab_file self.text_processor = VocabProcessor(text_processor_config.params) self.caption_processor = CaptionProcessor( caption_processor_config.params) registry.register("coco_text_processor", self.text_processor) registry.register("coco_caption_processor", self.caption_processor) self.model = self._build_model()
def _init_processors(self): with open(model_yaml) as f: config = yaml.load(f) config = ConfigNode(config) # Remove warning config.training_parameters.evalai_inference = True registry.register("config", config) self.config = config captioning_config = config.task_attributes.captioning.dataset_attributes.coco # captioning_config = config.task_attributes.captioning.dataset_attributes.youcookII text_processor_config = captioning_config.processors.text_processor caption_processor_config = captioning_config.processors.caption_processor # print("DEBUG captioning_config:", captioning_config) # print("DEBUG text_processor_config:", text_processor_config) # print("DEBUG caption_processor_config:", caption_processor_config) text_processor_config.params.vocab.vocab_file = "content/model_data/vocabulary_captioning_thresh5.txt" caption_processor_config.params.vocab.vocab_file = "content/model_data/vocabulary_captioning_thresh5.txt" self.text_processor = VocabProcessor(text_processor_config.params) self.caption_processor = CaptionProcessor(caption_processor_config.params) # print("DEBUG text_processor:", self.text_processor) # print("DEBUG caption_processor:", self.caption_processor) registry.register("coco_text_processor", self.text_processor) registry.register("coco_caption_processor", self.caption_processor)
def __init__(self, max_pred, mask_prob, vocab_words, indexer, max_len=512, block_mask=False, new_segment_ids=False, truncate_config={}, mask_image_regions=False, mode="s2s", len_vis_input=49, vis_mask_prob=0.25, enable_butd=False, region_bbox_file='', region_det_file_prefix='', local_rank=-1, load_vqa_ann=False): super().__init__() self.max_len = max_len self.max_pred = max_pred # max tokens of prediction self.mask_prob = mask_prob # masking probability self.vocab_words = vocab_words # vocabulary (sub)words self.indexer = indexer # function from token to token index self.max_len = max_len self._tril_matrix = torch.tril(torch.ones( (max_len, max_len), dtype=torch.long)) self.new_segment_ids = new_segment_ids self.always_truncate_tail = truncate_config.get( 'always_truncate_tail', False) self.max_len_a = truncate_config.get('max_len_a', None) self.max_len_b = truncate_config.get('max_len_b', None) self.trunc_seg = truncate_config.get('trunc_seg', None) self.mask_image_regions = mask_image_regions assert mode in ("s2s", "l2r", "bi") self.mode = mode self.region_bbox_file = region_bbox_file self.region_det_file_prefix = region_det_file_prefix with open(self.region_bbox_file, 'rb') as region_bbox_f: self.bbox_dict = pickle.load(region_bbox_f, encoding="bytes") if mode == 's2s': self.task_idx = 3 # relax projection layer for different tasks elif mode == 'bi': self.task_idx = 0 elif mode == 'l2r': self.task_idx = 1 self.len_vis_input = len_vis_input self.vis_mask_prob = vis_mask_prob # for images self.enable_butd = enable_butd if not enable_butd: self.Resize = transforms.Resize((255, 255)) self.RandomCrop = transforms.RandomCrop((224, 224)) self.ToTensor = transforms.ToTensor() self.res_Normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) else: if load_vqa_ann: # import packages from pythia import pythia.tasks.processors as pythia_proc # VQAAnswerProcessor from pythia.utils.configuration import ConfigNode args = {'vocab_file': 'pythia/data/vocabs/answers_vqa.txt', 'num_answers':10, 'preprocessor':{'type':'simple_word', 'params':{}}} args = ConfigNode(args) self.ans_proc = pythia_proc.registry.get_processor_class('vqa_answer')(args) else: self.ans_proc = None
def __init__(self): config_file = 'model_data/butd.yaml' vocab_file = 'model_data/vocabulary_captioning_thresh5.txt' with open(config_file) as f: config = yaml.load(f) config = ConfigNode(config) captioning_config = config.task_attributes.captioning.dataset_attributes.coco text_processor_config = captioning_config.processors.text_processor text_processor_config.params.vocab.vocab_file = vocab_file text_processor = VocabProcessor(text_processor_config.params) self.vocab = text_processor.vocab
def __init__(self, config, *args, **kwargs): if not hasattr(config, "vocab"): raise AttributeError( "Config passed to the processor has no attribute vocab") vocab_processor_config = ConfigNode(config) # GloVeProcessor needs vocab type to be "intersected" vocab_processor_config.vocab.type = "intersected" if "vocab_file" not in vocab_processor_config.vocab: warnings.warn("'vocab_file' key is not present in the config." " Switching to pretrained vocab.") vocab_processor_config.vocab.type = "pretrained" super().__init__(vocab_processor_config, *args, **kwargs)
def build_caption_model(caption_config: Dict, cuda_device: torch.device): """ Parameters ---------- caption_config : Dict Dict of BUTD and Detectron model configuration. cuda_device : torch.device Torch device to load the model to. Returns ------- (model, caption_processor, text_processor) : List[object] Returns the model, caption and text processor """ with open(caption_config["butd_model"]["config_yaml"]) as f: butd_config = yaml.load(f, Loader=yaml.FullLoader) butd_config = ConfigNode(butd_config) butd_config.training_parameters.evalai_inference = True registry.register("config", butd_config) caption_processor, text_processor = init_processors( caption_config, butd_config) if cuda_device == torch.device('cpu'): state_dict = torch.load(caption_config["butd_model"]["model_pth"], map_location='cpu') else: state_dict = torch.load(caption_config["butd_model"]["model_pth"]) model_config = butd_config.model_attributes.butd model_config.model_data_dir = caption_config["model_data_dir"] model = BUTD(model_config) model.build() model.init_losses_and_metrics() if list(state_dict.keys())[0].startswith('module') and \ not hasattr(model, 'module'): state_dict = multi_gpu_state_to_single(state_dict) model.load_state_dict(state_dict) model.to(cuda_device) model.eval() return model, caption_processor, text_processor
def _init_processors(self): with open("model_data/butd.yaml") as f: config = yaml.load(f) config = ConfigNode(config) config.training_parameters.evalai_inference = True registry.register("config", config) self.config = config captioning_config = config.task_attributes.captioning.dataset_attributes.coco text_processor_config = captioning_config.processors.text_processor caption_processor_config = captioning_config.processors.caption_processor text_processor_config.params.vocab.vocab_file = "model_data/vocabulary_captioning_thresh5.txt" caption_processor_config.params.vocab.vocab_file = "model_data/vocabulary_captioning_thresh5.txt" self.text_processor = VocabProcessor(text_processor_config.params) self.caption_processor = CaptionProcessor(caption_processor_config.params) registry.register("coco_text_processor", self.text_processor) registry.register("coco_caption_processor", self.caption_processor)
def _init_text_embeddings(self, attr="text"): if "embeddings" not in attr: attr += "_embeddings" text_embeddings = [] text_embeddings_list_config = self.config[attr] embeddings_out_dim = 0 for text_embedding in text_embeddings_list_config: embedding_type = text_embedding.type embedding_kwargs = ConfigNode(text_embedding.params) self._update_text_embedding_args(embedding_kwargs) embedding = TextEmbedding(embedding_type, **embedding_kwargs) text_embeddings.append(embedding) embeddings_out_dim += embedding.text_out_dim setattr(self, attr + "_out_dim", embeddings_out_dim) setattr(self, attr, nn.ModuleList(text_embeddings))
def test_caption_bleu4(self): path = os.path.join( os.path.abspath(__file__), "../../../pythia/common/defaults/configs/datasets/captioning/coco.yml", ) with open(os.path.abspath(path)) as f: config = yaml.load(f, Loader=yaml.FullLoader) config = ConfigNode(config) captioning_config = config.dataset_attributes.coco caption_processor_config = captioning_config.processors.caption_processor vocab_path = os.path.join(os.path.abspath(__file__), "..", "..", "data", "vocab.txt") caption_processor_config.params.vocab.vocab_file = os.path.abspath( vocab_path) caption_processor = CaptionProcessor(caption_processor_config.params) registry.register("coco_caption_processor", caption_processor) caption_bleu4 = metrics.CaptionBleu4Metric() expected = Sample() predicted = dict() # Test complete match expected.answers = torch.empty((5, 5, 10)) expected.answers.fill_(4) predicted["scores"] = torch.zeros((5, 10, 19)) predicted["scores"][:, :, 4] = 1.0 self.assertEqual( caption_bleu4.calculate(expected, predicted).item(), 1.0) # Test partial match expected.answers = torch.empty((5, 5, 10)) expected.answers.fill_(4) predicted["scores"] = torch.zeros((5, 10, 19)) predicted["scores"][:, 0:5, 4] = 1.0 self.assertAlmostEqual( caption_bleu4.calculate(expected, predicted).item(), 0.3928, 4)
def _get_config(self, path): path = os.path.join(os.path.abspath(__file__), path) with open(os.path.abspath(path)) as f: config = yaml.load(f, Loader=yaml.FullLoader) config = ConfigNode(config) return config
#!/usr/bin/env python3 import yaml from pythia.utils.configuration import ConfigNode from pythia.tasks.processors import VocabProcessor, VQAAnswerProcessor from torchvision import models if __name__ == '__main__': resnet152 = models.resnet152(pretrained=True) with open('/final/data/pythia.yaml') as f: config = yaml.load(f, Loader=yaml.FullLoader) config = ConfigNode(config) vqa_config = config.task_attributes.vqa.dataset_attributes.vqa2 text_processor_config = vqa_config.processors.text_processor answer_processor_config = vqa_config.processors.answer_processor text_processor_config.params.vocab.vocab_file = '/final/data/vocabulary_100k.txt' answer_processor_config.params.vocab_file = '/final/data/answers_vqa.txt' text_processor = VocabProcessor(text_processor_config.params) answer_processor = VQAAnswerProcessor(answer_processor_config.params)