def __init__(self, use_constrained=False): super(PythiaCaptioner, self).__init__() # load configuration file with open(config_file) as f: config = yaml.load(f) config = ConfigNode(config) self.use_constrained = use_constrained # the following blocks of code read some configuration # parameter in Pythia config.training_parameters.evalai_inference = True registry.register("config", config) self.config = config captioning_config = config.task_attributes.captioning.dataset_attributes.coco text_processor_config = captioning_config.processors.text_processor caption_processor_config = captioning_config.processors.caption_processor # text_processor and caption_processor are used to pre-process the text text_processor_config.params.vocab.vocab_file = vocab_file caption_processor_config.params.vocab.vocab_file = vocab_file self.text_processor = VocabProcessor(text_processor_config.params) self.caption_processor = CaptionProcessor( caption_processor_config.params) registry.register("coco_text_processor", self.text_processor) registry.register("coco_caption_processor", self.caption_processor) self.model = self._build_model()
def __init__(self, use_constrained=False): super(PythiaCaptioner, self).__init__() # load configuration file with open(config_file) as f: config = yaml.load(f) config = ConfigNode(config) self.use_constrained = use_constrained # TODO: not sure what these two lines really means config.training_parameters.evalai_inference = True registry.register("config", config) self.config = config captioning_config = config.task_attributes.captioning.dataset_attributes.coco text_processor_config = captioning_config.processors.text_processor caption_processor_config = captioning_config.processors.caption_processor text_processor_config.params.vocab.vocab_file = vocab_file caption_processor_config.params.vocab.vocab_file = vocab_file self.text_processor = VocabProcessor(text_processor_config.params) self.caption_processor = CaptionProcessor( caption_processor_config.params) registry.register("coco_text_processor", self.text_processor) registry.register("coco_caption_processor", self.caption_processor) self.model = self._build_model()
def test_caption_processor(self): path = os.path.join( os.path.abspath(__file__), "../../../pythia/common/defaults/configs/tasks/captioning/coco.yml", ) with open(os.path.abspath(path)) as f: config = yaml.load(f, Loader=yaml.FullLoader) config = ConfigNode(config) captioning_config = config.task_attributes.captioning.dataset_attributes.coco caption_processor_config = captioning_config.processors.caption_processor vocab_path = os.path.join(os.path.abspath(__file__), "../../modules/vocab.txt") caption_processor_config.params.vocab.vocab_file = os.path.abspath(vocab_path) caption_processor = CaptionProcessor(caption_processor_config.params) tokens = [1, 4, 5, 6, 4, 7, 8, 2, 0, 0, 0] caption = caption_processor(tokens) # Test start, stop, pad are removed self.assertNotIn('<s>', caption["tokens"]) self.assertNotIn('</s>', caption["tokens"]) self.assertNotIn('<pad>', caption["tokens"]) # Test caption is correct self.assertEqual(caption["caption"], "a man with a red helmet")
def _init_processors(self): with open(model_yaml) as f: config = yaml.load(f) config = ConfigNode(config) # Remove warning config.training_parameters.evalai_inference = True registry.register("config", config) self.config = config captioning_config = config.task_attributes.captioning.dataset_attributes.coco # captioning_config = config.task_attributes.captioning.dataset_attributes.youcookII text_processor_config = captioning_config.processors.text_processor caption_processor_config = captioning_config.processors.caption_processor # print("DEBUG captioning_config:", captioning_config) # print("DEBUG text_processor_config:", text_processor_config) # print("DEBUG caption_processor_config:", caption_processor_config) text_processor_config.params.vocab.vocab_file = "content/model_data/vocabulary_captioning_thresh5.txt" caption_processor_config.params.vocab.vocab_file = "content/model_data/vocabulary_captioning_thresh5.txt" self.text_processor = VocabProcessor(text_processor_config.params) self.caption_processor = CaptionProcessor(caption_processor_config.params) # print("DEBUG text_processor:", self.text_processor) # print("DEBUG caption_processor:", self.caption_processor) registry.register("coco_text_processor", self.text_processor) registry.register("coco_caption_processor", self.caption_processor)
def init_processors(caption_config: Dict, butd_config: Dict): """Build the caption and text processors. """ captioning_config = butd_config.task_attributes.captioning \ .dataset_attributes.coco text_processor_config = captioning_config.processors.text_processor caption_processor_config = captioning_config.processors \ .caption_processor vocab_file_path = caption_config["text_caption_processor_vocab_txt"] text_processor_config.params.vocab.vocab_file = vocab_file_path caption_processor_config.params.vocab.vocab_file = vocab_file_path text_processor = VocabProcessor(text_processor_config.params) caption_processor = CaptionProcessor(caption_processor_config.params) registry.register("coco_text_processor", text_processor) registry.register("coco_caption_processor", caption_processor) return caption_processor, text_processor
def _init_processors(self): with open("model_data/butd.yaml") as f: config = yaml.load(f) config = ConfigNode(config) config.training_parameters.evalai_inference = True registry.register("config", config) self.config = config captioning_config = config.task_attributes.captioning.dataset_attributes.coco text_processor_config = captioning_config.processors.text_processor caption_processor_config = captioning_config.processors.caption_processor text_processor_config.params.vocab.vocab_file = "model_data/vocabulary_captioning_thresh5.txt" caption_processor_config.params.vocab.vocab_file = "model_data/vocabulary_captioning_thresh5.txt" self.text_processor = VocabProcessor(text_processor_config.params) self.caption_processor = CaptionProcessor(caption_processor_config.params) registry.register("coco_text_processor", self.text_processor) registry.register("coco_caption_processor", self.caption_processor)
def test_caption_processor(self): config = self._get_config( "../../../pythia/common/defaults/configs/tasks/captioning/coco.yml" ) captioning_config = config.task_attributes.captioning.dataset_attributes.coco caption_processor_config = captioning_config.processors.caption_processor vocab_path = os.path.join(os.path.abspath(__file__), "..", "..", "data", "vocab.txt") caption_processor_config.params.vocab.vocab_file = os.path.abspath(vocab_path) caption_processor = CaptionProcessor(caption_processor_config.params) tokens = [1, 4, 5, 6, 4, 7, 8, 2, 0, 0, 0] caption = caption_processor(tokens) # Test start, stop, pad are removed self.assertNotIn('<s>', caption["tokens"]) self.assertNotIn('</s>', caption["tokens"]) self.assertNotIn('<pad>', caption["tokens"]) # Test caption is correct self.assertEqual(caption["caption"], "a man with a red helmet")
def test_caption_bleu4(self): path = os.path.join( os.path.abspath(__file__), "../../../pythia/common/defaults/configs/tasks/captioning/coco.yml", ) with open(os.path.abspath(path)) as f: config = yaml.load(f, Loader=yaml.FullLoader) config = ConfigNode(config) captioning_config = config.task_attributes.captioning.dataset_attributes.coco caption_processor_config = captioning_config.processors.caption_processor vocab_path = os.path.join(os.path.abspath(__file__), "..", "..", "data", "vocab.txt") caption_processor_config.params.vocab.vocab_file = os.path.abspath( vocab_path) caption_processor = CaptionProcessor(caption_processor_config.params) registry.register("coco_caption_processor", caption_processor) caption_bleu4 = metrics.CaptionBleu4Metric() expected = Sample() predicted = dict() # Test complete match expected.answers = torch.empty((5, 5, 10)) expected.answers.fill_(4) predicted["scores"] = torch.zeros((5, 10, 19)) predicted["scores"][:, :, 4] = 1.0 self.assertEqual( caption_bleu4.calculate(expected, predicted).item(), 1.0) # Test partial match expected.answers = torch.empty((5, 5, 10)) expected.answers.fill_(4) predicted["scores"] = torch.zeros((5, 10, 19)) predicted["scores"][:, 0:5, 4] = 1.0 self.assertAlmostEqual( caption_bleu4.calculate(expected, predicted).item(), 0.3928, 4)