def main():
    random.seed(42)

    # Get command line arguments
    parser = ParlaiParser()
    parser.add_argument('-n', '--num-examples', default=10)
    parser.set_defaults(datatype='train:ordered')

    ImageLoader.add_cmdline_args(parser)
    opt = parser.parse_args()

    opt['no_cuda'] = False
    opt['gpu'] = 0
    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)

    # Show some example dialogs.
    with world:
        for k in range(int(opt['num_examples'])):
            world.parley()
            print(world.display() + '\n~~')
            if world.epoch_done():
                print('EPOCH DONE')
                break
def main():
    random.seed(42)

    # Get command line arguments
    parser = ParlaiParser()
    parser.add_argument('-n', '--num-examples', default=10)
    parser.set_defaults(datatype='train:ordered')

    ImageLoader.add_cmdline_args(parser)
    opt = parser.parse_args()

    opt['no_cuda'] = False
    opt['gpu'] = 0
    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)

    # Show some example dialogs.
    with world:
        for k in range(int(opt['num_examples'])):
            world.parley()
            print(world.display() + '\n~~')
            if world.epoch_done():
                print('EPOCH DONE')
                break
Esempio n. 3
0
 def __init__(self, opt, task, mturk_agent):
     self.task = task
     self.mturk_agent = mturk_agent
     self.episodeDone = False
     self.turn_index = -1
     self.context = None
     self.question = None
     self.answer = None
     self.image_loader = ImageLoader(opt)
 def __init__(self, opt, shared=None):
     self.opt = opt
     self.datatype = self.opt.get('datatype')
     self.training = self.datatype.startswith('train')
     self.num_epochs = self.opt.get('num_epochs', 0)
     self.image_loader = ImageLoader(opt)
     data_path, self.image_path = _path(opt)
     self._setup_data(data_path, opt.get('unittest', False))
     self.dict_agent = DictionaryAgent(opt)
Esempio n. 5
0
 def __init__(self, opt, version='2017'):
     self.opt = opt
     self.version = version
     self.use_intro = opt.get('use_intro', False)
     self.num_cands = opt.get('num_cands', -1)
     self.datatype = self.opt.get('datatype')
     self.include_rest_val = opt.get('include_rest_val', True)
     self.image_loader = ImageLoader(opt)
     test_info_path, annotation_path, self.image_path = _path(opt, version)
     self._setup_data(test_info_path, annotation_path, opt)
Esempio n. 6
0
 def __init__(self, opt):
     self.opt = opt
     opt['image_load_task'] = 'personality_captions'
     self.image_mode = opt.get('image_mode', 'none')
     self.datatype = self.opt.get('datatype')
     self.training = self.datatype.startswith('train')
     self.include_image = opt.get('include_image')
     self.include_personality = opt.get('include_personality')
     data_path, personalities_data_path, self.image_path = _path(opt)
     self.image_loader = ImageLoader(opt)
     self._setup_data(data_path, personalities_data_path)
Esempio n. 7
0
    def __init__(self, opt: Opt, shared: TShared = None):
        self.opt = opt
        self.image_model = opt.get("image_mode")
        if shared:
            self.image_loader = shared["image_loader"]
        else:
            opt.setdefault("image_mode", self.image_model)
            new_opt = ParlaiParser(True, False).parse_args([])
            for k, v in new_opt.items():
                if k not in opt:
                    opt[k] = v

            self.image_loader = ImageLoader(opt)
Esempio n. 8
0
 def __init__(self, opt, data_loader=None, cands=None, shared=None, **kwargs):
     # self.data is a list of episodes
     # each episode is a tuple of entries
     # each entry is a tuple of values for the action/observation table
     if shared:
         self.image_loader = shared.get('image_loader', None)
         self.data = shared.get('data', [])
         self.cands = shared.get('cands', None)
     else:
         self.image_loader = ImageLoader(opt)
         self.data = []
         self._load(data_loader, opt['datafile'])
         self.cands = None if cands == None else set(sys.intern(c) for c in cands)
     self.addedCands = []
     self.copied_cands = False
Esempio n. 9
0
    def __init__(self, opt, shared=None):
        super().__init__(opt, shared)
        self.datatype = opt['datatype']
        data_path, annotation_path, self.image_path = _path(opt)
        self.image_mode = opt.get('image_mode', 'none')

        if shared and 'ques' in shared:
            self.ques = shared['ques']
            if 'annotation' in shared:
                self.annotation = shared['annotation']
            self.image_loader = shared['image_loader']
            self.master_loader = shared['master_loader']
        else:
            self._setup_data(data_path, annotation_path)
            self.image_loader = ImageLoader(opt)
            self.master_loader = MasterLoader(opt)
            self.master_loader.start()

        # for ordered data in batch mode (especially, for validation and
        # testing), each teacher in the batch gets a start index and a step
        # size so they all process disparate sets of the data
        self.step_size = opt.get('batchsize', 1)
        self.data_offset = opt.get('batchindex', 0)
        self.example_queue = queue.Queue()
        self.reset()
        if self.image_mode != 'none':
            self.submit_example_request()
Esempio n. 10
0
def setup_interactive():
    """
    Set up the interactive script.
    """
    parser = setup_args()
    opt = parser.parse_args(print_args=True)
    if not opt.get("model_file"):
        raise RuntimeError("Please specify a model file")
    if opt.get("fixed_cands_path") is None:
        fcp = os.path.join(
            "/".join(opt.get("model_file").split("/")[:-1]), "candidates.txt"
        )
        opt["fixed_cands_path"] = fcp
        opt["override"]["fixed_cands_path"] = fcp
    opt["task"] = "parlai.agents.local_human.local_human:LocalHumanAgent"
    opt["image_mode"] = "resnet152"
    opt["no_cuda"] = True
    opt["override"]["no_cuda"] = True
    SHARED["opt"] = opt
    SHARED["image_loader"] = ImageLoader(opt)

    # Create model and assign it to the specified task
    SHARED["agent"] = create_agent(opt, requireModelExists=True)
    SHARED["world"] = create_task(opt, SHARED["agent"])

    # Dialog History
    SHARED["dialog_history"] = []
Esempio n. 11
0
    def __init__(self, opt, agent, bot, image_idx: int, image_act: Message):
        super().__init__(opt, agent=agent, bot=bot)

        self.image_stack = opt['image_stack']
        self.image_idx = image_idx
        self.image_act = image_act

        # Get a stringified version of the image to show the user
        orig_image = self.image_act['image']
        self.image_src = get_image_src(image=orig_image)

        # Get a featurized version of the image to show the bot
        with NamedTemporaryFile(suffix='.jpg') as f:
            orig_image.save(f)
            image_loader = ImageLoader(self.bot.model_agent.opt)
            self.image_act.force_set('image', image_loader.load(f.name))
Esempio n. 12
0
class ImageFeaturesGenerator(object):
    """
    Features generator for images.

    Uses ParlAI Image Loader.
    """
    def __init__(self, opt: Opt, shared: TShared = None):
        self.opt = opt
        self.image_model = opt.get("image_mode")
        if shared:
            self.image_loader = shared["image_loader"]
        else:
            opt.setdefault("image_mode", self.image_model)
            new_opt = ParlaiParser(True, False).parse_args([])
            for k, v in new_opt.items():
                if k not in opt:
                    opt[k] = v

            self.image_loader = ImageLoader(opt)

    def get_image_features(self, image_id: str,
                           image: "PIL.Image") -> torch.Tensor:
        """
        Get image features for given image id and Image.

        :param image_id:
            id for image
        :param image:
            PIL Image object

        :return image_features:
            Image Features Tensor
        """
        image = image.convert("RGB")
        return self.image_loader.extract(image)
Esempio n. 13
0
class FlickrDataset(Dataset):
    """A Pytorch Dataset utilizing streaming"""
    def __init__(self, opt, shared=None):
        self.opt = opt
        self.datatype = self.opt.get('datatype')
        self.training = self.datatype.startswith('train')
        self.num_epochs = self.opt.get('num_epochs', 0)
        self.image_loader = ImageLoader(opt)
        data_path, self.image_path = _path(opt)
        self._setup_data(data_path, opt.get('unittest', False))
        self.dict_agent = DictionaryAgent(opt)

    def __getitem__(self, index):
        cap = self.data[index]
        image_id = int(cap['filename'].replace('.jpg', ''))
        ep = {
            'text': QUESTION,
            'image': self.get_image(image_id),
            'episode_done': True,
        }
        if self.opt.get('extract_image', False):
            ep['image_id'] = image_id
            return ep

        ep['labels'] = [s['raw'] for s in cap['sentences']]
        ep['valid'] = True
        if 'train' not in self.datatype:
            ep['label_candidates'] = self.cands
        return (index, ep)

    def __len__(self):
        return self.num_episodes()

    def _setup_data(self, data_path, unittest):
        with open(data_path) as data_file:
            raw_data = json.load(data_file)['images']
            if 'train' in self.datatype:
                self.data = [d for d in raw_data if d['split'] == 'train']
            elif 'valid' in self.datatype:
                self.data = [d for d in raw_data if d['split'] == 'val']
                self.cands = [l for d in self.data for l in [s['raw'] for s in d['sentences']]]
            else:
                self.data = [d for d in raw_data if d['split'] == 'test']
                self.cands = [l for d in self.data for l in [s['raw'] for s in d['sentences']]]
        if unittest:
            self.caption = self.caption[:10]

    def get_image(self, image_id):
        im_path = os.path.join(self.image_path, '%d.jpg' % (image_id))
        return self.image_loader.load(im_path)

    def num_episodes(self):
        return len(self.data)

    def num_examples(self):
        return self.num_episodes()

    def num_images(self):
        return self.num_episodes()
Esempio n. 14
0
 def __init__(self, opt):
     self.opt = opt
     self.use_hdf5 = opt.get('use_hdf5', False)
     self.datatype = self.opt.get('datatype')
     self.training = self.datatype.startswith('train')
     self.num_epochs = self.opt.get('num_epochs', 0)
     self.image_loader = ImageLoader(opt)
     caption_path, self.image_path = _path(opt)
     self._setup_data(caption_path, opt.get('unittest', False))
     if self.use_hdf5:
         try:
             import h5py
             self.h5py = h5py
         except ModuleNotFoundError:
             raise ModuleNotFoundError('Need to install h5py - `pip install h5py`')
         self._setup_image_data()
     self.dict_agent = DictionaryAgent(opt)
Esempio n. 15
0
 def __init__(self, opt, version='2014'):
     self.opt = opt
     self.use_hdf5 = opt.get('use_hdf5', False)
     self.datatype = self.opt.get('datatype')
     self.training = self.datatype.startswith('train')
     self.num_epochs = self.opt.get('num_epochs', 0)
     self.image_loader = ImageLoader(opt)
     test_info_path, annotation_path, self.image_path = _path(opt, version)
     self._setup_data(test_info_path, annotation_path, opt.get('unittest', False))
     if self.use_hdf5:
         try:
             import h5py
             self.h5py = h5py
         except ImportError:
             raise ImportError('Need to install h5py - `pip install h5py`')
         self._setup_image_data()
     self.dict_agent = DictionaryAgent(opt)
Esempio n. 16
0
 def __init__(self, opt, agents=None, shared=None, world_tag='NONE'):
     self.turn_idx = 0
     self.task_type = 'sandbox' if opt['is_sandbox'] else 'live'
     self.chat_done = False
     self.world_tag = world_tag
     self.max_resp_time = opt['max_resp_time']  # in secs
     super().__init__(opt, agents, shared)
     self.agents = agents
     self.agent = agents[0]
     self.data = []
     self.exact_match = False
     self.num_images = opt['num_images']
     self.d_rnd = opt.get('dialog_round')
     self.image_path = opt.get('image_path')
     self.task_dir = opt['task_dir']
     opt['image_mode'] = 'raw'
     self.image_loader = ImageLoader(opt)
    def __init__(self, opt, shared=None):
        super().__init__(opt)

        dt = opt['datatype'].split(':')[0]
        if dt not in ('train', 'test'):
            raise RuntimeError('Not valid datatype (only train/test).')

        task = opt.get('task', 'fvqa:split:0')
        task_num = 0  # default to train/split 0
        split = task.split(':')
        if len(split) > 2:
            task_num = split[2]
            if task_num not in [str(i) for i in range(5)]:
                raise RuntimeError(
                    'Invalid train/test split ID (0-4 inclusive)')

        if not hasattr(self, 'factmetrics'):
            if shared and shared.get('factmetrics'):
                self.factmetrics = shared['factmetrics']
            else:
                self.factmetrics = TeacherMetrics(
                    opt.get('numthreads', 1) > 1,
                    opt.get('metrics', 'default'))
            self.datatype = opt['datatype']
        questions_path, trainset_path, self.image_path = _path(opt)

        if shared and 'ques' in shared:
            self.ques = shared['ques']
        else:
            self._setup_data(questions_path, trainset_path, dt, task_num)
        self.len = len(self.ques)

        self.asked_question = False
        # for ordered data in batch mode (especially, for validation and
        # testing), each teacher in the batch gets a start index and a step
        # size so they all process disparate sets of the data
        self.step_size = opt.get('batchsize', 1)
        self.data_offset = opt.get('batchindex', 0)
        self.image_loader = ImageLoader(opt)

        self.reset()
Esempio n. 18
0
    def __init__(self, opt, shared=None):
        super().__init__(opt)
        self.datatype = opt['datatype']
        data_path, annotation_path, self.image_path = _path(opt)

        if shared and 'ques' in shared:
            self.ques = shared['ques']
            if 'annotation' in shared:
                self.annotation = shared['annotation']
        else:
            self._setup_data(data_path, annotation_path)
        self.len = len(self.ques['questions'])

        # for ordered data in batch mode (especially, for validation and
        # testing), each teacher in the batch gets a start index and a step
        # size so they all process disparate sets of the data
        self.step_size = opt.get('batchsize', 1)
        self.data_offset = opt.get('batchindex', 0)
        self.image_loader = ImageLoader(opt)

        self.reset()
Esempio n. 19
0
 def __init__(self, opt, shared=None):
     super().__init__(opt, shared)
     self.opt = opt
     self.image_mode = opt.get('image_mode', 'none')
     self.data_path, personalities_data_path, self.image_path = _path(opt)
     self.datatype = opt.get('datatype').split(':')[0]
     self.include_personality = opt.get('include_personality')
     self.include_image = opt.get('include_image')
     if shared and 'data' in shared:
         self.data = shared['data']
         self.image_loader = shared['image_loader']
     else:
         self.image_loader = ImageLoader(opt)
         self._setup_data(self.data_path, personalities_data_path)
     self.reset()
Esempio n. 20
0
    def __init__(self, opt, shared=None):
        super().__init__(opt, shared)
        data_path, annotation_path, self.image_path = _path(opt)
        self.datafile = data_path
        self.image_mode = opt.get('image_mode', 'none')

        if shared and 'ques' in shared:
            self.ques = shared['ques']
            if 'annotation' in shared:
                self.annotation = shared['annotation']
            self.image_loader = shared['image_loader']
        else:
            self._setup_data(data_path, annotation_path)
            self.image_loader = ImageLoader(opt)
        self.reset()
Esempio n. 21
0
    def __init__(self, opt, shared=None):
        super().__init__(opt, shared)
        self.image_mode = opt.get('image_mode', 'none')

        if shared:
            # another instance was set up already, just reference its data
            self.caption = shared['caption']
            self.image_loader = shared['image_loader']
        else:
            # need to set up data from scratch
            caption_path, self.image_path = _path(opt)
            self._setup_data(caption_path)
            self.image_loader = ImageLoader(opt)

        self.reset()
Esempio n. 22
0
    def __init__(self, opt, shared=None, version='2017'):
        super().__init__(opt, shared)
        self.image_mode = opt.get('image_mode', 'none')

        if shared:
            # another instance was set up already, just reference its data
            if 'annotation' in shared:
                self.annotation = shared['annotation']
            self.image_loader = shared['image_loader']
        else:
            # need to set up data from scratch
            test_info_path, annotation_path, self.image_path = _path(opt, version)
            self._setup_data(test_info_path, annotation_path)
            self.image_loader = ImageLoader(opt)

        self.reset()
Esempio n. 23
0
    def __init__(self, opt, shared=None):
        super().__init__(opt)
        self.image_mode = opt.get('image_mode', 'none')

        if shared and 'ques' in shared:
            # another instance was set up already, just reference its data
            self.ques = shared['ques']
            if 'annotation' in shared:
                self.annotation = shared['annotation']
            self.image_loader = shared['image_loader']
        else:
            # need to set up data from scratch
            data_path, annotation_path, self.image_path = _path(opt)
            self._setup_data(data_path, annotation_path)
            self.image_loader = ImageLoader(opt)

        self.reset()
Esempio n. 24
0
 def __init__(self, opt: Opt, shared: TShared = None):
     super().__init__(opt, shared)
     self.opt = opt
     self.image_mode = opt.get('image_mode', 'no_image_model')
     self.data_path, personalities_data_path, self.image_path = _path(opt)
     self.datatype = opt['datatype'].split(':')[0]
     self.include_personality = opt.get('include_personality')
     self.include_image = opt.get('include_image') and opt.get('load_images')
     self.num_cands = opt.get('num_cands')
     if shared and 'data' in shared:
         self.data = shared['data']
         self.personalities = shared['personalities']
         self.image_loader = shared['image_loader']
     else:
         self.image_loader = ImageLoader(opt)
         self._setup_data(self.data_path, personalities_data_path)
     self.num_exs = sum(len(d['dialog']) for d in self.data)
     self.reset()
Esempio n. 25
0
    def __init__(self, opt, shared=None):
        super().__init__(opt, shared)
        self.image_mode = opt.get('image_mode', 'none')
        self.use_intro = opt.get('use_intro', False)
        data_path, self.image_path = _path(opt)

        if shared:
            # another instance was set up already, just reference its data
            self.data = shared['data']
            self.image_loader = shared['image_loader']
            if 'cands' in shared:
                self.cands = shared['cands']
        else:
            # need to set up data from scratch
            self._setup_data(data_path)
            self.image_loader = ImageLoader(opt)

        self.reset()
Esempio n. 26
0
def setup_interactive():
    """Set up the interactive script."""
    parser = setup_args()
    opt = parser.parse_args(print_args=True)
    if not opt.get('model_file'):
        raise RuntimeError('Please specify a model file')
    if opt.get('fixed_cands_path') is None:
        opt['fixed_cands_path'] = os.path.join(
            '/'.join(opt.get('model_file').split('/')[:-1]), 'candidates.txt')
    opt['task'] = 'parlai.agents.local_human.local_human:LocalHumanAgent'
    opt['image_mode'] = 'resnet152'
    opt['no_cuda'] = True
    opt['override']['no_cuda'] = True
    SHARED['opt'] = opt
    SHARED['image_loader'] = ImageLoader(opt)

    # Create model and assign it to the specified task
    SHARED['agent'] = create_agent(opt, requireModelExists=True)
    SHARED['world'] = create_task(opt, SHARED['agent'])
Esempio n. 27
0
    def __init__(self, opt, shared=None):
        super().__init__(opt, shared)
        self.datatype = opt['datatype']
        data_path, annotation_path, self.image_path = _path(opt)

        if shared and 'ques' in shared:
            self.ques = shared['ques']
            if 'annotation' in shared:
                self.annotation = shared['annotation']
        else:
            self._setup_data(data_path, annotation_path)

        # for ordered data in batch mode (especially, for validation and
        # testing), each teacher in the batch gets a start index and a step
        # size so they all process disparate sets of the data
        self.step_size = opt.get('batchsize', 1)
        self.data_offset = opt.get('batchindex', 0)
        self.image_loader = ImageLoader(opt)
        self.reset()
Esempio n. 28
0
    def __init__(self, opt, shared=None, version='2017'):
        super().__init__(opt, shared)
        self.version = version
        self.image_mode = opt.get('image_mode', 'none')
        self.use_intro = opt.get('use_intro', False)
        self.num_cands = opt.get('num_cands', -1)
        self.include_rest_val = opt.get('include_rest_val', False)
        test_info_path, annotation_path, self.image_path = _path(opt, version)
        if shared:
            # another instance was set up already, just reference its data
            if 'annotation' in shared:
                self.annotation = shared['annotation']
            self.image_loader = shared['image_loader']
            self.cands = shared['cands']
        else:
            # need to set up data from scratch
            self._setup_data(test_info_path, annotation_path, opt)
            self.image_loader = ImageLoader(opt)

        self.reset()
Esempio n. 29
0
    def __init__(self, opt, shared=None):
        super().__init__(opt, shared)
        data_path, annotation_path, self.image_path = _path(opt)
        self.image_mode = opt.get('image_mode', 'none')

        if shared and 'ques' in shared:
            self.ques = shared['ques']
            if 'annotation' in shared:
                self.annotation = shared['annotation']
            self.image_loader = shared['image_loader']
            self.master_loader = shared['master_loader']
        else:
            self._setup_data(data_path, annotation_path)
            self.image_loader = ImageLoader(opt)
            self.master_loader = MasterLoader(opt)
            self.master_loader.start()

        self.example_queue = queue.Queue()
        self.reset()
        if self.image_mode != 'none':
            self.submit_example_request()
Esempio n. 30
0
    def __init__(self, opt, shared=None):
        super().__init__(opt)

        dt = opt['datatype'].split(':')[0]
        if dt not in ('train', 'test'):
            raise RuntimeError('Not valid datatype (only train/test).')

        task = opt.get('task', 'fvqa:split:0')
        task_num = 0  # default to train/split 0
        split = task.split(':')
        if len(split) > 2:
            task_num = split[2]
            if task_num not in [str(i) for i in range(5)]:
                raise RuntimeError('Invalid train/test split ID (0-4 inclusive)')

        if not hasattr(self, 'factmetrics'):
            if shared and shared.get('factmetrics'):
                self.factmetrics = shared['factmetrics']
            else:
                self.factmetrics = Metrics(opt)
            self.datatype = opt['datatype']
        questions_path, trainset_path, self.image_path = _path(opt)

        if shared and 'ques' in shared:
            self.ques = shared['ques']
        else:
            self._setup_data(questions_path, trainset_path, dt, task_num)
        self.len = len(self.ques)

        self.asked_question = False
        # for ordered data in batch mode (especially, for validation and
        # testing), each teacher in the batch gets a start index and a step
        # size so they all process disparate sets of the data
        self.step_size = opt.get('batchsize', 1)
        self.data_offset = opt.get('batchindex', 0)
        self.image_loader = ImageLoader(opt)

        self.reset()
Esempio n. 31
0
    def test_other_image_modes(self):
        """
        Test non-featurized image modes.
        """
        with testing_utils.tempdir() as tmp:
            image_file = 'tmp.jpg'
            image_path = os.path.join(tmp, image_file)
            image_zip_path = os.path.join(tmp, 'tmp.zip')
            image = Image.new('RGB', (16, 16), color=0)

            with PathManager.open(image_path, 'wb') as fp:
                image.save(fp, 'JPEG')

            with zipfile.ZipFile(PathManager.open(image_zip_path, 'wb'),
                                 mode='w') as zipf:
                zipf.write(image_path, arcname=image_file)

            for im in ['raw', 'ascii']:
                loader = ImageLoader({"image_mode": im})
                loader.load(image_path)
                loader.load(f"{image_zip_path}/{image_file}")
Esempio n. 32
0
class MTurkIGCEvalWorld(MultiAgentDialogWorld):
    """World where an agent observes 5 images and 3 comments about the images,
       and ranks the comments
    """
    def __init__(self, opt, agents=None, shared=None, world_tag='NONE'):
        self.turn_idx = 0
        self.task_type = 'sandbox' if opt['is_sandbox'] else 'live'
        self.chat_done = False
        self.world_tag = world_tag
        self.max_resp_time = opt['max_resp_time']  # in secs
        super().__init__(opt, agents, shared)
        self.agents = agents
        self.agent = agents[0]
        self.data = []
        self.exact_match = False
        self.num_images = opt['num_images']
        self.d_rnd = opt.get('dialog_round')
        self.image_path = opt.get('image_path')
        self.task_dir = opt['task_dir']
        opt['image_mode'] = 'raw'
        self.image_loader = ImageLoader(opt)

    def episode_done(self):
        return self.chat_done

    def parley(self):
        """RATER is given an image, context (and possibly some questions)
           and is asked to rate the responses.
        """
        # Initial Message Value
        control_msg = {'episode_done': False}
        control_msg['id'] = 'SYSTEM'
        """First, we give RATER the image and context
        """
        while self.turn_idx < self.num_images:
            print(self.world_tag + ' is at turn {}...'.format(self.turn_idx))
            # Send image to turker
            if self.d_rnd == 'questions':
                control_msg['description'] = config_questions[
                    'task_description']
            else:
                control_msg['description'] = config_responses[
                    'task_description']
            self.example_id, igc_example = self.agent.example_generator.pop_example(
            )
            img = self.image_loader.load(self.image_id_to_path(
                self.example_id))
            buffered = BytesIO()
            img.save(buffered, format="JPEG")
            encoded = str(
                base64.b64encode(buffered.getvalue()).decode('ascii'))
            control_msg['image'] = encoded
            control_msg['context'] = igc_example['context']
            """
                Setup Options for rating
            """
            if self.d_rnd == 'questions':
                options = [(k, v) for k, v in igc_example['questions'].items()]
            else:
                control_msg['question'] = igc_example['question']
                options = [(k, v) for k, v in igc_example['responses'].items()]
            random.shuffle(options)
            options, dup_dict = self.filter_option_duplicates(options)
            control_msg['options'] = [c[1] for c in options]
            # Collect rating from turker
            rate_msg = RATE_MSG if self.d_rnd == 'questions' else RATE_RESPONSE_MSG
            control_msg['text'] = rate_msg.format(self.turn_idx + 1)
            control_msg['new_eval'] = True
            self.agent.observe(validate(control_msg))
            time.sleep(1)
            act = self.agent.act(timeout=self.max_resp_time)
            # First timeout check
            self.check_timeout(act)
            if self.chat_done:
                break
            try:
                ratings = []
                collected_ratings = list(
                    zip([q[0] for q in options], act['ratings']))
                for opt, rating in collected_ratings:
                    for other_opt in dup_dict[opt]:
                        ratings.append((other_opt, rating))
                igc_example['ratings'] = ratings
            except Exception:
                # Agent disconnected
                break
            igc_example['dialog_round_evaluated'] = self.d_rnd
            self.data.append(igc_example)
            self.turn_idx += 1

        if self.turn_idx == self.num_images:
            control_msg['text'] = CHAT_ENDED_MSG.format(self.num_images)
            self.agent.observe(validate(control_msg))
        self.chat_done = True
        return

    def image_id_to_path(self, image_id):
        if self.image_path == '':
            return os.path.join(self.task_dir, 'banana.jpg')
        else:
            return '{}/{}.jpg'.format(self.image_path, id)

    def filter_option_duplicates(self, options):
        # options = [(opt, text), (opt2, text2), ...]
        new_options = []
        text_to_opt = {}
        opt_to_opt = {}
        for opt, text in options:
            if text not in text_to_opt:
                text_to_opt[text] = opt
                new_options.append([opt, text])
                opt_to_opt[opt] = [opt]
            else:
                opt_to_opt[text_to_opt[text]].append(opt)
        return new_options, opt_to_opt

    def check_timeout(self, act):
        if act['text'] == '[TIMEOUT]' and act['episode_done']:
            control_msg = {'episode_done': True}
            control_msg['id'] = 'SYSTEM'
            control_msg['text'] = TIMEOUT_MSG
            for ag in self.agents:
                if ag.id != act['id']:
                    ag.observe(validate(control_msg))
            self.chat_done = True
            return True
        elif act['text'] == '[DISCONNECT]':
            self.chat_done = True
            return True
        else:
            return False

    def save_data(self):
        convo_finished = True
        for ag in self.agents:
            if (ag.hit_is_abandoned or ag.hit_is_returned or ag.disconnected
                    or ag.hit_is_expired):
                convo_finished = False
        if not convo_finished:
            ag.example_generator.push_example(self.example_id)
            print("\n**Push image {} back to stack. **\n".format(
                self.example_id))
        self.agents[0].example_generator.save_idx_stack()
        data_path = self.opt['data_path']
        if not os.path.exists(data_path):
            os.makedirs(data_path)
        if convo_finished:
            filename = os.path.join(
                data_path,
                '{}_{}_{}.pkl'.format(time.strftime("%Y%m%d-%H%M%S"),
                                      np.random.randint(0, 1000),
                                      self.task_type))
        else:
            filename = os.path.join(
                data_path, '{}_{}_{}_incomplete.pkl'.format(
                    time.strftime("%Y%m%d-%H%M%S"), np.random.randint(0, 1000),
                    self.task_type))
        pickle.dump(
            {
                'data': self.data,
                'worker': self.agents[0].worker_id,
                'hit_id': self.agents[0].hit_id,
                'assignment_id': self.agents[0].assignment_id
            }, open(filename, 'wb'))
        print('{}: Data successfully saved at {}.'.format(
            self.world_tag, filename))

    def review_work(self):
        global review_agent

        def review_agent(ag):
            pass  # auto approve 5 days

        Parallel(n_jobs=len(self.agents),
                 backend='threading')(delayed(review_agent)(agent)
                                      for agent in self.agents)

    def shutdown(self):
        """Shutdown all mturk agents in parallel, otherwise if one mturk agent
        is disconnected then it could prevent other mturk agents from
        completing.
        """
        global shutdown_agent

        def shutdown_agent(agent):
            agent.shutdown()

        Parallel(n_jobs=len(self.agents),
                 backend='threading')(delayed(shutdown_agent)(agent)
                                      for agent in self.agents)
Esempio n. 33
0
class SplitTeacher(Teacher):
    """FVQA Teacher, which loads the json VQA data and implements its own
    `act` method for interacting with student agent.

    Use "fvqa:split:X" to choose between splits 0-4 (inclusive), or just
    "fvqa" to use the default split (0).
    """
    def __init__(self, opt, shared=None):
        super().__init__(opt)

        dt = opt['datatype'].split(':')[0]
        if dt not in ('train', 'test'):
            raise RuntimeError('Not valid datatype (only train/test).')

        task = opt.get('task', 'fvqa:split:0')
        task_num = 0  # default to train/split 0
        split = task.split(':')
        if len(split) > 2:
            task_num = split[2]
            if task_num not in [str(i) for i in range(5)]:
                raise RuntimeError(
                    'Invalid train/test split ID (0-4 inclusive)')

        if not hasattr(self, 'factmetrics'):
            if shared and shared.get('factmetrics'):
                self.factmetrics = shared['factmetrics']
            else:
                self.factmetrics = Metrics(opt)
            self.datatype = opt['datatype']
        questions_path, trainset_path, self.image_path = _path(opt)

        if shared and 'ques' in shared:
            self.ques = shared['ques']
        else:
            self._setup_data(questions_path, trainset_path, dt, task_num)
        self.len = len(self.ques)

        self.asked_question = False
        # for ordered data in batch mode (especially, for validation and
        # testing), each teacher in the batch gets a start index and a step
        # size so they all process disparate sets of the data
        self.step_size = opt.get('batchsize', 1)
        self.data_offset = opt.get('batchindex', 0)
        self.image_loader = ImageLoader(opt)

        self.reset()

    def num_examples(self):
        return self.len

    def num_episodes(self):
        return self.len

    def report(self):
        r = super().report()
        r['factmetrics'] = self.factmetrics.report()
        return r

    def reset(self):
        # Reset the dialog so that it is at the start of the epoch,
        # and all metrics are reset.
        super().reset()
        self.lastY = None
        self.episode_idx = self.data_offset - self.step_size
        self.epochDone = False

    def reset_metrics(self):
        super().reset_metrics()
        self.factmetrics.clear()

    def observe(self, observation):
        """Process observation for metrics."""
        if self.lastY is not None:
            if self.asked_question:
                self.metrics.update(observation, self.lastY[0])
            else:
                self.factmetrics.update(observation, self.lastY[1])
                self.lastY = None
        return observation

    def act(self):
        if self.asked_question:
            self.asked_question = False
            action = {
                'text': 'Which fact supports this answer?',
                'episode_done': True
            }
            if self.datatype.startswith('train'):
                action['labels'] = self.lastY[1]
            if self.datatype != 'train' and self.episode_idx + self.step_size >= self.num_episodes(
            ):
                self.epochDone = True
            return action

        if self.datatype == 'train':
            self.episode_idx = random.randrange(self.len)
        else:
            self.episode_idx = (self.episode_idx +
                                self.step_size) % self.num_episodes()

        self.asked_question = True
        qa = self.ques[self.episode_idx]
        question = qa['question']
        img_path = self.image_path + qa['img_file']

        action = {
            'image': self.image_loader.load(img_path),
            'text': question,
            'episode_done': False
        }

        human_readable = qa['fact_surface'].replace('[', '').replace(']', '')
        self.lastY = [[qa['answer']], [human_readable]]

        if self.datatype.startswith('train'):
            action['labels'] = self.lastY[0]

        return action

    def share(self):
        shared = super().share()
        shared['factmetrics'] = self.factmetrics
        shared['ques'] = self.ques
        if hasattr(self, 'facts'):
            shared['facts'] = self.facts
        return shared

    def _setup_data(self, questions_path, trainset_path, datatype, task_num):
        print('loading: ' + questions_path)
        with open(questions_path) as questions_file:
            questions = json.load(questions_file)
        train_test_images = set()
        with open(
                os.path.join(trainset_path,
                             '{}_list_{}.txt'.format(datatype,
                                                     task_num))) as imageset:
            for line in imageset:
                train_test_images.add(line.strip())
        self.ques = [
            questions[k] for k in sorted(questions.keys())
            if questions[k]['img_file'] in train_test_images
        ]
Esempio n. 34
0
class OeTeacher(Teacher):
    """
    VQA Open-Ended teacher, which loads the json vqa data and implements its
    own `act` method for interacting with student agent.
    """
    def __init__(self, opt, shared=None):
        super().__init__(opt, shared)
        self.datatype = opt['datatype']
        data_path, annotation_path, self.image_path = _path(opt)

        if shared and 'ques' in shared:
            self.ques = shared['ques']
            if 'annotation' in shared:
                self.annotation = shared['annotation']
        else:
            self._setup_data(data_path, annotation_path)

        # for ordered data in batch mode (especially, for validation and
        # testing), each teacher in the batch gets a start index and a step
        # size so they all process disparate sets of the data
        self.step_size = opt.get('batchsize', 1)
        self.data_offset = opt.get('batchindex', 0)
        self.image_loader = ImageLoader(opt)
        self.reset()

    def __len__(self):
        return len(self.ques['questions'])

    def reset(self):
        # Reset the dialog so that it is at the start of the epoch,
        # and all metrics are reset.
        super().reset()
        self.lastY = None
        self.episode_idx = self.data_offset - self.step_size

    def observe(self, observation):
        """Process observation for metrics."""
        if self.lastY is not None:
            self.metrics.update(observation, self.lastY)
            self.lastY = None
        return observation

    def act(self):
        if self.datatype == 'train':
            self.episode_idx = random.randrange(len(self))
        else:
            self.episode_idx = (self.episode_idx + self.step_size) % len(self)
            if self.episode_idx == len(self) - self.step_size:
                self.epochDone = True

        qa = self.ques['questions'][self.episode_idx]
        question = qa['question']
        image_id = qa['image_id']

        img_path = self.image_path + '%012d.jpg' % (image_id)

        action = {
            'image': self.image_loader.load(img_path),
            'text': question,
            'episode_done': True
        }

        if not self.datatype.startswith('test'):
            anno = self.annotation['annotations'][self.episode_idx]
            self.lastY = [ans['answer'] for ans in anno['answers']]

        if self.datatype.startswith('train'):
            action['labels'] = self.lastY

        return action

    def share(self):
        shared = super().share()
        shared['ques'] = self.ques
        if hasattr(self, 'annotation'):
            shared['annotation'] = self.annotation
        return shared

    def _setup_data(self, data_path, annotation_path):
        print('loading: ' + data_path)
        with open(data_path) as data_file:
            self.ques = json.load(data_file)

        if self.datatype != 'test':
            print('loading: ' + annotation_path)
            with open(annotation_path) as data_file:
                self.annotation = json.load(data_file)
Esempio n. 35
0
class SplitTeacher(Teacher):
    """FVQA Teacher, which loads the json VQA data and implements its own
    `act` method for interacting with student agent.

    Use "fvqa:split:X" to choose between splits 0-4 (inclusive), or just
    "fvqa" to use the default split (0).
    """

    def __init__(self, opt, shared=None):
        super().__init__(opt)

        dt = opt['datatype'].split(':')[0]
        if dt not in ('train', 'test'):
            raise RuntimeError('Not valid datatype (only train/test).')

        task = opt.get('task', 'fvqa:split:0')
        task_num = 0  # default to train/split 0
        split = task.split(':')
        if len(split) > 2:
            task_num = split[2]
            if task_num not in [str(i) for i in range(5)]:
                raise RuntimeError('Invalid train/test split ID (0-4 inclusive)')

        if not hasattr(self, 'factmetrics'):
            if shared and shared.get('factmetrics'):
                self.factmetrics = shared['factmetrics']
            else:
                self.factmetrics = Metrics(opt)
            self.datatype = opt['datatype']
        questions_path, trainset_path, self.image_path = _path(opt)

        if shared and 'ques' in shared:
            self.ques = shared['ques']
        else:
            self._setup_data(questions_path, trainset_path, dt, task_num)
        self.len = len(self.ques)

        self.asked_question = False
        # for ordered data in batch mode (especially, for validation and
        # testing), each teacher in the batch gets a start index and a step
        # size so they all process disparate sets of the data
        self.step_size = opt.get('batchsize', 1)
        self.data_offset = opt.get('batchindex', 0)
        self.image_loader = ImageLoader(opt)

        self.reset()

    def __len__(self):
        return self.len

    def report(self):
        r = super().report()
        r['factmetrics'] = self.factmetrics.report()
        return r

    def reset(self):
        # Reset the dialog so that it is at the start of the epoch,
        # and all metrics are reset.
        super().reset()
        self.lastY = None
        self.episode_idx = self.data_offset - self.step_size
        self.epochDone = False

    def reset_metrics(self):
        super().reset_metrics()
        self.factmetrics.clear()

    def observe(self, observation):
        """Process observation for metrics."""
        if self.lastY is not None:
            if self.asked_question:
                self.metrics.update(observation, self.lastY[0])
            else:
                self.factmetrics.update(observation, self.lastY[1])
                self.lastY = None
        return observation

    def act(self):
        if self.asked_question:
            self.asked_question = False
            action = {'text': 'Which fact supports this answer?', 'episode_done': True}
            if self.datatype.startswith('train'):
                action['labels'] = self.lastY[1]
            if self.datatype != 'train' and self.episode_idx + self.step_size >= len(self):
                self.epochDone = True
            return action

        if self.datatype == 'train':
            self.episode_idx = random.randrange(self.len)
        else:
            self.episode_idx = (self.episode_idx + self.step_size) % len(self)

        self.asked_question = True
        qa = self.ques[self.episode_idx]
        question = qa['question']
        img_path = self.image_path + qa['img_file']

        action = {
            'image': self.image_loader.load(img_path),
            'text': question,
            'episode_done': False
        }

        human_readable = qa['fact_surface'].replace('[', '').replace(']', '')
        self.lastY = [[qa['answer']], [human_readable]]

        if self.datatype.startswith('train'):
            action['labels'] = self.lastY[0]

        return action

    def share(self):
        shared = super().share()
        shared['factmetrics'] = self.factmetrics
        shared['ques'] = self.ques
        if hasattr(self, 'facts'):
            shared['facts'] = self.facts
        return shared

    def _setup_data(self, questions_path, trainset_path, datatype, task_num):
        print('loading: ' + questions_path)
        with open(questions_path) as questions_file:
            questions = json.load(questions_file)
        train_test_images = set()
        with open(os.path.join(trainset_path, '{}_list_{}.txt'.format(datatype, task_num))) as imageset:
            for line in imageset:
                train_test_images.add(line.strip())
        self.ques = [questions[k] for k in sorted(questions.keys()) if questions[k]['img_file'] in train_test_images]