def main(): random.seed(42) # Get command line arguments parser = ParlaiParser() parser.add_argument('-n', '--num-examples', default=10) parser.set_defaults(datatype='train:ordered') ImageLoader.add_cmdline_args(parser) opt = parser.parse_args() opt['no_cuda'] = False opt['gpu'] = 0 # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) # Show some example dialogs. with world: for k in range(int(opt['num_examples'])): world.parley() print(world.display() + '\n~~') if world.epoch_done(): print('EPOCH DONE') break
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser() parser.add_argument('-n', '--num-examples', default=10) parser.set_defaults(datatype='train:ordered') ImageLoader.add_cmdline_args(parser) opt = parser.parse_args() opt['no_cuda'] = False opt['gpu'] = 0 # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) # Show some example dialogs. with world: for k in range(int(opt['num_examples'])): world.parley() print(world.display() + '\n~~') if world.epoch_done(): print('EPOCH DONE') break
def __init__(self, opt, task, mturk_agent): self.task = task self.mturk_agent = mturk_agent self.episodeDone = False self.turn_index = -1 self.context = None self.question = None self.answer = None self.image_loader = ImageLoader(opt)
def __init__(self, opt, shared=None): self.opt = opt self.datatype = self.opt.get('datatype') self.training = self.datatype.startswith('train') self.num_epochs = self.opt.get('num_epochs', 0) self.image_loader = ImageLoader(opt) data_path, self.image_path = _path(opt) self._setup_data(data_path, opt.get('unittest', False)) self.dict_agent = DictionaryAgent(opt)
def __init__(self, opt, version='2017'): self.opt = opt self.version = version self.use_intro = opt.get('use_intro', False) self.num_cands = opt.get('num_cands', -1) self.datatype = self.opt.get('datatype') self.include_rest_val = opt.get('include_rest_val', True) self.image_loader = ImageLoader(opt) test_info_path, annotation_path, self.image_path = _path(opt, version) self._setup_data(test_info_path, annotation_path, opt)
def __init__(self, opt): self.opt = opt opt['image_load_task'] = 'personality_captions' self.image_mode = opt.get('image_mode', 'none') self.datatype = self.opt.get('datatype') self.training = self.datatype.startswith('train') self.include_image = opt.get('include_image') self.include_personality = opt.get('include_personality') data_path, personalities_data_path, self.image_path = _path(opt) self.image_loader = ImageLoader(opt) self._setup_data(data_path, personalities_data_path)
def __init__(self, opt: Opt, shared: TShared = None): self.opt = opt self.image_model = opt.get("image_mode") if shared: self.image_loader = shared["image_loader"] else: opt.setdefault("image_mode", self.image_model) new_opt = ParlaiParser(True, False).parse_args([]) for k, v in new_opt.items(): if k not in opt: opt[k] = v self.image_loader = ImageLoader(opt)
def __init__(self, opt, data_loader=None, cands=None, shared=None, **kwargs): # self.data is a list of episodes # each episode is a tuple of entries # each entry is a tuple of values for the action/observation table if shared: self.image_loader = shared.get('image_loader', None) self.data = shared.get('data', []) self.cands = shared.get('cands', None) else: self.image_loader = ImageLoader(opt) self.data = [] self._load(data_loader, opt['datafile']) self.cands = None if cands == None else set(sys.intern(c) for c in cands) self.addedCands = [] self.copied_cands = False
def __init__(self, opt, shared=None): super().__init__(opt, shared) self.datatype = opt['datatype'] data_path, annotation_path, self.image_path = _path(opt) self.image_mode = opt.get('image_mode', 'none') if shared and 'ques' in shared: self.ques = shared['ques'] if 'annotation' in shared: self.annotation = shared['annotation'] self.image_loader = shared['image_loader'] self.master_loader = shared['master_loader'] else: self._setup_data(data_path, annotation_path) self.image_loader = ImageLoader(opt) self.master_loader = MasterLoader(opt) self.master_loader.start() # for ordered data in batch mode (especially, for validation and # testing), each teacher in the batch gets a start index and a step # size so they all process disparate sets of the data self.step_size = opt.get('batchsize', 1) self.data_offset = opt.get('batchindex', 0) self.example_queue = queue.Queue() self.reset() if self.image_mode != 'none': self.submit_example_request()
def setup_interactive(): """ Set up the interactive script. """ parser = setup_args() opt = parser.parse_args(print_args=True) if not opt.get("model_file"): raise RuntimeError("Please specify a model file") if opt.get("fixed_cands_path") is None: fcp = os.path.join( "/".join(opt.get("model_file").split("/")[:-1]), "candidates.txt" ) opt["fixed_cands_path"] = fcp opt["override"]["fixed_cands_path"] = fcp opt["task"] = "parlai.agents.local_human.local_human:LocalHumanAgent" opt["image_mode"] = "resnet152" opt["no_cuda"] = True opt["override"]["no_cuda"] = True SHARED["opt"] = opt SHARED["image_loader"] = ImageLoader(opt) # Create model and assign it to the specified task SHARED["agent"] = create_agent(opt, requireModelExists=True) SHARED["world"] = create_task(opt, SHARED["agent"]) # Dialog History SHARED["dialog_history"] = []
def __init__(self, opt, agent, bot, image_idx: int, image_act: Message): super().__init__(opt, agent=agent, bot=bot) self.image_stack = opt['image_stack'] self.image_idx = image_idx self.image_act = image_act # Get a stringified version of the image to show the user orig_image = self.image_act['image'] self.image_src = get_image_src(image=orig_image) # Get a featurized version of the image to show the bot with NamedTemporaryFile(suffix='.jpg') as f: orig_image.save(f) image_loader = ImageLoader(self.bot.model_agent.opt) self.image_act.force_set('image', image_loader.load(f.name))
class ImageFeaturesGenerator(object): """ Features generator for images. Uses ParlAI Image Loader. """ def __init__(self, opt: Opt, shared: TShared = None): self.opt = opt self.image_model = opt.get("image_mode") if shared: self.image_loader = shared["image_loader"] else: opt.setdefault("image_mode", self.image_model) new_opt = ParlaiParser(True, False).parse_args([]) for k, v in new_opt.items(): if k not in opt: opt[k] = v self.image_loader = ImageLoader(opt) def get_image_features(self, image_id: str, image: "PIL.Image") -> torch.Tensor: """ Get image features for given image id and Image. :param image_id: id for image :param image: PIL Image object :return image_features: Image Features Tensor """ image = image.convert("RGB") return self.image_loader.extract(image)
class FlickrDataset(Dataset): """A Pytorch Dataset utilizing streaming""" def __init__(self, opt, shared=None): self.opt = opt self.datatype = self.opt.get('datatype') self.training = self.datatype.startswith('train') self.num_epochs = self.opt.get('num_epochs', 0) self.image_loader = ImageLoader(opt) data_path, self.image_path = _path(opt) self._setup_data(data_path, opt.get('unittest', False)) self.dict_agent = DictionaryAgent(opt) def __getitem__(self, index): cap = self.data[index] image_id = int(cap['filename'].replace('.jpg', '')) ep = { 'text': QUESTION, 'image': self.get_image(image_id), 'episode_done': True, } if self.opt.get('extract_image', False): ep['image_id'] = image_id return ep ep['labels'] = [s['raw'] for s in cap['sentences']] ep['valid'] = True if 'train' not in self.datatype: ep['label_candidates'] = self.cands return (index, ep) def __len__(self): return self.num_episodes() def _setup_data(self, data_path, unittest): with open(data_path) as data_file: raw_data = json.load(data_file)['images'] if 'train' in self.datatype: self.data = [d for d in raw_data if d['split'] == 'train'] elif 'valid' in self.datatype: self.data = [d for d in raw_data if d['split'] == 'val'] self.cands = [l for d in self.data for l in [s['raw'] for s in d['sentences']]] else: self.data = [d for d in raw_data if d['split'] == 'test'] self.cands = [l for d in self.data for l in [s['raw'] for s in d['sentences']]] if unittest: self.caption = self.caption[:10] def get_image(self, image_id): im_path = os.path.join(self.image_path, '%d.jpg' % (image_id)) return self.image_loader.load(im_path) def num_episodes(self): return len(self.data) def num_examples(self): return self.num_episodes() def num_images(self): return self.num_episodes()
def __init__(self, opt): self.opt = opt self.use_hdf5 = opt.get('use_hdf5', False) self.datatype = self.opt.get('datatype') self.training = self.datatype.startswith('train') self.num_epochs = self.opt.get('num_epochs', 0) self.image_loader = ImageLoader(opt) caption_path, self.image_path = _path(opt) self._setup_data(caption_path, opt.get('unittest', False)) if self.use_hdf5: try: import h5py self.h5py = h5py except ModuleNotFoundError: raise ModuleNotFoundError('Need to install h5py - `pip install h5py`') self._setup_image_data() self.dict_agent = DictionaryAgent(opt)
def __init__(self, opt, version='2014'): self.opt = opt self.use_hdf5 = opt.get('use_hdf5', False) self.datatype = self.opt.get('datatype') self.training = self.datatype.startswith('train') self.num_epochs = self.opt.get('num_epochs', 0) self.image_loader = ImageLoader(opt) test_info_path, annotation_path, self.image_path = _path(opt, version) self._setup_data(test_info_path, annotation_path, opt.get('unittest', False)) if self.use_hdf5: try: import h5py self.h5py = h5py except ImportError: raise ImportError('Need to install h5py - `pip install h5py`') self._setup_image_data() self.dict_agent = DictionaryAgent(opt)
def __init__(self, opt, agents=None, shared=None, world_tag='NONE'): self.turn_idx = 0 self.task_type = 'sandbox' if opt['is_sandbox'] else 'live' self.chat_done = False self.world_tag = world_tag self.max_resp_time = opt['max_resp_time'] # in secs super().__init__(opt, agents, shared) self.agents = agents self.agent = agents[0] self.data = [] self.exact_match = False self.num_images = opt['num_images'] self.d_rnd = opt.get('dialog_round') self.image_path = opt.get('image_path') self.task_dir = opt['task_dir'] opt['image_mode'] = 'raw' self.image_loader = ImageLoader(opt)
def __init__(self, opt, shared=None): super().__init__(opt) dt = opt['datatype'].split(':')[0] if dt not in ('train', 'test'): raise RuntimeError('Not valid datatype (only train/test).') task = opt.get('task', 'fvqa:split:0') task_num = 0 # default to train/split 0 split = task.split(':') if len(split) > 2: task_num = split[2] if task_num not in [str(i) for i in range(5)]: raise RuntimeError( 'Invalid train/test split ID (0-4 inclusive)') if not hasattr(self, 'factmetrics'): if shared and shared.get('factmetrics'): self.factmetrics = shared['factmetrics'] else: self.factmetrics = TeacherMetrics( opt.get('numthreads', 1) > 1, opt.get('metrics', 'default')) self.datatype = opt['datatype'] questions_path, trainset_path, self.image_path = _path(opt) if shared and 'ques' in shared: self.ques = shared['ques'] else: self._setup_data(questions_path, trainset_path, dt, task_num) self.len = len(self.ques) self.asked_question = False # for ordered data in batch mode (especially, for validation and # testing), each teacher in the batch gets a start index and a step # size so they all process disparate sets of the data self.step_size = opt.get('batchsize', 1) self.data_offset = opt.get('batchindex', 0) self.image_loader = ImageLoader(opt) self.reset()
def __init__(self, opt, shared=None): super().__init__(opt) self.datatype = opt['datatype'] data_path, annotation_path, self.image_path = _path(opt) if shared and 'ques' in shared: self.ques = shared['ques'] if 'annotation' in shared: self.annotation = shared['annotation'] else: self._setup_data(data_path, annotation_path) self.len = len(self.ques['questions']) # for ordered data in batch mode (especially, for validation and # testing), each teacher in the batch gets a start index and a step # size so they all process disparate sets of the data self.step_size = opt.get('batchsize', 1) self.data_offset = opt.get('batchindex', 0) self.image_loader = ImageLoader(opt) self.reset()
def __init__(self, opt, shared=None): super().__init__(opt, shared) self.opt = opt self.image_mode = opt.get('image_mode', 'none') self.data_path, personalities_data_path, self.image_path = _path(opt) self.datatype = opt.get('datatype').split(':')[0] self.include_personality = opt.get('include_personality') self.include_image = opt.get('include_image') if shared and 'data' in shared: self.data = shared['data'] self.image_loader = shared['image_loader'] else: self.image_loader = ImageLoader(opt) self._setup_data(self.data_path, personalities_data_path) self.reset()
def __init__(self, opt, shared=None): super().__init__(opt, shared) data_path, annotation_path, self.image_path = _path(opt) self.datafile = data_path self.image_mode = opt.get('image_mode', 'none') if shared and 'ques' in shared: self.ques = shared['ques'] if 'annotation' in shared: self.annotation = shared['annotation'] self.image_loader = shared['image_loader'] else: self._setup_data(data_path, annotation_path) self.image_loader = ImageLoader(opt) self.reset()
def __init__(self, opt, shared=None): super().__init__(opt, shared) self.image_mode = opt.get('image_mode', 'none') if shared: # another instance was set up already, just reference its data self.caption = shared['caption'] self.image_loader = shared['image_loader'] else: # need to set up data from scratch caption_path, self.image_path = _path(opt) self._setup_data(caption_path) self.image_loader = ImageLoader(opt) self.reset()
def __init__(self, opt, shared=None, version='2017'): super().__init__(opt, shared) self.image_mode = opt.get('image_mode', 'none') if shared: # another instance was set up already, just reference its data if 'annotation' in shared: self.annotation = shared['annotation'] self.image_loader = shared['image_loader'] else: # need to set up data from scratch test_info_path, annotation_path, self.image_path = _path(opt, version) self._setup_data(test_info_path, annotation_path) self.image_loader = ImageLoader(opt) self.reset()
def __init__(self, opt, shared=None): super().__init__(opt) self.image_mode = opt.get('image_mode', 'none') if shared and 'ques' in shared: # another instance was set up already, just reference its data self.ques = shared['ques'] if 'annotation' in shared: self.annotation = shared['annotation'] self.image_loader = shared['image_loader'] else: # need to set up data from scratch data_path, annotation_path, self.image_path = _path(opt) self._setup_data(data_path, annotation_path) self.image_loader = ImageLoader(opt) self.reset()
def __init__(self, opt: Opt, shared: TShared = None): super().__init__(opt, shared) self.opt = opt self.image_mode = opt.get('image_mode', 'no_image_model') self.data_path, personalities_data_path, self.image_path = _path(opt) self.datatype = opt['datatype'].split(':')[0] self.include_personality = opt.get('include_personality') self.include_image = opt.get('include_image') and opt.get('load_images') self.num_cands = opt.get('num_cands') if shared and 'data' in shared: self.data = shared['data'] self.personalities = shared['personalities'] self.image_loader = shared['image_loader'] else: self.image_loader = ImageLoader(opt) self._setup_data(self.data_path, personalities_data_path) self.num_exs = sum(len(d['dialog']) for d in self.data) self.reset()
def __init__(self, opt, shared=None): super().__init__(opt, shared) self.image_mode = opt.get('image_mode', 'none') self.use_intro = opt.get('use_intro', False) data_path, self.image_path = _path(opt) if shared: # another instance was set up already, just reference its data self.data = shared['data'] self.image_loader = shared['image_loader'] if 'cands' in shared: self.cands = shared['cands'] else: # need to set up data from scratch self._setup_data(data_path) self.image_loader = ImageLoader(opt) self.reset()
def setup_interactive(): """Set up the interactive script.""" parser = setup_args() opt = parser.parse_args(print_args=True) if not opt.get('model_file'): raise RuntimeError('Please specify a model file') if opt.get('fixed_cands_path') is None: opt['fixed_cands_path'] = os.path.join( '/'.join(opt.get('model_file').split('/')[:-1]), 'candidates.txt') opt['task'] = 'parlai.agents.local_human.local_human:LocalHumanAgent' opt['image_mode'] = 'resnet152' opt['no_cuda'] = True opt['override']['no_cuda'] = True SHARED['opt'] = opt SHARED['image_loader'] = ImageLoader(opt) # Create model and assign it to the specified task SHARED['agent'] = create_agent(opt, requireModelExists=True) SHARED['world'] = create_task(opt, SHARED['agent'])
def __init__(self, opt, shared=None): super().__init__(opt, shared) self.datatype = opt['datatype'] data_path, annotation_path, self.image_path = _path(opt) if shared and 'ques' in shared: self.ques = shared['ques'] if 'annotation' in shared: self.annotation = shared['annotation'] else: self._setup_data(data_path, annotation_path) # for ordered data in batch mode (especially, for validation and # testing), each teacher in the batch gets a start index and a step # size so they all process disparate sets of the data self.step_size = opt.get('batchsize', 1) self.data_offset = opt.get('batchindex', 0) self.image_loader = ImageLoader(opt) self.reset()
def __init__(self, opt, shared=None, version='2017'): super().__init__(opt, shared) self.version = version self.image_mode = opt.get('image_mode', 'none') self.use_intro = opt.get('use_intro', False) self.num_cands = opt.get('num_cands', -1) self.include_rest_val = opt.get('include_rest_val', False) test_info_path, annotation_path, self.image_path = _path(opt, version) if shared: # another instance was set up already, just reference its data if 'annotation' in shared: self.annotation = shared['annotation'] self.image_loader = shared['image_loader'] self.cands = shared['cands'] else: # need to set up data from scratch self._setup_data(test_info_path, annotation_path, opt) self.image_loader = ImageLoader(opt) self.reset()
def __init__(self, opt, shared=None): super().__init__(opt, shared) data_path, annotation_path, self.image_path = _path(opt) self.image_mode = opt.get('image_mode', 'none') if shared and 'ques' in shared: self.ques = shared['ques'] if 'annotation' in shared: self.annotation = shared['annotation'] self.image_loader = shared['image_loader'] self.master_loader = shared['master_loader'] else: self._setup_data(data_path, annotation_path) self.image_loader = ImageLoader(opt) self.master_loader = MasterLoader(opt) self.master_loader.start() self.example_queue = queue.Queue() self.reset() if self.image_mode != 'none': self.submit_example_request()
def __init__(self, opt, shared=None): super().__init__(opt) dt = opt['datatype'].split(':')[0] if dt not in ('train', 'test'): raise RuntimeError('Not valid datatype (only train/test).') task = opt.get('task', 'fvqa:split:0') task_num = 0 # default to train/split 0 split = task.split(':') if len(split) > 2: task_num = split[2] if task_num not in [str(i) for i in range(5)]: raise RuntimeError('Invalid train/test split ID (0-4 inclusive)') if not hasattr(self, 'factmetrics'): if shared and shared.get('factmetrics'): self.factmetrics = shared['factmetrics'] else: self.factmetrics = Metrics(opt) self.datatype = opt['datatype'] questions_path, trainset_path, self.image_path = _path(opt) if shared and 'ques' in shared: self.ques = shared['ques'] else: self._setup_data(questions_path, trainset_path, dt, task_num) self.len = len(self.ques) self.asked_question = False # for ordered data in batch mode (especially, for validation and # testing), each teacher in the batch gets a start index and a step # size so they all process disparate sets of the data self.step_size = opt.get('batchsize', 1) self.data_offset = opt.get('batchindex', 0) self.image_loader = ImageLoader(opt) self.reset()
def test_other_image_modes(self): """ Test non-featurized image modes. """ with testing_utils.tempdir() as tmp: image_file = 'tmp.jpg' image_path = os.path.join(tmp, image_file) image_zip_path = os.path.join(tmp, 'tmp.zip') image = Image.new('RGB', (16, 16), color=0) with PathManager.open(image_path, 'wb') as fp: image.save(fp, 'JPEG') with zipfile.ZipFile(PathManager.open(image_zip_path, 'wb'), mode='w') as zipf: zipf.write(image_path, arcname=image_file) for im in ['raw', 'ascii']: loader = ImageLoader({"image_mode": im}) loader.load(image_path) loader.load(f"{image_zip_path}/{image_file}")
class MTurkIGCEvalWorld(MultiAgentDialogWorld): """World where an agent observes 5 images and 3 comments about the images, and ranks the comments """ def __init__(self, opt, agents=None, shared=None, world_tag='NONE'): self.turn_idx = 0 self.task_type = 'sandbox' if opt['is_sandbox'] else 'live' self.chat_done = False self.world_tag = world_tag self.max_resp_time = opt['max_resp_time'] # in secs super().__init__(opt, agents, shared) self.agents = agents self.agent = agents[0] self.data = [] self.exact_match = False self.num_images = opt['num_images'] self.d_rnd = opt.get('dialog_round') self.image_path = opt.get('image_path') self.task_dir = opt['task_dir'] opt['image_mode'] = 'raw' self.image_loader = ImageLoader(opt) def episode_done(self): return self.chat_done def parley(self): """RATER is given an image, context (and possibly some questions) and is asked to rate the responses. """ # Initial Message Value control_msg = {'episode_done': False} control_msg['id'] = 'SYSTEM' """First, we give RATER the image and context """ while self.turn_idx < self.num_images: print(self.world_tag + ' is at turn {}...'.format(self.turn_idx)) # Send image to turker if self.d_rnd == 'questions': control_msg['description'] = config_questions[ 'task_description'] else: control_msg['description'] = config_responses[ 'task_description'] self.example_id, igc_example = self.agent.example_generator.pop_example( ) img = self.image_loader.load(self.image_id_to_path( self.example_id)) buffered = BytesIO() img.save(buffered, format="JPEG") encoded = str( base64.b64encode(buffered.getvalue()).decode('ascii')) control_msg['image'] = encoded control_msg['context'] = igc_example['context'] """ Setup Options for rating """ if self.d_rnd == 'questions': options = [(k, v) for k, v in igc_example['questions'].items()] else: control_msg['question'] = igc_example['question'] options = [(k, v) for k, v in igc_example['responses'].items()] random.shuffle(options) options, dup_dict = self.filter_option_duplicates(options) control_msg['options'] = [c[1] for c in options] # Collect rating from turker rate_msg = RATE_MSG if self.d_rnd == 'questions' else RATE_RESPONSE_MSG control_msg['text'] = rate_msg.format(self.turn_idx + 1) control_msg['new_eval'] = True self.agent.observe(validate(control_msg)) time.sleep(1) act = self.agent.act(timeout=self.max_resp_time) # First timeout check self.check_timeout(act) if self.chat_done: break try: ratings = [] collected_ratings = list( zip([q[0] for q in options], act['ratings'])) for opt, rating in collected_ratings: for other_opt in dup_dict[opt]: ratings.append((other_opt, rating)) igc_example['ratings'] = ratings except Exception: # Agent disconnected break igc_example['dialog_round_evaluated'] = self.d_rnd self.data.append(igc_example) self.turn_idx += 1 if self.turn_idx == self.num_images: control_msg['text'] = CHAT_ENDED_MSG.format(self.num_images) self.agent.observe(validate(control_msg)) self.chat_done = True return def image_id_to_path(self, image_id): if self.image_path == '': return os.path.join(self.task_dir, 'banana.jpg') else: return '{}/{}.jpg'.format(self.image_path, id) def filter_option_duplicates(self, options): # options = [(opt, text), (opt2, text2), ...] new_options = [] text_to_opt = {} opt_to_opt = {} for opt, text in options: if text not in text_to_opt: text_to_opt[text] = opt new_options.append([opt, text]) opt_to_opt[opt] = [opt] else: opt_to_opt[text_to_opt[text]].append(opt) return new_options, opt_to_opt def check_timeout(self, act): if act['text'] == '[TIMEOUT]' and act['episode_done']: control_msg = {'episode_done': True} control_msg['id'] = 'SYSTEM' control_msg['text'] = TIMEOUT_MSG for ag in self.agents: if ag.id != act['id']: ag.observe(validate(control_msg)) self.chat_done = True return True elif act['text'] == '[DISCONNECT]': self.chat_done = True return True else: return False def save_data(self): convo_finished = True for ag in self.agents: if (ag.hit_is_abandoned or ag.hit_is_returned or ag.disconnected or ag.hit_is_expired): convo_finished = False if not convo_finished: ag.example_generator.push_example(self.example_id) print("\n**Push image {} back to stack. **\n".format( self.example_id)) self.agents[0].example_generator.save_idx_stack() data_path = self.opt['data_path'] if not os.path.exists(data_path): os.makedirs(data_path) if convo_finished: filename = os.path.join( data_path, '{}_{}_{}.pkl'.format(time.strftime("%Y%m%d-%H%M%S"), np.random.randint(0, 1000), self.task_type)) else: filename = os.path.join( data_path, '{}_{}_{}_incomplete.pkl'.format( time.strftime("%Y%m%d-%H%M%S"), np.random.randint(0, 1000), self.task_type)) pickle.dump( { 'data': self.data, 'worker': self.agents[0].worker_id, 'hit_id': self.agents[0].hit_id, 'assignment_id': self.agents[0].assignment_id }, open(filename, 'wb')) print('{}: Data successfully saved at {}.'.format( self.world_tag, filename)) def review_work(self): global review_agent def review_agent(ag): pass # auto approve 5 days Parallel(n_jobs=len(self.agents), backend='threading')(delayed(review_agent)(agent) for agent in self.agents) def shutdown(self): """Shutdown all mturk agents in parallel, otherwise if one mturk agent is disconnected then it could prevent other mturk agents from completing. """ global shutdown_agent def shutdown_agent(agent): agent.shutdown() Parallel(n_jobs=len(self.agents), backend='threading')(delayed(shutdown_agent)(agent) for agent in self.agents)
class SplitTeacher(Teacher): """FVQA Teacher, which loads the json VQA data and implements its own `act` method for interacting with student agent. Use "fvqa:split:X" to choose between splits 0-4 (inclusive), or just "fvqa" to use the default split (0). """ def __init__(self, opt, shared=None): super().__init__(opt) dt = opt['datatype'].split(':')[0] if dt not in ('train', 'test'): raise RuntimeError('Not valid datatype (only train/test).') task = opt.get('task', 'fvqa:split:0') task_num = 0 # default to train/split 0 split = task.split(':') if len(split) > 2: task_num = split[2] if task_num not in [str(i) for i in range(5)]: raise RuntimeError( 'Invalid train/test split ID (0-4 inclusive)') if not hasattr(self, 'factmetrics'): if shared and shared.get('factmetrics'): self.factmetrics = shared['factmetrics'] else: self.factmetrics = Metrics(opt) self.datatype = opt['datatype'] questions_path, trainset_path, self.image_path = _path(opt) if shared and 'ques' in shared: self.ques = shared['ques'] else: self._setup_data(questions_path, trainset_path, dt, task_num) self.len = len(self.ques) self.asked_question = False # for ordered data in batch mode (especially, for validation and # testing), each teacher in the batch gets a start index and a step # size so they all process disparate sets of the data self.step_size = opt.get('batchsize', 1) self.data_offset = opt.get('batchindex', 0) self.image_loader = ImageLoader(opt) self.reset() def num_examples(self): return self.len def num_episodes(self): return self.len def report(self): r = super().report() r['factmetrics'] = self.factmetrics.report() return r def reset(self): # Reset the dialog so that it is at the start of the epoch, # and all metrics are reset. super().reset() self.lastY = None self.episode_idx = self.data_offset - self.step_size self.epochDone = False def reset_metrics(self): super().reset_metrics() self.factmetrics.clear() def observe(self, observation): """Process observation for metrics.""" if self.lastY is not None: if self.asked_question: self.metrics.update(observation, self.lastY[0]) else: self.factmetrics.update(observation, self.lastY[1]) self.lastY = None return observation def act(self): if self.asked_question: self.asked_question = False action = { 'text': 'Which fact supports this answer?', 'episode_done': True } if self.datatype.startswith('train'): action['labels'] = self.lastY[1] if self.datatype != 'train' and self.episode_idx + self.step_size >= self.num_episodes( ): self.epochDone = True return action if self.datatype == 'train': self.episode_idx = random.randrange(self.len) else: self.episode_idx = (self.episode_idx + self.step_size) % self.num_episodes() self.asked_question = True qa = self.ques[self.episode_idx] question = qa['question'] img_path = self.image_path + qa['img_file'] action = { 'image': self.image_loader.load(img_path), 'text': question, 'episode_done': False } human_readable = qa['fact_surface'].replace('[', '').replace(']', '') self.lastY = [[qa['answer']], [human_readable]] if self.datatype.startswith('train'): action['labels'] = self.lastY[0] return action def share(self): shared = super().share() shared['factmetrics'] = self.factmetrics shared['ques'] = self.ques if hasattr(self, 'facts'): shared['facts'] = self.facts return shared def _setup_data(self, questions_path, trainset_path, datatype, task_num): print('loading: ' + questions_path) with open(questions_path) as questions_file: questions = json.load(questions_file) train_test_images = set() with open( os.path.join(trainset_path, '{}_list_{}.txt'.format(datatype, task_num))) as imageset: for line in imageset: train_test_images.add(line.strip()) self.ques = [ questions[k] for k in sorted(questions.keys()) if questions[k]['img_file'] in train_test_images ]
class OeTeacher(Teacher): """ VQA Open-Ended teacher, which loads the json vqa data and implements its own `act` method for interacting with student agent. """ def __init__(self, opt, shared=None): super().__init__(opt, shared) self.datatype = opt['datatype'] data_path, annotation_path, self.image_path = _path(opt) if shared and 'ques' in shared: self.ques = shared['ques'] if 'annotation' in shared: self.annotation = shared['annotation'] else: self._setup_data(data_path, annotation_path) # for ordered data in batch mode (especially, for validation and # testing), each teacher in the batch gets a start index and a step # size so they all process disparate sets of the data self.step_size = opt.get('batchsize', 1) self.data_offset = opt.get('batchindex', 0) self.image_loader = ImageLoader(opt) self.reset() def __len__(self): return len(self.ques['questions']) def reset(self): # Reset the dialog so that it is at the start of the epoch, # and all metrics are reset. super().reset() self.lastY = None self.episode_idx = self.data_offset - self.step_size def observe(self, observation): """Process observation for metrics.""" if self.lastY is not None: self.metrics.update(observation, self.lastY) self.lastY = None return observation def act(self): if self.datatype == 'train': self.episode_idx = random.randrange(len(self)) else: self.episode_idx = (self.episode_idx + self.step_size) % len(self) if self.episode_idx == len(self) - self.step_size: self.epochDone = True qa = self.ques['questions'][self.episode_idx] question = qa['question'] image_id = qa['image_id'] img_path = self.image_path + '%012d.jpg' % (image_id) action = { 'image': self.image_loader.load(img_path), 'text': question, 'episode_done': True } if not self.datatype.startswith('test'): anno = self.annotation['annotations'][self.episode_idx] self.lastY = [ans['answer'] for ans in anno['answers']] if self.datatype.startswith('train'): action['labels'] = self.lastY return action def share(self): shared = super().share() shared['ques'] = self.ques if hasattr(self, 'annotation'): shared['annotation'] = self.annotation return shared def _setup_data(self, data_path, annotation_path): print('loading: ' + data_path) with open(data_path) as data_file: self.ques = json.load(data_file) if self.datatype != 'test': print('loading: ' + annotation_path) with open(annotation_path) as data_file: self.annotation = json.load(data_file)
class SplitTeacher(Teacher): """FVQA Teacher, which loads the json VQA data and implements its own `act` method for interacting with student agent. Use "fvqa:split:X" to choose between splits 0-4 (inclusive), or just "fvqa" to use the default split (0). """ def __init__(self, opt, shared=None): super().__init__(opt) dt = opt['datatype'].split(':')[0] if dt not in ('train', 'test'): raise RuntimeError('Not valid datatype (only train/test).') task = opt.get('task', 'fvqa:split:0') task_num = 0 # default to train/split 0 split = task.split(':') if len(split) > 2: task_num = split[2] if task_num not in [str(i) for i in range(5)]: raise RuntimeError('Invalid train/test split ID (0-4 inclusive)') if not hasattr(self, 'factmetrics'): if shared and shared.get('factmetrics'): self.factmetrics = shared['factmetrics'] else: self.factmetrics = Metrics(opt) self.datatype = opt['datatype'] questions_path, trainset_path, self.image_path = _path(opt) if shared and 'ques' in shared: self.ques = shared['ques'] else: self._setup_data(questions_path, trainset_path, dt, task_num) self.len = len(self.ques) self.asked_question = False # for ordered data in batch mode (especially, for validation and # testing), each teacher in the batch gets a start index and a step # size so they all process disparate sets of the data self.step_size = opt.get('batchsize', 1) self.data_offset = opt.get('batchindex', 0) self.image_loader = ImageLoader(opt) self.reset() def __len__(self): return self.len def report(self): r = super().report() r['factmetrics'] = self.factmetrics.report() return r def reset(self): # Reset the dialog so that it is at the start of the epoch, # and all metrics are reset. super().reset() self.lastY = None self.episode_idx = self.data_offset - self.step_size self.epochDone = False def reset_metrics(self): super().reset_metrics() self.factmetrics.clear() def observe(self, observation): """Process observation for metrics.""" if self.lastY is not None: if self.asked_question: self.metrics.update(observation, self.lastY[0]) else: self.factmetrics.update(observation, self.lastY[1]) self.lastY = None return observation def act(self): if self.asked_question: self.asked_question = False action = {'text': 'Which fact supports this answer?', 'episode_done': True} if self.datatype.startswith('train'): action['labels'] = self.lastY[1] if self.datatype != 'train' and self.episode_idx + self.step_size >= len(self): self.epochDone = True return action if self.datatype == 'train': self.episode_idx = random.randrange(self.len) else: self.episode_idx = (self.episode_idx + self.step_size) % len(self) self.asked_question = True qa = self.ques[self.episode_idx] question = qa['question'] img_path = self.image_path + qa['img_file'] action = { 'image': self.image_loader.load(img_path), 'text': question, 'episode_done': False } human_readable = qa['fact_surface'].replace('[', '').replace(']', '') self.lastY = [[qa['answer']], [human_readable]] if self.datatype.startswith('train'): action['labels'] = self.lastY[0] return action def share(self): shared = super().share() shared['factmetrics'] = self.factmetrics shared['ques'] = self.ques if hasattr(self, 'facts'): shared['facts'] = self.facts return shared def _setup_data(self, questions_path, trainset_path, datatype, task_num): print('loading: ' + questions_path) with open(questions_path) as questions_file: questions = json.load(questions_file) train_test_images = set() with open(os.path.join(trainset_path, '{}_list_{}.txt'.format(datatype, task_num))) as imageset: for line in imageset: train_test_images.add(line.strip()) self.ques = [questions[k] for k in sorted(questions.keys()) if questions[k]['img_file'] in train_test_images]