def save_image_contexts(task_opt: Opt): """ Save a JSON of images and associated contexts for the model image chat task. Note that each image will have BST-style context information saved with it, such as persona strings and a pair of lines of dialogue from another dataset. TODO: perhaps have the image chat task make use of this context information """ print('Creating teacher to loop over images.') agent = RepeatLabelAgent(task_opt) world = create_task(task_opt, agent) num_examples = task_opt['num_examples'] print('Creating context generator.') context_generator = get_context_generator() print( f'Looping over {num_examples:d} images and pulling a context for each one.' ) image_contexts = [] unique_image_srcs = set() while len(image_contexts) < num_examples: # Get the next teacher act world.parley() teacher_act = world.get_acts()[0] image_src = get_image_src(image=teacher_act['image']) if image_src in unique_image_srcs: # Skip over non-unique images, such as from the later turns of an episode print('\tSkipping non-unique image.') else: unique_image_srcs.add(image_src) image_context = { 'image_act': teacher_act, 'context_info': context_generator.get_context(), } image_contexts.append(image_context) if len(image_contexts) % 5 == 0: print(f'Collected {len(image_contexts):d} images.') print(f'{len(image_contexts):d} image contexts created.') # Save with open(task_opt['image_context_path'], 'wb') as f: pickle.dump(image_contexts, f)
def __init__(self, task_run: "TaskRun", args: "DictConfig", shared_state: "SharedTaskState"): conversations_needed = self._process_conversations_needed(args) self.conversations_needed = conversations_needed shared_state.conversations_needed = conversations_needed args.blueprint.num_conversations = sum(conversations_needed.values()) super().__init__(task_run=task_run, args=args, shared_state=shared_state) if args.blueprint.get("annotations_config_path", "") != "": # We are going to do annotations, so load the onboarding data file that will # be used to onboard users into knowing how to do the annotations properly onboard_task_data_path = os.path.expanduser( args.blueprint.onboard_task_data_path) with open(onboard_task_data_path, "r") as onboard_task_data_file: self.onboard_task_data = json.load(onboard_task_data_file) else: self.onboard_task_data = None run_statistics = {r: 0 for (r, v) in self.conversations_needed.items()} shared_state.run_statistics = run_statistics context_generator: Optional[ContextGenerator] = None if (args.blueprint.include_persona or args.blueprint.conversation_start_mode == 'bst'): context_generator = get_context_generator( args.blueprint.override_opt) shared_state.context_generator = context_generator # Lock for editing run statistics between threads statistics_condition = Condition() # Move shared state into the world and onboarding opts, such that these # can be used by the worlds shared_state.onboarding_world_opt.update({ 'onboard_statistics': shared_state.onboard_statistics, 'statistics_condition': statistics_condition, 'max_onboard_time': args.blueprint.max_onboard_time, 'onboard_task_data': self.onboard_task_data, 'onboarding_qualification': args.blueprint.onboarding_qualification, }) shared_state.world_opt.update({ 'conversations_needed': conversations_needed, 'run_statistics': shared_state.run_statistics, 'context_generator': context_generator, 'statistics_condition': statistics_condition, 'conversation_start_mode': args.blueprint.conversation_start_mode, 'include_persona': args.blueprint.include_persona, })