コード例 #1
0
def save_image_contexts(task_opt: Opt):
    """
    Save a JSON of images and associated contexts for the model image chat task.

    Note that each image will have BST-style context information saved with it, such as
    persona strings and a pair of lines of dialogue from another dataset.
    TODO: perhaps have the image chat task make use of this context information
    """

    print('Creating teacher to loop over images.')
    agent = RepeatLabelAgent(task_opt)
    world = create_task(task_opt, agent)
    num_examples = task_opt['num_examples']

    print('Creating context generator.')
    context_generator = get_context_generator()

    print(
        f'Looping over {num_examples:d} images and pulling a context for each one.'
    )
    image_contexts = []
    unique_image_srcs = set()
    while len(image_contexts) < num_examples:

        # Get the next teacher act
        world.parley()
        teacher_act = world.get_acts()[0]

        image_src = get_image_src(image=teacher_act['image'])
        if image_src in unique_image_srcs:
            # Skip over non-unique images, such as from the later turns of an episode
            print('\tSkipping non-unique image.')
        else:
            unique_image_srcs.add(image_src)
            image_context = {
                'image_act': teacher_act,
                'context_info': context_generator.get_context(),
            }
            image_contexts.append(image_context)
            if len(image_contexts) % 5 == 0:
                print(f'Collected {len(image_contexts):d} images.')

    print(f'{len(image_contexts):d} image contexts created.')

    # Save
    with open(task_opt['image_context_path'], 'wb') as f:
        pickle.dump(image_contexts, f)
コード例 #2
0
    def __init__(self, task_run: "TaskRun", args: "DictConfig",
                 shared_state: "SharedTaskState"):

        conversations_needed = self._process_conversations_needed(args)
        self.conversations_needed = conversations_needed
        shared_state.conversations_needed = conversations_needed
        args.blueprint.num_conversations = sum(conversations_needed.values())

        super().__init__(task_run=task_run,
                         args=args,
                         shared_state=shared_state)

        if args.blueprint.get("annotations_config_path", "") != "":
            # We are going to do annotations, so load the onboarding data file that will
            # be used to onboard users into knowing how to do the annotations properly
            onboard_task_data_path = os.path.expanduser(
                args.blueprint.onboard_task_data_path)
            with open(onboard_task_data_path, "r") as onboard_task_data_file:
                self.onboard_task_data = json.load(onboard_task_data_file)
        else:
            self.onboard_task_data = None

        run_statistics = {r: 0 for (r, v) in self.conversations_needed.items()}
        shared_state.run_statistics = run_statistics

        context_generator: Optional[ContextGenerator] = None
        if (args.blueprint.include_persona
                or args.blueprint.conversation_start_mode == 'bst'):
            context_generator = get_context_generator(
                args.blueprint.override_opt)
        shared_state.context_generator = context_generator

        # Lock for editing run statistics between threads
        statistics_condition = Condition()

        # Move shared state into the world and onboarding opts, such that these
        # can be used by the worlds
        shared_state.onboarding_world_opt.update({
            'onboard_statistics':
            shared_state.onboard_statistics,
            'statistics_condition':
            statistics_condition,
            'max_onboard_time':
            args.blueprint.max_onboard_time,
            'onboard_task_data':
            self.onboard_task_data,
            'onboarding_qualification':
            args.blueprint.onboarding_qualification,
        })
        shared_state.world_opt.update({
            'conversations_needed':
            conversations_needed,
            'run_statistics':
            shared_state.run_statistics,
            'context_generator':
            context_generator,
            'statistics_condition':
            statistics_condition,
            'conversation_start_mode':
            args.blueprint.conversation_start_mode,
            'include_persona':
            args.blueprint.include_persona,
        })