コード例 #1
0
    def __init__(self, opt):
        self.second_resp = opt.get('second_response')
        self.examples_idx_stack_path = os.path.join(
            os.getcwd(),
            './{}_examples_stack{}.pkl'.format(
                'second_response' if self.second_resp else 'first_response',
                '_sandbox' if opt['is_sandbox'] else '',
            ),
        )
        self.OLD = OffensiveStringMatcher()
        self.opt = opt
        build_pc(opt)
        build_ic(opt)
        df = 'personality_captions' if not self.second_resp else 'image_chat'
        data_path = os.path.join(self.opt['datapath'], '{}/{}.json')
        self.data = []
        for dt in ['train', 'val', 'test']:
            if self.second_resp and dt == 'val':
                dt = 'valid'
            with open(data_path.format(df, dt)) as f:
                self.data += json.load(f)

        if self.second_resp:
            self.data = [d for d in self.data if len(d['dialog']) > 1]

        if os.path.exists(self.examples_idx_stack_path):
            with open(self.examples_idx_stack_path, 'rb') as handle:
                self.idx_stack = pickle.load(handle)
        else:
            self.idx_stack = []
            self.add_idx_stack()
            self.save_idx_stack()
コード例 #2
0
ファイル: run.py プロジェクト: Arran5353/dialog-probing
def main():
    """
        Human Evaluation of various responses to comments on images.

        A turker is shown an image and some dialog history. Then, the
        turker is asked to choose which response they think is more engaging.

        If no `--eval-data-path` is given, the data from the original
        Image-Chat dataset is used.

        To use your own data, please specify `--eval-data-path`, a path to an
        appropriate json file with a list of examples, where each example
        has the following structure:
            {
                'image_hash': <hash of image>,
                'dialog': [(personality, text), ...] - list of personality, text tuples
                'personality': <personality of responses to compare>
                '<compare_key_1>': <first response to compare>,
                '<compare_key_2>': <second option to compare>,
                .
                .
                .
            }
        Note that compare_key_1 and compare_key_2 can be any field, as long as they
        map to a string response.

        Example Scenario:
            Suppose you have the original Image-Chat dataset, and
            you would like to compare the outputs of your model called `model`.

            Your data may look like the following:
            [{
                'image_hash': hashforimageofcat,
                'dialog': [
                    ('Sweet', 'What a cute cat!'),
                    ('Neutral', 'Just looks like a plain cat to me')
                ]
                'personality': 'Sweet',
                'comment': 'It really is adorable if you look!', # Human Comment
                'model_comment': 'You'll love it if you pet it!' # Model Comment
            }, ...]

            Thus, you would specify `-ck1 comment -ck2 model_comment` to evaluate
            the outputs of the model vs. the human comments from Personality-Captions

    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-min_t',
                           '--min_turns',
                           default=3,
                           type=int,
                           help='minimum number of turns')
    argparser.add_argument('-mt',
                           '--max_turns',
                           default=5,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument(
        '-mx_rsp_time',
        '--max_resp_time',
        default=1800,
        type=int,
        help='time limit for entering a dialog message',
    )
    argparser.add_argument(
        '-mx_onb_time',
        '--max_onboard_time',
        type=int,
        default=300,
        help='time limit for turker'
        'in onboarding',
    )
    argparser.add_argument(
        '-ni',
        '--num_images',
        type=int,
        default=10,
        help='number of images to show \
                           to turker',
    )
    argparser.add_argument(
        '--auto-approve-delay',
        type=int,
        default=3600 * 24,
        help='how long to wait for  \
                           auto approval',
    )
    argparser.add_argument('--data-path',
                           type=str,
                           default='',
                           help='where to save data')
    argparser.add_argument(
        '--eval-data-path',
        type=str,
        default='',
        help='where to load data to rank from. Leave '
        'blank to use Image-Chat data',
    )
    argparser.add_argument(
        '-ck1',
        '--compare-key-1',
        type=str,
        default='comment',
        help='key of first comparable',
    )
    argparser.add_argument(
        '-ck2',
        '--compare-key-2',
        type=str,
        default='comment',
        help='key of first comparable',
    )
    argparser.add_argument(
        '-rnd',
        '--dialog-round',
        type=str,
        default='first_response',
        choices=round_choices,
        help='which dialog round to show',
    )
    argparser.add_argument(
        '--show-personality',
        default=True,
        type='bool',
        help='whether to show the personality',
    )
    ImageChatTeacher.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    build_ic(opt)
    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)
    if 'data_path' not in opt or opt['data_path'] == '':
        opt['data_path'] = os.getcwd() + '/data/' + opt['task']
    if opt.get('eval_data_path') == '':
        opt['eval_data_path'] = os.path.join(opt['datapath'],
                                             'image_chat/test.json')
    config = config_first if opt[
        'dialog_round'] == 'first_response' else config_second
    opt.update(config)

    mturk_agent_ids = [CHOOSER]
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    example_generator = ExampleGenerator(opt)
    mturk_manager.setup_server(task_directory_path=directory_path)

    try:
        mturk_manager.start_new_run()

        def run_onboard(worker):
            worker.example_generator = example_generator
            world = RoleOnboardWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()
        mturk_manager.create_hits()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for w in workers:
                w.id = mturk_agent_ids[0]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]
            conv_idx = mturk_manager.conversation_index
            world = MTurkImageChatStackRankWorld(
                opt,
                agents=agents,
                world_tag='conversation t_{}'.format(conv_idx))
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()