Exemple #1
0
    def test_mturk_racehistory(self):
        """
        Emulate a setting where batch_act misappropriately handles mturk.
        """
        agent = get_agent(batchsize=16, interactive_mode=True, echo=True)
        share1 = create_agent_from_shared(agent.share())

        share1.observe(Message({'text': 'thread1-msg1', 'episode_done': False}))
        share2 = create_agent_from_shared(agent.share())
        share2.observe(Message({'text': 'thread2-msg1', 'episode_done': False}))
        share1.act()
        share2.act()

        share1.observe(Message({'text': 'thread1-msg2', 'episode_done': False}))
        share2.observe(Message({'text': 'thread2-msg2', 'episode_done': False}))
        share2.act()
        share1.act()

        share2.observe(Message({'text': 'thread2-msg3', 'episode_done': False}))
        share1.observe(Message({'text': 'thread1-msg3', 'episode_done': False}))

        self.assertNotIn('thread1-msg1', share2.history.get_history_str())
        self.assertNotIn('thread2-msg1', share1.history.get_history_str())
        self.assertNotIn('thread1-msg2', share2.history.get_history_str())
        self.assertNotIn('thread2-msg2', share1.history.get_history_str())
Exemple #2
0
    def test_interactive_mode(self):
        """
        Test if conversation history is destroyed in MTurk mode.
        """
        # both manually setting bs to 1 and interactive mode true
        agent = get_agent(batchsize=1, interactive_mode=True)
        agent.observe(Message({'text': 'foo', 'episode_done': True}))
        response = agent.act()
        self.assertIn(
            'Evaluating 0', response['text'], 'Incorrect output in single act()'
        )
        shared = create_agent_from_shared(agent.share())
        shared.observe(Message({'text': 'bar', 'episode_done': True}))
        response = shared.act()
        self.assertIn(
            'Evaluating 0', response['text'], 'Incorrect output in single act()'
        )

        # now just bs 1
        agent = get_agent(batchsize=1, interactive_mode=False)
        agent.observe(Message({'text': 'foo', 'episode_done': True}))
        response = agent.act()
        self.assertIn(
            'Evaluating 0', response['text'], 'Incorrect output in single act()'
        )
        shared = create_agent_from_shared(agent.share())
        shared.observe(Message({'text': 'bar', 'episode_done': True}))
        response = shared.act()
        self.assertIn(
            'Evaluating 0', response['text'], 'Incorrect output in single act()'
        )

        # now just interactive
        shared = create_agent_from_shared(agent.share())
        agent.observe(Message({'text': 'foo', 'episode_done': True}))
        response = agent.act()
        self.assertIn(
            'Evaluating 0', response['text'], 'Incorrect output in single act()'
        )
        shared = create_agent_from_shared(agent.share())
        shared.observe(Message({'text': 'bar', 'episode_done': True}))
        response = shared.act()
        self.assertIn(
            'Evaluating 0', response['text'], 'Incorrect output in single act()'
        )

        # finally, actively attempt to sabotage
        agent = get_agent(batchsize=16, interactive_mode=False)
        agent.observe(Message({'text': 'foo', 'episode_done': True}))
        response = agent.act()
        self.assertIn(
            'Evaluating 0', response['text'], 'Incorrect output in single act()'
        )
        shared = create_agent_from_shared(agent.share())
        shared.observe(Message({'text': 'bar', 'episode_done': True}))
        response = shared.act()
        self.assertIn(
            'Evaluating 0', response['text'], 'Incorrect output in single act()'
        )
    def __init__(
        self,
        opt,
        agents=None,
        shared=None,
        range_turn=(3, 5),
        max_turn=5,
        max_resp_time=120,
        model_agent_opt=None,
        world_tag='',
        agent_timeout_shutdown=120,
        knowledge_retriever_opt=None,
    ):
        self.opt = opt
        # TURN CONTROL
        self.turn_idx = 0
        self.range_turn = range_turn
        self.max_turn = max_turn
        self.n_turn = np.random.randint(self.range_turn[0],
                                        self.range_turn[1]) + 1
        self.chat_done = False
        self.other_first = random.choice([True, False])

        # DATA
        self.dialog = []
        self.dialog_list = []
        self.gmark_score = -1
        self.task_type = 'sandbox' if opt['is_sandbox'] else 'live'
        self.world_tag = world_tag
        self.ratings = ['1', '2', '3', '4', '5']

        super().__init__(opt, agents, shared)

        # MODEL AGENT SET UP
        if model_agent_opt is not None:
            self.model_agent = create_agent_from_shared(model_agent_opt)
            self.knowledge_agent = create_agent_from_shared(
                knowledge_retriever_opt)
        else:
            # case where we test against a human
            self.model_agent = None

        # TIMEOUT PROTOCOLS
        self.max_resp_time = max_resp_time  # in secs
        self.agent_timeout_shutdown = agent_timeout_shutdown

        # TOPIC CHOICES
        if self.model_agent is None:
            for idx in range(len(self.agents)):
                if self.agents[idx].id == 'PERSON_1':
                    self.eval_agent = self.agents[idx]
                    self.other_agent = self.agents[idx - 1]
                    break
        else:
            self.eval_agent = self.agents[0]

        self.chosen_topic = self.eval_agent.chosen_topic
        self.seen = self.eval_agent.seen
        self.topic_choices = self.eval_agent.topic_choices
    def __init__(self, opt):
        if isinstance(opt, ParlaiParser):
            opt = opt.parse_args()
        # Possibly build a dictionary (not all models do this).
        if opt['dict_build_first'] and 'dict_file' in opt:
            if opt['dict_file'] is None and opt.get('model_file_transmitter') and opt.get('model_file_receiver'):
                opt['dict_file'] = opt['model_file_transmitter'] + '_' + opt['model_file_receiver']  + '.dict'
            print("[ building dictionary first... ]")
            build_dict(opt, skip_if_built=False)

        # Create model and assign it to the specified task
        print("[ create meta-agent ... ]")
        self.agent = create_agent(opt)
        print("[ create agent A ... ]")
        shared = self.agent.share()
        self.agent_a = create_agent_from_shared(shared)
        self.agent_a.set_id(suffix=' A')
        print("[ create agent B ... ]")
        self.agent_b = create_agent_from_shared(shared)
        # self.agent_b = create_agent(opt)
        self.agent_b.set_id(suffix=' B')
        # self.agent_a.copy(self.agent, 'transmitter')
        # self.agent_b.copy(self.agent, 'transmitter')
        self.world = create_selfplay_world(opt, [self.agent_a, self.agent_b])

        # TODO: if batch, it is also not parallel
        # self.world = BatchSelfPlayWorld(opt, self_play_world)

        self.train_time = Timer()
        self.train_dis_time = Timer()
        self.validate_time = Timer()
        self.log_time = Timer()
        self.save_time = Timer()
        print('[ training... ]')
        self.parleys_episode = 0
        self.max_num_epochs = opt['num_epochs'] if opt['num_epochs'] > 0 else float('inf')
        self.max_train_time = opt['max_train_time'] if opt['max_train_time'] > 0 else float('inf')
        self.log_every_n_secs = opt['log_every_n_secs'] if opt['log_every_n_secs'] > 0 else float('inf')
        self.train_dis_every_n_secs = opt['train_display_every_n_secs'] if opt['train_display_every_n_secs'] > 0 else float('inf')
        self.val_every_n_secs = opt['validation_every_n_secs'] if opt['validation_every_n_secs'] > 0 else float('inf')
        self.save_every_n_secs = opt['save_every_n_secs'] if opt['save_every_n_secs'] > 0 else float('inf')
        self.valid_optim = 1 if opt['validation_metric_mode'] == 'max' else -1
        self.best_valid = None
        if opt.get('model_file_transmitter') and os.path.isfile(opt['model_file_transmitter'] + '.best_valid'):
            with open(opt['model_file_transmitter'] + ".best_valid", 'r') as f:
                x = f.readline()
                self.best_valid = float(x)
                f.close()
        self.impatience = 0
        self.saved = False
        self.valid_world = None
        self.opt = opt
        if opt['tensorboard_log'] is True:
            self.writer = TensorboardLogger(opt)
Exemple #5
0
 def __init__(self, opt: Opt):
     self.opt = opt
     self.agents = []
     self.agent_dict = None
     self.generations = []
     self.input_type = 'Memory'
     self.delimiter = opt.get('memory_decoder_delimiter', '\n')
     self.one_line_memories = opt.get('memory_decoder_one_line_memories', False)
     model_file = modelzoo_path(opt['datapath'], opt['memory_decoder_model_file'])
     if model_file and os.path.exists(model_file):
         logging.info(f'Building Memory Decoder from file: {model_file}')
         logging.disable()
         overrides = {
             'skip_generation': False,
             'inference': 'beam',
             'beam_size': opt.get('memory_decoder_beam_size', 3),
             'beam_min_length': opt.get('memory_decoder_beam_min_length', 10),
             'beam_block_ngram': 3,
         }
         if self.opt.get('memory_decoder_truncate', -1) > 0:
             overrides['text_truncate'] = self.opt['memory_decoder_truncate']
             overrides['truncate'] = self.opt['memory_decoder_truncate']
         base_agent = create_agent_from_model_file(
             model_file, opt_overrides=overrides
         )
         assert isinstance(base_agent, TorchAgent)
         self.agents = [base_agent]
         assert isinstance(self.agents[0], TorchAgent)
         copies = max(100, (opt['batchsize'] * opt.get('rag_turn_n_turns', 1)))
         self.agents += [
             create_agent_from_shared(self.agents[0].share()) for _ in range(copies)
         ]
         self.agent_dict = self.agents[0].build_dictionary()
         logging.enable()
    def __init__(self, opt, shared=None):
        super().__init__(opt, shared)
        self.debug = opt['debug']
        self.model_path = os.path.join(
            opt['datapath'],
            'models',
            'wizard_of_wikipedia',
            'full_dialogue_retrieval_model',
        )

        if not shared:
            # Create responder
            self._set_up_responder(opt)
            # Create retriever
            self._set_up_retriever(opt)
        else:
            self.opt = shared['opt']
            self.retriever = shared['retriever']
            self.responder = create_agent_from_shared(
                shared['responder_shared_opt'])
            self.sent_tok = shared['sent_tok']
            self.wiki_map = shared['wiki_map']

        self.id = 'WizardGenerativeInteractiveAgent'
        self.ret_history = {}
Exemple #7
0
 def __init__(self, opt: Opt):
     self.opt = opt
     self.agents = []
     self.agent_dict = None
     self.generations = []
     self.input_type = 'Search'
     self.knowledge_access_method = KnowledgeAccessMethod(
         opt['knowledge_access_method']
     )
     model_file = modelzoo_path(opt['datapath'], opt['query_generator_model_file'])
     if model_file and os.path.exists(model_file):
         logging.info(f'Building Query Generator from file: {model_file}')
         logging.disable()
         overrides: Dict[str, Any] = {'skip_generation': False}
         overrides['inference'] = opt['query_generator_inference']
         overrides['beam_size'] = opt.get('query_generator_beam_size', 3)
         overrides['beam_min_length'] = opt.get('query_generator_beam_min_length', 2)
         if self.opt['query_generator_truncate'] > 0:
             overrides['text_truncate'] = self.opt['query_generator_truncate']
             overrides['truncate'] = self.opt['query_generator_truncate']
         base_agent = create_agent_from_model_file(
             model_file, opt_overrides=overrides
         )
         assert isinstance(base_agent, TorchAgent)
         self.agents = [base_agent]
         bsz = opt.get('batchsize', 1)
         rag_turn_n_turns = opt.get('rag_turn_n_turns', 1)
         if bsz > 1 or rag_turn_n_turns > 1:
             self.agents += [
                 create_agent_from_shared(self.agents[0].share())
                 for _ in range((bsz * rag_turn_n_turns) - 1)
             ]
         self.agent_dict = self.agents[0].build_dictionary()
         logging.enable()
Exemple #8
0
    def __init__(self, opt, shared=None):
        self.opt = opt
        self.add_token_knowledge = opt['add_token_knowledge']
        self.model_path = os.path.join(
            opt['datapath'],
            'models',
            'wizard_of_wikipedia',
            'full_dialogue_retrieval_model',
        )

        if not shared:
            # Create retriever
            download(
                opt['datapath'])  # make sure to download all relevant files
            self._set_up_tfidf_retriever(opt)
            self._set_up_selector(opt)
        else:
            self.selector = create_agent_from_shared(shared['selector'])
            self.retriever = shared['retriever']
            self.sent_tok = shared['sent_tok']
            self.wiki_map = shared['wiki_map']

        self.id = 'KnowledgeRetrieverAgent'

        # NOTE: dialogue history should NOT be shared between instances
        self.retriever_history = {'episode_done': False}
        self.dialogue_history = []
        self.checked_sentence_history = []
Exemple #9
0
    def run_conversation(manager, opt, agents, task_id):
        agent = agents[0]
        this_bot = create_agent_from_shared(shared_bot_params)

        world = MessengerBotChatTaskWorld(opt=opt, agent=agent, bot=this_bot)
        while not world.episode_done():
            world.parley()
        world.shutdown()
Exemple #10
0
def run_eval(agent,
             opt,
             datatype,
             max_exs=-1,
             write_log=False,
             valid_world=None):
    """Eval on validation/test data.
    - Agent is the agent to use for the evaluation.
    - opt is the options that specific the task, eval_task, etc
    - datatype is the datatype to use, such as "valid" or "test"
    - write_log specifies to write metrics to file if the model_file is set
    - max_exs limits the number of examples if max_exs > 0
    - valid_world can be an existing world which will be reset instead of reinitialized
    """
    print('[ running eval: ' + datatype + ' ]')
    if 'stream' in opt['datatype']:
        datatype += ':stream'

    if valid_world is None:
        opt = opt.copy()
        opt['datatype'] = datatype
        if opt.get('evaltask'):
            opt['task'] = opt['evaltask']
        if opt.get('validation_share_agent', False):
            valid_agent = create_agent_from_shared(agent.share())
        else:
            valid_agent = agent
        valid_world = create_task(opt, valid_agent)
    valid_world.reset()
    cnt = 0

    i_example = 0
    print('== Before valid ==')
    while not valid_world.epoch_done(
    ):  # and opt['validation_max_examples'] > i_example:
        valid_world.parley()
        if cnt == 0 and opt['display_examples']:
            print(valid_world.display() + '\n~~')
            print(valid_world.report())
        cnt += opt['batchsize']
        if max_exs > 0 and cnt > max_exs + opt.get('numthreads', 1):
            # note this max_exs is approximate--some batches won't always be
            # full depending on the structure of the data
            break
        i_example += 1
    valid_report = valid_world.report()
    valid_world.reset()  # this makes sure agent doesn't remember valid data

    metrics = datatype + ':' + str(valid_report)
    print(metrics)
    if write_log and opt.get('model_file'):
        # Write out metrics
        f = open(opt['model_file'] + '.' + datatype, 'a+')
        f.write(metrics + '\n')
        f.close()

    return valid_report, valid_world
Exemple #11
0
 def generate_world(opt, agents):
     if opt['models'] is None:
         raise RuntimeError("Model must be specified")
     return ChattyGooseMessengerTaskWorld(
         opt,
         agents[0],
         create_agent_from_shared(opt['shared_bot_params'][
             ChattyGooseMessengerTaskWorld.MODEL_KEY]),
     )
Exemple #12
0
 def generate_world(opt, agents):
     if opt['models'] is None:
         raise RuntimeError("Model must be specified")
     return ConversationBotTaskWorld(
         opt,
         agents[0],
         create_agent_from_shared(
             opt['shared_bot_params'][ConversationBotTaskWorld.MODEL_KEY]),
     )
Exemple #13
0
 def __init__(
     self,
     shared: TShared = None,
     custom_model_file='zoo:dialogue_safety/single_turn/model',
 ):
     if not shared:
         self.model = self._create_safety_model(custom_model_file)
     else:
         self.model = create_agent_from_shared(shared['model'])
     self.classes = {OK_CLASS: False, NOT_OK_CLASS: True}
Exemple #14
0
    def __init__(
        self,
        opt,
        agents=None,
        shared=None,
        range_turn=(5, 6),
        max_turn=10,
        max_resp_time=120,
        model_agent_opt=None,
        world_tag='',
        agent_timeout_shutdown=120,
    ):
        self.turn_idx = 0
        self.hit_id = None
        self.range_turn = range_turn
        self.max_turn = max_turn
        self.n_turn = np.random.randint(self.range_turn[0],
                                        self.range_turn[1]) + 1
        self.model_name = opt.get('model_name')
        self.dialog = []
        self.task_type = 'sandbox' if opt['is_sandbox'] else 'live'
        self.chat_done = False
        self.n_personas = []
        self.fluency_score = len(agents) * [-1]
        self.fluency_reason = len(agents) * [None]
        self.eng_score = len(agents) * [-1]
        self.eng_reason = len(agents) * [None]
        self.consistent_score = len(agents) * [-1]
        self.consistent_reason = len(agents) * [None]
        self.persona_picked = len(agents) * [None]
        self.world_tag = world_tag
        self.ratings = ['1', '2', '3', '4', '5']
        super().__init__(opt, agents, shared)

        # set up model agent
        if model_agent_opt is not None:
            self.model_agent = create_agent_from_shared(model_agent_opt)
        else:
            # case where we test against a human
            self.model_agent = self.agents[1]

        # below are timeout protocols
        self.max_resp_time = max_resp_time  # in secs
        self.agent_timeout_shutdown = agent_timeout_shutdown

        # set up personas
        self.personas = [
            (ag.persona_data if hasattr(ag, 'persona_data') else None)
            for ag in self.agents
        ]
        self.model_persona_text = '\n'.join([
            'your persona:' + pers for pers in self.agents[0].model_persona[1]
        ])
        print(self.model_persona_text)
Exemple #15
0
        def run_conversation(mturk_manager, opt, workers):
            remaining_counts_needed = [
                (m, c - run_statistics[m])
                for (m, c) in opt['conversations_needed'].items()
            ]
            remaining_counts_needed.sort(reverse=True, key=lambda x: x[1])
            model_name = remaining_counts_needed[0][0]
            print(
                f'Remaining conversation counts needed: {remaining_counts_needed}'
            )

            # Get a bot and add it to the list of "workers"
            print(f'Choosing the "{model_name}" model for the bot.')
            agent = create_agent_from_shared(shared_bot_agents[model_name])
            bot_worker = TurkLikeAgent(
                opt,
                model_name=model_name,
                model_agent=agent,
                num_turns=opt['num_turns'],
                semaphore=semaphore,
            )
            workers_including_bot = workers + [bot_worker]

            assert len(workers_including_bot) == 2

            # Get context: personas, previous utterances, etc.
            if context_generator is not None:
                context_info = context_generator.get_context()
            else:
                context_info = None

            conv_idx = mturk_manager.conversation_index
            world = TurnAnnotationsChatWorld(
                opt=opt,
                agents=workers_including_bot,
                num_turns=opt['num_turns'],
                max_resp_time=opt['max_resp_time'],
                tag='conversation t_{}'.format(conv_idx),
                context_info=context_info,
            )
            while not world.episode_done():
                print('About to parley')
                world.parley()
            model_nickname, worker_is_unacceptable, convo_finished = world.save_data(
            )
            if worker_is_unacceptable:
                print(f'Soft-blocking worker {workers[0].worker_id}')
                mturk_manager.soft_block_worker(workers[0].worker_id)
                time.sleep(0.1)
            if not worker_is_unacceptable and convo_finished:
                run_statistics[model_nickname] += 1

            world.shutdown()
            world.review_work()
Exemple #16
0
 def __init__(self, opt: Opt, shared=None):
     if ',' in opt['task']:
         raise ValueError(
             'LabelToTextTeacher cannot be used with multiple tasks!')
     self.id = opt['task']
     self.opt = opt
     if shared and 'task' in shared:
         self.task = create_agent_from_shared(shared['task'])
     else:
         opt_singletask = copy.deepcopy(opt)
         opt_singletask['task'] = opt['label_to_text_task']
         self.task = create_task_agent_from_taskname(opt_singletask)[0]
Exemple #17
0
 def __init__(self, opt: Opt, shared=None):
     if ',' in opt['task']:
         raise ValueError(
             'AbstractWrapperTeacher cannot be used with multiple tasks!')
     self.id = opt['task']
     self.opt = opt
     if shared:
         self.task = create_agent_from_shared(shared['task'])
     else:
         opt_singletask = copy.deepcopy(opt)
         opt_singletask['task'] = opt['wrapper_task']
         self.task = create_task_agent_from_taskname(opt_singletask)[0]
Exemple #18
0
def load_eval_worlds(agent, opt, datatype):
    """
    Create a new eval world for the agent and the given opt.

    Overrides the datatype options for doing this.  Handles some magic
    overrides of other special options for the training script.

    :param Agent agent:
        The model being trained.

    :param Opt opt:
        The global CLI opts.

    :param string datatype:
        The new datatype.
    """

    if 'stream' in opt['datatype']:
        datatype += ':stream'
    opt = opt.copy()
    opt['datatype'] = datatype
    if opt.get('evaltask'):
        # if a different eval task is specified, use it.
        opt['task'] = opt['evaltask']
    if opt.get('eval_batchsize'):
        # override eval time batchsize
        opt['batchsize'] = opt['eval_batchsize']
    if opt.get('eval_dynamic_batching'):
        # FIXME: see issue tracked in https://github.com/facebookresearch/ParlAI/issues/3367
        # override eval time dynamic batching settings
        eval_dyn_batch = (
            None
            if opt['eval_dynamic_batching'] == 'off'
            else opt['eval_dynamic_batching']
        )
        opt['dynamic_batching'] = eval_dyn_batch

    tasks = opt['task'].split(',')
    worlds = []
    # possibly load agent
    if opt.get('validation_share_agent', False):
        valid_agent = create_agent_from_shared(agent.share())
    else:
        valid_agent = agent
    # create worlds
    for task in tasks:
        task_opt = opt.copy()  # copy opt since we edit the task
        task_opt['task'] = task
        valid_world = create_task(task_opt, valid_agent)
        worlds.append(valid_world)

    return worlds
Exemple #19
0
async def parlai(bot, userstate, initial=None):
    user_text = initial or await bot.listen_text(timeout=10)
    agent = create_agent_from_shared(root_agent.share())
    while True:
        if language.get() != 'en':
            user_text = await translate(language.get(), 'en', user_text)
        msg = dict(type='message', text=user_text, episode_done=False)
        agent.observe(validate(msg))
        response = agent.act()
        bot_text = response['text'].replace(" ' ", "'").replace(' ,', ',')
        if language.get() != 'en':
            bot_text = await translate('en', language.get(), bot_text)
        user_text = await bot.ask(bot_text, timeout=10)
Exemple #20
0
 def __init__(self, opt, shared=None):
     if 'dictionary_agent' in shared:
         # use this first--maybe be overriding an original dictionary
         self.dict = create_agent_from_shared(shared['dictionary_agent'])
     elif 'dictionary' in shared:
         # otherwise use this dictionary
         self.dict = shared['dictionary']
     else:
         raise RuntimeError('ParsedRemoteAgent needs a dictionary to parse' +
                            ' text with--pass in a dictionary using shared' +
                            '["dictionary"] or pass in the arguments to ' +
                            'instantiate one using shared["dictionary_args' +
                            '"] = (class, options, shared).')
     super().__init__(opt, shared)
Exemple #21
0
def load_eval_worlds(agent, opt, datatype):
    """
    Create a new eval world for the agent and the given opt.

    Overrides the datatype options for doing this.  Handles some magic
    overrides of other special options for the training script.

    :param Agent agent:
        The model being trained.

    :param Opt opt:
        The global CLI opts.

    :param string datatype:
        The new datatype.
    """
    if 'stream' in opt['datatype']:
        datatype += ':stream'
    opt = opt.copy()
    opt['datatype'] = datatype
    if opt.get('pytorch_teacher_task'):
        # never use pytorch teachers for evaluation
        # but don't forget what we were normally using
        opt['task'] = opt['pytorch_teacher_task']
        del opt['pytorch_teacher_task']
    if opt.get('evaltask'):
        # if a different eval task is specified, use it.
        opt['task'] = opt['evaltask']
    if opt.get('eval_batchsize'):
        # override eval time batchsize
        opt['batchsize'] = opt['eval_batchsize']

    tasks = opt['task'].split(',')
    worlds = []
    # possibly load agent
    if opt.get('validation_share_agent', False):
        valid_agent = create_agent_from_shared(agent.share())
    else:
        valid_agent = agent
    # create worlds
    for task in tasks:
        task_opt = opt.copy()  # copy opt since we edit the task
        task_opt['task'] = task
        valid_world = create_task(task_opt, valid_agent)
        worlds.append(valid_world)

    return worlds
Exemple #22
0
def load_eval_worlds(agent, opt, datatype):
    """
    Create a new eval world for the agent and the given opt.

    Overrides the datatype options for doing this.  Handles some magic
    overrides of other special options for the training script.

    :param Agent agent:
        The model being trained.

    :param Opt opt:
        The global CLI opts.

    :param string datatype:
        The new datatype.
    """
    if not is_primary_worker():
        # don't load worlds in workers
        # TODO(MW): this block will need to be removed
        return None

    if 'stream' in opt['datatype']:
        datatype += ':stream'
    opt = opt.copy()
    opt['datatype'] = datatype
    if opt.get('evaltask'):
        # if a different eval task is specified, use it.
        opt['task'] = opt['evaltask']
    if opt.get('eval_batchsize'):
        # override eval time batchsize
        opt['batchsize'] = opt['eval_batchsize']

    tasks = opt['task'].split(',')
    worlds = []
    # possibly load agent
    if opt.get('validation_share_agent', False):
        valid_agent = create_agent_from_shared(agent.share())
    else:
        valid_agent = agent
    # create worlds
    for task in tasks:
        task_opt = opt.copy()  # copy opt since we edit the task
        task_opt['task'] = task
        valid_world = create_task(task_opt, valid_agent)
        worlds.append(valid_world)

    return worlds
Exemple #23
0
def load_eval_world(agent, opt, datatype):
    if 'stream' in opt['datatype']:
        datatype += ':stream'
    opt = opt.copy()
    opt['datatype'] = datatype
    if opt.get('evaltask'):
        opt['task'] = opt['evaltask']
    if opt.get('eval_batchsize'):
        # override eval time batchsize
        opt['batchsize'] = opt['eval_batchsize']
    if opt.get('validation_share_agent', False):
        valid_agent = create_agent_from_shared(agent.share())
    else:
        valid_agent = agent

    valid_world = create_task(opt, valid_agent)
    return valid_world
Exemple #24
0
 def __init__(self, opt: Opt):
     self.opt = opt
     self.agents = []
     self.agent_dict = None
     self.generations = []
     self.input_type = 'Search'
     self.knowledge_access_method = KnowledgeAccessMethod(
         opt['knowledge_access_method'])
     model_file = modelzoo_path(opt['datapath'],
                                opt['query_generator_model_file'])
     if (self.knowledge_access_method is KnowledgeAccessMethod.SEARCH_ONLY
             and 'blenderbot2/query_generator/model' in model_file):
         raise ValueError(
             'You cannot use the blenderbot2 query generator with search_only. Please '
             'consider setting --query-generator-model-file zoo:sea/bart_sq_gen/model '
             'instead.')
     if model_file and os.path.exists(model_file):
         logging.info(f'Building Query Generator from file: {model_file}')
         logging.disable()
         overrides: Dict[str, Any] = {'skip_generation': False}
         overrides['inference'] = opt['query_generator_inference']
         overrides['beam_size'] = opt.get('query_generator_beam_size', 3)
         overrides['beam_min_length'] = opt.get(
             'query_generator_beam_min_length', 2)
         overrides['model_parallel'] = opt['model_parallel']
         overrides['no_cuda'] = opt['no_cuda']
         if self.opt['query_generator_truncate'] > 0:
             overrides['text_truncate'] = self.opt[
                 'query_generator_truncate']
             overrides['truncate'] = self.opt['query_generator_truncate']
         base_agent = create_agent_from_model_file(model_file,
                                                   opt_overrides=overrides)
         assert isinstance(base_agent, TorchAgent)
         self.agents = [base_agent]
         bsz = max(
             opt.get('batchsize') or 1,
             opt.get('eval_batchsize') or 1)
         rag_turn_n_turns = opt.get('rag_turn_n_turns', 1)
         if bsz > 1 or rag_turn_n_turns > 1:
             self.agents += [
                 create_agent_from_shared(self.agents[0].share())
                 for _ in range((bsz * rag_turn_n_turns) - 1)
             ]
         self.agent_dict = self.agents[0].build_dictionary()
         logging.enable()
    def _init_chat(self, chatID):
        """
        Create new chat for new dialog. Sets up a new instantiation of the agent so that
        each chat has its own local state.

        :param chatID: chat id
        :return: new instance of your local agent
        """
        agent_info = self.shared

        # Add refs to current world instance and chat id to agent 'opt' parameter
        if 'opt' not in agent_info.keys() or agent_info['opt'] is None:
            agent_info['opt'] = {}
        agent_info['opt']['convai_chatID'] = chatID

        local_agent = create_agent_from_shared(agent_info)
        self.chats[chatID] = local_agent
        return self.chats[chatID]
Exemple #26
0
    def _init_chat(self, chatID):
        """Create new chat for new dialog.
        Sets up a new instantiation of the agent so that each chat has its own
        local state.

        :param chatID: chat id
        :return: new instance of your local agent
        """
        agent_info = self.shared

        # Add refs to current world instance and chat id to agent 'opt' parameter
        if 'opt' not in agent_info.keys() or agent_info['opt'] is None:
            agent_info['opt'] = {}
        agent_info['opt']['convai_chatID'] = chatID

        local_agent = create_agent_from_shared(agent_info)
        self.chats[chatID] = local_agent
        return self.chats[chatID]
Exemple #27
0
def get_bot_worker(opt: Dict[str, Any], model_name: str) -> TurkLikeAgent:
    """
    Return a bot agent.

    Agent behaves like a crowdsource worker but actually wraps around a dialogue model.
    """
    semaphore = opt['semaphore']
    shared_bot_agents = opt['shared_bot_agents']
    num_turns = opt['num_turns']
    bot_agent = create_agent_from_shared(shared_bot_agents[model_name])
    bot_worker = TurkLikeAgent(
        opt,
        model_name=model_name,
        model_agent=bot_agent,
        num_turns=num_turns,
        semaphore=semaphore,
    )
    return bot_worker
Exemple #28
0
        def run_conversation(mturk_manager, opt, workers):
            remaining_counts_needed = [
                (m, c - run_statistics[m])
                for (m, c) in opt['conversations_needed'].items()
            ]
            remaining_counts_needed.sort(reverse=True, key=lambda x: x[1])
            model_name = remaining_counts_needed[0][0]
            print(
                f'Remaining conversation counts needed: {remaining_counts_needed}'
            )

            # Get a bot and add it to the list of "workers"
            print(f'Choosing the "{model_name}" model for the bot.')
            agent = create_agent_from_shared(shared_bot_agents[model_name])
            bot_worker = TurkLikeAgent(
                opt,
                model_name=model_name,
                model_agent=agent,
                num_turns=opt['num_turns'],
                semaphore=semaphore,
            )
            workers_including_bot = workers + [bot_worker]

            assert len(workers_including_bot) == 2

            conv_idx = mturk_manager.conversation_index
            world = TurnAnnotationsChatWorld(
                opt=opt,
                agents=workers_including_bot,
                num_turns=opt['num_turns'],
                max_resp_time=opt['max_resp_time'],
                tag='conversation t_{}'.format(conv_idx),
                annotations_config=ANNOTATIONS_CONFIG,
            )
            while not world.episode_done():
                print('About to parley')
                world.parley()
            model_nickname, convo_finished = world.save_data()

            if convo_finished:
                run_statistics[model_nickname] += 1

            world.shutdown()
            world.review_work()
Exemple #29
0
def make_world(opt, agents):
    # Extract important components from opt
    semaphore = opt['semaphore']
    shared_bot_agents = opt['shared_bot_agents']
    statistics_condition = opt['statistics_condition']
    context_generator = opt['context_generator']
    num_turns = opt['num_turns']

    # Decide on a bot to use
    run_statistics = opt['run_statistics']
    with statistics_condition:
        remaining_counts_needed = [
            (m, c - run_statistics[m])
            for (m, c) in opt['conversations_needed'].items()
        ]
        remaining_counts_needed.sort(reverse=True, key=lambda x: x[1])
        model_name = remaining_counts_needed[0][0]
        print(
            f'Remaining conversation counts needed: {remaining_counts_needed}')
        print(f'Choosing the "{model_name}" model for the bot.')

    # Create the bot
    bot_agent = create_agent_from_shared(shared_bot_agents[model_name])
    bot_worker = TurkLikeAgent(
        opt,
        model_name=model_name,
        model_agent=bot_agent,
        num_turns=num_turns,
        semaphore=semaphore,
    )

    # Get context: personas, previous utterances, etc.
    if context_generator is not None:
        context_info = context_generator.get_context()
    else:
        context_info = None

    agents[0].agent_id = "Worker"

    return TurnAnnotationsChatWorld(opt,
                                    agent=agents[0],
                                    bot=bot_worker,
                                    context_info=context_info)
Exemple #30
0
    def _init_chat(self, chat):
        """
        Create new chat for new dialog. Chat consists of new instance of ConvAIAgent,
        new instance of your own Agent and new instance DialogPartnerWorld where this two agents
        will communicate. Information about class of your local agent getting from shared data.
        :param chat: chat id
        :return: tuple with instances of ConvAIAgent, your local agent, DialogPartnerWorld
        """
        agent_info = self.shared["agents"][0]

        # Add refs to current world instance and chat id to agent 'opt' parameter
        if 'opt' not in agent_info.keys() or agent_info['opt'] is None:
            agent_info['opt'] = {}
        agent_info['opt']['convai_world'] = self
        agent_info['opt']['convai_chat'] = chat

        local_agent = create_agent_from_shared(agent_info)
        remote_agent = ConvAIAgent({'chat': chat})
        world = DialogPartnerWorld({'task': 'ConvAI Dialog'},
                                   [remote_agent, local_agent])
        self.chats[chat] = (remote_agent, local_agent, world)
        return self.chats[chat]
Exemple #31
0
def load_eval_world(agent, opt, datatype):
    if 'stream' in opt['datatype']:
        datatype += ':stream'
    opt = opt.copy()
    opt['datatype'] = datatype
    if opt.get('pytorch_teacher_task'):
        # never use pytorch teachers for evaluation
        # but don't forget what we were normally using
        opt['task'] = opt['pytorch_teacher_task']
        del opt['pytorch_teacher_task']
    if opt.get('evaltask'):
        # if a different eval task is specified, use it.
        opt['task'] = opt['evaltask']
    if opt.get('eval_batchsize'):
        # override eval time batchsize
        opt['batchsize'] = opt['eval_batchsize']
    if opt.get('validation_share_agent', False):
        valid_agent = create_agent_from_shared(agent.share())
    else:
        valid_agent = agent

    valid_world = create_task(opt, valid_agent)
    return valid_world