Esempio n. 1
0
def interactive_rank(opt, print_parser=None):
    # Create model and assign it to the specified task
    human = create_agent(opt)
    task = create_task_agent_from_taskname(opt)[0]

    metrics = Metrics(opt)
    episodes = 0

    def print_metrics():
        report = metrics.report()
        report['episodes'] = episodes
        print(report)

    # Show some example dialogs:
    try:
        while not task.epoch_done():
            msg = task.act()
            print('[{id}]: {text}'.format(id=task.getID(),
                                          text=msg.get('text', '')))
            cands = list(msg.get('label_candidates', []))
            random.shuffle(cands)
            for i, c in enumerate(cands):
                print('    [{i}]: {c}'.format(i=i, c=c))

            print('[ Please choose a response from the list. ]')

            choice = None
            while choice is None:
                choice = human.act().get('text')
                try:
                    choice = int(choice)
                    if choice >= 0 and choice < len(cands):
                        choice = cands[choice]
                    else:
                        print('[ Try again: you selected {i} but the '
                              'candidates are indexed from 0 to {j}. ]'
                              ''.format(i=choice, j=len(cands) - 1))
                        choice = None
                except (TypeError, ValueError):
                    print('[ Try again: you did not enter a valid index. ]')
                    choice = None

            print('[ You chose ]: {}'.format(choice))
            reply = {'text_candidates': [choice]}
            labels = msg.get('eval_labels', msg.get('labels'))
            metrics.update(reply, labels)
            if msg.get('episode_done'):
                episodes += 1
            print_metrics()
            print('------------------------------')
            print('[ True reply ]: {}'.format(labels[0]))
            if msg.get('episode_done'):
                print('******************************')

    except KeyboardInterrupt:
        pass

    print()
    print_metrics()
Esempio n. 2
0
 def _run_through(self, task, mutators):
     pp = ParlaiParser(True, False)
     opt = pp.parse_kwargs(task=task, mutators=mutators)
     teacher = create_task_agent_from_taskname(opt)[0]
     outputs = []
     for _ in range(5):
         outputs.append(teacher.act())
     return outputs
Esempio n. 3
0
 def test_iter(self):
     with testing_utils.tempdir() as tmpdir:
         teacher = create_task_agent_from_taskname({
             'task': 'integration_tests',
             'datatype': 'valid',
             'datapath': tmpdir
         })[0]
         # twice to assert we reset iterators correctly
         assert len(list(teacher)) == 100
         assert len(list(teacher)) == 100
 def get_teacher_act(defaults, teacher_processed=False, agent_to=None):
     parser = train_setup_args()
     parser.set_defaults(**defaults)
     opt = parser.parse_args([])
     build_dict(opt)
     teacher = create_task_agent_from_taskname(opt)[0]
     agent = create_agent(opt)
     act = teacher.act()
     if teacher_processed:
         return act, agent
     return agent.observe(act), agent
Esempio n. 5
0
 def __init__(self, opt: Opt, shared=None):
     if ',' in opt['task']:
         raise ValueError(
             'AbstractWrapperTeacher cannot be used with multiple tasks!')
     self.id = opt['task']
     self.opt = opt
     if shared:
         self.task = create_agent_from_shared(shared['task'])
     else:
         opt_singletask = copy.deepcopy(opt)
         opt_singletask['task'] = opt['wrapper_task']
         self.task = create_task_agent_from_taskname(opt_singletask)[0]
Esempio n. 6
0
 def __init__(self, opt: Opt, shared=None):
     if ',' in opt['task']:
         raise ValueError(
             'LabelToTextTeacher cannot be used with multiple tasks!')
     self.id = opt['task']
     self.opt = opt
     if shared and 'task' in shared:
         self.task = create_agent_from_shared(shared['task'])
     else:
         opt_singletask = copy.deepcopy(opt)
         opt_singletask['task'] = opt['label_to_text_task']
         self.task = create_task_agent_from_taskname(opt_singletask)[0]
    def test_pytd_teacher(self):
        """
        Test that the pytorch teacher works with given Pytorch Datasets as well.
        """
        defaults = integration_test_parser_defaults.copy()
        defaults['datatype'] = 'train:stream'
        defaults['image_mode'] = 'ascii'

        # Get processed act from agent
        parser = display_setup_args()
        defaults['pytorch_teacher_dataset'] = 'integration_tests'
        del defaults['pytorch_teacher_task']
        parser.set_defaults(**defaults)
        opt = parser.parse_args([])
        teacher = create_task_agent_from_taskname(opt)[0]
        pytorch_teacher_act = teacher.act()

        parser = display_setup_args()
        defaults['task'] = 'integration_tests'
        del defaults['pytorch_teacher_dataset']
        parser.set_defaults(**defaults)
        opt = parser.parse_args([])
        teacher = create_task_agent_from_taskname(opt)[0]
        regular_teacher_act = teacher.act()

        keys = set(pytorch_teacher_act.keys()).intersection(
            set(regular_teacher_act.keys()))
        self.assertTrue(len(keys) != 0)
        for key in keys:
            self.assertTrue(
                pytorch_teacher_act[key] == regular_teacher_act[key],
                'PytorchDataTeacher does not have the same value '
                'as regular teacher for act key: {}. '
                'Values: {}; {}'.format(key, pytorch_teacher_act[key],
                                        regular_teacher_act[key]),
            )
 def test_image_loader(self):
     """
     Test that model correctly handles text task.
     """
     opt = ParlaiParser().parse_args([])
     opt.update(BASE_IMAGE_ARGS)
     for image_mode, dim in IMAGE_MODE_TO_DIM.items():
         opt["image_mode"] = image_mode
         teacher = create_task_agent_from_taskname(opt)[0]
         teacher_act = teacher.get(0)
         self.assertEquals(
             teacher_act["image"].size(),
             dim,
             f"dim mismatch for image mode {image_mode}",
         )
Esempio n. 9
0
 def _split_type_teacher(
     split_type: str,
 ) -> CMUDocumentGroundedConversationsTeacher:
     kwargs = {
         'task': 'cmu_dog',
         'datatype': 'valid',
         'cmu_dog_split_type': split_type,
         'datapath': data_path,
     }
     parser = setup_args()
     parser.set_defaults(**kwargs)
     opt = parser.parse_args([])
     agents = create_task_agent_from_taskname(opt)
     assert isinstance(agents, List)
     task = agents[0]
     assert isinstance(task, CMUDocumentGroundedConversationsTeacher)
     return task
Esempio n. 10
0
    def test_not_sticky(self):
        pp = ParlaiParser(True, False)
        opt = pp.parse_kwargs(
            task='integration_tests:multiturn',
            mutators='flatten',
            datatype='train:ordered',
        )
        teacher = create_task_agent_from_taskname(opt)[0]
        first_epoch = []
        second_epoch = []
        for _ in range(teacher.num_examples()):
            first_epoch.append(teacher.act())
        teacher.reset()
        for _ in range(teacher.num_examples()):
            second_epoch.append(teacher.act())

        assert all(f == s for f, s in zip(first_epoch, second_epoch))
def setup_title_to_passage(opt):
    print('[ Setting up Title to Passage Dict ]')
    saved_dp = os.path.join(os.getcwd() + '/data/', 'title_to_passage.pkl')
    if os.path.exists(saved_dp):
        print('[ Loading from saved location, {} ]'.format(saved_dp))
        with open(saved_dp, 'rb') as f:
            title_to_passage = pickle.load(f)
            return title_to_passage
    topics_path = '{}/personas_with_wiki_links.txt'.format(os.getcwd())
    topics = []
    with open(topics_path) as f:
        text = f.read()
        personas = text.split('\n\n')
        for persona in personas:
            persona = persona.split('\n')
            for i in range(1, len(persona)):
                p_i = persona[i]
                if 'https' in p_i:
                    topic = unquote(p_i[p_i.rfind('/') + 1:]).replace('_', ' ')
                    topics.append(topic)
    ordered_opt = opt.copy()
    ordered_opt['datatype'] = 'train:ordered:stream'
    ordered_opt['batchsize'] = 1
    ordered_opt['numthreads'] = 1
    ordered_opt['task'] = 'wikipedia:full:key-value'
    teacher = create_task_agent_from_taskname(ordered_opt)[0]
    title_to_passage = {}
    i = 0
    length = teacher.num_episodes()
    pbar = tqdm.tqdm(total=length)
    while not teacher.epoch_done():
        pbar.update(1)
        i += 1
        action = teacher.act()
        title = action['text']
        if title in topics:
            text = action['labels'][0]
            title_to_passage[title] = text
    pbar.close()
    print('[ Finished Building Title to Passage dict; saving now]')
    with open(saved_dp, 'wb') as f:
        pickle.dump(title_to_passage, f)
    return title_to_passage
Esempio n. 12
0
 def _base_test_loader(self, image_mode_partial: str, no_cuda: bool = False):
     """
     Test for given partial image mode.
     """
     opt = ParlaiParser().parse_args([])
     opt.update(BASE_IMAGE_ARGS)
     opt['no_cuda'] = no_cuda
     for image_mode, dim in IMAGE_MODE_TO_DIM.items():
         if image_mode_partial not in image_mode:
             continue
         opt["image_mode"] = image_mode
         teacher = create_task_agent_from_taskname(opt)[0]
         teacher_act = teacher.get(0)
         self.assertEquals(
             teacher_act["image"].size(),
             dim,
             f"dim mismatch for image mode {image_mode}",
         )
     torch.cuda.empty_cache()
Esempio n. 13
0
def _create_task_agents(opt: Opt):
    """
    Create task agent(s) for the given task name.

    It does this by calling the create_agent function in agents.py of the given task. If
    create_agents function does not exist, it just looks for the teacher (agent) class
    defined by the task name directly.  (This saves the task creator bothering to define
    the create_agents function when it is not needed.)
    """
    my_module = load_task_module(opt['task'])
    try:
        # Tries to call the create_agent function in agents.py
        task_agents = my_module.create_agents(opt)  # type: ignore

    except AttributeError:
        # Create_agent not found, so try to create the teacher directly.
        return create_task_agent_from_taskname(opt)
    if type(task_agents) != list:
        task_agents = [task_agents]
    return task_agents
Esempio n. 14
0
    def load_classifier(self, gpu_num):
        if self.has_classifier:
            if self.classifier is None:
                opt_overrides = {}
                self.classifier_gpu_num = gpu_num
                opt_overrides['gpu'] = gpu_num
                opt_overrides['datatype'] = 'test'
                # opt_overrides['inference'] = 'nucleus'
                opt_overrides['skip_generation'] = False

                self.classifier = create_agent_from_model_file(
                    self.classifier_checkpoint, opt_overrides=opt_overrides)
                teacher_for_classifier_opt = deepcopy(self.classifier.opt)
                teacher_for_classifier_opt.update({"build_data_or_not": False})
                self.teacher_for_classifier = create_task_agent_from_taskname(
                    teacher_for_classifier_opt)[0]
                logging.info("load classifier from:{}".format(
                    self.classifier_checkpoint))
                logging.info("allocate classifier to gpu_{}".format(gpu_num))
        else:
            self.classifier = None
Esempio n. 15
0
def _create_task_agents(opt: Opt):
    """
    Create task agent(s) for the given task name.

    It does this by calling the create_agent function in agents.py of the given task. If
    create_agents function does not exist, it just looks for the teacher (agent) class
    defined by the task name directly.  (This saves the task creator bothering to define
    the create_agents function when it is not needed.)
    """
    if opt.get('interactive_task', False) or opt.get('selfchat_task', False):
        # do not need task agents in interactive or self chat settings
        return []

    try:
        # Tries to call the create_agent function in agents.py
        my_module = load_task_module(opt['task'])
        task_agents = my_module.create_agents(opt)  # type: ignore
    except (ModuleNotFoundError, AttributeError):
        # Create_agent not found, so try to create the teacher directly.
        return create_task_agent_from_taskname(opt)
    if type(task_agents) != list:
        task_agents = [task_agents]
    return task_agents
 def test_shuffle(self):
     """
     Simple test to ensure that dataloader is initialized with correct data sampler.
     """
     dts = ['train', 'valid', 'test']
     exts = ['', ':stream', ':ordered', ':stream:ordered']
     shuffle_opts = [False, True]
     task = 'babi:task1k:1'
     for dt in dts:
         for ext in exts:
             datatype = dt + ext
             for shuffle in shuffle_opts:
                 opt_defaults = {
                     'pytorch_teacher_task': task,
                     'datatype': datatype,
                     'shuffle': shuffle,
                 }
                 parser = display_setup_args()
                 parser.set_defaults(**opt_defaults)
                 opt = parser.parse_args([])
                 teacher = create_task_agent_from_taskname(opt)[0]
                 if ('ordered' in datatype or
                     ('stream' in datatype and not opt.get('shuffle'))
                         or 'train' not in datatype):
                     self.assertIsInstance(
                         teacher.pytorch_dataloader.sampler,
                         Sequential,
                         'PytorchDataTeacher failed with args: {}'.format(
                             opt),
                     )
                 else:
                     self.assertIsInstance(
                         teacher.pytorch_dataloader.sampler,
                         RandomSampler,
                         'PytorchDataTeacher failed with args: {}'.format(
                             opt),
                     )
def store_contents(opt,
                   task,
                   save_path,
                   context_length=-1,
                   include_labels=True):
    """
    Preprocess and store a corpus of documents in sqlite.

    Args:
        task: ParlAI tasks of text (and possibly values) to store.
        save_path: Path to output sqlite db.
        num_workers: Number of parallel processes to use when reading docs.
    """
    if os.path.isfile(save_path):
        raise RuntimeError('%s already exists! Not overwriting.' % save_path)

    logger.info('Reading into database...')
    conn = sqlite3.connect(save_path)
    c = conn.cursor()
    c.execute('CREATE TABLE documents (id INTEGER PRIMARY KEY, text, value);')
    if not task:
        logger.info('No data to initialize table: just creating table.')
        logger.info('Add more data by passing observations to the agent.')
        logger.info('Committing...')
        conn.commit()
        conn.close()
        return

    ordered_opt = opt.copy()
    dt = opt.get('datatype', '').split(':')
    ordered_opt['datatype'] = ':'.join([dt[0], 'ordered'] + dt[1:])
    ordered_opt['batchsize'] = 1
    ordered_opt['numthreads'] = 1
    ordered_opt['task'] = task
    teacher = create_task_agent_from_taskname(ordered_opt)[0]

    episode_done = False
    current = []
    triples = []
    context_length = context_length if context_length >= 0 else None
    context = deque(maxlen=context_length)
    with tqdm(total=teacher.num_episodes()) as pbar:
        while not teacher.epoch_done():
            # collect examples in episode
            while not episode_done:
                action = teacher.act()
                current.append(action)
                episode_done = action['episode_done']

            for ex in current:
                if 'text' in ex:
                    text = ex['text']
                    context.append(text)
                    if len(context) > 1:
                        text = '\n'.join(context)

                # add labels to context
                labels = ex.get('labels', ex.get('eval_labels'))
                label = None
                if labels is not None:
                    label = random.choice(labels)
                    if include_labels:
                        context.append(label)
                # use None for ID to auto-assign doc ids--we don't need to
                # ever reverse-lookup them
                triples.append((None, text, label))

            c.executemany('INSERT OR IGNORE INTO documents VALUES (?,?,?)',
                          triples)
            pbar.update()

            # reset flags and content
            episode_done = False
            triples.clear()
            current.clear()
            context.clear()

    logger.info('Read %d examples from %d episodes.' %
                (teacher.num_examples(), teacher.num_episodes()))
    logger.info('Committing...')
    conn.commit()
    conn.close()
Esempio n. 18
0
 def set_classifier(self, classifier):
     self.classifier = classifier
     teacher_for_classifier_opt = deepcopy(self.classifier.opt)
     teacher_for_classifier_opt.update({"build_data_or_not": False})
     self.teacher_for_classifier = create_task_agent_from_taskname(
         teacher_for_classifier_opt)[0]
Esempio n. 19
0
def create_agents(opt):
    if not opt.get('interactive_task', False):
        return create_task_agent_from_taskname(opt)
    else:
        # interactive task has no task agents (they are attached as user agents)
        return []
    def test_custom_eval(self):
        """
        Test whether custom evaluation works.
        """
        with testing_utils.capture_output():
            parser = setup_args()
            opt = parser.parse_args([
                '--task',
                'wizard_of_wikipedia',
                '--datatype',
                'valid',
                '--label-type',
                'chosen_sent',
            ])
            teacher = create_task_agent_from_taskname(opt)[0]

        title = 'Gardening'
        cands = list('four')

        text = "Gardening\nI like Gardening, even when I've only been doing it for a short time."
        response = 'I live on a farm, we garden all year long, it is very relaxing.'
        checked_sent = (
            'Gardening is considered by many people to be a relaxing activity.'
        )
        checked_sent_label = f'{title}{TOKEN_KNOWLEDGE}{checked_sent}'

        retrieval_metric_keys = [
            'passage_r@1', 'passage_r@5', 'title_r@1', 'title_r@5'
        ]

        chosen_sent_teacher_action = Message({
            'text':
            text,
            'labels': [checked_sent_label],
            'title': [title],
            'checked_sentence': [checked_sent],
        })
        correct_chosen_sent_response = Message({
            'text':
            checked_sent_label,
            'title_candidates': [title] + cands,
            'text_candidates': [checked_sent_label] + cands,
        })
        top5_chosen_sent_response = Message({
            'text':
            f'hello{TOKEN_KNOWLEDGE}goodbye',
            'title_candidates':
            cands + [title],
            'text_candidates':
            cands + [checked_sent_label],
        })
        incorrect_chosen_sent_response = Message({
            'text': f'hello{TOKEN_KNOWLEDGE}goodbye',
            'title_candidates': cands,
            'text_candidates': cands,
        })

        response_teacher_action = Message({
            'text': text,
            'labels': [response],
            'checked_sentence': checked_sent
        })
        high_f1_response = Message({'text': checked_sent})
        low_f1_response = Message({'text': 'incorrect'})

        # 1) Test with correct top sentence
        teacher.reset_metrics()
        teacher.custom_evaluation(
            chosen_sent_teacher_action,
            [checked_sent_label],
            correct_chosen_sent_response,
        )
        report = teacher.report()
        for k in retrieval_metric_keys:
            assert k in report
            assert report[k] == AverageMetric(1)

        # 2) Test with top sentence in top 5
        teacher.reset_metrics()
        teacher.custom_evaluation(chosen_sent_teacher_action,
                                  [checked_sent_label],
                                  top5_chosen_sent_response)
        report = teacher.report()
        for k in retrieval_metric_keys:
            assert k in report
            assert report[k] == AverageMetric(
                1) if '5' in k else AverageMetric(0)

        # 3) Test with no top sentences
        teacher.reset_metrics()
        teacher.custom_evaluation(
            chosen_sent_teacher_action,
            [checked_sent_label],
            incorrect_chosen_sent_response,
        )
        report = teacher.report()
        for k in retrieval_metric_keys:
            assert k in report
            assert report[k] == AverageMetric(0)

        # 4) Test knowledge f1 with high f1
        teacher.label_type = 'response'
        teacher.reset_metrics()
        teacher.custom_evaluation(response_teacher_action, [response],
                                  high_f1_response)
        report = teacher.report()
        assert 'knowledge_f1' in report
        assert report['knowledge_f1'] == F1Metric(1)

        # 5) Test knowledge f1 with low f1
        teacher.reset_metrics()
        teacher.custom_evaluation(response_teacher_action, [response],
                                  low_f1_response)
        report = teacher.report()
        assert 'knowledge_f1' in report
        assert report['knowledge_f1'] == F1Metric(0)