Exemple #1
0
 def test_shuffle(self):
     """Simple test to ensure that dataloader is initialized with correct
         data sampler
     """
     dts = ['train', 'valid', 'test']
     exts = ['', ':stream', ':ordered', ':stream:ordered']
     shuffle_opts = [False, True]
     task = 'babi:task1k:1'
     for dt in dts:
         for ext in exts:
             datatype = dt + ext
             for shuffle in shuffle_opts:
                 opt_defaults = {
                     'pytorch_teacher_task': task,
                     'datatype': datatype,
                     'shuffle': shuffle
                 }
                 with testing_utils.capture_output() as _:
                     parser = display_setup_args()
                     parser.set_defaults(**opt_defaults)
                     opt = parser.parse_args()
                     teacher = create_task_agent_from_taskname(opt)[0]
                     if ('ordered' in datatype or
                         ('stream' in datatype and not opt.get('shuffle'))
                             or 'train' not in datatype):
                         self.assertIsInstance(
                             teacher.pytorch_dataloader.sampler, Sequential,
                             'PytorchDataTeacher failed with args: {}'.
                             format(opt))
                     else:
                         self.assertIsInstance(
                             teacher.pytorch_dataloader.sampler,
                             RandomSampler,
                             'PytorchDataTeacher failed with args: {}'.
                             format(opt))
def interactive_rank(opt, print_parser=None):
    # Create model and assign it to the specified task
    human = create_agent(opt)
    task = create_task_agent_from_taskname(opt)[0]

    metrics = Metrics(opt)
    episodes = 0

    def print_metrics():
        report = metrics.report()
        report['episodes'] = episodes
        print(report)

    # Show some example dialogs:
    try:
        while not task.epoch_done():
            msg = task.act()
            print('[{id}]: {text}'.format(id=task.getID(), text=msg.get('text', '')))
            cands = list(msg.get('label_candidates', []))
            random.shuffle(cands)
            for i, c in enumerate(cands):
                print('    [{i}]: {c}'.format(i=i, c=c))

            print('[ Please choose a response from the list. ]')

            choice = None
            while choice is None:
                choice = human.act().get('text')
                try:
                    choice = int(choice)
                    if choice >= 0 and choice < len(cands):
                        choice = cands[choice]
                    else:
                        print(
                            '[ Try again: you selected {i} but the '
                            'candidates are indexed from 0 to {j}. ]'
                            ''.format(i=choice, j=len(cands) - 1)
                        )
                        choice = None
                except (TypeError, ValueError):
                    print('[ Try again: you did not enter a valid index. ]')
                    choice = None

            print('[ You chose ]: {}'.format(choice))
            reply = {'text_candidates': [choice]}
            labels = msg.get('eval_labels', msg.get('labels'))
            metrics.update(reply, labels)
            if msg.get('episode_done'):
                episodes += 1
            print_metrics()
            print('------------------------------')
            print('[ True reply ]: {}'.format(labels[0]))
            if msg.get('episode_done'):
                print('******************************')

    except KeyboardInterrupt:
        pass

    print()
    print_metrics()
Exemple #3
0
    def test_pytd_teacher(self):
        """
            Test that the pytorch teacher works with given Pytorch Datasets
            as well

            I'll be using the Flickr30k dataset to ensure that the observations
            are the same.
        """
        defaults = parser_defaults.copy()
        defaults['datatype'] = 'train:stream'
        defaults['image_mode'] = 'ascii'

        f = io.StringIO()

        with redirect_stdout(f):
            # Get processed act from agent
            parser = display_setup_args()
            defaults['pytorch_teacher_dataset'] = 'flickr30k'
            del defaults['pytorch_teacher_task']
            parser.set_defaults(**defaults)
            opt = parser.parse_args()
            teacher = create_task_agent_from_taskname(opt)[0]
            pytorch_teacher_act = teacher.act()

            parser = display_setup_args()
            defaults['task'] = 'flickr30k'
            del defaults['pytorch_teacher_dataset']
            parser.set_defaults(**defaults)
            opt = parser.parse_args()
            teacher = create_task_agent_from_taskname(opt)[0]
            regular_teacher_act = teacher.act()

        keys = set(pytorch_teacher_act.keys()).intersection(
            set(regular_teacher_act.keys()))
        self.assertTrue(len(keys) != 0)
        for key in keys:
            self.assertTrue(
                pytorch_teacher_act[key] == regular_teacher_act[key],
                'PytorchDataTeacher does not have the same value '
                'as regular teacher for act key: {}'.format(key))
        print('\n------Passed `test_pytd_teacher`------\n')
Exemple #4
0
 def get_teacher_act(defaults, teacher_processed=False, agent_to=None):
     parser = train_setup_args()
     parser.set_defaults(**defaults)
     opt = parser.parse_args()
     build_dict(opt)
     with testing_utils.capture_output() as _:
         teacher = create_task_agent_from_taskname(opt)[0]
     agent = create_agent(opt)
     act = teacher.act()
     if teacher_processed:
         return act, agent
     return agent.observe(act), agent
    def test_pytd_teacher(self):
        """
        Test that the pytorch teacher works with given Pytorch Datasets
        as well
        """
        defaults = integration_test_parser_defaults.copy()
        defaults['datatype'] = 'train:stream'
        defaults['image_mode'] = 'ascii'

        with testing_utils.capture_output():
            # Get processed act from agent
            parser = display_setup_args()
            defaults['pytorch_teacher_dataset'] = 'integration_tests'
            del defaults['pytorch_teacher_task']
            parser.set_defaults(**defaults)
            opt = parser.parse_args()
            teacher = create_task_agent_from_taskname(opt)[0]
            pytorch_teacher_act = teacher.act()

            parser = display_setup_args()
            defaults['task'] = 'integration_tests'
            del defaults['pytorch_teacher_dataset']
            parser.set_defaults(**defaults)
            opt = parser.parse_args()
            teacher = create_task_agent_from_taskname(opt)[0]
            regular_teacher_act = teacher.act()

        keys = set(pytorch_teacher_act.keys()).intersection(
            set(regular_teacher_act.keys()))
        self.assertTrue(len(keys) != 0)
        for key in keys:
            self.assertTrue(
                pytorch_teacher_act[key] == regular_teacher_act[key],
                'PytorchDataTeacher does not have the same value '
                'as regular teacher for act key: {}. '
                'Values: {}; {}'.format(key, pytorch_teacher_act[key],
                                        regular_teacher_act[key]),
            )
Exemple #6
0
def setup_title_to_passage(opt):
    print('[ Setting up Title to Passage Dict ]')
    saved_dp = os.path.join(os.getcwd() + '/data/', 'title_to_passage.pkl')
    if os.path.exists(saved_dp):
        print('[ Loading from saved location, {} ]'.format(saved_dp))
        with open(saved_dp, 'rb') as f:
            title_to_passage = pickle.load(f)
            return title_to_passage
    topics_path = '{}/personas_with_wiki_links.txt'.format(os.getcwd())
    topics = []
    with open(topics_path) as f:
        text = f.read()
        personas = text.split('\n\n')
        for persona in personas:
            persona = persona.split('\n')
            for i in range(1, len(persona)):
                p_i = persona[i]
                if 'https' in p_i:
                    topic = unquote(p_i[p_i.rfind('/') + 1 :]).replace('_', ' ')
                    topics.append(topic)
    ordered_opt = opt.copy()
    ordered_opt['datatype'] = 'train:ordered:stream'
    ordered_opt['batchsize'] = 1
    ordered_opt['numthreads'] = 1
    ordered_opt['task'] = 'wikipedia:full:key-value'
    teacher = create_task_agent_from_taskname(ordered_opt)[0]
    title_to_passage = {}
    i = 0
    length = teacher.num_episodes()
    pbar = tqdm.tqdm(total=length)
    while not teacher.epoch_done():
        pbar.update(1)
        i += 1
        action = teacher.act()
        title = action['text']
        if title in topics:
            text = action['labels'][0]
            title_to_passage[title] = text
    pbar.close()
    print('[ Finished Building Title to Passage dict; saving now]')
    with open(saved_dp, 'wb') as f:
        pickle.dump(title_to_passage, f)
    return title_to_passage
Exemple #7
0
def _create_task_agents(opt: Opt):
    """
    Create task agent(s) for the given task name.

    It does this by calling the create_agent function in agents.py of the given task. If
    create_agents function does not exist, it just looks for the teacher (agent) class
    defined by the task name directly.  (This saves the task creator bothering to define
    the create_agents function when it is not needed.)
    """
    my_module = load_task_module(opt['task'])
    try:
        # Tries to call the create_agent function in agents.py
        task_agents = my_module.create_agents(opt)  # type: ignore

    except AttributeError:
        # Create_agent not found, so try to create the teacher directly.
        return create_task_agent_from_taskname(opt)
    if type(task_agents) != list:
        task_agents = [task_agents]
    return task_agents
 def test_shuffle(self):
     """Simple test to ensure that dataloader is initialized with correct
         data sampler
     """
     dts = ['train', 'valid', 'test']
     exts = ['', ':stream', ':ordered', ':stream:ordered']
     shuffle_opts = [False, True]
     task = 'babi:task1k:1'
     for dt in dts:
         for ext in exts:
             datatype = dt + ext
             for shuffle in shuffle_opts:
                 opt_defaults = {
                     'pytorch_teacher_task': task,
                     'datatype': datatype,
                     'shuffle': shuffle
                 }
                 print('Testing test_shuffle with args {}'.format(
                     opt_defaults))
                 f = io.StringIO()
                 with redirect_stdout(f):
                     parser = display_setup_args()
                     parser.set_defaults(**opt_defaults)
                     opt = parser.parse_args()
                     teacher = create_task_agent_from_taskname(opt)[0]
                 if ('ordered' in datatype or
                     ('stream' in datatype and not opt.get('shuffle'))
                         or 'train' not in datatype):
                     self.assertTrue(
                         type(teacher.pytorch_dataloader.sampler) is
                         Sequential,
                         'PytorchDataTeacher failed with args: {}'.format(
                             opt))
                 else:
                     self.assertTrue(
                         type(teacher.pytorch_dataloader.sampler) is
                         RandomSampler,
                         'PytorchDataTeacher failed with args: {}'.format(
                             opt))
     print('\n------Passed `test_shuffle`------\n')
Exemple #9
0
def _create_task_agents(opt):
    """
    Create task agent(s) for the given task name.

    It does this by calling the create_agent function in agents.py of the
    given task.  If create_agents function does not exist, it just looks for
    the teacher (agent) class defined by the task name directly.  (This saves
    the task creator bothering to define the create_agents function when it is
    not needed.)
    """
    sp = opt['task'].strip()
    repo = 'parlai'
    if sp.startswith('internal:'):
        # To switch to local repo, useful for non-public projects
        # (make a directory called 'parlai_internal' with your private agents)
        repo = 'parlai_internal'
        sp = sp[9:]
    sp = sp.split(':')
    if '.' in sp[0]:
        # The case of opt['task'] = 'parlai.tasks.squad.agents:DefaultTeacher'
        # (i.e. specifying your own path directly)
        module_name = sp[0]
    elif sp[0] == 'pytorch_teacher':
        module_name = 'parlai.core.pytorch_data_teacher'
    else:
        task = sp[0].lower()
        module_name = "%s.tasks.%s.agents" % (repo, task)
    my_module = importlib.import_module(module_name)
    try:
        # Tries to call the create_agent function in agents.py
        task = sp[0].lower()
        task_agents = my_module.create_agents(opt, task)
    except AttributeError:
        # Create_agent not found, so try to create the teacher directly.
        return create_task_agent_from_taskname(opt)
    if type(task_agents) != list:
        task_agents = [task_agents]
    return task_agents
Exemple #10
0
def store_contents(opt,
                   task,
                   save_path,
                   context_length=-1,
                   include_labels=True):
    """Preprocess and store a corpus of documents in sqlite.

    Args:
        task: ParlAI tasks of text (and possibly values) to store.
        save_path: Path to output sqlite db.
        num_workers: Number of parallel processes to use when reading docs.
    """
    if os.path.isfile(save_path):
        raise RuntimeError('%s already exists! Not overwriting.' % save_path)

    logger.info('Reading into database...')
    conn = sqlite3.connect(save_path)
    c = conn.cursor()
    c.execute('CREATE TABLE documents (id INTEGER PRIMARY KEY, text, value);')
    if not task:
        logger.info('No data to initialize table: just creating table.')
        logger.info('Add more data by passing observations to the agent.')
        logger.info('Committing...')
        conn.commit()
        conn.close()
        return

    ordered_opt = opt.copy()
    dt = opt.get('datatype', '').split(':')
    ordered_opt['datatype'] = ':'.join([dt[0], 'ordered'] + dt[1:])
    ordered_opt['batchsize'] = 1
    ordered_opt['numthreads'] = 1
    ordered_opt['task'] = task
    teacher = create_task_agent_from_taskname(ordered_opt)[0]

    episode_done = False
    current = []
    triples = []
    context_length = context_length if context_length >= 0 else None
    context = deque(maxlen=context_length)
    with tqdm(total=teacher.num_episodes()) as pbar:
        while not teacher.epoch_done():
            # collect examples in episode
            while not episode_done:
                action = teacher.act()
                current.append(action)
                episode_done = action['episode_done']

            for ex in current:
                if 'text' in ex:
                    text = ex['text']
                    context.append(text)
                    if len(context) > 1:
                        text = '\n'.join(context)

                # add labels to context
                labels = ex.get('labels', ex.get('eval_labels'))
                if labels is not None:
                    label = random.choice(labels)
                    if include_labels:
                        context.append(label)
                # use None for ID to auto-assign doc ids--we don't need to
                # ever reverse-lookup them
                triples.append((None, text, label))

            c.executemany('INSERT OR IGNORE INTO documents VALUES (?,?,?)',
                          triples)
            pbar.update()

            # reset flags and content
            episode_done = False
            triples.clear()
            current.clear()
            context.clear()

    logger.info('Read %d examples from %d episodes.' %
                (teacher.num_examples(), teacher.num_episodes()))
    logger.info('Committing...')
    conn.commit()
    conn.close()
Exemple #11
0
def create_agents(opt, task):
    if not opt.get('interactive_task', False):
        return create_task_agent_from_taskname(opt)
    else:
        # interactive task has no task agents (they are attached as user agents)
        return []
Exemple #12
0
def store_contents(opt, task, save_path, context_length=-1, include_labels=True):
    """Preprocess and store a corpus of documents in sqlite.

    Args:
        task: ParlAI tasks of text (and possibly values) to store.
        save_path: Path to output sqlite db.
        num_workers: Number of parallel processes to use when reading docs.
    """
    if os.path.isfile(save_path):
        raise RuntimeError('%s already exists! Not overwriting.' % save_path)

    logger.info('Reading into database...')
    conn = sqlite3.connect(save_path)
    c = conn.cursor()
    c.execute('CREATE TABLE documents (id INTEGER PRIMARY KEY, text, value);')
    if not task:
        logger.info('No data to initialize table: just creating table.')
        logger.info('Add more data by passing observations to the agent.')
        logger.info('Committing...')
        conn.commit()
        conn.close()
        return

    ordered_opt = opt.copy()
    dt = opt.get('datatype', '').split(':')
    ordered_opt['datatype'] = ':'.join([dt[0], 'ordered'] + dt[1:])
    ordered_opt['batchsize'] = 1
    ordered_opt['numthreads'] = 1
    ordered_opt['task'] = task
    teacher = create_task_agent_from_taskname(ordered_opt)[0]

    episode_done = False
    current = []
    triples = []
    context_length = context_length if context_length >= 0 else None
    context = deque(maxlen=context_length)
    with tqdm(total=teacher.num_episodes()) as pbar:
        while not teacher.epoch_done():
            # collect examples in episode
            while not episode_done:
                action = teacher.act()
                current.append(action)
                episode_done = action['episode_done']

            for ex in current:
                if 'text' in ex:
                    text = ex['text']
                    context.append(text)
                    if len(context) > 1:
                        text = '\n'.join(context)

                # add labels to context
                labels = ex.get('labels', ex.get('eval_labels'))
                if labels is not None:
                    label = random.choice(labels)
                    if include_labels:
                        context.append(label)
                # use None for ID to auto-assign doc ids--we don't need to
                # ever reverse-lookup them
                triples.append((None, text, label))

            c.executemany('INSERT OR IGNORE INTO documents VALUES (?,?,?)',
                          triples)
            pbar.update()

            # reset flags and content
            episode_done = False
            triples.clear()
            current.clear()
            context.clear()

    logger.info('Read %d examples from %d episodes.' % (
        teacher.num_examples(), teacher.num_episodes()))
    logger.info('Committing...')
    conn.commit()
    conn.close()