Esempio n. 1
0
 def write_conversations_format(self, outfile, world):
     Conversations.save_conversations(
         self._logs,
         outfile,
         world.opt,
         self_chat=world.opt.get('selfchat_task', False),
     )
Esempio n. 2
0
 def write_conversations_format(self, outfile, world):
     logging.info(f'Saving log to {outfile} in Conversations format')
     Conversations.save_conversations(
         self._logs,
         outfile,
         world.opt,
         self_chat=world.opt.get('selfchat_task', False),
     )
Esempio n. 3
0
    def test_conversations(self):
        act_list = [
            [
                [
                    {'id': 'Emily', 'text': 'Hello, do you like this test?'},
                    {'id': 'Stephen', 'text': 'Why yes! I love this test!'},
                ],
                [
                    {'id': 'Emily', 'text': 'So will you stamp this diff?'},
                    {'id': 'Stephen', 'text': 'Yes, I will do it right now!'},
                ],
            ],
            [
                [
                    {
                        'id': 'A',
                        'text': 'Somebody once told me the world is gonna roll me',
                    },
                    {'id': 'B', 'text': 'I aint the sharpest tool in the shed'},
                ],
                [
                    {
                        'id': 'A',
                        'text': 'She was looking kind of dumb with her finger and her thumb',
                    },
                    {'id': 'B', 'text': 'In the shape of an L on her forehead'},
                ],
            ],
        ]
        self.opt = {
            'A': 'B',
            'C': 'D',
            'E': 'F',
        }

        self.convo_datapath = os.path.join(self.datapath, 'convo1')
        Conversations.save_conversations(
            act_list,
            self.convo_datapath,
            self.opt,
            self_chat=False,
            other_info='Blah blah blah',
        )
        assert os.path.exists(self.convo_datapath + '.jsonl')
        assert os.path.exists(self.convo_datapath + '.metadata')

        convos = Conversations(self.convo_datapath + '.jsonl')

        # test conversations loaded
        self.assertEqual(convos.num_conversations, 2)

        # test speakers saved
        speakers = {'Stephen', 'Emily', 'A', 'B'}
        self.assertEqual(set(convos.metadata.speakers), speakers)

        # test opt saved
        for x in ['A', 'C', 'E']:
            self.assertEqual(
                self.opt[x], convos.metadata.opt[x],
            )

        # test kwargs
        self.assertEqual({'other_info': 'Blah blah blah'}, convos.metadata.extra_data)

        # test reading conversations
        with testing_utils.capture_output() as out:
            convos.read_conv_idx(0)
        str_version = (
            'Emily: Hello, do you like this test?\n'
            'Stephen: Why yes! I love this test!\n'
            'Emily: So will you stamp this diff?\n'
            'Stephen: Yes, I will do it right now!\n'
        )
        self.assertIn(str_version, out.getvalue())
Esempio n. 4
0
    def test_conversations(self):
        act_list = [
            [
                [
                    {
                        'id': 'Emily',
                        'text': 'Hello, do you like this test?'
                    },
                    {
                        'id': 'Stephen',
                        'text': 'Why yes! I love this test!'
                    },
                ],
                [
                    {
                        'id': 'Emily',
                        'text': 'So will you stamp this diff?'
                    },
                    {
                        'id': 'Stephen',
                        'text': 'Yes, I will do it right now!'
                    },
                ],
            ],
            [
                [
                    {
                        'id': 'A',
                        'text':
                        'Somebody once told me the world is gonna roll me',
                    },
                    {
                        'id': 'B',
                        'text': 'I aint the sharpest tool in the shed'
                    },
                ],
                [
                    {
                        'id':
                        'A',
                        'text':
                        'She was looking kind of dumb with her finger and her thumb',
                    },
                    {
                        'id': 'B',
                        'text': 'In the shape of an L on her forehead'
                    },
                ],
            ],
        ]
        self.opt = {'A': 'B', 'C': 'D', 'E': 'F'}

        self.convo_datapath = os.path.join(self.datapath, 'convo1')
        Conversations.save_conversations(
            act_list,
            self.convo_datapath,
            self.opt,
            self_chat=False,
            other_info='Blah blah blah',
        )
        assert os.path.exists(self.convo_datapath + '.jsonl')
        assert os.path.exists(self.convo_datapath + '.metadata')

        convos = Conversations(self.convo_datapath + '.jsonl')

        # test conversations loaded
        self.assertEqual(len(convos), 2)

        # test speakers saved
        speakers = {'Stephen', 'Emily', 'A', 'B'}
        self.assertEqual(set(convos.metadata.speakers), speakers)

        # test opt saved
        for x in ['A', 'C', 'E']:
            self.assertEqual(self.opt[x], convos.metadata.opt[x])

        # test kwargs
        self.assertEqual({'other_info': 'Blah blah blah'},
                         convos.metadata.extra_data)

        # test reading conversations
        with self.assertLogs(logger=logging.logger, level='DEBUG') as cm:
            convos.read_conv_idx(0)
            str_version = ('Emily: Hello, do you like this test?\n'
                           'Stephen: Why yes! I love this test!\n'
                           'Emily: So will you stamp this diff?\n'
                           'Stephen: Yes, I will do it right now!\n')
            self.assertIn(str_version, "\n".join(cm.output))

        # test getting a specific turn
        first = convos[0]  # Conversation
        self.assertEqual(first[0].id, 'Emily')
        self.assertEqual(first[3].text, 'Yes, I will do it right now!')
Esempio n. 5
0
def dump_data(opt):
    """
    Dump task data to ACUTE-Eval.
    """
    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)
    task = opt.get('task')
    speaker_0_id = opt.get('speaker_0_id') or f'{task}_as_human'
    speaker_1_id = opt.get('speaker_1_id') or f'{task}_as_model'
    if opt['outfile'] is None:
        outfile = tempfile.mkstemp(prefix='{}_{}_'.format(
            opt['task'], opt['datatype']),
                                   suffix='.txt')[1]
    else:
        outfile = opt['outfile']

    num_episodes = (world.num_episodes() if opt['num_episodes'] == -1 else min(
        opt['num_episodes'], world.num_episodes()))
    log_timer = TimeLogger()

    print(f'[ starting to convert, saving output to {outfile} ]')
    dialogues = []
    for _ in range(num_episodes):
        episode = []
        episode_done = False
        while not episode_done:
            world.parley()
            acts = world.get_acts()
            text = acts[0].get('text')
            split_text = text.split('\n')
            label = random.choice(acts[0].get('labels',
                                              acts[0].pop('eval_labels',
                                                          None)))
            if not episode and opt.get('prepended_context'):
                # first turn
                context = split_text[:-1]
                text = split_text[-1]
                context_turn = [{
                    'text': context,
                    'episode_done': False,
                    'id': 'context'
                } for _ in range(2)]
                episode.append(context_turn)
            turn = [
                {
                    'text': text,
                    'episode_done': False,
                    'id': speaker_0_id
                },
                {
                    'text': label,
                    'episode_done': False,
                    'id': speaker_1_id
                },
            ]
            episode.append(turn)
            if acts[0].get('episode_done', False):
                episode[-1][-1]['episode_done'] = True
                episode_done = True
                dialogues.append(episode)

            if log_timer.time() > opt['log_every_n_secs']:
                text, _log = log_timer.log(world.total_parleys,
                                           world.num_examples())
                print(text)

        if world.epoch_done():
            break

    Conversations.save_conversations(dialogues, outfile, opt)