def test_key_insertion_deletion(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        corpus1.get_conversation(None).meta['convo_meta'] = 1

        corpus1.get_speaker("alice").meta['surname'] = 1.0

        self.assertEqual(corpus1.meta_index.utterances_index['foo'],
                         [str(type('bar'))])
        self.assertEqual(corpus1.meta_index.conversations_index['convo_meta'],
                         [str(type(1))])
        self.assertEqual(corpus1.meta_index.speakers_index['surname'],
                         [str(type(1.0))])

        # test that deleting an attribute from an individual utterance fails to remove it
        del corpus1.get_utterance("2").meta['hey']
        corpus1.get_utterance("2").meta['hey']

        # test that delete_metadata works
        corpus1.delete_metadata('utterance', 'foo')
        self.assertRaises(KeyError,
                          lambda: corpus1.meta_index.utterances_index['foo'])
        self.assertRaises(KeyError,
                          lambda: corpus1.get_utterance("0").meta["foo"])
コード例 #2
0
    def transform(self, corpus: Corpus) -> Corpus:
        super().transform(corpus)
        if self.replace_text:
            selector = lambda utt_: self.input_filter(utt_, None)
            for utt in corpus.iter_utterances(selector):
                cleaned_text = utt.retrieve_meta(self.output_field)
                if self.save_original:
                    utt.add_meta(self.output_field, utt.text)
                utt.text = cleaned_text

            if not self.save_original:
                corpus.delete_metadata('utterance', self.output_field)
        return corpus