Esempio n. 1
0
    def test_corpus_dump(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", user=User(name="alice")),
            Utterance(id="1", text="my name is bob", user=User(name="bob")),
            Utterance(id="2", text="this is a test", user=User(
                name="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        corpus1.get_conversation(None).meta['convo_meta'] = 1

        corpus1.get_user("alice").meta['surname'] = 1.0
        corpus1.dump('test_index_meta_corpus', base_path="./")
        corpus2 = Corpus(filename="test_index_meta_corpus")

        self.assertEqual(corpus1.meta_index.utterances_index,
                         corpus2.meta_index.utterances_index)
        self.assertEqual(corpus1.meta_index.users_index,
                         corpus2.meta_index.users_index)
        self.assertEqual(corpus1.meta_index.conversations_index,
                         corpus2.meta_index.conversations_index)
        self.assertEqual(corpus1.meta_index.overall_index,
                         corpus2.meta_index.overall_index)
Esempio n. 2
0
    def test_key_insertion_deletion(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", user=User(name="alice")),
            Utterance(id="1", text="my name is bob", user=User(name="bob")),
            Utterance(id="2", text="this is a test", user=User(
                name="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        corpus1.get_conversation(None).meta['convo_meta'] = 1

        corpus1.get_user("alice").meta['surname'] = 1.0

        self.assertEqual(corpus1.meta_index.utterances_index['foo'],
                         str(type('bar')))
        self.assertEqual(corpus1.meta_index.conversations_index['convo_meta'],
                         str(type(1)))
        self.assertEqual(corpus1.meta_index.users_index['surname'],
                         str(type(1.0)))

        # test that deleting a key from an utterance removes it from the index
        del corpus1.get_utterance("2").meta['hey']
        self.assertRaises(KeyError,
                          lambda: corpus1.meta_index.utterances_index['hey'])

        # test that deleting a key from an utterance removes it from the index and from all other objects of same type
        del corpus1.get_utterance("1").meta['foo']
        self.assertRaises(KeyError,
                          lambda: corpus1.meta_index.utterances_index['foo'])
        self.assertRaises(KeyError,
                          lambda: corpus1.get_utterance("0").meta["foo"])
    def test_broken_convos(self):
        """
        Test basic meta functions
        """

        corpus1 = Corpus(utterances=[
            Utterance(id="0",
                      text="hello world",
                      reply_to=None,
                      user=User(id="alice"),
                      timestamp=0),
            Utterance(id="1",
                      text="my name is bob",
                      reply_to="0",
                      user=User(id="bob"),
                      timestamp=2),
            Utterance(id="2",
                      text="this is a test",
                      reply_to="1",
                      user=User(id="charlie"),
                      timestamp=1),
            Utterance(id="3",
                      text="hello world 2",
                      reply_to=None,
                      user=User(id="alice2"),
                      timestamp=0),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="0",
                      text="hello world",
                      reply_to=None,
                      user=User(id="alice"),
                      timestamp=0),
            Utterance(id="1",
                      text="my name is bob",
                      reply_to="0",
                      user=User(id="bob"),
                      timestamp=2),
            Utterance(id="2",
                      text="this is a test",
                      reply_to="1",
                      user=User(id="charlie"),
                      timestamp=1),
            Utterance(id="3",
                      text="hello world 2",
                      reply_to="9",
                      user=User(id="alice2"),
                      timestamp=0),
        ])

        # test broken convo where there are multiple roots
        convo = corpus1.get_conversation(None)
        self.assertRaises(
            ValueError, lambda: list(convo.traverse("dfs", as_utterance=True)))

        # test broken convo where utterance replies to something not in Conversation
        convo = corpus2.get_conversation(None)
        self.assertRaises(
            ValueError, lambda: list(convo.traverse("dfs", as_utterance=True)))
    def test_overlap_diff_data(self):
        """
        Merge with overlap in utterance id and utterance has diff data but same metadata

        Warning should be printed. Original utterance data should be preserved.
        """
        corpus1 = Corpus(utterances=[
            Utterance(id=0, text="hello world", user=User(name="alice")),
            Utterance(id=1, text="my name is bob", user=User(name="bob")),
            Utterance(id=2, text="this is a test", user=User(name="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id=2, text="this is a test2", user=User(name="candace")),
            Utterance(id=4, text="this is a sentence", user=User(name="echo")),
            Utterance(id=5, text="goodbye", user=User(name="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_users())), 5)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)

        self.assertEqual(merged.get_utterance(2).text, "this is a test")
        self.assertEqual(merged.get_utterance(2).user, User(name="charlie"))
    def test_corpus_merge_add(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", user=User(id="alice")),
            Utterance(id="1", text="my name is bob", user=User(id="bob")),
            Utterance(id="2", text="this is a test", user=User(id="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        # test that adding separately initialized utterances with new metadata updates Index
        new_utt = Utterance(id="4",
                            text="hello world",
                            user=User(id="alice", meta={'donkey': 'kong'}),
                            meta={'new': 'meta'})

        new_corpus = corpus1.add_utterances([new_utt])
        self.assertTrue('new' in new_corpus.meta_index.utterances_index)
        self.assertTrue('donkey' in new_corpus.meta_index.users_index)
    def test_add_utterance(self):
        corpus1 = Corpus(utterances=[
            Utterance(id=0, text="hello world", user=User(name="alice")),
            Utterance(id=1, text="my name is bob", user=User(name="bob")),
            Utterance(id=2,
                      text="this is a test",
                      user=User(name="charlie"),
                      meta={
                          'hey': 'jude',
                          'hello': 'world'
                      }),
        ])

        utts = [
            Utterance(id=1, text="i like pie", user=User(name="delta")),
            Utterance(id=2,
                      text="this is a test",
                      user=User(name="charlie"),
                      meta={
                          'hello': 'food',
                          'what': 'a mood'
                      }),
            Utterance(id=5, text="goodbye", user=User(name="foxtrot")),
        ]
        added = corpus1.add_utterances(utts)

        self.assertEqual(len(list(added.iter_utterances())), 4)
        self.assertEqual(len(added.get_utterance(2).meta), 3)
        self.assertEqual(added.get_utterance(2).meta['hello'], 'food')
    def test_corpus_metadata(self):
        """
        Merge with overlap in corpus metadata

        Expect second corpus metadata to override if keys are the same
        """
        corpus1 = Corpus(utterances=[
            Utterance(id=0, text="hello world", user=User(name="alice")),
            Utterance(id=1, text="my name is bob", user=User(name="bob")),
            Utterance(id=2, text="this is a test", user=User(name="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id=3, text="i like pie", user=User(name="delta")),
            Utterance(id=4, text="this is a sentence", user=User(name="echo")),
            Utterance(id=5, text="goodbye", user=User(name="foxtrot")),
        ])

        corpus1.add_meta('politeness', 0.95)
        corpus1.add_meta('toxicity', 0.8)

        corpus2.add_meta('toxicity', 0.9)
        corpus2.add_meta('paggro', 1.0)

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(merged.meta), 3)
        self.assertEqual(merged.meta['toxicity'], 0.9)
    def test_basic_functions(self):
        """
        Test basic meta functions
        """

        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", user=User(id="alice")),
            Utterance(id="1", text="my name is bob", user=User(id="bob")),
            Utterance(id="2", text="this is a test", user=User(id="charlie")),
        ])

        first_utt = corpus1.get_utterance("0")
        first_utt.meta['hey'] = 9

        # correct class type stored
        self.assertEqual(corpus1.meta_index.utterances_index['hey'],
                         repr(type(9)))

        # keyErrors result in None output
        self.assertRaises(KeyError, lambda: first_utt.meta['nonexistent key'])

        # test that setting a custom get still works
        self.assertEqual(first_utt.meta.get('nonexistent_key', {}), {})
Esempio n. 9
0
    def test_overlap_convo_metadata(self):
        """
        Merge with overlap in conversation with metadata differences.

        Expect second corpus convo metadata to override if keys are the same
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0",
                      root='convo1',
                      text="hello world",
                      user=User(name="alice")),
            Utterance(id="1",
                      root='convo1',
                      text="my name is bob",
                      user=User(name="bob")),
            Utterance(id="2",
                      root='convo1',
                      text="this is a test",
                      user=User(name="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="2",
                      root='convo1',
                      text="this is a test",
                      user=User(name="charlie")),
            Utterance(id="4",
                      root='convo1',
                      text="this is a sentence",
                      user=User(name="echo")),
            Utterance(id="5",
                      root='convo1',
                      text="goodbye",
                      user=User(name="foxtrot")),
        ])

        corpus1.get_conversation('convo1').add_meta('hey', 'jude')
        corpus1.get_conversation('convo1').add_meta('hello', 'world')

        corpus2.get_conversation('convo1').add_meta('hey', 'jude')
        corpus2.get_conversation('convo1').add_meta('hello', 'food')
        corpus2.get_conversation('convo1').add_meta('what', 'a mood')

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(merged.get_conversation('convo1').meta), 3)
        self.assertEqual(
            merged.get_conversation('convo1').meta['hello'], 'food')
Esempio n. 10
0
    def test_no_overlap(self):
        """
        Basic merge: no overlap in utterance id
        """
        corpus1 = Corpus(utterances=[
            Utterance(id=0, text="hello world", user=User(name="alice")),
            Utterance(id=1, text="my name is bob", user=User(name="bob")),
            Utterance(id=2, text="this is a test", user=User(name="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id=3, text="i like pie", user=User(name="delta")),
            Utterance(id=4, text="this is a sentence", user=User(name="echo")),
            Utterance(id=5, text="goodbye", user=User(name="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 6)
        self.assertEqual(len(list(merged.iter_users())), 6)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)
Esempio n. 11
0
    def test_with_overlap(self):
        """
        Basic merge: with overlap in utterance id (but utterance has same data & metadata)
        """
        corpus1 = Corpus(utterances=[
            Utterance(id=0, text="hello world", user=User(name="alice")),
            Utterance(id=1, text="my name is bob", user=User(name="bob")),
            Utterance(id=2, text="this is a test", user=User(name="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id=2, text="this is a test", user=User(name="charlie")),
            Utterance(id=4, text="this is a sentence", user=User(name="echo")),
            Utterance(id=5, text="goodbye", user=User(name="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_users())), 5)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)
Esempio n. 12
0
    def test_overlap_diff_metadata(self):
        """
        Merge with overlap in utterance id and utterance has same data but diff metadata

        Second corpus utterance metadata should override if the keys are the same.
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", user=User(name="alice")),
            Utterance(id="1", text="my name is bob", user=User(name="bob")),
            Utterance(id="2",
                      text="this is a test",
                      user=User(name="charlie"),
                      meta={
                          'hey': 'jude',
                          'the': 'beatles'
                      }),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="2",
                      text="this is a test",
                      user=User(name="charlie"),
                      meta={
                          'hey': 'jude',
                          'the': 'ringo',
                          'let it': 'be'
                      }),
            Utterance(
                id="4", text="this is a sentence", user=User(name="echo")),
            Utterance(id="5", text="goodbye", user=User(name="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_users())), 5)

        self.assertEqual(len(merged.get_utterance("2").meta), 3)
        self.assertEqual(merged.get_utterance("2").meta['the'], 'ringo')
 def setUp(self) -> None:
     """
     Basic Conversation tree (left to right within subtree => earliest to latest)
                0
         1      2      3
       4 5 6   7 8     9
     10                11
     """
     self.corpus = Corpus(utterances=[
         Utterance(id="0",
                   reply_to=None,
                   root="0",
                   user=User(id="alice"),
                   timestamp=0),
         Utterance(id="2",
                   reply_to="0",
                   root="0",
                   user=User(id="alice"),
                   timestamp=2),
         Utterance(id="1",
                   reply_to="0",
                   root="0",
                   user=User(id="alice"),
                   timestamp=1),
         Utterance(id="3",
                   reply_to="0",
                   root="0",
                   user=User(id="alice"),
                   timestamp=3),
         Utterance(id="4",
                   reply_to="1",
                   root="0",
                   user=User(id="alice"),
                   timestamp=4),
         Utterance(id="5",
                   reply_to="1",
                   root="0",
                   user=User(id="alice"),
                   timestamp=5),
         Utterance(id="6",
                   reply_to="1",
                   root="0",
                   user=User(id="alice"),
                   timestamp=6),
         Utterance(id="7",
                   reply_to="2",
                   root="0",
                   user=User(id="alice"),
                   timestamp=4),
         Utterance(id="8",
                   reply_to="2",
                   root="0",
                   user=User(id="alice"),
                   timestamp=5),
         Utterance(id="9",
                   reply_to="3",
                   root="0",
                   user=User(id="alice"),
                   timestamp=4),
         Utterance(id="10",
                   reply_to="4",
                   root="0",
                   user=User(id="alice"),
                   timestamp=5),
         Utterance(id="11",
                   reply_to="9",
                   root="0",
                   user=User(id="alice"),
                   timestamp=10),
         Utterance(id="other",
                   reply_to=None,
                   root="other",
                   user=User(id="alice"),
                   timestamp=99)
     ])
     self.corpus.get_conversation("0").meta['hey'] = 'jude'
     self.corpus.meta['foo'] = 'bar'