def test_corpus_dump(self): corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", user=User(name="alice")), Utterance(id="1", text="my name is bob", user=User(name="bob")), Utterance(id="2", text="this is a test", user=User( name="charlie")), ]) corpus1.get_utterance("0").meta['foo'] = 'bar' corpus1.get_utterance("1").meta['foo'] = 'bar2' corpus1.get_utterance("2").meta['hey'] = 'jude' corpus1.get_conversation(None).meta['convo_meta'] = 1 corpus1.get_user("alice").meta['surname'] = 1.0 corpus1.dump('test_index_meta_corpus', base_path="./") corpus2 = Corpus(filename="test_index_meta_corpus") self.assertEqual(corpus1.meta_index.utterances_index, corpus2.meta_index.utterances_index) self.assertEqual(corpus1.meta_index.users_index, corpus2.meta_index.users_index) self.assertEqual(corpus1.meta_index.conversations_index, corpus2.meta_index.conversations_index) self.assertEqual(corpus1.meta_index.overall_index, corpus2.meta_index.overall_index)
def test_key_insertion_deletion(self): corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", user=User(name="alice")), Utterance(id="1", text="my name is bob", user=User(name="bob")), Utterance(id="2", text="this is a test", user=User( name="charlie")), ]) corpus1.get_utterance("0").meta['foo'] = 'bar' corpus1.get_utterance("1").meta['foo'] = 'bar2' corpus1.get_utterance("2").meta['hey'] = 'jude' corpus1.get_conversation(None).meta['convo_meta'] = 1 corpus1.get_user("alice").meta['surname'] = 1.0 self.assertEqual(corpus1.meta_index.utterances_index['foo'], str(type('bar'))) self.assertEqual(corpus1.meta_index.conversations_index['convo_meta'], str(type(1))) self.assertEqual(corpus1.meta_index.users_index['surname'], str(type(1.0))) # test that deleting a key from an utterance removes it from the index del corpus1.get_utterance("2").meta['hey'] self.assertRaises(KeyError, lambda: corpus1.meta_index.utterances_index['hey']) # test that deleting a key from an utterance removes it from the index and from all other objects of same type del corpus1.get_utterance("1").meta['foo'] self.assertRaises(KeyError, lambda: corpus1.meta_index.utterances_index['foo']) self.assertRaises(KeyError, lambda: corpus1.get_utterance("0").meta["foo"])
def test_broken_convos(self): """ Test basic meta functions """ corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", reply_to=None, user=User(id="alice"), timestamp=0), Utterance(id="1", text="my name is bob", reply_to="0", user=User(id="bob"), timestamp=2), Utterance(id="2", text="this is a test", reply_to="1", user=User(id="charlie"), timestamp=1), Utterance(id="3", text="hello world 2", reply_to=None, user=User(id="alice2"), timestamp=0), ]) corpus2 = Corpus(utterances=[ Utterance(id="0", text="hello world", reply_to=None, user=User(id="alice"), timestamp=0), Utterance(id="1", text="my name is bob", reply_to="0", user=User(id="bob"), timestamp=2), Utterance(id="2", text="this is a test", reply_to="1", user=User(id="charlie"), timestamp=1), Utterance(id="3", text="hello world 2", reply_to="9", user=User(id="alice2"), timestamp=0), ]) # test broken convo where there are multiple roots convo = corpus1.get_conversation(None) self.assertRaises( ValueError, lambda: list(convo.traverse("dfs", as_utterance=True))) # test broken convo where utterance replies to something not in Conversation convo = corpus2.get_conversation(None) self.assertRaises( ValueError, lambda: list(convo.traverse("dfs", as_utterance=True)))
def test_overlap_diff_data(self): """ Merge with overlap in utterance id and utterance has diff data but same metadata Warning should be printed. Original utterance data should be preserved. """ corpus1 = Corpus(utterances=[ Utterance(id=0, text="hello world", user=User(name="alice")), Utterance(id=1, text="my name is bob", user=User(name="bob")), Utterance(id=2, text="this is a test", user=User(name="charlie")), ]) corpus2 = Corpus(utterances=[ Utterance(id=2, text="this is a test2", user=User(name="candace")), Utterance(id=4, text="this is a sentence", user=User(name="echo")), Utterance(id=5, text="goodbye", user=User(name="foxtrot")), ]) merged = corpus1.merge(corpus2) self.assertEqual(len(list(merged.iter_utterances())), 5) self.assertEqual(len(list(merged.iter_users())), 5) self.assertEqual(len(list(corpus1.iter_utterances())), 3) self.assertEqual(len(list(corpus2.iter_utterances())), 3) self.assertEqual(merged.get_utterance(2).text, "this is a test") self.assertEqual(merged.get_utterance(2).user, User(name="charlie"))
def test_corpus_merge_add(self): corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", user=User(id="alice")), Utterance(id="1", text="my name is bob", user=User(id="bob")), Utterance(id="2", text="this is a test", user=User(id="charlie")), ]) corpus1.get_utterance("0").meta['foo'] = 'bar' corpus1.get_utterance("1").meta['foo'] = 'bar2' corpus1.get_utterance("2").meta['hey'] = 'jude' # test that adding separately initialized utterances with new metadata updates Index new_utt = Utterance(id="4", text="hello world", user=User(id="alice", meta={'donkey': 'kong'}), meta={'new': 'meta'}) new_corpus = corpus1.add_utterances([new_utt]) self.assertTrue('new' in new_corpus.meta_index.utterances_index) self.assertTrue('donkey' in new_corpus.meta_index.users_index)
def test_add_utterance(self): corpus1 = Corpus(utterances=[ Utterance(id=0, text="hello world", user=User(name="alice")), Utterance(id=1, text="my name is bob", user=User(name="bob")), Utterance(id=2, text="this is a test", user=User(name="charlie"), meta={ 'hey': 'jude', 'hello': 'world' }), ]) utts = [ Utterance(id=1, text="i like pie", user=User(name="delta")), Utterance(id=2, text="this is a test", user=User(name="charlie"), meta={ 'hello': 'food', 'what': 'a mood' }), Utterance(id=5, text="goodbye", user=User(name="foxtrot")), ] added = corpus1.add_utterances(utts) self.assertEqual(len(list(added.iter_utterances())), 4) self.assertEqual(len(added.get_utterance(2).meta), 3) self.assertEqual(added.get_utterance(2).meta['hello'], 'food')
def test_corpus_metadata(self): """ Merge with overlap in corpus metadata Expect second corpus metadata to override if keys are the same """ corpus1 = Corpus(utterances=[ Utterance(id=0, text="hello world", user=User(name="alice")), Utterance(id=1, text="my name is bob", user=User(name="bob")), Utterance(id=2, text="this is a test", user=User(name="charlie")), ]) corpus2 = Corpus(utterances=[ Utterance(id=3, text="i like pie", user=User(name="delta")), Utterance(id=4, text="this is a sentence", user=User(name="echo")), Utterance(id=5, text="goodbye", user=User(name="foxtrot")), ]) corpus1.add_meta('politeness', 0.95) corpus1.add_meta('toxicity', 0.8) corpus2.add_meta('toxicity', 0.9) corpus2.add_meta('paggro', 1.0) merged = corpus1.merge(corpus2) self.assertEqual(len(merged.meta), 3) self.assertEqual(merged.meta['toxicity'], 0.9)
def test_basic_functions(self): """ Test basic meta functions """ corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", user=User(id="alice")), Utterance(id="1", text="my name is bob", user=User(id="bob")), Utterance(id="2", text="this is a test", user=User(id="charlie")), ]) first_utt = corpus1.get_utterance("0") first_utt.meta['hey'] = 9 # correct class type stored self.assertEqual(corpus1.meta_index.utterances_index['hey'], repr(type(9))) # keyErrors result in None output self.assertRaises(KeyError, lambda: first_utt.meta['nonexistent key']) # test that setting a custom get still works self.assertEqual(first_utt.meta.get('nonexistent_key', {}), {})
def test_overlap_convo_metadata(self): """ Merge with overlap in conversation with metadata differences. Expect second corpus convo metadata to override if keys are the same """ corpus1 = Corpus(utterances=[ Utterance(id="0", root='convo1', text="hello world", user=User(name="alice")), Utterance(id="1", root='convo1', text="my name is bob", user=User(name="bob")), Utterance(id="2", root='convo1', text="this is a test", user=User(name="charlie")), ]) corpus2 = Corpus(utterances=[ Utterance(id="2", root='convo1', text="this is a test", user=User(name="charlie")), Utterance(id="4", root='convo1', text="this is a sentence", user=User(name="echo")), Utterance(id="5", root='convo1', text="goodbye", user=User(name="foxtrot")), ]) corpus1.get_conversation('convo1').add_meta('hey', 'jude') corpus1.get_conversation('convo1').add_meta('hello', 'world') corpus2.get_conversation('convo1').add_meta('hey', 'jude') corpus2.get_conversation('convo1').add_meta('hello', 'food') corpus2.get_conversation('convo1').add_meta('what', 'a mood') merged = corpus1.merge(corpus2) self.assertEqual(len(merged.get_conversation('convo1').meta), 3) self.assertEqual( merged.get_conversation('convo1').meta['hello'], 'food')
def test_no_overlap(self): """ Basic merge: no overlap in utterance id """ corpus1 = Corpus(utterances=[ Utterance(id=0, text="hello world", user=User(name="alice")), Utterance(id=1, text="my name is bob", user=User(name="bob")), Utterance(id=2, text="this is a test", user=User(name="charlie")), ]) corpus2 = Corpus(utterances=[ Utterance(id=3, text="i like pie", user=User(name="delta")), Utterance(id=4, text="this is a sentence", user=User(name="echo")), Utterance(id=5, text="goodbye", user=User(name="foxtrot")), ]) merged = corpus1.merge(corpus2) self.assertEqual(len(list(merged.iter_utterances())), 6) self.assertEqual(len(list(merged.iter_users())), 6) self.assertEqual(len(list(corpus1.iter_utterances())), 3) self.assertEqual(len(list(corpus2.iter_utterances())), 3)
def test_with_overlap(self): """ Basic merge: with overlap in utterance id (but utterance has same data & metadata) """ corpus1 = Corpus(utterances=[ Utterance(id=0, text="hello world", user=User(name="alice")), Utterance(id=1, text="my name is bob", user=User(name="bob")), Utterance(id=2, text="this is a test", user=User(name="charlie")), ]) corpus2 = Corpus(utterances=[ Utterance(id=2, text="this is a test", user=User(name="charlie")), Utterance(id=4, text="this is a sentence", user=User(name="echo")), Utterance(id=5, text="goodbye", user=User(name="foxtrot")), ]) merged = corpus1.merge(corpus2) self.assertEqual(len(list(merged.iter_utterances())), 5) self.assertEqual(len(list(merged.iter_users())), 5) self.assertEqual(len(list(corpus1.iter_utterances())), 3) self.assertEqual(len(list(corpus2.iter_utterances())), 3)
def test_overlap_diff_metadata(self): """ Merge with overlap in utterance id and utterance has same data but diff metadata Second corpus utterance metadata should override if the keys are the same. """ corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", user=User(name="alice")), Utterance(id="1", text="my name is bob", user=User(name="bob")), Utterance(id="2", text="this is a test", user=User(name="charlie"), meta={ 'hey': 'jude', 'the': 'beatles' }), ]) corpus2 = Corpus(utterances=[ Utterance(id="2", text="this is a test", user=User(name="charlie"), meta={ 'hey': 'jude', 'the': 'ringo', 'let it': 'be' }), Utterance( id="4", text="this is a sentence", user=User(name="echo")), Utterance(id="5", text="goodbye", user=User(name="foxtrot")), ]) merged = corpus1.merge(corpus2) self.assertEqual(len(list(merged.iter_utterances())), 5) self.assertEqual(len(list(merged.iter_users())), 5) self.assertEqual(len(merged.get_utterance("2").meta), 3) self.assertEqual(merged.get_utterance("2").meta['the'], 'ringo')
def setUp(self) -> None: """ Basic Conversation tree (left to right within subtree => earliest to latest) 0 1 2 3 4 5 6 7 8 9 10 11 """ self.corpus = Corpus(utterances=[ Utterance(id="0", reply_to=None, root="0", user=User(id="alice"), timestamp=0), Utterance(id="2", reply_to="0", root="0", user=User(id="alice"), timestamp=2), Utterance(id="1", reply_to="0", root="0", user=User(id="alice"), timestamp=1), Utterance(id="3", reply_to="0", root="0", user=User(id="alice"), timestamp=3), Utterance(id="4", reply_to="1", root="0", user=User(id="alice"), timestamp=4), Utterance(id="5", reply_to="1", root="0", user=User(id="alice"), timestamp=5), Utterance(id="6", reply_to="1", root="0", user=User(id="alice"), timestamp=6), Utterance(id="7", reply_to="2", root="0", user=User(id="alice"), timestamp=4), Utterance(id="8", reply_to="2", root="0", user=User(id="alice"), timestamp=5), Utterance(id="9", reply_to="3", root="0", user=User(id="alice"), timestamp=4), Utterance(id="10", reply_to="4", root="0", user=User(id="alice"), timestamp=5), Utterance(id="11", reply_to="9", root="0", user=User(id="alice"), timestamp=10), Utterance(id="other", reply_to=None, root="other", user=User(id="alice"), timestamp=99) ]) self.corpus.get_conversation("0").meta['hey'] = 'jude' self.corpus.meta['foo'] = 'bar'