def test_overlap_diff_metadata(self): """ Merge with overlap in utterance id and utterance has same data but diff metadata Second corpus utterance metadata should override if the keys are the same. """ corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", user=User(id="alice")), Utterance(id="1", text="my name is bob", user=User(id="bob")), Utterance(id="2", text="this is a test", user=User(id="charlie"), meta={ 'hey': 'jude', 'the': 'beatles' }), ]) corpus2 = Corpus(utterances=[ Utterance(id="2", text="this is a test", user=User(id="charlie"), meta={ 'hey': 'jude', 'the': 'ringo', 'let it': 'be' }), Utterance(id="4", text="this is a sentence", user=User(id="echo")), Utterance(id="5", text="goodbye", user=User(id="foxtrot")), ]) merged = corpus1.merge(corpus2) self.assertEqual(len(list(merged.iter_utterances())), 5) self.assertEqual(len(list(merged.iter_users())), 5) self.assertEqual(len(merged.get_utterance("2").meta), 3) self.assertEqual(merged.get_utterance("2").meta['the'], 'ringo')
def test_no_overlap(self): """ Basic merge: no overlap in utterance id """ corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", speaker=Speaker(id="alice")), Utterance(id="1", text="my name is bob", speaker=Speaker( id="bob")), Utterance( id="2", text="this is a test", speaker=Speaker(id="charlie")), ]) corpus2 = Corpus(utterances=[ Utterance(id="3", text="i like pie", speaker=Speaker(id="delta")), Utterance( id="4", text="this is a sentence", speaker=Speaker(id="echo")), Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")), ]) merged = corpus1.merge(corpus2) self.assertEqual(len(list(merged.iter_utterances())), 6) self.assertEqual(len(list(merged.iter_speakers())), 6) self.assertEqual(len(list(corpus1.iter_utterances())), 3) self.assertEqual(len(list(corpus2.iter_utterances())), 3)
def test_with_overlap(self): """ Basic merge: with overlap in utterance id (but utterance has same data & metadata) """ corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", user=User(name="alice")), Utterance(id="1", text="my name is bob", user=User(name="bob")), Utterance(id="2", text="this is a test", user=User( name="charlie")), ]) corpus2 = Corpus(utterances=[ Utterance(id="2", text="this is a test", user=User( name="charlie")), Utterance( id="4", text="this is a sentence", user=User(name="echo")), Utterance(id="5", text="goodbye", user=User(name="foxtrot")), ]) merged = corpus1.merge(corpus2) self.assertEqual(len(list(merged.iter_utterances())), 5) self.assertEqual(len(list(merged.iter_users())), 5) self.assertEqual(len(list(corpus1.iter_utterances())), 3) self.assertEqual(len(list(corpus2.iter_utterances())), 3)
def setUp(self) -> None: """ Basic Conversation tree (left to right within subtree => earliest to latest) 0 1 2 3 4 5 6 7 8 9 10 11 """ self.corpus = Corpus(utterances=[ Utterance(id="0", reply_to=None, root="0", speaker=Speaker(id="alice"), timestamp=0), Utterance(id="2", reply_to="0", root="0", speaker=Speaker(id="alice"), timestamp=2), Utterance(id="1", reply_to="0", root="0", speaker=Speaker(id="alice"), timestamp=1), Utterance(id="3", reply_to="0", root="0", speaker=Speaker(id="alice"), timestamp=3), Utterance(id="4", reply_to="1", root="0", speaker=Speaker(id="alice"), timestamp=4), Utterance(id="5", reply_to="1", root="0", speaker=Speaker(id="alice"), timestamp=5), Utterance(id="6", reply_to="1", root="0", speaker=Speaker(id="alice"), timestamp=6), Utterance(id="7", reply_to="2", root="0", speaker=Speaker(id="alice"), timestamp=4), Utterance(id="8", reply_to="2", root="0", speaker=Speaker(id="alice"), timestamp=5), Utterance(id="9", reply_to="3", root="0", speaker=Speaker(id="alice"), timestamp=4), Utterance(id="10", reply_to="4", root="0", speaker=Speaker(id="alice"), timestamp=5), Utterance(id="11", reply_to="9", root="0", speaker=Speaker(id="alice"), timestamp=10), Utterance(id="other", reply_to=None, root="other", speaker=Speaker(id="alice"), timestamp=99) ]) self.corpus.get_conversation("0").meta['hey'] = 'jude' self.corpus.meta['foo'] = 'bar'