コード例 #1
0
    def test_overlap_diff_data(self):
        """
        Merge with overlap in utterance id and utterance has diff data but same metadata

        Warning should be printed. Original utterance data should be preserved.
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(
                id="2", text="this is a test2", speaker=Speaker(id="candace")),
            Utterance(
                id="4", text="this is a sentence", speaker=Speaker(id="echo")),
            Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_speakers())), 5)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)

        self.assertEqual(merged.get_utterance("2").text, "this is a test")
        self.assertEqual(
            merged.get_utterance("2").speaker, Speaker(id="charlie"))
コード例 #2
0
    def test_with_overlap(self):
        """
        Basic merge: with overlap in utterance id (but utterance has same data & metadata)
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
            Utterance(
                id="4", text="this is a sentence", speaker=Speaker(id="echo")),
            Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_speakers())), 5)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)
コード例 #3
0
    def test_corpus_metadata(self):
        """
        Merge with overlap in corpus metadata

        Expect second corpus metadata to override if keys are the same
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="3", text="i like pie", speaker=Speaker(id="delta")),
            Utterance(
                id="4", text="this is a sentence", speaker=Speaker(id="echo")),
            Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
        ])

        corpus1.add_meta('politeness', 0.95)
        corpus1.add_meta('toxicity', 0.8)

        corpus2.add_meta('toxicity', 0.9)
        corpus2.add_meta('paggro', 1.0)

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(merged.meta), 3)
        self.assertEqual(merged.meta['toxicity'], 0.9)
コード例 #4
0
    def test_overlap_convo_metadata(self):
        """
        Merge with overlap in conversation with metadata differences.

        Expect second corpus convo metadata to override if keys are the same
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0",
                      conversation_id='convo1',
                      text="hello world",
                      speaker=Speaker(id="alice")),
            Utterance(id="1",
                      conversation_id='convo1',
                      text="my name is bob",
                      speaker=Speaker(id="bob")),
            Utterance(id="2",
                      conversation_id='convo1',
                      text="this is a test",
                      speaker=Speaker(id="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="2",
                      conversation_id='convo1',
                      text="this is a test",
                      speaker=Speaker(id="charlie")),
            Utterance(id="4",
                      conversation_id='convo1',
                      text="this is a sentence",
                      speaker=Speaker(id="echo")),
            Utterance(id="5",
                      conversation_id='convo1',
                      text="goodbye",
                      speaker=Speaker(id="foxtrot")),
        ])

        corpus1.get_conversation('convo1').add_meta('hey', 'jude')
        corpus1.get_conversation('convo1').add_meta('hello', 'world')

        corpus2.get_conversation('convo1').add_meta('hey', 'jude')
        corpus2.get_conversation('convo1').add_meta('hello', 'food')
        corpus2.get_conversation('convo1').add_meta('what', 'a mood')

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(merged.get_conversation('convo1').meta), 3)
        self.assertEqual(
            merged.get_conversation('convo1').meta['hello'], 'food')
コード例 #5
0
    def test_no_overlap(self):
        """
        Basic merge: no overlap in utterance id
        """
        corpus1 = Corpus(utterances=[
            Utterance(id=0, text="hello world", user=User(name="alice")),
            Utterance(id=1, text="my name is bob", user=User(name="bob")),
            Utterance(id=2, text="this is a test", user=User(name="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id=3, text="i like pie", user=User(name="delta")),
            Utterance(id=4, text="this is a sentence", user=User(name="echo")),
            Utterance(id=5, text="goodbye", user=User(name="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 6)
        self.assertEqual(len(list(merged.iter_users())), 6)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)
コード例 #6
0
    def test_overlap_diff_metadata(self):
        """
        Merge with overlap in utterance id and utterance has same data but diff metadata

        Second corpus utterance metadata should override if the keys are the same.
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(id="2",
                      text="this is a test",
                      speaker=Speaker(id="charlie"),
                      meta={
                          'hey': 'jude',
                          'the': 'beatles'
                      }),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="2",
                      text="this is a test",
                      speaker=Speaker(id="charlie"),
                      meta={
                          'hey': 'jude',
                          'the': 'ringo',
                          'let it': 'be'
                      }),
            Utterance(
                id="4", text="this is a sentence", speaker=Speaker(id="echo")),
            Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_speakers())), 5)

        self.assertEqual(len(merged.get_utterance("2").meta), 3)
        self.assertEqual(merged.get_utterance("2").meta['the'], 'ringo')