예제 #1
0
    def test_key_insertion_deletion(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        corpus1.get_conversation(None).meta['convo_meta'] = 1

        corpus1.get_speaker("alice").meta['surname'] = 1.0

        self.assertEqual(corpus1.meta_index.utterances_index['foo'],
                         str(type('bar')))
        self.assertEqual(corpus1.meta_index.conversations_index['convo_meta'],
                         str(type(1)))
        self.assertEqual(corpus1.meta_index.speakers_index['surname'],
                         str(type(1.0)))

        # test that deleting a key from an utterance removes it from the index
        del corpus1.get_utterance("2").meta['hey']
        self.assertRaises(KeyError,
                          lambda: corpus1.meta_index.utterances_index['hey'])

        # test that deleting a key from an utterance removes it from the index and from all other objects of same type
        del corpus1.get_utterance("1").meta['foo']
        self.assertRaises(KeyError,
                          lambda: corpus1.meta_index.utterances_index['foo'])
        self.assertRaises(KeyError,
                          lambda: corpus1.get_utterance("0").meta["foo"])
    def test_key_insertion_deletion(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        corpus1.get_conversation(None).meta['convo_meta'] = 1

        corpus1.get_speaker("alice").meta['surname'] = 1.0

        self.assertEqual(corpus1.meta_index.utterances_index['foo'],
                         [str(type('bar'))])
        self.assertEqual(corpus1.meta_index.conversations_index['convo_meta'],
                         [str(type(1))])
        self.assertEqual(corpus1.meta_index.speakers_index['surname'],
                         [str(type(1.0))])

        # test that deleting an attribute from an individual utterance fails to remove it
        del corpus1.get_utterance("2").meta['hey']
        corpus1.get_utterance("2").meta['hey']

        # test that delete_metadata works
        corpus1.delete_metadata('utterance', 'foo')
        self.assertRaises(KeyError,
                          lambda: corpus1.meta_index.utterances_index['foo'])
        self.assertRaises(KeyError,
                          lambda: corpus1.get_utterance("0").meta["foo"])
예제 #3
0
    def test_corpus_dump(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        corpus1.get_conversation(None).meta['convo_meta'] = 1

        corpus1.get_speaker("alice").meta['surname'] = 1.0
        corpus1.dump('test_index_meta_corpus', base_path="./")
        corpus2 = Corpus(filename="test_index_meta_corpus")

        self.assertEqual(corpus1.meta_index.utterances_index,
                         corpus2.meta_index.utterances_index)
        self.assertEqual(corpus1.meta_index.speakers_index,
                         corpus2.meta_index.speakers_index)
        self.assertEqual(corpus1.meta_index.conversations_index,
                         corpus2.meta_index.conversations_index)
        self.assertEqual(corpus1.meta_index.overall_index,
                         corpus2.meta_index.overall_index)
예제 #4
0
    def test_basic_functions(self):
        """
        Test basic meta functions
        """

        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        first_utt = corpus1.get_utterance("0")
        first_utt.meta['hey'] = 9

        # correct class type stored
        self.assertEqual(corpus1.meta_index.utterances_index['hey'],
                         repr(type(9)))

        # keyErrors result in None output
        self.assertRaises(KeyError, lambda: first_utt.meta['nonexistent key'])

        # test that setting a custom get still works
        self.assertEqual(first_utt.meta.get('nonexistent_key', {}), {})
def burr_sir_corpus():
    hamilton = Speaker(id='hamilton')
    burr = Speaker(id='burr')
    utterances = [
        Utterance(id='0', text=BURR_SIR_TEXT_1, speaker=hamilton),
        Utterance(id='1', text=BURR_SIR_TEXT_2, speaker=burr)
    ]

    return Corpus(utterances=utterances)
예제 #6
0
def politeness_test_zh_corpus():
    speakers = [Speaker(id='alice'), Speaker(id='bob')]
    texts = [GRATITUDE_ZH, DEFERENCE_ZH, GREETING_ZH, APOLOGY_ZH, PLEASE_ZH, PLEASE_START_ZH, BTW_ZH, DIRECT_QN_ZH , HEDGES_ZH, FACTUALITY_ZH]

    utterances = [Utterance(id='0', text=texts[0], speaker=speakers[1], reply_to=None)]
    for i, text in enumerate(texts[1:]):
        utterances.append(Utterance(id=str(i+1), text=text, speaker=speakers[i%2], reply_to=str(i)))
        
    return Corpus(utterances=utterances)  
예제 #7
0
def politeness_test_corpus():
    speakers = [Speaker(id='alice'), Speaker(id='bob')]
    texts = [GRATITUDE, DEFERENCE, GREETING, APOLOGY, PLEASE,
            PLEASE_START, BTW, DIRECT_QN, DIRECT_START,
            SUBJUNCTIVE, INDICATIVE, HEDGES, FACTUALITY]
    
    utterances = [Utterance(id='0', text=texts[0], speaker=speakers[1], reply_to=None)]
    for i, text in enumerate(texts[1:]):
        utterances.append(Utterance(id=str(i+1), text=text, speaker=speakers[i%2], reply_to=str(i)))
        
    return Corpus(utterances=utterances)
예제 #8
0
    def test_broken_convos(self):
        """
        Test basic meta functions
        """

        corpus1 = Corpus(utterances=[
            Utterance(id="0",
                      text="hello world",
                      reply_to=None,
                      speaker=Speaker(id="alice"),
                      timestamp=0),
            Utterance(id="1",
                      text="my name is bob",
                      reply_to="0",
                      speaker=Speaker(id="bob"),
                      timestamp=2),
            Utterance(id="2",
                      text="this is a test",
                      reply_to="1",
                      speaker=Speaker(id="charlie"),
                      timestamp=1),
            Utterance(id="3",
                      text="hello world 2",
                      reply_to=None,
                      speaker=Speaker(id="alice2"),
                      timestamp=0),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="0",
                      text="hello world",
                      reply_to=None,
                      speaker=Speaker(id="alice"),
                      timestamp=0),
            Utterance(id="1",
                      text="my name is bob",
                      reply_to="0",
                      speaker=Speaker(id="bob"),
                      timestamp=2),
            Utterance(id="2",
                      text="this is a test",
                      reply_to="1",
                      speaker=Speaker(id="charlie"),
                      timestamp=1),
            Utterance(id="3",
                      text="hello world 2",
                      reply_to="9",
                      speaker=Speaker(id="alice2"),
                      timestamp=0),
        ])

        # test broken convo where there are multiple roots
        convo = corpus1.get_conversation(None)
        self.assertRaises(
            ValueError, lambda: list(convo.traverse("dfs", as_utterance=True)))

        # test broken convo where utterance replies to something not in Conversation
        convo = corpus2.get_conversation(None)
        self.assertRaises(
            ValueError, lambda: list(convo.traverse("dfs", as_utterance=True)))
예제 #9
0
    def transform_utterance(self, utt: Utterance, spacy_nlp: Callable[[str], Doc] = None, markers: bool = False):
        """
        Extract politeness strategies for raw string inputs (or individual utterances)
        
        :param utt: the utterance to be annotated with politeness strategies. 
        :spacy_nlp: if provided, will use this SpaCy object to do parsing; otherwise will initialize an object via `load('en')`.
        :return: the utterance with politeness annotations.
        """
        
        if isinstance(utt, str):
            utt = Utterance(text=utt, speaker=Speaker(id='speaker'))
        
        if self.parse_attribute_name not in utt.meta:
            
            if spacy_nlp is None:
                raise ValueError('spacy object required')
            
            parses = process_text(utt.text, spacy_nlp=spacy_nlp)
            utt.add_meta(self.parse_attribute_name, parses)
        
        parsed = utt.retrieve_meta(self.parse_attribute_name)
        for i, sent in enumerate(parsed):
            for p in sent["toks"]:
                p["tok"] = p['tok'].lower()
        parses = [x["toks"] for x in parsed]
        
        utt.meta[self.strategy_attribute_name], marks = self._extractor_lookup[self.strategy_collection](parses)

        if markers:
            utt.meta[self.marker_attribute_name] = marks
        
        return utt
예제 #10
0
    def transform_utterance(self, utt, override_input_filter=False):
        """
            Computes per-utterance attributes of an individual utterance or string. For utterances which do not contain all of the `input_field` attributes as specified in the constructor, or for utterances which return `False` on `input_filter`, this call will not annotate the utterance. For strings, will convert the string to an utterance and return the utterance, annotating it if `input_field` is not set to `None` at initialization.

            :param utt: utterance or a string
            :param override_input_filter: ignore `input_filter` and compute attribute for all utterances
            :return: the utterance
        """

        if isinstance(utt, str):
            utt = Utterance(text=utt)
        if self.input_field is None:
            text_entry = utt.text
        else:
            if not override_input_filter:
                if not self.input_filter(utt, self.aux_input): 
                    return utt 
            if isinstance(self.input_field, str):
                text_entry = utt.get_info(self.input_field)
            elif isinstance(self.input_field, list):
                text_entry = {field: utt.get_info(field) for field in self.input_field}
                if sum(x is None for x in text_entry.values()) > 0:
                    return utt
        if text_entry is None:
            return utt
        if len(self.aux_input) == 0:
            result = self.proc_fn(text_entry)
        else:
            result = self.proc_fn(text_entry, self.aux_input)
        if self.multi_outputs:
            for res, out in zip(result, self.output_field):
                utt.set_info(out, res)
        else:
            utt.set_info(self.output_field, result)
        return utt
    def transform_utterance(self, utterance, spacy_nlp = None, markers = False):
        
        """
        Extract politeness strategies for raw string inputs. 
        
        :param utterance: the utterance to be annotated with politeness strategies. 
        :spacy_nlp: if provided, will use this SpaCy object to do parsing; otherwise will initialize an object via `load('en')`.
        :return: the utterance with politeness annotations.
        """
        
        if isinstance(utterance, str):
            utterance = Utterance(text=utterance)
        
        if spacy_nlp is None:
            spacy_nlp = spacy.load('en', disable=['ner'])
            
        utterance.meta['parsed'] = process_text(utterance.text, spacy_nlp=spacy_nlp)
        
        for i, sent in enumerate(utterance.meta["parsed"]):
            for p in sent["toks"]:
                p["tok"] = re.sub("[^a-z,.:;]", "", p["tok"].lower())
            
        utterance.meta[self.strategy_attribute_name], marks = get_politeness_strategy_features(utterance)

        if markers:
            utterance.meta[self.marker_attribute_name] = marks
        
        return utterance
예제 #12
0
    def transform_utterance(self, utterance, spacy_nlp=None, markers=False):
        """
        Extract politeness strategies for raw string inputs. 
        
        :param utterance: the utterance to be annotated with politeness strategies. 
        :spacy_nlp: if provided, will use this SpaCy object to do parsing; otherwise will initialize an object via `load('en')`.
        :return: the utterance with politeness annotations.
        """

        if isinstance(utterance, str):
            utterance = Utterance(text=utterance,
                                  speaker=Speaker(id='speaker'))

        if spacy_nlp is None:
            spacy_nlp = spacy.load('en_core_web_sm', disable=['ner'])

        utterance.meta['parsed'] = process_text(utterance.text,
                                                spacy_nlp=spacy_nlp)

        for i, sent in enumerate(utterance.meta["parsed"]):

            for p in sent["toks"]:
                p["tok"] = p['tok'].lower()

        utterance.meta[
            self.strategy_attribute_name], marks = self.__extractor_lookup[
                self.strategy_collection](utterance)

        if markers:
            utterance.meta[self.marker_attribute_name] = marks

        return utterance
    def test_multiple_types(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus1.get_utterance('2').meta['hey'] = None
        self.assertEqual(corpus1.meta_index.utterances_index.get('hey', None),
                         None)
        corpus1.get_utterance('0').meta['hey'] = 5
        self.assertEqual(corpus1.meta_index.utterances_index['hey'],
                         [str(type(5))])
        corpus1.get_utterance('1').meta['hey'] = 'five'
        self.assertEqual(corpus1.meta_index.utterances_index['hey'],
                         [str(type(5)), str(type('five'))])
    def test_corpus_merge_add(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", user=User(id="alice")),
            Utterance(id="1", text="my name is bob", user=User(id="bob")),
            Utterance(id="2", text="this is a test", user=User(id="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        # test that adding separately initialized utterances with new metadata updates Index
        new_utt = Utterance(id="4",
                            text="hello world",
                            user=User(id="alice", meta={'donkey': 'kong'}),
                            meta={'new': 'meta'})

        new_corpus = corpus1.add_utterances([new_utt])
        self.assertTrue('new' in new_corpus.meta_index.utterances_index)
        self.assertTrue('donkey' in new_corpus.meta_index.users_index)
예제 #15
0
	def transform_utterance(self, utterance):
		"""
			Computes prompt type assignments for individual utterances. can take as input ConvoKit Utterances or raw strings. will return assignments for *all* string input, even if the input is not a question.

			:param utterance: the utterance, as an Utterance or string.
			:return: the utterance, annotated with type assignments.
		"""

		if isinstance(utterance, str):
			utterance = Utterance(text=utterance)
			utterance.meta['is_question'] = True
		return self.pipe.transform_utterance(utterance)        
예제 #16
0
    def test_overlap_diff_data(self):
        """
        Merge with overlap in utterance id and utterance has diff data but same metadata

        Warning should be printed. Original utterance data should be preserved.
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(
                id="2", text="this is a test2", speaker=Speaker(id="candace")),
            Utterance(
                id="4", text="this is a sentence", speaker=Speaker(id="echo")),
            Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_speakers())), 5)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)

        self.assertEqual(merged.get_utterance("2").text, "this is a test")
        self.assertEqual(
            merged.get_utterance("2").speaker, Speaker(id="charlie"))
예제 #17
0
    def test_with_overlap(self):
        """
        Basic merge: with overlap in utterance id (but utterance has same data & metadata)
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
            Utterance(
                id="4", text="this is a sentence", speaker=Speaker(id="echo")),
            Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_speakers())), 5)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)
예제 #18
0
    def test_add_utterance(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(id="2",
                      text="this is a test",
                      speaker=Speaker(id="charlie"),
                      meta={
                          'hey': 'jude',
                          'hello': 'world'
                      }),
        ])

        utts = [
            Utterance(id="1", text="i like pie", speaker=Speaker(id="delta")),
            Utterance(id="2",
                      text="this is a test",
                      speaker=Speaker(id="charlie"),
                      meta={
                          'hello': 'food',
                          'what': 'a mood'
                      }),
            Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
        ]
        added = corpus1.add_utterances(utts)

        self.assertEqual(len(list(added.iter_utterances())), 4)
        self.assertEqual(len(added.get_utterance("2").meta), 3)
        self.assertEqual(added.get_utterance("2").meta['hello'], 'food')
예제 #19
0
    def test_corpus_metadata(self):
        """
        Merge with overlap in corpus metadata

        Expect second corpus metadata to override if keys are the same
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="3", text="i like pie", speaker=Speaker(id="delta")),
            Utterance(
                id="4", text="this is a sentence", speaker=Speaker(id="echo")),
            Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
        ])

        corpus1.add_meta('politeness', 0.95)
        corpus1.add_meta('toxicity', 0.8)

        corpus2.add_meta('toxicity', 0.9)
        corpus2.add_meta('paggro', 1.0)

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(merged.meta), 3)
        self.assertEqual(merged.meta['toxicity'], 0.9)
	def transform_utterance(self, utt):
		"""
			Computes attributes of an individual string or utterance using all of the transformers in the pipeline.
			
			:param utt: the utterance to compute attributes for.
			:return: the utterance, with new attributes.
		"""

		if isinstance(utt, str):
			utt = Utterance(text=utt)
		for name, transform in self.steps:
			utt = transform.transform_utterance(utt)
		return utt
예제 #21
0
    def transform_utterance(self, utt, **params):
        """
			Computes attributes of an individual string or utterance using all of the transformers in the pipeline.
			
			:param utt: the utterance to compute attributes for.
			:return: the utterance, with new attributes.
		"""
        params_steps = self._parse_param_steps(params)

        if isinstance(utt, str):
            utt = Utterance(text=utt, speaker=Speaker(id="speaker"))
        for name, transform in self.steps:
            if name in params_steps:
                utt = transform.transform_utterance(utt, **params_steps[name])
            else:
                utt = transform.transform_utterance(utt)
        return utt
예제 #22
0
    def test_overlap_convo_metadata(self):
        """
        Merge with overlap in conversation with metadata differences.

        Expect second corpus convo metadata to override if keys are the same
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0",
                      conversation_id='convo1',
                      text="hello world",
                      speaker=Speaker(id="alice")),
            Utterance(id="1",
                      conversation_id='convo1',
                      text="my name is bob",
                      speaker=Speaker(id="bob")),
            Utterance(id="2",
                      conversation_id='convo1',
                      text="this is a test",
                      speaker=Speaker(id="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="2",
                      conversation_id='convo1',
                      text="this is a test",
                      speaker=Speaker(id="charlie")),
            Utterance(id="4",
                      conversation_id='convo1',
                      text="this is a sentence",
                      speaker=Speaker(id="echo")),
            Utterance(id="5",
                      conversation_id='convo1',
                      text="goodbye",
                      speaker=Speaker(id="foxtrot")),
        ])

        corpus1.get_conversation('convo1').add_meta('hey', 'jude')
        corpus1.get_conversation('convo1').add_meta('hello', 'world')

        corpus2.get_conversation('convo1').add_meta('hey', 'jude')
        corpus2.get_conversation('convo1').add_meta('hello', 'food')
        corpus2.get_conversation('convo1').add_meta('what', 'a mood')

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(merged.get_conversation('convo1').meta), 3)
        self.assertEqual(
            merged.get_conversation('convo1').meta['hello'], 'food')
예제 #23
0
    def test_no_overlap(self):
        """
        Basic merge: no overlap in utterance id
        """
        corpus1 = Corpus(utterances=[
            Utterance(id=0, text="hello world", user=User(name="alice")),
            Utterance(id=1, text="my name is bob", user=User(name="bob")),
            Utterance(id=2, text="this is a test", user=User(name="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id=3, text="i like pie", user=User(name="delta")),
            Utterance(id=4, text="this is a sentence", user=User(name="echo")),
            Utterance(id=5, text="goodbye", user=User(name="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 6)
        self.assertEqual(len(list(merged.iter_users())), 6)
        self.assertEqual(len(list(corpus1.iter_utterances())), 3)
        self.assertEqual(len(list(corpus2.iter_utterances())), 3)
예제 #24
0
    def test_overlap_diff_metadata(self):
        """
        Merge with overlap in utterance id and utterance has same data but diff metadata

        Second corpus utterance metadata should override if the keys are the same.
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(id="2",
                      text="this is a test",
                      speaker=Speaker(id="charlie"),
                      meta={
                          'hey': 'jude',
                          'the': 'beatles'
                      }),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="2",
                      text="this is a test",
                      speaker=Speaker(id="charlie"),
                      meta={
                          'hey': 'jude',
                          'the': 'ringo',
                          'let it': 'be'
                      }),
            Utterance(
                id="4", text="this is a sentence", speaker=Speaker(id="echo")),
            Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
        ])

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(list(merged.iter_utterances())), 5)
        self.assertEqual(len(list(merged.iter_speakers())), 5)

        self.assertEqual(len(merged.get_utterance("2").meta), 3)
        self.assertEqual(merged.get_utterance("2").meta['the'], 'ringo')
예제 #25
0
 def setUp(self) -> None:
     """
     Basic Conversation tree (left to right within subtree => earliest to latest)
                0
         1      2      3
       4 5 6   7 8     9
     10                11
     """
     self.corpus = Corpus(utterances=[
         Utterance(id="0",
                   reply_to=None,
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=0),
         Utterance(id="2",
                   reply_to="0",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=2),
         Utterance(id="1",
                   reply_to="0",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=1),
         Utterance(id="3",
                   reply_to="0",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=3),
         Utterance(id="4",
                   reply_to="1",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=4),
         Utterance(id="5",
                   reply_to="1",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=5),
         Utterance(id="6",
                   reply_to="1",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=6),
         Utterance(id="7",
                   reply_to="2",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=4),
         Utterance(id="8",
                   reply_to="2",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=5),
         Utterance(id="9",
                   reply_to="3",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=4),
         Utterance(id="10",
                   reply_to="4",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=5),
         Utterance(id="11",
                   reply_to="9",
                   root="0",
                   speaker=Speaker(id="alice"),
                   timestamp=10),
         Utterance(id="other",
                   reply_to=None,
                   root="other",
                   speaker=Speaker(id="alice"),
                   timestamp=99)
     ])
     self.corpus.get_conversation("0").meta['hey'] = 'jude'
     self.corpus.meta['foo'] = 'bar'