コード例 #1
0
    def fit_transform(self, corpus: Corpus) -> Corpus:
        """
        fit_transform() retrieves features from the corpus conversational
        threads using retrieve_feats()

        :param corpus: Corpus object to retrieve feature information from

        :return: corpus with conversations having a new meta field "hyperconvo" containing the stats generated by retrieve_feats(). Each conversation's metadata then contains the stats for the thread(s) it contains.
        """
        feats = HyperConvo.retrieve_feats(corpus,
                                          prefix_len=self.prefix_len,
                                          min_thread_len=self.min_thread_len,
                                          include_root=self.include_root)
        if self.include_root: # threads start at root (post)
            for root_id in feats.keys():
                convo = corpus.get_conversation(root_id)
                convo.add_meta("hyperconvo", {root_id: feats[root_id]})
        else: # threads start at top-level-comment
            # Construct top-level-comment to root mapping
            tlc_to_root_mapping = dict() # tlc = top level comment
            threads = corpus.utterance_threads(prefix_len=self.prefix_len, include_root=False)
            root_to_tlc = dict()
            for tlc_id, utts in threads.items():
                if len(utts) < self.min_thread_len: continue
                thread_root = threads[tlc_id][tlc_id].root
                if thread_root in root_to_tlc:
                    root_to_tlc[thread_root][tlc_id] = feats[tlc_id]
                else:
                    root_to_tlc[thread_root] = {tlc_id: feats[tlc_id]}

            for root_id in root_to_tlc:
                convo = corpus.get_conversation(root_id)
                convo.add_meta("hyperconvo", root_to_tlc[root_id])

        return corpus
コード例 #2
0
    def test_broken_convos(self):
        """
        Test basic meta functions
        """

        corpus1 = Corpus(utterances=[
            Utterance(id="0",
                      text="hello world",
                      reply_to=None,
                      speaker=Speaker(id="alice"),
                      timestamp=0),
            Utterance(id="1",
                      text="my name is bob",
                      reply_to="0",
                      speaker=Speaker(id="bob"),
                      timestamp=2),
            Utterance(id="2",
                      text="this is a test",
                      reply_to="1",
                      speaker=Speaker(id="charlie"),
                      timestamp=1),
            Utterance(id="3",
                      text="hello world 2",
                      reply_to=None,
                      speaker=Speaker(id="alice2"),
                      timestamp=0),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="0",
                      text="hello world",
                      reply_to=None,
                      speaker=Speaker(id="alice"),
                      timestamp=0),
            Utterance(id="1",
                      text="my name is bob",
                      reply_to="0",
                      speaker=Speaker(id="bob"),
                      timestamp=2),
            Utterance(id="2",
                      text="this is a test",
                      reply_to="1",
                      speaker=Speaker(id="charlie"),
                      timestamp=1),
            Utterance(id="3",
                      text="hello world 2",
                      reply_to="9",
                      speaker=Speaker(id="alice2"),
                      timestamp=0),
        ])

        # test broken convo where there are multiple roots
        convo = corpus1.get_conversation(None)
        self.assertRaises(
            ValueError, lambda: list(convo.traverse("dfs", as_utterance=True)))

        # test broken convo where utterance replies to something not in Conversation
        convo = corpus2.get_conversation(None)
        self.assertRaises(
            ValueError, lambda: list(convo.traverse("dfs", as_utterance=True)))
コード例 #3
0
    def transform(self, corpus: Corpus) -> Corpus:
        """Computes the average number of questions asked in a conversation
        
        :param corpus: the corpus to compute features for.
        :type corpus: Corpus
        """

        if self.verbose: print("Finding questions per utterance")

        questions = []
        allutterids = corpus.get_utterance_ids()
        for i in list(range(0, len(allutterids))):
            utter_id = allutterids[i]
            text = corpus.get_utterance(utter_id).text
            nquestions = len(re.findall(r'\?+', text))
            questions.append(
                nquestions)  #gives number of questions in each utterance

        if self.verbose: print("Finding questions per conversation")
        allconvoids = corpus.get_conversation_ids()
        for i in list(range(0, len(allconvoids))):
            convo_id = allconvoids[i]
            convo_utters = corpus.get_conversation(convo_id)._utterance_ids
            avgquestion = np.mean(
                np.asarray(questions)[np.asarray(convo_utters)])
            corpus.get_conversation(convo_id)._meta[
                self.ATTR_NAME] = avgquestion
            #adds average questions per conversation to conversation metadata

        return corpus
コード例 #4
0
    def transform(self, corpus: Corpus) -> Corpus:
        """Computes the count of pause and hesitancy words for each utterance, then aggregates them for each conversation
        
        :param corpus: the corpus to compute features for.
        :type corpus: Corpus
        """

        if self.verbose:
            print("Finding counts of pause and hesitancy words...")

        pause_words = [
            'um', 'umm', 'ummm', 'uh', 'uhh', 'uhhh', 'hm', 'hmm', 'hmmm',
            'er', 'err', 'uh huh', 'huh', 'mhm', 'mhmm', 'erm', '...', 'ah',
            'ahh', 'ahem', 'eh', 'ehh', 'ehhh', 'meh'
        ]
        hesitant_words = [
            'maybe', 'not', 'sure', 'unsure', 'probably', 'well', 'okay',
            'like', 'actually', 'basically', 'seriously', 'totally',
            'literally', 'know', 'mean', 'guess', 'suppose', 'but',
            'something', 'so', 'wow', 'just', 'really', 'later', 'wait',
            'future', 'almost', 'slightly', 'perhaps', 'somehow', 'sort',
            'kind', 'little', 'somewhat', 'hey', 'alas', 'see', 'sounds', 'ok',
            'roughly', 'why', 'how', 'yep', 'yup', 'may', 'possibly', 'might',
            'could', 'doubt', 'skeptical', 'don\'t', 'won\'t', 'nah'
        ]

        pause = []
        hesitancy = []
        allutterids = corpus.get_utterance_ids()
        for i in list(range(0, len(allutterids))):
            utter_id = allutterids[i]
            text = corpus.get_utterance(utter_id).text
            textcleaned = "".join(
                c for c in text
                if c not in ('!', '.', ':', '?', '\'', ',', '\"', '@', '#',
                             '$', '%', '^', '&', '*', '(', ')', '-', '~', '`',
                             '_', '+', '=', '>', '<', '[', ']', '{', '}'))
            textlist = textcleaned.split()
            npause = len([i for i in textlist if i in pause_words])
            nhesitant = len([i for i in textlist if i in hesitant_words])
            pause.append(
                npause)  #gives number of pause words in each utterance
            hesitancy.append(
                nhesitant)  #gives number of hesitant words in each utterance
            corpus.get_utterance(utter_id).meta[self.NAME1] = npause
            corpus.get_utterance(utter_id).meta[self.NAME2] = nhesitant

        allconvoids = corpus.get_conversation_ids()
        for i in list(range(0, len(allconvoids))):
            convo_id = allconvoids[i]
            convo_utters = corpus.get_conversation(convo_id)._utterance_ids
            avgpause = np.mean(np.asarray(pause)[np.asarray(convo_utters)])
            avghesitancy = np.mean(
                np.asarray(hesitancy)[np.asarray(convo_utters)])
            corpus.get_conversation(convo_id)._meta[self.NAME3] = avgpause
            corpus.get_conversation(convo_id)._meta[self.NAME4] = avghesitancy

        return corpus
コード例 #5
0
def transform(self, corpus: Corpus):
        """Adds metadata about self-reflection to each utterance.

        :param corpus: the corpus to compute features for.
        :type corpus: Corpus
        """
        
        for conv_id in corpus.conversations:
            conv = corpus.get_conversation(conv_id)
            for utt in conv.iter_utterances():
                if utt.text != None:
                    tokenized = word_tokenize(utt.text.lower())
                    invocations = 0
                    length = len(tokenized)
                    pol_words = []
                    for token in tokenized:
                        if token in self.key_words:
                            invocations += 1
                            pol_words.append(token)
                    utt.meta["num_pol_refs"] = invocations
                    if (length > 0):
                        utt.meta["num_pol_refs_incidence"] = (invocations/length)
                    else:
                        utt.meta["num_pol_refs_incidence"] = 0
                    utt.meta["pol_words"] = pol_words
        return corpus
コード例 #6
0
    def test_corpus_dump(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        corpus1.get_conversation(None).meta['convo_meta'] = 1

        corpus1.get_speaker("alice").meta['surname'] = 1.0
        corpus1.dump('test_index_meta_corpus', base_path="./")
        corpus2 = Corpus(filename="test_index_meta_corpus")

        self.assertEqual(corpus1.meta_index.utterances_index,
                         corpus2.meta_index.utterances_index)
        self.assertEqual(corpus1.meta_index.speakers_index,
                         corpus2.meta_index.speakers_index)
        self.assertEqual(corpus1.meta_index.conversations_index,
                         corpus2.meta_index.conversations_index)
        self.assertEqual(corpus1.meta_index.overall_index,
                         corpus2.meta_index.overall_index)
コード例 #7
0
    def test_key_insertion_deletion(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        corpus1.get_conversation(None).meta['convo_meta'] = 1

        corpus1.get_speaker("alice").meta['surname'] = 1.0

        self.assertEqual(corpus1.meta_index.utterances_index['foo'],
                         str(type('bar')))
        self.assertEqual(corpus1.meta_index.conversations_index['convo_meta'],
                         str(type(1)))
        self.assertEqual(corpus1.meta_index.speakers_index['surname'],
                         str(type(1.0)))

        # test that deleting a key from an utterance removes it from the index
        del corpus1.get_utterance("2").meta['hey']
        self.assertRaises(KeyError,
                          lambda: corpus1.meta_index.utterances_index['hey'])

        # test that deleting a key from an utterance removes it from the index and from all other objects of same type
        del corpus1.get_utterance("1").meta['foo']
        self.assertRaises(KeyError,
                          lambda: corpus1.meta_index.utterances_index['foo'])
        self.assertRaises(KeyError,
                          lambda: corpus1.get_utterance("0").meta["foo"])
    def test_key_insertion_deletion(self):
        corpus1 = Corpus(utterances=[
            Utterance(id="0", text="hello world", speaker=Speaker(id="alice")),
            Utterance(id="1", text="my name is bob", speaker=Speaker(
                id="bob")),
            Utterance(
                id="2", text="this is a test", speaker=Speaker(id="charlie")),
        ])

        corpus1.get_utterance("0").meta['foo'] = 'bar'
        corpus1.get_utterance("1").meta['foo'] = 'bar2'
        corpus1.get_utterance("2").meta['hey'] = 'jude'

        corpus1.get_conversation(None).meta['convo_meta'] = 1

        corpus1.get_speaker("alice").meta['surname'] = 1.0

        self.assertEqual(corpus1.meta_index.utterances_index['foo'],
                         [str(type('bar'))])
        self.assertEqual(corpus1.meta_index.conversations_index['convo_meta'],
                         [str(type(1))])
        self.assertEqual(corpus1.meta_index.speakers_index['surname'],
                         [str(type(1.0))])

        # test that deleting an attribute from an individual utterance fails to remove it
        del corpus1.get_utterance("2").meta['hey']
        corpus1.get_utterance("2").meta['hey']

        # test that delete_metadata works
        corpus1.delete_metadata('utterance', 'foo')
        self.assertRaises(KeyError,
                          lambda: corpus1.meta_index.utterances_index['foo'])
        self.assertRaises(KeyError,
                          lambda: corpus1.get_utterance("0").meta["foo"])
コード例 #9
0
    def test_overlap_convo_metadata(self):
        """
        Merge with overlap in conversation with metadata differences.

        Expect second corpus convo metadata to override if keys are the same
        """
        corpus1 = Corpus(utterances=[
            Utterance(id="0",
                      conversation_id='convo1',
                      text="hello world",
                      speaker=Speaker(id="alice")),
            Utterance(id="1",
                      conversation_id='convo1',
                      text="my name is bob",
                      speaker=Speaker(id="bob")),
            Utterance(id="2",
                      conversation_id='convo1',
                      text="this is a test",
                      speaker=Speaker(id="charlie")),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="2",
                      conversation_id='convo1',
                      text="this is a test",
                      speaker=Speaker(id="charlie")),
            Utterance(id="4",
                      conversation_id='convo1',
                      text="this is a sentence",
                      speaker=Speaker(id="echo")),
            Utterance(id="5",
                      conversation_id='convo1',
                      text="goodbye",
                      speaker=Speaker(id="foxtrot")),
        ])

        corpus1.get_conversation('convo1').add_meta('hey', 'jude')
        corpus1.get_conversation('convo1').add_meta('hello', 'world')

        corpus2.get_conversation('convo1').add_meta('hey', 'jude')
        corpus2.get_conversation('convo1').add_meta('hello', 'food')
        corpus2.get_conversation('convo1').add_meta('what', 'a mood')

        merged = corpus1.merge(corpus2)
        self.assertEqual(len(merged.get_conversation('convo1').meta), 3)
        self.assertEqual(
            merged.get_conversation('convo1').meta['hello'], 'food')
コード例 #10
0
 def transform(self, corpus: Corpus):
     for character in corpus.get_usernames():
         user1 = corpus.get_user(character)
         utterances = user1.get_utterance_ids()
         utterances_per_conversation = []
         conversations = []
         for uid in utterances:
             utterance = corpus.get_utterance(uid)
             conversation = corpus.get_conversation(utterance.root)
             conversations.append(utterance.root)
             utterances_per_conversation.append(
                 (utterance.root, len(conversation.get_usernames()),
                  len(conversation.get_utterance_ids())))
             first_last = 0
             if uid in (utterance.root,
                        list(conversation.get_utterance_ids())[-1]):
                 first_last += 1
         raw_count = len(utterances) / len(list(corpus.utterances.values()))
         total_conversations = len(set(conversations))
         #bootstrapping
         iterations = 0
         for i in range(20):
             samples = random.choices(utterances, k=25)
             #for politeness complexity#
             politeness_rows = []
             #many operations#
             for uid in samples:
                 politeness_rows.append(
                     list(
                         corpus.get_utterance(
                             uid).meta["politeness_strategies"].values()))
         #politeness#
             politeness_results = np.sum(politeness_rows, 0)
             politeness_results_count = len([
                 i / len(politeness_rows)
                 for i in politeness_results if i != 0.0
             ]) / len(politeness_rows)
             iterations += politeness_results_count
         #politness_final#
         politeness_final = iterations / 20
         #first/last#
         first_last_count = first_last / total_conversations
         #utterances_per_conversation#
         utterances_per_conversations = Counter(utterances_per_conversation)
         upc_final = []
         for k, v in utterances_per_conversations.items():
             average = k[2] / k[1]
             upc_final.append(v / average)
         upc_count = sum(upc_final) / len(utterances_per_conversations)
         user1.add_meta('politeness_complexity', politeness_final)
         user1.add_meta('utterance_per_conversation', upc_count)
         user1.add_meta('first_last_word', first_last_count)
         user1.add_meta('raw_count', raw_count)
     return (corpus)
コード例 #11
0
    def transform(self, corpus: Corpus):
        """Adds metadata about readability of the corpus to each utterance.

        :param corpus: the corpus to compute features for.
        :type corpus: Corpus
        """
        for conv_id in corpus.conversations:
            conv = corpus.get_conversation(conv_id)
            for utt in conv.iter_utterances():
                if utt.text != None:
                    cumu_sentences = 0
                    cumu_words = 0
                    cumu_syllables = 0
                    cumu_syll_counted_words = 0
                    cumu_words_over2_syllables = 0
                    for sentence in sent_tokenize(utt.text):
                        cumu_sentences += 1
                        tokenized = word_tokenize(sentence)
                        cumu_words += len(tokenized)
                        for token in tokenized:
                            try:
                                syll = self.__num_syllables(token)[0]
                                cumu_syllables += syll
                                cumu_syll_counted_words += 1
                                if syll > 2:
                                    cumu_words_over2_syllables += 1
                            except Exception as e:
                                pass

                # readability formulas from https://www.geeksforgeeks.org/readability-index-pythonnlp/
                if cumu_sentences > 0 and cumu_syll_counted_words > 0:
                    gunning_fog = 0.4 * ((cumu_words / cumu_sentences) + \
                      (cumu_words_over2_syllables / cumu_syll_counted_words))
                    flesch = 206.835 - (1.015 * (cumu_words / cumu_sentences)) - \
                      (84.6 * (cumu_syllables / cumu_syll_counted_words))
                    flesch_kincaid = (0.39 * cumu_words / cumu_sentences) + \
                      (11.8 * cumu_syllables / cumu_syll_counted_words) - 15.59
                    utt.meta['complexity'] = \
                      {"gunning_fog": gunning_fog,
                      "flesch": flesch,
                      "flesch_kincaid": flesch_kincaid,
                      "num_words": cumu_words,
                      "num_sentences": cumu_sentences}
                else:
                    utt.meta['complexity'] = \
                      {"gunning_fog": None,
                      "flesch": None,
                      "flesch_kincaid": None,
                      "num_words": None,
                      "num_sentences": None}
        return corpus
コード例 #12
0
    def transform(self, corpus: Corpus) -> Corpus:
        """
        transform() retrieves features from the corpus conversational
        threads using retrieve_feats() and annotates Conversations with this data

        :param corpus: Corpus object to retrieve feature information from

        :return: corpus with conversations having a new meta field "hyperconvo" containing the stats generated by retrieve_feats(). Each conversation's metadata then contains the stats for the thread(s) it contains.
        """
        convo_id_to_feats = self.retrieve_feats(corpus)
        for convo_id, feats in convo_id_to_feats.items():
            convo = corpus.get_conversation(convo_id)
            convo.add_meta("hyperconvo", feats)
        return corpus
コード例 #13
0
    def transform(self, corpus: Corpus):
        """Adds metadata about self-reflection to each utterance.

        :param corpus: the corpus to compute features for.
        :type corpus: Corpus
        """
        for conv_id in corpus.conversations:
            conv = corpus.get_conversation(conv_id)
            for utt in conv.iter_utterances():
                if utt.text != None:
                    tokenized = word_tokenize(utt.text.lower())
                    invocations = 0
                    for token in tokenized:
                        if token in self.key_words:
                            invocations += 1
                    utt.meta["num_self_invocations"] = invocations
        return corpus
コード例 #14
0
    def transform(self, corpus: Corpus):
        """Extract politeness strategies from each utterances in the corpus and annotate
        the utterances with the extracted strategies. Requires that the corpus has previously
        been transformed by a Parser, such that each utterance has dependency parse info in
        its metadata table.
        
        :param corpus: the corpus to compute features for.
        :type corpus: Corpus
        """
        for conv_id in corpus.conversations:
            conv = corpus.get_conversation(conv_id)
            num_utts = len(conv.get_utterance_ids())
            for i, utt in enumerate(conv.iter_utterances()):
                length_through_conv = i / num_utts
                utt.meta['length_tracker'] = [i, length_through_conv]

        return corpus
コード例 #15
0
class CorpusTraversal(unittest.TestCase):
    def setUp(self) -> None:
        """
        Basic Conversation tree (left to right within subtree => earliest to latest)
                   0
            1      2      3
          4 5 6   7 8     9
        10                11
        """
        self.corpus = Corpus(utterances=[
            Utterance(id="0",
                      reply_to=None,
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=0),
            Utterance(id="2",
                      reply_to="0",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=2),
            Utterance(id="1",
                      reply_to="0",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=1),
            Utterance(id="3",
                      reply_to="0",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=3),
            Utterance(id="4",
                      reply_to="1",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=4),
            Utterance(id="5",
                      reply_to="1",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=5),
            Utterance(id="6",
                      reply_to="1",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=6),
            Utterance(id="7",
                      reply_to="2",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=4),
            Utterance(id="8",
                      reply_to="2",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=5),
            Utterance(id="9",
                      reply_to="3",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=4),
            Utterance(id="10",
                      reply_to="4",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=5),
            Utterance(id="11",
                      reply_to="9",
                      root="0",
                      speaker=Speaker(id="alice"),
                      timestamp=10),
            Utterance(id="other",
                      reply_to=None,
                      root="other",
                      speaker=Speaker(id="alice"),
                      timestamp=99)
        ])
        self.corpus.get_conversation("0").meta['hey'] = 'jude'
        self.corpus.meta['foo'] = 'bar'

    def test_broken_convos(self):
        """
        Test basic meta functions
        """

        corpus1 = Corpus(utterances=[
            Utterance(id="0",
                      text="hello world",
                      reply_to=None,
                      speaker=Speaker(id="alice"),
                      timestamp=0),
            Utterance(id="1",
                      text="my name is bob",
                      reply_to="0",
                      speaker=Speaker(id="bob"),
                      timestamp=2),
            Utterance(id="2",
                      text="this is a test",
                      reply_to="1",
                      speaker=Speaker(id="charlie"),
                      timestamp=1),
            Utterance(id="3",
                      text="hello world 2",
                      reply_to=None,
                      speaker=Speaker(id="alice2"),
                      timestamp=0),
        ])

        corpus2 = Corpus(utterances=[
            Utterance(id="0",
                      text="hello world",
                      reply_to=None,
                      speaker=Speaker(id="alice"),
                      timestamp=0),
            Utterance(id="1",
                      text="my name is bob",
                      reply_to="0",
                      speaker=Speaker(id="bob"),
                      timestamp=2),
            Utterance(id="2",
                      text="this is a test",
                      reply_to="1",
                      speaker=Speaker(id="charlie"),
                      timestamp=1),
            Utterance(id="3",
                      text="hello world 2",
                      reply_to="9",
                      speaker=Speaker(id="alice2"),
                      timestamp=0),
        ])

        # test broken convo where there are multiple roots
        convo = corpus1.get_conversation(None)
        self.assertRaises(
            ValueError, lambda: list(convo.traverse("dfs", as_utterance=True)))

        # test broken convo where utterance replies to something not in Conversation
        convo = corpus2.get_conversation(None)
        self.assertRaises(
            ValueError, lambda: list(convo.traverse("dfs", as_utterance=True)))

    def test_bfs_traversal(self):
        convo = self.corpus.get_conversation("0")
        bfs_traversal = [
            utt.id for utt in convo.traverse("bfs", as_utterance=True)
        ]
        self.assertEqual(bfs_traversal, [str(i) for i in range(12)])

    def test_dfs_traversal(self):
        convo = self.corpus.get_conversation("0")
        dfs_traversal = [
            utt.id for utt in convo.traverse("dfs", as_utterance=True)
        ]
        self.assertEqual(
            dfs_traversal,
            [str(i) for i in [0, 1, 4, 10, 5, 6, 2, 7, 8, 3, 9, 11]])

    def test_postorder_traversal(self):
        convo = self.corpus.get_conversation("0")
        postorder_traversal = [
            utt.id for utt in convo.traverse("postorder", as_utterance=True)
        ]
        self.assertEqual(
            postorder_traversal,
            ['10', '4', '5', '6', '1', '7', '8', '2', '11', '9', '3', '0'])

    def test_preorder_traversal(self):
        convo = self.corpus.get_conversation("0")
        preorder_traversal = [
            utt.id for utt in convo.traverse("preorder", as_utterance=True)
        ]
        self.assertEqual(
            preorder_traversal,
            ['0', '1', '4', '10', '5', '6', '2', '7', '8', '3', '9', '11'])

    def test_subtree(self):
        convo = self.corpus.get_conversation("0")
        node = convo.get_subtree("1")
        self.assertEqual([node.utt.id for node in node.bfs_traversal()],
                         ['1', '4', '5', '6', '10'])

    def test_root_to_leaf_paths(self):
        convo = self.corpus.get_conversation("0")
        paths = convo.get_root_to_leaf_paths()
        path_tuples = [tuple(utt.id for utt in paths[i]) for i in range(6)]
        self.assertIn(('0', '1', '4', '10'), path_tuples)
        self.assertIn(('0', '1', '5'), path_tuples)
        self.assertIn(('0', '1', '6'), path_tuples)
        self.assertIn(('0', '2', '7'), path_tuples)
        self.assertIn(('0', '2', '8'), path_tuples)
        self.assertIn(('0', '3', '9', '11'), path_tuples)

    def test_one_utt_convo(self):
        convo = self.corpus.get_conversation("other")
        self.assertEqual([utt.id for utt in convo.traverse('bfs')], ["other"])
        self.assertEqual([utt.id for utt in convo.traverse('dfs')], ["other"])
        self.assertEqual([utt.id for utt in convo.traverse('postorder')],
                         ["other"])
        self.assertEqual([utt.id for utt in convo.traverse('preorder')],
                         ["other"])

    def test_reindex_corpus(self):
        new_convo_roots = ['1', '2', '3']
        new_corpus = self.corpus.reindex_conversations(new_convo_roots)
        # checking for correct number of conversations and utterances
        self.assertEqual(len(list(new_corpus.iter_conversations())), 3)
        self.assertEqual(len(list(new_corpus.iter_utterances())), 11)

        # checking that corpus and conversation metadata was preserved
        for convo in new_corpus.iter_conversations():
            self.assertEqual(convo.meta['original_convo_meta'],
                             self.corpus.get_conversation("0").meta)

        self.assertEqual(self.corpus.meta, new_corpus.meta)

    def test_reindex_corpus2(self):
        new_convo_roots = ['1', '2', '3']
        new_corpus = self.corpus.reindex_conversations(
            new_convo_roots,
            preserve_convo_meta=False,
            preserve_corpus_meta=False)
        # checking for correct number of conversations and utterances
        self.assertEqual(len(list(new_corpus.iter_conversations())), 3)
        self.assertEqual(len(list(new_corpus.iter_utterances())), 11)

        # checking that corpus and conversation metadata was preserved
        for convo in new_corpus.iter_conversations():
            self.assertEqual(convo.meta, dict())

        self.assertEqual(new_corpus.meta, dict())