Beispiel #1
0
    def sig_to_stems_clicked(self, row):
        signature = self.sig_to_stems_major_table.item(row, 0).text()
        print(signature)
        signature = tuple(signature.split(SEP_SIG))

        stems = sorted(self.lexicon.signatures_to_stems()[signature])
        number_of_stems_per_column = 5

        # create a master list of sublists, where each sublist contains k stems
        # k = number_of_stems_per_column
        stem_rows = list()
        stem_row = list()

        for i, stem in enumerate(stems, 1):
            stem_row.append(stem)
            if not i % number_of_stems_per_column:
                stem_rows.append(stem_row)
                stem_row = list()
        if stem_row:
            stem_rows.append(stem_row)

        # set up the minor table as table widget
        sig_to_stems_minor_table = QTableWidget()
        sig_to_stems_minor_table.horizontalHeader().hide()
        sig_to_stems_minor_table.verticalHeader().hide()
        sig_to_stems_minor_table.clear()
        sig_to_stems_minor_table.setRowCount(len(stem_rows))
        sig_to_stems_minor_table.setColumnCount(number_of_stems_per_column)

        # fill in the minor table
        for row, stem_row in enumerate(stem_rows):
            for col, stem in enumerate(stem_row):
                item = QTableWidgetItem(stem)
                sig_to_stems_minor_table.setItem(row, col, item)

        sig_to_stems_minor_table.resizeColumnsToContents()

        minor_table_title = QLabel('{} (number of stems: {})'
                                   .format(SEP_SIG.join(signature), len(stems))
                                   )

        minor_table_widget_with_title = QWidget()
        layout = QVBoxLayout()
        layout.addWidget(minor_table_title)
        layout.addWidget(sig_to_stems_minor_table)
        minor_table_widget_with_title.setLayout(layout)

        new_display = QSplitter(Qt.Horizontal)
        new_display.setHandleWidth(10)
        new_display.setChildrenCollapsible(False)

        new_display.addWidget(self.sig_to_stems_major_table)
        new_display.addWidget(minor_table_widget_with_title)
        new_display_width = self.majorDisplay.width() / 2
        new_display.setSizes(
            [new_display_width * 0.4, new_display_width * 0.6])

        self.load_main_window(major_display=new_display)
        self.status.clearMessage()
        self.status.showMessage('{} selected'.format(signature))
    def sig_to_stems_clicked(self, row):
        signature = self.sig_to_stems_major_table.item(row, 0).text()
        print(signature)
        signature = tuple(signature.split(SEP_SIG))

        stems = sorted(self.lexicon.signatures_to_stems()[signature])
        number_of_stems_per_column = 5

        # create a master list of sublists, where each sublist contains k stems
        # k = number_of_stems_per_column
        stem_rows = list()
        stem_row = list()

        for i, stem in enumerate(stems, 1):
            stem_row.append(stem)
            if not i % number_of_stems_per_column:
                stem_rows.append(stem_row)
                stem_row = list()
        if stem_row:
            stem_rows.append(stem_row)

        # set up the minor table as table widget
        sig_to_stems_minor_table = QTableWidget()
        sig_to_stems_minor_table.horizontalHeader().hide()
        sig_to_stems_minor_table.verticalHeader().hide()
        sig_to_stems_minor_table.clear()
        sig_to_stems_minor_table.setRowCount(len(stem_rows))
        sig_to_stems_minor_table.setColumnCount(number_of_stems_per_column)

        # fill in the minor table
        for row, stem_row in enumerate(stem_rows):
            for col, stem in enumerate(stem_row):
                item = QTableWidgetItem(stem)
                sig_to_stems_minor_table.setItem(row, col, item)

        sig_to_stems_minor_table.resizeColumnsToContents()

        minor_table_title = QLabel('{} (number of stems: {})'
                                   .format(SEP_SIG.join(signature), len(stems)))

        minor_table_widget_with_title = QWidget()
        layout = QVBoxLayout()
        layout.addWidget(minor_table_title)
        layout.addWidget(sig_to_stems_minor_table)
        minor_table_widget_with_title.setLayout(layout)

        new_display = QSplitter(Qt.Horizontal)
        new_display.setHandleWidth(10)
        new_display.setChildrenCollapsible(False)

        new_display.addWidget(self.sig_to_stems_major_table)
        new_display.addWidget(minor_table_widget_with_title)
        new_display_width = self.majorDisplay.width() / 2
        new_display.setSizes(
            [new_display_width * 0.4, new_display_width * 0.6])

        self.load_main_window(major_display=new_display)
        self.status.clearMessage()
        self.status.showMessage('{} selected'.format(signature))
    def tree_item_clicked(self, item):
        """
        Trigger the appropriate action when something in the lexicon tree
        is clicked, and update the major display plus parameter window
        """
        item_str = item.text(0)

        if item_str in {WORD_NGRAMS, SIGNATURES, TRIES, PHONOLOGY, MANIFOLDS}:
            return

        print('loading', item_str, flush=True)

        self.status.clearMessage()
        self.status.showMessage('Loading {}...'.format(item_str))

        new_display = None
        new_parameter_window = None

        if item_str == WORDLIST:
            new_display = self.create_major_display_table(
                self.lexicon.word_phonology_dict().items(),
                key=lambda x: x[1].count, reverse=True,
                headers=['Word', 'Count', 'Frequency', 'Phones',
                         'Unigram plog', 'Avg unigram plog',
                         'Bigram plog', 'Avg bigram plog'],
                row_cell_functions=[
                    lambda x: x[0], lambda x: x[1].count,
                    lambda x: x[1].frequency,
                    lambda x: ' '.join(x[1].phones),
                    lambda x: x[1].unigram_plog,
                    lambda x: x[1].avg_unigram_plog,
                    lambda x: x[1].bigram_plog,
                    lambda x: x[1].avg_bigram_plog],
                cutoff=0)

        elif item_str == BIGRAMS:
            if self.lexicon.file_is_wordlist:
                self.unavailable_for_wordlist()
                return
            new_display = self.create_major_display_table(
                self.lexicon.word_bigram_counter().items(),
                key=lambda x: x[1], reverse=True,
                headers=['Bigram', 'Count'],
                row_cell_functions=[lambda x: SEP_NGRAM.join(x[0]),
                                    lambda x: x[1]],
                cutoff=2000)

        elif item_str == TRIGRAMS:
            if self.lexicon.file_is_wordlist:
                self.unavailable_for_wordlist()
                return
            new_display = self.create_major_display_table(
                self.lexicon.word_trigram_counter().items(),
                key=lambda x: x[1], reverse=True,
                headers=['Trigram', 'Count'],
                row_cell_functions=[lambda x: SEP_NGRAM.join(x[0]),
                                    lambda x: x[1]],
                cutoff=2000)

        elif item_str == SIGS_TO_STEMS:
            self.sig_to_stems_major_table = self.create_major_display_table(
                self.lexicon.signatures_to_stems().items(),
                key=lambda x: len(x[1]), reverse=True,
                headers=['Signature', 'Stem count', 'A few stems'],
                row_cell_functions=[lambda x: SEP_SIG.join(x[0]),
                                    lambda x: len(x[1]),
                                    lambda x: ', '.join(sorted(x[1])[:2]) +
                                              ', ...'],
                cutoff=0)
            # noinspection PyUnresolvedReferences
            self.sig_to_stems_major_table.cellClicked.connect(
                self.sig_to_stems_clicked)
            new_display = self.sig_to_stems_major_table

        elif item_str == WORDS_TO_SIGS:
            new_display = self.create_major_display_table(
                self.lexicon.words_to_signatures().items(),
                key=lambda x: len(x[1]), reverse=True,
                headers=['Word', 'Signature count', 'Signatures'],
                row_cell_functions=[lambda x: x[0],
                                    lambda x: len(x[1]),
                                    lambda x: ', '.join([SEP_SIG.join(sig)
                                                         for sig in
                                                         sorted(x[1])])],
                cutoff=2000)

        elif item_str == WORDS_AS_TRIES:
            words = self.lexicon.broken_words_left_to_right().keys()
            words_to_tries = dict()
            # key: word (str)
            # value: tuple of (str, str)
            # for left-to-right and right-to-left tries

            for word in words:
                l_r = ' '.join(self.lexicon.broken_words_left_to_right()[word])
                r_l = ' '.join(self.lexicon.broken_words_right_to_left()[word])
                words_to_tries[word] = (l_r, r_l)  # left-right, right-left

            new_display = self.create_major_display_table(
                words_to_tries.items(),
                key=lambda x: x[0], reverse=False,
                headers=['Word', 'Reversed word',
                         'Left-to-right trie', 'Right-to-left trie'],
                row_cell_functions=[lambda x: x[0], lambda x: x[0][::-1],
                                    lambda x: x[1][0], lambda x: x[1][1]],
                cutoff=0, set_text_alignment=[(3, Qt.AlignRight)])

        elif item_str == SUCCESSORS:
            new_display = self.create_major_display_table(
                self.lexicon.successors().items(),
                key=lambda x: len(x[1]), reverse=True,
                headers=['String', 'Successor count', 'Successors'],
                row_cell_functions=[lambda x: x[0],
                                    lambda x: len(x[1]),
                                    lambda x: ', '.join(sorted(x[1]))],
                cutoff=0)

        elif item_str == PREDECESSORS:
            new_display = self.create_major_display_table(
                self.lexicon.predecessors().items(),
                key=lambda x: len(x[1]), reverse=True,
                headers=['String', 'Predecessor count', 'Predecessors'],
                row_cell_functions=[lambda x: x[0],
                                    lambda x: len(x[1]),
                                    lambda x: ', '.join(sorted(x[1]))],
                cutoff=0)

        elif item_str == PHONES:
            new_display = self.create_major_display_table(
                self.lexicon.phone_dict().items(),
                key=lambda x: x[1].count, reverse=True,
                headers=['Phone', 'Count', 'Frequency', 'Plog'],
                row_cell_functions=[lambda x: x[0],
                                    lambda x: x[1].count,
                                    lambda x: x[1].frequency,
                                    lambda x: x[1].plog],
                cutoff=0)

        elif item_str == BIPHONES:
            new_display = self.create_major_display_table(
                self.lexicon.biphone_dict().items(),
                key=lambda x: x[1].count, reverse=True,
                headers=['Biphone', 'Count', 'Frequency',
                         'Mutual information (MI)', 'Weighted MI'],
                row_cell_functions=[lambda x: SEP_NGRAM.join(x[0]),
                                    lambda x: x[1].count,
                                    lambda x: x[1].frequency,
                                    lambda x: x[1].MI,
                                    lambda x: x[1].weighted_MI],
                cutoff=0)

        elif item_str == TRIPHONES:
            new_display = self.create_major_display_table(
                self.lexicon.phone_trigram_counter().items(),
                key=lambda x: x[1], reverse=True,
                headers=['Triphone', 'Count'],
                row_cell_functions=[lambda x: SEP_NGRAM.join(x[0]),
                                    lambda x: x[1]],
                cutoff=0)

        elif item_str == WORD_NEIGHBORS:
            if self.lexicon.file_is_wordlist:
                self.unavailable_for_wordlist()
                return
            word_to_freq = self.lexicon.word_unigram_counter()
            new_display = self.create_major_display_table(
                self.lexicon.words_to_neighbors().items(),
                key=lambda x: word_to_freq[x[0]], reverse=True,
                headers=['Word', 'Word count', 'Neighbors'],
                row_cell_functions=[lambda x: x[0],
                                    lambda x: word_to_freq[x[0]],
                                    lambda x: ' '.join(x[1])],
                cutoff=0)

        elif item_str == VISUALIZED_GRAPH:
            if self.lexicon.file_is_wordlist:
                self.unavailable_for_wordlist()
                return

            graph_width = self.screen_width - TREEWIDGET_WIDTH_MAX - 50
            graph_height = self.screen_height - 70
            html_name = 'show_manifold.html'

            manifold_name = '{}_manifold.json'.format(self.corpus_stem_name)
            manifold_filename = os.path.join(CONFIG_DIR, manifold_name)
            print('manifold_filename', manifold_filename)

            manifold_json_data = json_graph.node_link_data(
                self.lexicon.neighbor_graph())
            json.dump(manifold_json_data, open(manifold_filename, 'w'))

            viz_html = os.path.join(CONFIG_DIR, html_name)
            print('viz_html', viz_html)

            # write the show_manifold html file
            with open(viz_html, 'w') as f:
                print(SHOW_MANIFOLD_HTML.format(os.path.dirname(__file__),
                                                graph_width, graph_height,
                                                manifold_filename), file=f)

            url = Path(viz_html).as_uri()
            print('url:', url)

            new_display = QWebView()
            new_display.setUrl(QUrl(url))

        self.load_main_window(major_display=new_display,
                              parameter_window=new_parameter_window)

        self.status.clearMessage()
        self.status.showMessage('{} selected'.format(item_str))
Beispiel #4
0
    def output_all_results(self, directory=None, verbose=False, test=False):
        """
        Output all Linguistica results to *directory*.

        :param directory: output directory. If not specified, it defaults to
            the current directory given by ``os.getcwd()``.
        """
        if not directory:
            output_dir = os.getcwd()
        else:
            output_dir = os.path.abspath(directory)

        # ----------------------------------------------------------------------
        if self.corpus_file_object:
            vprint(verbose, 'ngram objects')

            fname = 'word_bigrams.txt'
            obj = double_sorted(self.word_bigram_counter().items(),
                                key=lambda x: x[1],
                                reverse=True)
            f_path = os.path.join(output_dir, fname)
            output_latex(
                obj,
                f_path,
                title='Word bigrams',
                headers=['Word bigram', 'Count'],
                row_functions=[lambda x: ' '.join(x[0]), lambda x: x[1]],
                column_widths=[50, 10],
                lxa_parameters=self.parameters(),
                test=test,
                encoding=self.encoding,
                number_of_word_types=self.number_of_word_types(),
                number_of_word_tokens=self.number_of_word_tokens(),
                input_file_path=self.file_abspath)
            vprint(verbose, '\t' + fname)

            fname = 'word_trigrams.txt'
            obj = double_sorted(self.word_trigram_counter().items(),
                                key=lambda x: x[1],
                                reverse=True)
            f_path = os.path.join(output_dir, fname)
            output_latex(
                obj,
                f_path,
                title='Word trigrams',
                headers=['Word trigram', 'Count'],
                row_functions=[lambda x: ' '.join(x[0]), lambda x: x[1]],
                column_widths=[75, 10],
                lxa_parameters=self.parameters(),
                test=test,
                encoding=self.encoding,
                number_of_word_types=self.number_of_word_types(),
                number_of_word_tokens=self.number_of_word_tokens(),
                input_file_path=self.file_abspath)
            vprint(verbose, '\t' + fname)

        # ----------------------------------------------------------------------
        vprint(verbose, 'morphological signature objects')

        fname = 'stems_to_words.txt'
        obj = double_sorted(self.stems_to_words().items(),
                            key=lambda x: len(x[1]),
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Stems to words '
                     '(descending order of word count)',
                     headers=['Stem', 'Word count', 'Words'],
                     row_functions=[
                         lambda x: x[0], lambda x: len(x[1]),
                         lambda x: ', '.join(sorted(x[1]))
                     ],
                     column_widths=[15, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'stems_to_words.txt'
        obj = double_sorted(self.stems_to_words().items(),
                            key=lambda x: x[0],
                            reverse=False)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Stems to words '
                     '(alphabetical order of stems)',
                     headers=['Stem', 'Word count', '1st 10 words'],
                     row_functions=[
                         lambda x: x[0], lambda x: len(x[1]),
                         lambda x: ', '.join(sorted(x[1]))
                     ],
                     column_widths=[15, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'signatures_to_stems.txt'
        obj = double_sorted(self.signatures_to_stems().items(),
                            key=lambda x: len(x[1]),
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Signatures to stems',
                     headers=['Signature', 'Stem count', 'Stems'],
                     row_functions=[
                         lambda x: SEP_SIG.join(x[0]), lambda x: len(x[1]),
                         lambda x: ', '.join(sorted(x[1]))
                     ],
                     column_widths=[30, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'signatures_to_stems_truncated.txt'
        obj = double_sorted(self.signatures_to_stems().items(),
                            key=lambda x: len(x[1]),
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Signatures to stems '
                     '(first 10 stems for each sig)',
                     headers=['Signature', 'Stem count', '1st 10 stems'],
                     row_functions=[
                         lambda x: SEP_SIG.join(x[0]), lambda x: len(x[1]),
                         lambda x: ' '.join(sorted(x[1])[:10])
                     ],
                     column_widths=[30, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'stems_to_signatures.txt'
        obj = double_sorted(self.stems_to_signatures().items(),
                            key=lambda x: len(x[1]),
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(
            obj,
            f_path,
            title='Stems to signatures',
            headers=['Stems', 'Signatures'],
            row_functions=[
                lambda x: x[0],
                lambda x: ', '.join(SEP_SIG.join(sig) for sig in sorted(x[1]))
            ],
            column_widths=[15, 0],
            lxa_parameters=self.parameters(),
            test=test,
            encoding=self.encoding,
            number_of_word_types=self.number_of_word_types(),
            number_of_word_tokens=self.number_of_word_tokens(),
            input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'words_to_signatures.txt'
        obj = double_sorted(self.words_to_signatures().items(),
                            key=lambda x: len(x[1]),
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(
            obj,
            f_path,
            title='Words to signatures',
            headers=['Word', 'Sig count', 'Signatures'],
            row_functions=[
                lambda x: x[0], lambda x: len(x[1]),
                lambda x: ', '.join(SEP_SIG.join(sig) for sig in sorted(x[1]))
            ],
            column_widths=[25, 15, 0],
            lxa_parameters=self.parameters(),
            test=test,
            encoding=self.encoding,
            number_of_word_types=self.number_of_word_types(),
            number_of_word_tokens=self.number_of_word_tokens(),
            input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'signatures_to_words.txt'
        obj = double_sorted(self.signatures_to_words().items(),
                            key=lambda x: len(x[1]),
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Signatures to words',
                     headers=['Signature', 'Word count', 'Words'],
                     row_functions=[
                         lambda x: SEP_SIG.join(x[0]), lambda x: len(x[1]),
                         lambda x: ', '.join(sorted(x[1]))
                     ],
                     column_widths=[20, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'signatures_to_words_truncated.txt'
        obj = double_sorted(self.signatures_to_words().items(),
                            key=lambda x: len(x[1]),
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Signatures to words '
                     '(first 10 words for each sig)',
                     headers=['Signature', 'Word count', '1st 10 words'],
                     row_functions=[
                         lambda x: SEP_SIG.join(x[0]), lambda x: len(x[1]),
                         lambda x: ', '.join(sorted(x[1])[:10])
                     ],
                     column_widths=[20, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'words_to_sigtransforms.txt'
        obj = double_sorted(self.words_to_sigtransforms().items(),
                            key=lambda x: len(x[1]),
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Words to sigtransforms',
                     headers=['Word', 'Signature transforms'],
                     row_functions=[
                         lambda x: x[0], lambda x: ', '.join(
                             SEP_SIG.join(sig) + SEP_SIGTRANSFORM + affix
                             for sig, affix in sorted(x[1]))
                     ],
                     column_widths=[20, 0],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'affixes_to_signatures.txt'
        obj = double_sorted(self.affixes_to_signatures().items(),
                            key=lambda x: len(x[1]),
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(
            obj,
            f_path,
            title='Affixes to signatures',
            headers=['Affix', 'Sig count', 'Signatures'],
            row_functions=[
                lambda x: x[0], lambda x: len(x[1]),
                lambda x: ', '.join(SEP_SIG.join(sig) for sig in sorted(x[1]))
            ],
            column_widths=[15, 15, 0],
            lxa_parameters=self.parameters(),
            test=test,
            encoding=self.encoding,
            number_of_word_types=self.number_of_word_types(),
            number_of_word_tokens=self.number_of_word_tokens(),
            input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        # ----------------------------------------------------------------------
        if self.corpus_file_object:
            vprint(verbose, 'manifold objects')

            fname = 'words_to_neighbors.txt'
            obj = list()  # list of tuple(word, list of neighbor words)
            for word in self.wordlist()[:self.parameters()['max_word_types']]:
                obj.append((word, self.words_to_neighbors()[word]))
            f_path = os.path.join(output_dir, fname)
            output_latex(
                obj,
                f_path,
                title='Words to neighbors',
                headers=['Word', 'Neighbors'],
                row_functions=[lambda x: x[0], lambda x: ' '.join(x[1])],
                column_widths=[25, 0],
                lxa_parameters=self.parameters(),
                test=test,
                encoding=self.encoding,
                number_of_word_types=self.number_of_word_types(),
                number_of_word_tokens=self.number_of_word_tokens(),
                input_file_path=self.file_abspath)
            vprint(verbose, '\t' + fname)

        # ----------------------------------------------------------------------
        vprint(verbose, 'phon objects')

        def output_latex_for_phon_words(obj_, f_path_, title_, lxa_parameters_,
                                        test_, encoding_,
                                        number_of_word_types_,
                                        number_of_word_tokens_,
                                        input_file_path_):
            output_latex(obj_,
                         f_path_,
                         title=title_,
                         headers=[
                             'Word', 'Count', 'Frequency', 'Phones',
                             'Unigram plog', 'Avg unigram plog', 'Bigram plog',
                             'Avg bigram plog'
                         ],
                         row_functions=[
                             lambda x: x[0],
                             lambda x: x[1].count,
                             lambda x: '%.6f' % x[1].frequency,
                             lambda x: ' '.join(x[1].phones),
                             lambda x: '%8.3f' % x[1].unigram_plog,
                             lambda x: '%8.3f' % x[1].avg_unigram_plog,
                             lambda x: '%8.3f' % x[1].bigram_plog,
                             lambda x: '%8.3f' % x[1].avg_bigram_plog,
                         ],
                         column_widths=[35, 10, 15, 60, 15, 15, 15, 15],
                         lxa_parameters=lxa_parameters_,
                         test=test_,
                         encoding=encoding_,
                         number_of_word_types=number_of_word_types_,
                         number_of_word_tokens=number_of_word_tokens_,
                         input_file_path=input_file_path_)

        fname = 'wordlist.txt'
        obj_word_phon = list()  # list of tuple(word, list of neighbor words)
        for word in self.wordlist():
            obj_word_phon.append((word, self.word_phonology_dict()[word]))
        f_path = os.path.join(output_dir, 'wordlist.txt')
        output_latex_for_phon_words(obj_word_phon, f_path,
                                    'Wordlist sorted by word count',
                                    self.parameters(), test, self.encoding,
                                    self.number_of_word_types(),
                                    self.number_of_word_tokens(),
                                    self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'wordlist_by_avg_unigram_plog.txt'
        obj_unigram_plog = double_sorted(obj_word_phon,
                                         key=lambda x: x[1].avg_unigram_plog,
                                         reverse=False)
        f_path = os.path.join(output_dir, fname)
        output_latex_for_phon_words(obj_unigram_plog, f_path,
                                    'Wordlist sorted by avg unigram plog',
                                    self.parameters(), test, self.encoding,
                                    self.number_of_word_types(),
                                    self.number_of_word_tokens(),
                                    self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'wordlist_by_avg_bigram_plog.txt'
        obj_bigram_plog = double_sorted(obj_word_phon,
                                        key=lambda x: x[1].avg_bigram_plog,
                                        reverse=False)
        f_path = os.path.join(output_dir, fname)
        output_latex_for_phon_words(obj_bigram_plog, f_path,
                                    'Wordlist sorted by avg bigram plog',
                                    self.parameters(), test, self.encoding,
                                    self.number_of_word_types(),
                                    self.number_of_word_tokens(),
                                    self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'phones.txt'
        obj = double_sorted(self.phone_dict().items(),
                            key=lambda x: x[1].count,
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Phones',
                     headers=['Phone', 'Count', 'Frequency', 'Plog'],
                     row_functions=[
                         lambda x: x[0],
                         lambda x: x[1].count,
                         lambda x: '%.6f' % x[1].frequency,
                         lambda x: '%8.3f' % x[1].plog,
                     ],
                     column_widths=[10, 10, 15, 15],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'biphones.txt'
        obj = double_sorted(self.biphone_dict().items(),
                            key=lambda x: x[1].count,
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(
            obj,
            f_path,
            title='Biphones',
            headers=['Biphone', 'Count', 'Frequency', 'MI', 'Weighted MI'],
            row_functions=[
                lambda x: ' '.join(x[0]),
                lambda x: x[1].count,
                lambda x: '%.6f' % x[1].frequency,
                lambda x: '%8.3f' % x[1].MI,
                lambda x: '%8.3f' % x[1].weighted_MI,
            ],
            column_widths=[10, 10, 15, 15, 15],
            lxa_parameters=self.parameters(),
            test=test,
            encoding=self.encoding,
            number_of_word_types=self.number_of_word_types(),
            number_of_word_tokens=self.number_of_word_tokens(),
            input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'triphones.txt'
        obj = double_sorted(self.phone_trigram_counter().items(),
                            key=lambda x: x[1],
                            reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Triphones',
                     headers=['Triphone', 'Count'],
                     row_functions=[
                         lambda x: ' '.join(x[0]),
                         lambda x: x[1],
                     ],
                     column_widths=[15, 10],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        # ----------------------------------------------------------------------
        vprint(verbose, 'trie objects')

        fname = 'words_as_tries.txt'
        obj = list()
        for word in self.wordlist():
            obj.append((word, self.broken_words_left_to_right()[word],
                        self.broken_words_right_to_left()[word]))
        f_path = os.path.join(output_dir, fname)
        output_latex(
            obj,
            f_path,
            title='Words as tries',
            headers=['Word', 'Left-to-right trie', 'Right-to-left trie'],
            row_functions=[
                lambda x: x[0],
                lambda x: ' '.join(x[1]),
                lambda x: ' '.join(x[2]),
            ],
            column_widths=[35, 50, 50],
            lxa_parameters=self.parameters(),
            test=test,
            encoding=self.encoding,
            number_of_word_types=self.number_of_word_types(),
            number_of_word_tokens=self.number_of_word_tokens(),
            input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'successors.txt'
        obj = double_sorted(self.successors().items(),
                            key=lambda x: len(x[1]),
                            reverse=False)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Successors',
                     headers=['String', 'Successors'],
                     row_functions=[
                         lambda x: x[0],
                         lambda x: ' '.join(sorted(x[1])),
                     ],
                     column_widths=[35, 0],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)

        fname = 'predecessors.txt'
        obj = double_sorted(self.predecessors().items(),
                            key=lambda x: len(x[1]),
                            reverse=False)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj,
                     f_path,
                     title='Predecessors',
                     headers=['String', 'Predecessors'],
                     row_functions=[
                         lambda x: x[0],
                         lambda x: ' '.join(sorted(x[1])),
                     ],
                     column_widths=[35, 0],
                     lxa_parameters=self.parameters(),
                     test=test,
                     encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint(verbose, '\t' + fname)
Beispiel #5
0
    def output_all_results(self, directory=None, verbose=False, test=False):
        """
        Output all Linguistica results to *directory*.

        :param directory: output directory. If not specified, it defaults to
            the current directory given by ``os.getcwd()``.
        """
        if not directory:
            output_dir = os.getcwd()
        else:
            output_dir = os.path.abspath(directory)

        # ----------------------------------------------------------------------
        if self.corpus_file_object:
            vprint('ngram objects', verbose=verbose)

            fname = 'word_bigrams.txt'
            obj = double_sorted(self.word_bigram_counter().items(),
                                key=lambda x: x[1], reverse=True)
            f_path = os.path.join(output_dir, fname)
            output_latex(obj, f_path,
                         title='Word bigrams',
                         headers=['Word bigram', 'Count'],
                         row_functions=[lambda x: ' '.join(x[0]),
                                        lambda x: x[1]],
                         column_widths=[50, 10],
                         lxa_parameters=self.parameters(),
                         test=test, encoding=self.encoding,
                         number_of_word_types=self.number_of_word_types(),
                         number_of_word_tokens=self.number_of_word_tokens(),
                         input_file_path=self.file_abspath)
            vprint('\t' + fname, verbose=verbose)

            fname = 'word_trigrams.txt'
            obj = double_sorted(self.word_trigram_counter().items(),
                                key=lambda x: x[1], reverse=True)
            f_path = os.path.join(output_dir, fname)
            output_latex(obj, f_path,
                         title='Word trigrams',
                         headers=['Word trigram', 'Count'],
                         row_functions=[lambda x: ' '.join(x[0]),
                                        lambda x: x[1]],
                         column_widths=[75, 10],
                         lxa_parameters=self.parameters(),
                         test=test, encoding=self.encoding,
                         number_of_word_types=self.number_of_word_types(),
                         number_of_word_tokens=self.number_of_word_tokens(),
                         input_file_path=self.file_abspath)
            vprint('\t' + fname, verbose=verbose)

        # ----------------------------------------------------------------------
        vprint('morphological signature objects', verbose=verbose)

        fname = 'stems_to_words.txt'
        obj = double_sorted(self.stems_to_words().items(),
                            key=lambda x: len(x[1]), reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Stems to words '
                           '(descending order of word count)',
                     headers=['Stem', 'Word count', 'Words'],
                     row_functions=[lambda x: x[0],
                                    lambda x: len(x[1]),
                                    lambda x: ', '.join(sorted(x[1]))],
                     column_widths=[15, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'stems_to_words.txt'
        obj = double_sorted(self.stems_to_words().items(),
                            key=lambda x: x[0], reverse=False)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Stems to words '
                           '(alphabetical order of stems)',
                     headers=['Stem', 'Word count', '1st 10 words'],
                     row_functions=[lambda x: x[0],
                                    lambda x: len(x[1]),
                                    lambda x: ', '.join(sorted(x[1]))],
                     column_widths=[15, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'signatures_to_stems.txt'
        obj = double_sorted(self.signatures_to_stems().items(),
                            key=lambda x: len(x[1]), reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Signatures to stems',
                     headers=['Signature', 'Stem count', 'Stems'],
                     row_functions=[lambda x: SEP_SIG.join(x[0]),
                                    lambda x: len(x[1]),
                                    lambda x: ', '.join(sorted(x[1]))],
                     column_widths=[30, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'signatures_to_stems_truncated.txt'
        obj = double_sorted(self.signatures_to_stems().items(),
                            key=lambda x: len(x[1]), reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Signatures to stems '
                           '(first 10 stems for each sig)',
                     headers=['Signature', 'Stem count', '1st 10 stems'],
                     row_functions=[lambda x: SEP_SIG.join(x[0]),
                                    lambda x: len(x[1]),
                                    lambda x:
                                    ' '.join(sorted(x[1])[:10])],
                     column_widths=[30, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'stems_to_signatures.txt'
        obj = double_sorted(self.stems_to_signatures().items(),
                            key=lambda x: len(x[1]), reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Stems to signatures',
                     headers=['Stems', 'Signatures'],
                     row_functions=[lambda x: x[0],
                                    lambda x:
                                    ', '.join(SEP_SIG.join(sig)
                                              for sig in sorted(x[1]))],
                     column_widths=[15, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'words_to_signatures.txt'
        obj = double_sorted(self.words_to_signatures().items(),
                            key=lambda x: len(x[1]), reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Words to signatures',
                     headers=['Word', 'Sig count', 'Signatures'],
                     row_functions=[lambda x: x[0],
                                    lambda x: len(x[1]),
                                    lambda x:
                                    ', '.join(SEP_SIG.join(sig)
                                              for sig in sorted(x[1]))],
                     column_widths=[25, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'signatures_to_words.txt'
        obj = double_sorted(self.signatures_to_words().items(),
                            key=lambda x: len(x[1]), reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Signatures to words',
                     headers=['Signature', 'Word count', 'Words'],
                     row_functions=[lambda x: SEP_SIG.join(x[0]),
                                    lambda x: len(x[1]),
                                    lambda x: ', '.join(sorted(x[1]))],
                     column_widths=[20, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'signatures_to_words_truncated.txt'
        obj = double_sorted(self.signatures_to_words().items(),
                            key=lambda x: len(x[1]), reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Signatures to words '
                           '(first 10 words for each sig)',
                     headers=['Signature', 'Word count', '1st 10 words'],
                     row_functions=[lambda x: SEP_SIG.join(x[0]),
                                    lambda x: len(x[1]),
                                    lambda x:
                                    ', '.join(sorted(x[1])[:10])],
                     column_widths=[20, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'words_to_sigtransforms.txt'
        obj = double_sorted(self.words_to_sigtransforms().items(),
                            key=lambda x: len(x[1]), reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Words to sigtransforms',
                     headers=['Word', 'Signature transforms'],
                     row_functions=[lambda x: x[0],
                                    lambda x:
                                    ', '.join(SEP_SIG.join(sig) +
                                              SEP_SIGTRANSFORM + affix
                                              for sig, affix in sorted(x[1]))],
                     column_widths=[20, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'affixes_to_signatures.txt'
        obj = double_sorted(self.affixes_to_signatures().items(),
                            key=lambda x: len(x[1]), reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Affixes to signatures',
                     headers=['Affix', 'Sig count', 'Signatures'],
                     row_functions=[lambda x: x[0],
                                    lambda x: len(x[1]),
                                    lambda x:
                                    ', '.join(SEP_SIG.join(sig)
                                              for sig in sorted(x[1]))],
                     column_widths=[15, 15, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        # ----------------------------------------------------------------------
        if self.corpus_file_object:
            vprint('manifold objects', verbose=verbose)

            fname = 'words_to_neighbors.txt'
            obj = list()  # list of tuple(word, list of neighbor words)
            for word in self.wordlist()[: self.parameters()['max_word_types']]:
                obj.append((word, self.words_to_neighbors()[word]))
            f_path = os.path.join(output_dir, fname)
            output_latex(obj, f_path,
                         title='Words to neighbors',
                         headers=['Word', 'Neighbors'],
                         row_functions=[lambda x: x[0],
                                        lambda x: ' '.join(x[1])],
                         column_widths=[25, 0],
                         lxa_parameters=self.parameters(),
                         test=test, encoding=self.encoding,
                         number_of_word_types=self.number_of_word_types(),
                         number_of_word_tokens=self.number_of_word_tokens(),
                         input_file_path=self.file_abspath)
            vprint('\t' + fname, verbose=verbose)

        # ----------------------------------------------------------------------
        vprint('phon objects', verbose=verbose)

        def output_latex_for_phon_words(obj_, f_path_, title_, lxa_parameters_,
                                        test_, encoding_, number_of_word_types_,
                                        number_of_word_tokens_,
                                        input_file_path_):
            output_latex(obj_, f_path_,
                         title=title_,
                         headers=['Word', 'Count', 'Frequency', 'Phones',
                                  'Unigram plog', 'Avg unigram plog',
                                  'Bigram plog', 'Avg bigram plog'],
                         row_functions=[lambda x: x[0],
                                        lambda x: x[1].count,
                                        lambda x:
                                        '%.6f' % x[1].frequency,
                                        lambda x:
                                        ' '.join(x[1].phones),
                                        lambda x:
                                        '%8.3f' % x[1].unigram_plog,
                                        lambda x:
                                        '%8.3f' % x[1].avg_unigram_plog,
                                        lambda x:
                                        '%8.3f' % x[1].bigram_plog,
                                        lambda x:
                                        '%8.3f' % x[1].avg_bigram_plog,
                                        ],
                         column_widths=[35, 10, 15, 60, 15, 15, 15, 15],
                         lxa_parameters=lxa_parameters_,
                         test=test_, encoding=encoding_,
                         number_of_word_types=number_of_word_types_,
                         number_of_word_tokens=number_of_word_tokens_,
                         input_file_path=input_file_path_)

        fname = 'wordlist.txt'
        obj_word_phon = list()  # list of tuple(word, list of neighbor words)
        for word in self.wordlist():
            obj_word_phon.append((word, self.word_phonology_dict()[word]))
        f_path = os.path.join(output_dir, 'wordlist.txt')
        output_latex_for_phon_words(obj_word_phon, f_path,
                                    'Wordlist sorted by word count',
                                    self.parameters(), test, self.encoding,
                                    self.number_of_word_types(),
                                    self.number_of_word_tokens(),
                                    self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'wordlist_by_avg_unigram_plog.txt'
        obj_unigram_plog = double_sorted(obj_word_phon,
                                         key=lambda x: x[1].avg_unigram_plog,
                                         reverse=False)
        f_path = os.path.join(output_dir, fname)
        output_latex_for_phon_words(obj_unigram_plog, f_path,
                                    'Wordlist sorted by avg unigram plog',
                                    self.parameters(), test, self.encoding,
                                    self.number_of_word_types(),
                                    self.number_of_word_tokens(),
                                    self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'wordlist_by_avg_bigram_plog.txt'
        obj_bigram_plog = double_sorted(obj_word_phon,
                                        key=lambda x: x[1].avg_bigram_plog,
                                        reverse=False)
        f_path = os.path.join(output_dir, fname)
        output_latex_for_phon_words(obj_bigram_plog, f_path,
                                    'Wordlist sorted by avg bigram plog',
                                    self.parameters(), test, self.encoding,
                                    self.number_of_word_types(),
                                    self.number_of_word_tokens(),
                                    self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'phones.txt'
        obj = double_sorted(self.phone_dict().items(),
                            key=lambda x: x[1].count, reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Phones',
                     headers=['Phone', 'Count', 'Frequency', 'Plog'],
                     row_functions=[lambda x: x[0],
                                    lambda x: x[1].count,
                                    lambda x: '%.6f' % x[1].frequency,
                                    lambda x: '%8.3f' % x[1].plog,
                                    ],
                     column_widths=[10, 10, 15, 15],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'biphones.txt'
        obj = double_sorted(self.biphone_dict().items(),
                            key=lambda x: x[1].count, reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Biphones',
                     headers=['Biphone', 'Count', 'Frequency',
                              'MI', 'Weighted MI'],
                     row_functions=[lambda x: ' '.join(x[0]),
                                    lambda x: x[1].count,
                                    lambda x:
                                    '%.6f' % x[1].frequency,
                                    lambda x:
                                    '%8.3f' % x[1].MI,
                                    lambda x:
                                    '%8.3f' % x[1].weighted_MI,
                                    ],
                     column_widths=[10, 10, 15, 15, 15],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'triphones.txt'
        obj = double_sorted(self.phone_trigram_counter().items(),
                            key=lambda x: x[1], reverse=True)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Triphones',
                     headers=['Triphone', 'Count'],
                     row_functions=[lambda x: ' '.join(x[0]),
                                    lambda x: x[1],
                                    ],
                     column_widths=[15, 10],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        # ----------------------------------------------------------------------
        vprint('trie objects', verbose=verbose)

        fname = 'words_as_tries.txt'
        obj = list()
        for word in self.wordlist():
            obj.append((word,
                        self.broken_words_left_to_right()[word],
                        self.broken_words_right_to_left()[word]))
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Words as tries',
                     headers=['Word', 'Left-to-right trie',
                              'Right-to-left trie'],
                     row_functions=[lambda x: x[0],
                                    lambda x: ' '.join(x[1]),
                                    lambda x: ' '.join(x[2]),
                                    ],
                     column_widths=[35, 50, 50],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'successors.txt'
        obj = double_sorted(self.successors().items(),
                            key=lambda x: len(x[1]), reverse=False)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Successors',
                     headers=['String', 'Successors'],
                     row_functions=[lambda x: x[0],
                                    lambda x: ' '.join(sorted(x[1])),
                                    ],
                     column_widths=[35, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)

        fname = 'predecessors.txt'
        obj = double_sorted(self.predecessors().items(),
                            key=lambda x: len(x[1]), reverse=False)
        f_path = os.path.join(output_dir, fname)
        output_latex(obj, f_path,
                     title='Predecessors',
                     headers=['String', 'Predecessors'],
                     row_functions=[lambda x: x[0],
                                    lambda x: ' '.join(sorted(x[1])),
                                    ],
                     column_widths=[35, 0],
                     lxa_parameters=self.parameters(),
                     test=test, encoding=self.encoding,
                     number_of_word_types=self.number_of_word_types(),
                     number_of_word_tokens=self.number_of_word_tokens(),
                     input_file_path=self.file_abspath)
        vprint('\t' + fname, verbose=verbose)