Ejemplo n.º 1
0
    def process_data(self):
        super().process_data()

        self.progress_updated.emit(self.tr('Rendering figure ...'))

        time.sleep(0.1)

        self.processing_finished.emit(wordless_misc.merge_dicts(self.tokens_freq_files),
                                      wordless_misc.merge_dicts(self.tokens_stats_files))
Ejemplo n.º 2
0
    def run(self):
        super().run()

        self.progress_updated.emit(self.tr('Rendering figure ...'))

        time.sleep(0.1)

        self.worker_done.emit(wordless_misc.merge_dicts(self.keywords_freq_files),
                              wordless_misc.merge_dicts(self.keywords_stats_files))
Ejemplo n.º 3
0
    def update_gui(texts_stats_files):
        if any(itertools.chain.from_iterable(texts_stats_files)):
            table.settings = copy.deepcopy(main.settings_custom)

            table.blockSignals(True)
            table.setUpdatesEnabled(False)

            table.clear_table()

            count_tokens_lens = []

            # Insert column (total)
            for i, file in enumerate(files):
                table.insert_col(table.find_col(main.tr('Total')),
                                 file['name'],
                                 is_breakdown=True)

            count_paras_total = len(texts_stats_files[-1][0])
            count_sentences_total = len(texts_stats_files[-1][3])
            count_clauses_total = len(texts_stats_files[-1][4])
            count_tokens_total = len(texts_stats_files[-1][5])
            count_types_total = len(texts_stats_files[-1][6])
            count_chars_total = sum(texts_stats_files[-1][5])

            for i, stats in enumerate(texts_stats_files):
                len_paras_in_sentence = stats[0]
                len_paras_in_clause = stats[1]
                len_paras_in_token = stats[2]
                len_sentences = stats[3]
                len_clauses = stats[4]
                len_tokens = stats[5]
                len_types = stats[6]
                ttr = stats[7]
                sttr = stats[8]

                count_paras = len(len_paras_in_sentence)
                count_sentences = len(len_sentences)
                count_clauses = len(len_clauses)
                count_tokens = len(len_tokens)
                count_types = len(len_types)
                count_chars = sum(len_tokens)

                # Count of Paragraphs
                table.set_item_num(0, i, count_paras)
                table.set_item_num(1, i, count_paras, count_paras_total)
                # Count of Sentences
                table.set_item_num(2, i, count_sentences)
                table.set_item_num(3, i, count_sentences,
                                   count_sentences_total)
                # Count of Clauses
                table.set_item_num(4, i, count_clauses)
                table.set_item_num(5, i, count_clauses, count_clauses_total)
                # Count of Tokens
                table.set_item_num(6, i, count_tokens)
                table.set_item_num(7, i, count_tokens, count_tokens_total)
                # Count of Types
                table.set_item_num(8, i, count_types)
                table.set_item_num(9, i, count_types, count_types_total)
                # Count of Characters
                table.set_item_num(10, i, count_chars)
                table.set_item_num(11, i, count_chars, count_chars_total)
                # Type-Token Ratio
                table.set_item_num(12, i, ttr)
                # Type-Token Ratio (Standardized)
                table.set_item_num(13, i, sttr)

                # Paragraph Length
                if count_paras == 0:
                    table.set_item_num(14, i, 0)
                    table.set_item_num(15, i, 0)
                    table.set_item_num(16, i, 0)
                    table.set_item_num(17, i, 0)
                    table.set_item_num(18, i, 0)
                    table.set_item_num(19, i, 0)
                else:
                    table.set_item_num(14, i,
                                       numpy.mean(len_paras_in_sentence))
                    table.set_item_num(15, i, numpy.std(len_paras_in_sentence))
                    table.set_item_num(16, i, numpy.mean(len_paras_in_clause))
                    table.set_item_num(17, i, numpy.std(len_paras_in_clause))
                    table.set_item_num(18, i, numpy.mean(len_paras_in_token))
                    table.set_item_num(19, i, numpy.std(len_paras_in_token))

                # Sentence Length
                if count_sentences == 0:
                    table.set_item_num(20, i, 0)
                    table.set_item_num(21, i, 0)
                else:
                    table.set_item_num(20, i, numpy.mean(len_sentences))
                    table.set_item_num(21, i, numpy.std(len_sentences))

                # Clause Length
                if count_clauses == 0:
                    table.set_item_num(22, i, 0)
                    table.set_item_num(23, i, 0)
                else:
                    table.set_item_num(22, i, numpy.mean(len_clauses))
                    table.set_item_num(23, i, numpy.std(len_clauses))

                # Token Length
                if count_tokens == 0:
                    table.set_item_num(24, i, 0)
                    table.set_item_num(25, i, 0)
                else:
                    table.set_item_num(24, i, numpy.mean(len_tokens))
                    table.set_item_num(25, i, numpy.std(len_tokens))

                # Type Length
                if count_types == 0:
                    table.set_item_num(26, i, 0)
                    table.set_item_num(27, i, 0)
                else:
                    table.set_item_num(26, i, numpy.mean(len_types))
                    table.set_item_num(27, i, numpy.std(len_types))

                count_tokens_lens.append(collections.Counter(len_tokens))

            # Count of n-length Tokens
            if any(count_tokens_lens):
                len_files = len(files)
                count_tokens_lens_files = wordless_misc.merge_dicts(
                    count_tokens_lens)
                count_tokens_lens_total = {
                    len_token: count_tokens_files[-1]
                    for len_token, count_tokens_files in
                    count_tokens_lens_files.items()
                }
                len_tokens_max = max(count_tokens_lens_files)

                for i in range(len_tokens_max):
                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-Length Tokens'),
                        is_int=True,
                        is_cumulative=True)
                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-Length Tokens %'),
                        is_pct=True,
                        is_cumulative=True)

                for i in range(len_tokens_max):
                    counts = count_tokens_lens_files.get(
                        i + 1, [0] * (len_files + 1))

                    for j, count in enumerate(counts):
                        table.set_item_num(row=table.rowCount() -
                                           (len_tokens_max - i) * 2,
                                           col=j,
                                           val=count)
                        table.set_item_num(row=table.rowCount() -
                                           (len_tokens_max - i) * 2 + 1,
                                           col=j,
                                           val=count,
                                           total=count_tokens_lens_total.get(
                                               i + 1, 0))

            table.setUpdatesEnabled(True)
            table.blockSignals(False)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)
Ejemplo n.º 4
0
    def data_received(texts_stats_files):
        if any(itertools.chain.from_iterable(texts_stats_files)):
            table.settings = copy.deepcopy(main.settings_custom)

            table.blockSignals(True)
            table.setUpdatesEnabled(False)

            table.clear_table()

            len_tokens_files = []

            for i, file in enumerate(files):
                table.insert_col(table.find_col(main.tr('Total')),
                                 file['name'],
                                 breakdown=True)

            for i, stats in enumerate(texts_stats_files):
                len_paras_in_sentence = stats[0]
                len_paras_in_clause = stats[1]
                len_paras_in_token = stats[2]
                len_sentences = stats[3]
                len_clauses = stats[4]
                len_tokens = stats[5]
                len_types = stats[6]
                ttr = stats[7]
                sttr = stats[8]

                count_paras = len(len_paras_in_token)
                count_sentences = len(len_sentences)
                count_clauses = len(len_clauses)
                count_tokens = len(len_tokens)
                count_types = len(len_types)
                count_chars = sum(len_tokens)

                # Count of Paragraphs
                table.set_item_num_cumulative(0, i, count_paras)
                # Count of Sentences
                table.set_item_num_cumulative(1, i, count_sentences)
                # Count of Clauses
                table.set_item_num_cumulative(2, i, count_clauses)
                # Count of Tokens
                table.set_item_num_cumulative(3, i, count_tokens)
                # Count of Types
                table.set_item_num_pct(4, i, count_types)
                # Count of Characters
                table.set_item_num_cumulative(5, i, count_chars)
                # Type-token Ratio
                table.set_item_num_float(6, i, ttr)
                # Type-token Ratio (Standardized)
                table.set_item_num_float(7, i, sttr)

                # Paragraph Length
                if count_paras == 0:
                    table.set_item_num_float(8, i, 0)
                    table.set_item_num_float(9, i, 0)
                    table.set_item_num_float(10, i, 0)
                    table.set_item_num_float(11, i, 0)
                    table.set_item_num_float(12, i, 0)
                    table.set_item_num_float(13, i, 0)
                else:
                    table.set_item_num_float(8, i,
                                             numpy.mean(len_paras_in_sentence))
                    table.set_item_num_float(9, i,
                                             numpy.std(len_paras_in_sentence))
                    table.set_item_num_float(10, i,
                                             numpy.mean(len_paras_in_clause))
                    table.set_item_num_float(11, i,
                                             numpy.std(len_paras_in_clause))
                    table.set_item_num_float(12, i,
                                             numpy.mean(len_paras_in_token))
                    table.set_item_num_float(13, i,
                                             numpy.std(len_paras_in_token))

                # Sentence Length
                if count_sentences == 0:
                    table.set_item_num_float(14, i, 0)
                    table.set_item_num_float(15, i, 0)
                else:
                    table.set_item_num_float(14, i, numpy.mean(len_sentences))
                    table.set_item_num_float(15, i, numpy.std(len_sentences))

                # Clause Length
                if count_clauses == 0:
                    table.set_item_num_float(16, i, 0)
                    table.set_item_num_float(17, i, 0)
                else:
                    table.set_item_num_float(16, i, numpy.mean(len_clauses))
                    table.set_item_num_float(17, i, numpy.std(len_clauses))

                # Token Length
                if count_tokens == 0:
                    table.set_item_num_float(18, i, 0)
                    table.set_item_num_float(19, i, 0)
                else:
                    table.set_item_num_float(18, i, numpy.mean(len_tokens))
                    table.set_item_num_float(19, i, numpy.std(len_tokens))

                # Type Length
                if count_types == 0:
                    table.set_item_num_float(20, i, 0)
                    table.set_item_num_float(21, i, 0)
                else:
                    table.set_item_num_float(20, i, numpy.mean(len_types))
                    table.set_item_num_float(21, i, numpy.std(len_types))

                len_tokens_files.append(collections.Counter(len_tokens))

            # Count of n-length Tokens
            len_files = len(files)
            len_tokens_total = wordless_misc.merge_dicts(len_tokens_files)
            len_tokens_max = max(len_tokens_total)

            # Use tags only
            if settings['token_settings']['use_tags']:
                table.setVerticalHeaderLabels([
                    main.tr('Count of Paragraphs'),
                    main.tr('Count of Sentences'),
                    main.tr('Count of Clauses'),
                    main.tr('Count of Tags'),
                    main.tr('Count of Tag Types'),
                    main.tr('Count of Characters'),
                    main.tr('Type-tag Ratio'),
                    main.tr('Type-tag Ratio (Standardized)'),
                    main.tr('Paragraph Length in Sentence (Mean)'),
                    main.tr(
                        'Paragraph Length in Sentence (Standard Deviation)'),
                    main.tr('Paragraph Length in Sentence (Mean)'),
                    main.tr(
                        'Paragraph Length in Sentence (Standard Deviation)'),
                    main.tr('Paragraph Length in Tag (Mean)'),
                    main.tr('Paragraph Length in Tag (Standard Deviation)'),
                    main.tr('Sentence Length in Tag (Mean)'),
                    main.tr('Sentence Length in Tag (Standard Deviation)'),
                    main.tr('Clause Length in Tag (Mean)'),
                    main.tr('Clause Length in Tag (Standard Deviation)'),
                    main.tr('Tag Length in Character (Mean)'),
                    main.tr('Tag Length in Character (Standard Deviation)'),
                    main.tr('Tag Type Length in Character (Mean)'),
                    main.tr(
                        'Tag Type Length in Character (Standard Deviation)')
                ])

                for i in range(len_tokens_max):
                    table.insert_row(table.rowCount(),
                                     main.tr(f'Count of {i + 1}-length Tags'),
                                     num=True,
                                     pct=True,
                                     cumulative=True)
            else:
                for i in range(len_tokens_max):

                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-length Tokens'),
                        num=True,
                        pct=True,
                        cumulative=True)

            for i in range(len_tokens_max):
                freqs = len_tokens_total.get(i + 1, [0] * (len_files + 1))

                for j, freq in enumerate(freqs):
                    table.set_item_num_cumulative(
                        table.rowCount() - len_tokens_max + i, j, freq)

            table.blockSignals(False)
            table.setUpdatesEnabled(True)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_items_width()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

        dialog_progress.accept()
Ejemplo n.º 5
0
    def data_received(texts_stats_files, texts_len_tokens_files):
        if any(texts_len_tokens_files):
            table.settings = copy.deepcopy(main.settings_custom)

            table.blockSignals(True)
            table.setUpdatesEnabled(False)

            table.clear_table()

            for i, file in enumerate(files):
                table.insert_col(table.find_col(main.tr('Total')), file['name'], breakdown = True)

            for i, stats in enumerate(texts_stats_files):
                count_paras = stats[0]
                count_sentences = stats[1]
                count_tokens = stats[2]
                count_types = stats[3]
                count_chars = stats[4]
                ttr = stats[5]
                sttr = stats[6]

                table.set_item_num_cumulative(0, i, count_paras)
                table.set_item_num_cumulative(1, i, count_sentences)
                table.set_item_num_cumulative(2, i, count_tokens)
                table.set_item_num_pct(3, i, count_types)
                table.set_item_num_cumulative(4, i, count_chars)
                table.set_item_num_float(5, i, ttr)
                table.set_item_num_float(6, i, sttr)

                if count_paras == 0:
                    table.set_item_num_float(7, i, 0)
                    table.set_item_num_float(8, i, 0)
                else:
                    table.set_item_num_float(7, i, count_sentences / count_paras)
                    table.set_item_num_float(8, i, count_tokens / count_paras)

                if count_sentences == 0:
                    table.set_item_num_float(9, i, 0)
                else:
                    table.set_item_num_float(9, i, count_tokens / count_sentences)

                if count_tokens == 0:
                    table.set_item_num_float(10, i, 0)
                else:
                    table.set_item_num_float(10, i, count_chars / count_tokens)

            # Count of n-length Tokens
            len_tokens_total = wordless_misc.merge_dicts(texts_len_tokens_files)
            len_tokens_max = max(len_tokens_total)

            if settings['token_settings']['use_tags']:
                table.setVerticalHeaderLabels([
                    main.tr('Count of Paragraphs'),
                    main.tr('Count of Sentences'),
                    main.tr('Count of Tags'),
                    main.tr('Count of Tag Types'),
                    main.tr('Count of Characters'),
                    main.tr('Type-Tag Ratio'),
                    main.tr('Type-Tag Ratio (Standardized)'),
                    main.tr('Average Paragraph Length (in Sentence)'),
                    main.tr('Average Paragraph Length (in Tag)'),
                    main.tr('Average Sentence Length (in Tag)'),
                    main.tr('Average Tag Length (in Character)')
                ])

                for i in range(len_tokens_max):
                    table.insert_row(table.rowCount(),
                                     main.tr(f'Count of {i + 1}-length Tags'),
                                     num = True, pct = True, cumulative = True)
            else:
                for i in range(len_tokens_max):

                    table.insert_row(table.rowCount(),
                                     main.tr(f'Count of {i + 1}-length Tokens'),
                                     num = True, pct = True, cumulative = True)

            len_files = len(files)

            for i in range(len_tokens_max):
                freqs = len_tokens_total.get(i + 1, [0] * (len_files + 1))

                for j, freq in enumerate(freqs):
                    table.set_item_num_cumulative(table.rowCount() - len_tokens_max + i, j, freq)

            table.blockSignals(False)
            table.setUpdatesEnabled(True)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_items_width()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

        dialog_progress.accept()