Ejemplo n.º 1
0
    def data_received(keywords_freq_files, keywords_stats_files):
        if keywords_freq_files:
            table.clear_table()

            table.settings = copy.deepcopy(main.settings_custom)

            text_test_significance = settings['generation_settings'][
                'test_significance']
            text_measure_effect_size = settings['generation_settings'][
                'measure_effect_size']

            (text_test_stat, text_p_value,
             text_bayes_factor) = main.settings_global['tests_significance'][
                 'keywords'][text_test_significance]['cols']
            text_effect_size = main.settings_global['measures_effect_size'][
                'keywords'][text_measure_effect_size]['col']

            table.blockSignals(True)
            table.setSortingEnabled(False)
            table.setUpdatesEnabled(False)

            # Insert columns (Files)
            table.insert_col(table.columnCount() - 1,
                             main.tr(f'[{ref_file["name"]}]\nFrequency'),
                             num=True,
                             pct=True,
                             cumulative=True)

            for file in files:
                table.insert_col(table.columnCount() - 1,
                                 main.tr(f'[{file["name"]}]\nFrequency'),
                                 num=True,
                                 pct=True,
                                 cumulative=True,
                                 breakdown=True)

                if text_test_stat:
                    table.insert_col(
                        table.columnCount() - 1,
                        main.tr(f'[{file["name"]}]\n{text_test_stat}'),
                        num=True,
                        breakdown=True)

                table.insert_col(table.columnCount() - 1,
                                 main.tr(f'[{file["name"]}]\n{text_p_value}'),
                                 num=True,
                                 breakdown=True)

                if text_bayes_factor:
                    table.insert_col(
                        table.columnCount() - 1,
                        main.tr(f'[{file["name"]}]\n{text_bayes_factor}'),
                        num=True,
                        breakdown=True)

                table.insert_col(
                    table.columnCount() - 1,
                    main.tr(f'[{file["name"]}]\n{text_effect_size}'),
                    num=True,
                    breakdown=True)

            # Insert columns (Total)
            table.insert_col(table.columnCount() - 1,
                             main.tr(f'Total\nFrequency'),
                             num=True,
                             pct=True,
                             cumulative=True)

            if text_test_stat:
                table.insert_col(table.columnCount() - 1,
                                 main.tr(f'Total\n{text_test_stat}'),
                                 num=True)

            table.insert_col(table.columnCount() - 1,
                             main.tr(f'Total\n{text_p_value}'),
                             num=True)

            if text_bayes_factor:
                table.insert_col(table.columnCount() - 1,
                                 main.tr(f'Total\n{text_bayes_factor}'),
                                 num=True)

            table.insert_col(table.columnCount() - 1,
                             main.tr(f'Total\n{text_effect_size}'),
                             num=True)

            # Sort by p-value of the first file
            table.sortByColumn(
                table.find_col(
                    main.tr(f'[{files[0]["name"]}]\n{text_p_value}')),
                Qt.AscendingOrder)

            cols_freq = table.find_cols(main.tr('\nFrequency'))

            if text_test_stat:
                cols_test_stat = table.find_cols(
                    main.tr(f'\n{text_test_stat}'))

            cols_p_value = table.find_cols(main.tr('\np-value'))

            if text_bayes_factor:
                cols_bayes_factor = table.find_cols(main.tr('\nBayes Factor'))

            cols_effect_size = table.find_cols(f'\n{text_effect_size}')
            col_number_files_found = table.find_col(
                main.tr('Number of\nFiles Found'))

            len_files = len(files)

            table.setRowCount(len(keywords_freq_files))

            for i, (keyword, stats_files) in enumerate(
                    wordless_sorting.sorted_keywords_stats_files(
                        keywords_stats_files)):
                keyword_freq_files = keywords_freq_files[keyword]

                # Rank
                table.set_item_num_int(i, 0, -1)

                # Keywords
                table.setItem(i, 1,
                              wordless_table.Wordless_Table_Item(keyword))

                # Frequency
                for j, freq in enumerate(keyword_freq_files):
                    table.set_item_num_cumulative(i, cols_freq[j], freq)

                for j, (test_stat, p_value, bayes_factor,
                        effect_size) in enumerate(stats_files):
                    # Test Statistic
                    if text_test_stat:
                        table.set_item_num_float(i, cols_test_stat[j],
                                                 test_stat)

                    # p-value
                    table.set_item_num_float(i, cols_p_value[j], p_value)

                    # Bayes Factor
                    if text_bayes_factor:
                        table.set_item_num_float(i, cols_bayes_factor[j],
                                                 bayes_factor)

                    # Effect Size
                    table.set_item_num_float(i, cols_effect_size[j],
                                             effect_size)

                # Number of Files Found
                table.set_item_num_pct(
                    i, col_number_files_found,
                    len([freq for freq in keyword_freq_files[1:-1] if freq]),
                    len_files)

            table.blockSignals(False)
            table.setSortingEnabled(True)
            table.setUpdatesEnabled(True)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_ranks()
            table.update_items_width()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

        dialog_progress.accept()
Ejemplo n.º 2
0
    def data_received(tokens_freq_files, tokens_stats_files):
        if tokens_freq_files:
            table.clear_table()

            table.settings = copy.deepcopy(main.settings_custom)

            text_measure_dispersion = settings['generation_settings'][
                'measure_dispersion']
            text_measure_adjusted_freq = settings['generation_settings'][
                'measure_adjusted_freq']

            text_dispersion = main.settings_global['measures_dispersion'][
                text_measure_dispersion]['col']
            text_adjusted_freq = main.settings_global[
                'measures_adjusted_freq'][text_measure_adjusted_freq]['col']

            table.blockSignals(True)
            table.setSortingEnabled(False)
            table.setUpdatesEnabled(False)

            if settings['token_settings']['use_tags']:
                table.setHorizontalHeaderLabels([
                    main.tr('Rank'),
                    main.tr('Tags'),
                    main.tr('Number of\nFiles Found')
                ])

            # Insert Columns (Files)
            for file in files:
                table.insert_col(table.columnCount() - 1,
                                 main.tr(f'[{file["name"]}]\nFrequency'),
                                 num=True,
                                 pct=True,
                                 cumulative=True,
                                 breakdown=True)

                table.insert_col(
                    table.columnCount() - 1,
                    main.tr(f'[{file["name"]}]\n{text_dispersion}'),
                    num=True,
                    breakdown=True)

                table.insert_col(
                    table.columnCount() - 1,
                    main.tr(f'[{file["name"]}]\n{text_adjusted_freq}'),
                    num=True,
                    breakdown=True)

            # Insert Columns (Total)
            table.insert_col(table.columnCount() - 1,
                             main.tr('Total\nFrequency'),
                             num=True,
                             pct=True,
                             cumulative=True)

            table.insert_col(table.columnCount() - 1,
                             main.tr(f'Total\n{text_dispersion}'),
                             num=True)

            table.insert_col(table.columnCount() - 1,
                             main.tr(f'Total\n{text_adjusted_freq}'),
                             num=True)

            # Sort by frequency of the first file
            table.sortByColumn(
                table.find_col(main.tr(f'[{files[0]["name"]}]\nFrequency')),
                Qt.DescendingOrder)

            cols_freq = table.find_cols(main.tr('\nFrequency'))
            cols_dispersion = table.find_cols(main.tr(f'\n{text_dispersion}'))
            cols_adjusted_freq = table.find_cols(
                main.tr(f'\n{text_adjusted_freq}'))
            col_files_found = table.find_col(main.tr('Number of\nFiles Found'))

            len_files = len(files)

            table.setRowCount(len(tokens_freq_files))

            for i, (token, freq_files) in enumerate(
                    wordless_sorting.sorted_tokens_freq_files(
                        tokens_freq_files)):
                stats_files = tokens_stats_files[token]

                # Rank
                table.set_item_num_int(i, 0, -1)

                # Tokens
                table.setItem(i, 1, wordless_table.Wordless_Table_Item(token))

                # Frequency
                for j, freq in enumerate(freq_files):
                    table.set_item_num_cumulative(i, cols_freq[j], freq)

                for j, (dispersion, adjusted_freq) in enumerate(stats_files):
                    # Dispersion
                    table.set_item_num_float(i, cols_dispersion[j], dispersion)

                    # Adjusted Frequency
                    table.set_item_num_float(i, cols_adjusted_freq[j],
                                             adjusted_freq)

                # Number of Files Found
                table.set_item_num_pct(
                    i, col_files_found,
                    len([freq for freq in freq_files[:-1] if freq]), len_files)

            table.blockSignals(False)
            table.setSortingEnabled(True)
            table.setUpdatesEnabled(True)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_ranks()
            table.update_items_width()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

        dialog_progress.accept()
Ejemplo n.º 3
0
    def update_gui(keywords_freq_files, keywords_stats_files):
        if keywords_freq_files:
            table.clear_table()

            table.settings = copy.deepcopy(main.settings_custom)

            text_test_significance = settings['generation_settings']['test_significance']
            text_measure_effect_size = settings['generation_settings']['measure_effect_size']

            (text_test_stat,
             text_p_value,
             text_bayes_factor) = main.settings_global['tests_significance']['keyword'][text_test_significance]['cols']
            text_effect_size =  main.settings_global['measures_effect_size']['keyword'][text_measure_effect_size]['col']

            # Insert columns (files)
            table.insert_col(table.columnCount() - 2,
                             main.tr(f'[{ref_file["name"]}]\nFrequency'),
                             is_int = True, is_cumulative = True)
            table.insert_col(table.columnCount() - 2,
                             main.tr(f'[{ref_file["name"]}]\nFrequency %'),
                             is_pct = True, is_cumulative = True)

            for file in files:
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\nFrequency'),
                                 is_int = True, is_cumulative = True, is_breakdown = True)
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\nFrequency %'),
                                 is_pct = True, is_cumulative = True, is_breakdown = True)

                if text_test_stat:
                    table.insert_col(table.columnCount() - 2,
                                     main.tr(f'[{file["name"]}]\n{text_test_stat}'),
                                     is_float = True, is_breakdown = True)

                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\n{text_p_value}'),
                                 is_float = True, is_breakdown = True)

                if text_bayes_factor:
                    table.insert_col(table.columnCount() - 2,
                                     main.tr(f'[{file["name"]}]\n{text_bayes_factor}'),
                                     is_float = True, is_breakdown = True)

                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\n{text_effect_size}'),
                                 is_float = True, is_breakdown = True)

            # Insert columns (total)
            table.insert_col(table.columnCount() - 2,
                             main.tr('Total\nFrequency'),
                             is_int = True, is_cumulative = True)
            table.insert_col(table.columnCount() - 2,
                             main.tr('Total\nFrequency %'),
                             is_pct = True, is_cumulative = True)

            if text_test_stat:
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'Total\n{text_test_stat}'),
                                 is_float = True)

            table.insert_col(table.columnCount() - 2,
                             main.tr(f'Total\n{text_p_value}'),
                             is_float = True)

            if text_bayes_factor:
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'Total\n{text_bayes_factor}'),
                                 is_float = True)

            table.insert_col(table.columnCount() - 2,
                             main.tr(f'Total\n{text_effect_size}'),
                             is_float = True)

            # Sort by p-value of the first file
            table.horizontalHeader().setSortIndicator(
                table.find_col(main.tr(f'[{files[0]["name"]}]\n{text_p_value}')),
                Qt.AscendingOrder
            )

            table.blockSignals(True)
            table.setSortingEnabled(False)
            table.setUpdatesEnabled(False)

            cols_freq = table.find_cols(main.tr('\nFrequency'))
            cols_freq_pct = table.find_cols(main.tr('\nFrequency %'))

            for col in cols_freq_pct:
                cols_freq.remove(col)

            if text_test_stat:
                cols_test_stat = table.find_cols(main.tr(f'\n{text_test_stat}'))

            cols_p_value = table.find_cols(main.tr('\np-value'))

            if text_bayes_factor:
                cols_bayes_factor = table.find_cols(main.tr('\nBayes Factor'))

            cols_effect_size = table.find_cols(f'\n{text_effect_size}')
            col_files_found = table.find_col(main.tr('Number of\nFiles Found'))
            col_files_found_pct = table.find_col(main.tr('Number of\nFiles Found %'))

            freq_totals = numpy.array(list(keywords_freq_files.values())).sum(axis = 0)
            len_files = len(files)

            table.setRowCount(len(keywords_freq_files))

            for i, (keyword, stats_files) in enumerate(wordless_sorting.sorted_keywords_stats_files(keywords_stats_files)):
                freq_files = keywords_freq_files[keyword]

                # Rank
                table.set_item_num(i, 0, -1)

                # Keyword
                table.setItem(i, 1, wordless_table.Wordless_Table_Item(keyword))

                # Frequency
                for j, freq in enumerate(freq_files):
                    table.set_item_num(i, cols_freq[j], freq)
                    table.set_item_num(i, cols_freq_pct[j], freq, freq_totals[j])

                for j, (test_stat, p_value, bayes_factor, effect_size) in enumerate(stats_files):
                    # Test Statistic
                    if text_test_stat:
                        table.set_item_num(i, cols_test_stat[j], test_stat)

                    # p-value
                    table.set_item_num(i, cols_p_value[j], p_value)

                    # Bayes Factor
                    if text_bayes_factor:
                        table.set_item_num(i, cols_bayes_factor[j], bayes_factor)

                    # Effect Size
                    table.set_item_num(i, cols_effect_size[j], effect_size)

                # Number of Files Found
                num_files_found = len([freq for freq in freq_files[1:-1] if freq])

                table.set_item_num(i, col_files_found, num_files_found)
                table.set_item_num(i, col_files_found_pct, num_files_found, len_files)

            table.setSortingEnabled(True)
            table.setUpdatesEnabled(True)
            table.blockSignals(False)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_ranks()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)
Ejemplo n.º 4
0
    def data_received(texts_stats_files):
        if any(itertools.chain.from_iterable(texts_stats_files)):
            table.settings = copy.deepcopy(main.settings_custom)

            table.blockSignals(True)
            table.setUpdatesEnabled(False)

            table.clear_table()

            len_tokens_files = []

            for i, file in enumerate(files):
                table.insert_col(table.find_col(main.tr('Total')),
                                 file['name'],
                                 breakdown=True)

            for i, stats in enumerate(texts_stats_files):
                len_paras_in_sentence = stats[0]
                len_paras_in_clause = stats[1]
                len_paras_in_token = stats[2]
                len_sentences = stats[3]
                len_clauses = stats[4]
                len_tokens = stats[5]
                len_types = stats[6]
                ttr = stats[7]
                sttr = stats[8]

                count_paras = len(len_paras_in_token)
                count_sentences = len(len_sentences)
                count_clauses = len(len_clauses)
                count_tokens = len(len_tokens)
                count_types = len(len_types)
                count_chars = sum(len_tokens)

                # Count of Paragraphs
                table.set_item_num_cumulative(0, i, count_paras)
                # Count of Sentences
                table.set_item_num_cumulative(1, i, count_sentences)
                # Count of Clauses
                table.set_item_num_cumulative(2, i, count_clauses)
                # Count of Tokens
                table.set_item_num_cumulative(3, i, count_tokens)
                # Count of Types
                table.set_item_num_pct(4, i, count_types)
                # Count of Characters
                table.set_item_num_cumulative(5, i, count_chars)
                # Type-token Ratio
                table.set_item_num_float(6, i, ttr)
                # Type-token Ratio (Standardized)
                table.set_item_num_float(7, i, sttr)

                # Paragraph Length
                if count_paras == 0:
                    table.set_item_num_float(8, i, 0)
                    table.set_item_num_float(9, i, 0)
                    table.set_item_num_float(10, i, 0)
                    table.set_item_num_float(11, i, 0)
                    table.set_item_num_float(12, i, 0)
                    table.set_item_num_float(13, i, 0)
                else:
                    table.set_item_num_float(8, i,
                                             numpy.mean(len_paras_in_sentence))
                    table.set_item_num_float(9, i,
                                             numpy.std(len_paras_in_sentence))
                    table.set_item_num_float(10, i,
                                             numpy.mean(len_paras_in_clause))
                    table.set_item_num_float(11, i,
                                             numpy.std(len_paras_in_clause))
                    table.set_item_num_float(12, i,
                                             numpy.mean(len_paras_in_token))
                    table.set_item_num_float(13, i,
                                             numpy.std(len_paras_in_token))

                # Sentence Length
                if count_sentences == 0:
                    table.set_item_num_float(14, i, 0)
                    table.set_item_num_float(15, i, 0)
                else:
                    table.set_item_num_float(14, i, numpy.mean(len_sentences))
                    table.set_item_num_float(15, i, numpy.std(len_sentences))

                # Clause Length
                if count_clauses == 0:
                    table.set_item_num_float(16, i, 0)
                    table.set_item_num_float(17, i, 0)
                else:
                    table.set_item_num_float(16, i, numpy.mean(len_clauses))
                    table.set_item_num_float(17, i, numpy.std(len_clauses))

                # Token Length
                if count_tokens == 0:
                    table.set_item_num_float(18, i, 0)
                    table.set_item_num_float(19, i, 0)
                else:
                    table.set_item_num_float(18, i, numpy.mean(len_tokens))
                    table.set_item_num_float(19, i, numpy.std(len_tokens))

                # Type Length
                if count_types == 0:
                    table.set_item_num_float(20, i, 0)
                    table.set_item_num_float(21, i, 0)
                else:
                    table.set_item_num_float(20, i, numpy.mean(len_types))
                    table.set_item_num_float(21, i, numpy.std(len_types))

                len_tokens_files.append(collections.Counter(len_tokens))

            # Count of n-length Tokens
            len_files = len(files)
            len_tokens_total = wordless_misc.merge_dicts(len_tokens_files)
            len_tokens_max = max(len_tokens_total)

            # Use tags only
            if settings['token_settings']['use_tags']:
                table.setVerticalHeaderLabels([
                    main.tr('Count of Paragraphs'),
                    main.tr('Count of Sentences'),
                    main.tr('Count of Clauses'),
                    main.tr('Count of Tags'),
                    main.tr('Count of Tag Types'),
                    main.tr('Count of Characters'),
                    main.tr('Type-tag Ratio'),
                    main.tr('Type-tag Ratio (Standardized)'),
                    main.tr('Paragraph Length in Sentence (Mean)'),
                    main.tr(
                        'Paragraph Length in Sentence (Standard Deviation)'),
                    main.tr('Paragraph Length in Sentence (Mean)'),
                    main.tr(
                        'Paragraph Length in Sentence (Standard Deviation)'),
                    main.tr('Paragraph Length in Tag (Mean)'),
                    main.tr('Paragraph Length in Tag (Standard Deviation)'),
                    main.tr('Sentence Length in Tag (Mean)'),
                    main.tr('Sentence Length in Tag (Standard Deviation)'),
                    main.tr('Clause Length in Tag (Mean)'),
                    main.tr('Clause Length in Tag (Standard Deviation)'),
                    main.tr('Tag Length in Character (Mean)'),
                    main.tr('Tag Length in Character (Standard Deviation)'),
                    main.tr('Tag Type Length in Character (Mean)'),
                    main.tr(
                        'Tag Type Length in Character (Standard Deviation)')
                ])

                for i in range(len_tokens_max):
                    table.insert_row(table.rowCount(),
                                     main.tr(f'Count of {i + 1}-length Tags'),
                                     num=True,
                                     pct=True,
                                     cumulative=True)
            else:
                for i in range(len_tokens_max):

                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-length Tokens'),
                        num=True,
                        pct=True,
                        cumulative=True)

            for i in range(len_tokens_max):
                freqs = len_tokens_total.get(i + 1, [0] * (len_files + 1))

                for j, freq in enumerate(freqs):
                    table.set_item_num_cumulative(
                        table.rowCount() - len_tokens_max + i, j, freq)

            table.blockSignals(False)
            table.setUpdatesEnabled(True)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_items_width()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

        dialog_progress.accept()
Ejemplo n.º 5
0
    def update_gui(tokens_freq_files, tokens_stats_files):
        if tokens_freq_files:
            table.clear_table()

            table.settings = copy.deepcopy(main.settings_custom)

            text_measure_dispersion = settings['generation_settings'][
                'measure_dispersion']
            text_measure_adjusted_freq = settings['generation_settings'][
                'measure_adjusted_freq']

            text_dispersion = main.settings_global['measures_dispersion'][
                text_measure_dispersion]['col']
            text_adjusted_freq = main.settings_global[
                'measures_adjusted_freq'][text_measure_adjusted_freq]['col']

            if settings['token_settings']['use_tags']:
                table.horizontalHeaderItem(1).setText(main.tr('Tag'))

            # Insert columns (files)
            for file in files:
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\nFrequency'),
                                 is_int=True,
                                 is_cumulative=True,
                                 is_breakdown=True)
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\nFrequency %'),
                                 is_pct=True,
                                 is_cumulative=True,
                                 is_breakdown=True)

                table.insert_col(
                    table.columnCount() - 2,
                    main.tr(f'[{file["name"]}]\n{text_dispersion}'),
                    is_float=True,
                    is_breakdown=True)

                table.insert_col(
                    table.columnCount() - 2,
                    main.tr(f'[{file["name"]}]\n{text_adjusted_freq}'),
                    is_float=True,
                    is_breakdown=True)

            # Insert columns (total)
            table.insert_col(table.columnCount() - 2,
                             main.tr('Total\nFrequency'),
                             is_int=True,
                             is_cumulative=True)
            table.insert_col(table.columnCount() - 2,
                             main.tr('Total\nFrequency %'),
                             is_pct=True,
                             is_cumulative=True)

            table.insert_col(table.columnCount() - 2,
                             main.tr(f'Total\n{text_dispersion}'),
                             is_float=True)

            table.insert_col(table.columnCount() - 2,
                             main.tr(f'Total\n{text_adjusted_freq}'),
                             is_float=True)

            # Sort by frequency of the first file
            table.horizontalHeader().setSortIndicator(
                table.find_col(main.tr(f'[{files[0]["name"]}]\nFrequency')),
                Qt.DescendingOrder)

            table.blockSignals(True)
            table.setSortingEnabled(False)
            table.setUpdatesEnabled(False)

            cols_freq = table.find_cols(main.tr('\nFrequency'))
            cols_freq_pct = table.find_cols(main.tr('\nFrequency %'))

            for col in cols_freq_pct:
                cols_freq.remove(col)

            cols_dispersion = table.find_cols(main.tr(f'\n{text_dispersion}'))
            cols_adjusted_freq = table.find_cols(
                main.tr(f'\n{text_adjusted_freq}'))
            col_files_found = table.find_col(main.tr('Number of\nFiles Found'))
            col_files_found_pct = table.find_col(
                main.tr('Number of\nFiles Found %'))

            freq_totals = numpy.array(list(
                tokens_freq_files.values())).sum(axis=0)
            len_files = len(files)

            table.setRowCount(len(tokens_freq_files))

            for i, (token, freq_files) in enumerate(
                    wordless_sorting.sorted_tokens_freq_files(
                        tokens_freq_files)):
                stats_files = tokens_stats_files[token]

                # Rank
                table.set_item_num(i, 0, -1)

                # Token
                table.setItem(i, 1, wordless_table.Wordless_Table_Item(token))

                # Frequency
                for j, freq in enumerate(freq_files):
                    table.set_item_num(i, cols_freq[j], freq)
                    table.set_item_num(i, cols_freq_pct[j], freq,
                                       freq_totals[j])

                for j, (dispersion, adjusted_freq) in enumerate(stats_files):
                    # Dispersion
                    table.set_item_num(i, cols_dispersion[j], dispersion)

                    # Adjusted Frequency
                    table.set_item_num(i, cols_adjusted_freq[j], adjusted_freq)

                # Number of Files Found
                num_files_found = len(
                    [freq for freq in freq_files[:-1] if freq])

                table.set_item_num(i, col_files_found, num_files_found)
                table.set_item_num(i, col_files_found_pct, num_files_found,
                                   len_files)

            table.setSortingEnabled(True)
            table.setUpdatesEnabled(True)
            table.blockSignals(False)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_ranks()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)
Ejemplo n.º 6
0
    def update_gui(texts_stats_files):
        if any(itertools.chain.from_iterable(texts_stats_files)):
            table.settings = copy.deepcopy(main.settings_custom)

            table.blockSignals(True)
            table.setUpdatesEnabled(False)

            table.clear_table()

            count_tokens_lens = []

            # Insert column (total)
            for i, file in enumerate(files):
                table.insert_col(table.find_col(main.tr('Total')),
                                 file['name'],
                                 is_breakdown=True)

            count_paras_total = len(texts_stats_files[-1][0])
            count_sentences_total = len(texts_stats_files[-1][3])
            count_clauses_total = len(texts_stats_files[-1][4])
            count_tokens_total = len(texts_stats_files[-1][5])
            count_types_total = len(texts_stats_files[-1][6])
            count_chars_total = sum(texts_stats_files[-1][5])

            for i, stats in enumerate(texts_stats_files):
                len_paras_in_sentence = stats[0]
                len_paras_in_clause = stats[1]
                len_paras_in_token = stats[2]
                len_sentences = stats[3]
                len_clauses = stats[4]
                len_tokens = stats[5]
                len_types = stats[6]
                ttr = stats[7]
                sttr = stats[8]

                count_paras = len(len_paras_in_sentence)
                count_sentences = len(len_sentences)
                count_clauses = len(len_clauses)
                count_tokens = len(len_tokens)
                count_types = len(len_types)
                count_chars = sum(len_tokens)

                # Count of Paragraphs
                table.set_item_num(0, i, count_paras)
                table.set_item_num(1, i, count_paras, count_paras_total)
                # Count of Sentences
                table.set_item_num(2, i, count_sentences)
                table.set_item_num(3, i, count_sentences,
                                   count_sentences_total)
                # Count of Clauses
                table.set_item_num(4, i, count_clauses)
                table.set_item_num(5, i, count_clauses, count_clauses_total)
                # Count of Tokens
                table.set_item_num(6, i, count_tokens)
                table.set_item_num(7, i, count_tokens, count_tokens_total)
                # Count of Types
                table.set_item_num(8, i, count_types)
                table.set_item_num(9, i, count_types, count_types_total)
                # Count of Characters
                table.set_item_num(10, i, count_chars)
                table.set_item_num(11, i, count_chars, count_chars_total)
                # Type-Token Ratio
                table.set_item_num(12, i, ttr)
                # Type-Token Ratio (Standardized)
                table.set_item_num(13, i, sttr)

                # Paragraph Length
                if count_paras == 0:
                    table.set_item_num(14, i, 0)
                    table.set_item_num(15, i, 0)
                    table.set_item_num(16, i, 0)
                    table.set_item_num(17, i, 0)
                    table.set_item_num(18, i, 0)
                    table.set_item_num(19, i, 0)
                else:
                    table.set_item_num(14, i,
                                       numpy.mean(len_paras_in_sentence))
                    table.set_item_num(15, i, numpy.std(len_paras_in_sentence))
                    table.set_item_num(16, i, numpy.mean(len_paras_in_clause))
                    table.set_item_num(17, i, numpy.std(len_paras_in_clause))
                    table.set_item_num(18, i, numpy.mean(len_paras_in_token))
                    table.set_item_num(19, i, numpy.std(len_paras_in_token))

                # Sentence Length
                if count_sentences == 0:
                    table.set_item_num(20, i, 0)
                    table.set_item_num(21, i, 0)
                else:
                    table.set_item_num(20, i, numpy.mean(len_sentences))
                    table.set_item_num(21, i, numpy.std(len_sentences))

                # Clause Length
                if count_clauses == 0:
                    table.set_item_num(22, i, 0)
                    table.set_item_num(23, i, 0)
                else:
                    table.set_item_num(22, i, numpy.mean(len_clauses))
                    table.set_item_num(23, i, numpy.std(len_clauses))

                # Token Length
                if count_tokens == 0:
                    table.set_item_num(24, i, 0)
                    table.set_item_num(25, i, 0)
                else:
                    table.set_item_num(24, i, numpy.mean(len_tokens))
                    table.set_item_num(25, i, numpy.std(len_tokens))

                # Type Length
                if count_types == 0:
                    table.set_item_num(26, i, 0)
                    table.set_item_num(27, i, 0)
                else:
                    table.set_item_num(26, i, numpy.mean(len_types))
                    table.set_item_num(27, i, numpy.std(len_types))

                count_tokens_lens.append(collections.Counter(len_tokens))

            # Count of n-length Tokens
            if any(count_tokens_lens):
                len_files = len(files)
                count_tokens_lens_files = wordless_misc.merge_dicts(
                    count_tokens_lens)
                count_tokens_lens_total = {
                    len_token: count_tokens_files[-1]
                    for len_token, count_tokens_files in
                    count_tokens_lens_files.items()
                }
                len_tokens_max = max(count_tokens_lens_files)

                for i in range(len_tokens_max):
                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-Length Tokens'),
                        is_int=True,
                        is_cumulative=True)
                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-Length Tokens %'),
                        is_pct=True,
                        is_cumulative=True)

                for i in range(len_tokens_max):
                    counts = count_tokens_lens_files.get(
                        i + 1, [0] * (len_files + 1))

                    for j, count in enumerate(counts):
                        table.set_item_num(row=table.rowCount() -
                                           (len_tokens_max - i) * 2,
                                           col=j,
                                           val=count)
                        table.set_item_num(row=table.rowCount() -
                                           (len_tokens_max - i) * 2 + 1,
                                           col=j,
                                           val=count,
                                           total=count_tokens_lens_total.get(
                                               i + 1, 0))

            table.setUpdatesEnabled(True)
            table.blockSignals(False)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)
Ejemplo n.º 7
0
    def data_received(texts_stats_files, texts_len_tokens_files):
        if any(texts_len_tokens_files):
            table.settings = copy.deepcopy(main.settings_custom)

            table.blockSignals(True)
            table.setUpdatesEnabled(False)

            table.clear_table()

            for i, file in enumerate(files):
                table.insert_col(table.find_col(main.tr('Total')), file['name'], breakdown = True)

            for i, stats in enumerate(texts_stats_files):
                count_paras = stats[0]
                count_sentences = stats[1]
                count_tokens = stats[2]
                count_types = stats[3]
                count_chars = stats[4]
                ttr = stats[5]
                sttr = stats[6]

                table.set_item_num_cumulative(0, i, count_paras)
                table.set_item_num_cumulative(1, i, count_sentences)
                table.set_item_num_cumulative(2, i, count_tokens)
                table.set_item_num_pct(3, i, count_types)
                table.set_item_num_cumulative(4, i, count_chars)
                table.set_item_num_float(5, i, ttr)
                table.set_item_num_float(6, i, sttr)

                if count_paras == 0:
                    table.set_item_num_float(7, i, 0)
                    table.set_item_num_float(8, i, 0)
                else:
                    table.set_item_num_float(7, i, count_sentences / count_paras)
                    table.set_item_num_float(8, i, count_tokens / count_paras)

                if count_sentences == 0:
                    table.set_item_num_float(9, i, 0)
                else:
                    table.set_item_num_float(9, i, count_tokens / count_sentences)

                if count_tokens == 0:
                    table.set_item_num_float(10, i, 0)
                else:
                    table.set_item_num_float(10, i, count_chars / count_tokens)

            # Count of n-length Tokens
            len_tokens_total = wordless_misc.merge_dicts(texts_len_tokens_files)
            len_tokens_max = max(len_tokens_total)

            if settings['token_settings']['use_tags']:
                table.setVerticalHeaderLabels([
                    main.tr('Count of Paragraphs'),
                    main.tr('Count of Sentences'),
                    main.tr('Count of Tags'),
                    main.tr('Count of Tag Types'),
                    main.tr('Count of Characters'),
                    main.tr('Type-Tag Ratio'),
                    main.tr('Type-Tag Ratio (Standardized)'),
                    main.tr('Average Paragraph Length (in Sentence)'),
                    main.tr('Average Paragraph Length (in Tag)'),
                    main.tr('Average Sentence Length (in Tag)'),
                    main.tr('Average Tag Length (in Character)')
                ])

                for i in range(len_tokens_max):
                    table.insert_row(table.rowCount(),
                                     main.tr(f'Count of {i + 1}-length Tags'),
                                     num = True, pct = True, cumulative = True)
            else:
                for i in range(len_tokens_max):

                    table.insert_row(table.rowCount(),
                                     main.tr(f'Count of {i + 1}-length Tokens'),
                                     num = True, pct = True, cumulative = True)

            len_files = len(files)

            for i in range(len_tokens_max):
                freqs = len_tokens_total.get(i + 1, [0] * (len_files + 1))

                for j, freq in enumerate(freqs):
                    table.set_item_num_cumulative(table.rowCount() - len_tokens_max + i, j, freq)

            table.blockSignals(False)
            table.setUpdatesEnabled(True)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_items_width()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

        dialog_progress.accept()