Exemple #1
0
    def run(self):
        super().run()

        self.progress_updated.emit(self.tr('Rendering figure...'))
        self.worker_done.emit(self.err_msg,
                              wl_misc.merge_dicts(self.keywords_freq_files),
                              wl_misc.merge_dicts(self.keywords_stats_files))
Exemple #2
0
    def run(self):
        super().run()

        self.progress_updated.emit(self.tr('Rendering figure ...'))

        time.sleep(0.1)

        self.worker_done.emit(wl_misc.merge_dicts(self.tokens_freq_files),
                              wl_misc.merge_dicts(self.tokens_stats_files))
Exemple #3
0
    def update_gui(texts_stats_files):
        if any(itertools.chain.from_iterable(texts_stats_files)):
            table.settings = copy.deepcopy(main.settings_custom)

            table.blockSignals(True)
            table.setUpdatesEnabled(False)

            table.clear_table()

            count_tokens_lens = []

            # Insert column (total)
            for i, file in enumerate(files):
                table.insert_col(table.find_col(main.tr('Total')),
                                 file['name'],
                                 is_breakdown=True)

            count_paras_total = len(texts_stats_files[-1][0])
            count_sentences_total = len(texts_stats_files[-1][3])
            count_clauses_total = len(texts_stats_files[-1][4])
            count_tokens_total = len(texts_stats_files[-1][5])
            count_types_total = len(texts_stats_files[-1][6])
            count_chars_total = sum(texts_stats_files[-1][5])

            for i, stats in enumerate(texts_stats_files):
                len_paras_in_sentence = stats[0]
                len_paras_in_clause = stats[1]
                len_paras_in_token = stats[2]
                len_sentences = stats[3]
                len_clauses = stats[4]
                len_tokens = stats[5]
                len_types = stats[6]
                ttr = stats[7]
                sttr = stats[8]

                count_paras = len(len_paras_in_sentence)
                count_sentences = len(len_sentences)
                count_clauses = len(len_clauses)
                count_tokens = len(len_tokens)
                count_types = len(len_types)
                count_chars = sum(len_tokens)

                # Count of Paragraphs
                table.set_item_num(0, i, count_paras)
                table.set_item_num(1, i, count_paras, count_paras_total)
                # Count of Sentences
                table.set_item_num(2, i, count_sentences)
                table.set_item_num(3, i, count_sentences,
                                   count_sentences_total)
                # Count of Clauses
                table.set_item_num(4, i, count_clauses)
                table.set_item_num(5, i, count_clauses, count_clauses_total)
                # Count of Tokens
                table.set_item_num(6, i, count_tokens)
                table.set_item_num(7, i, count_tokens, count_tokens_total)
                # Count of Types
                table.set_item_num(8, i, count_types)
                table.set_item_num(9, i, count_types, count_types_total)
                # Count of Characters
                table.set_item_num(10, i, count_chars)
                table.set_item_num(11, i, count_chars, count_chars_total)
                # Type-Token Ratio
                table.set_item_num(12, i, ttr)
                # Type-Token Ratio (Standardized)
                table.set_item_num(13, i, sttr)

                # Paragraph Length
                if count_paras == 0:
                    table.set_item_num(14, i, 0)
                    table.set_item_num(15, i, 0)
                    table.set_item_num(16, i, 0)
                    table.set_item_num(17, i, 0)
                    table.set_item_num(18, i, 0)
                    table.set_item_num(19, i, 0)
                else:
                    table.set_item_num(14, i,
                                       numpy.mean(len_paras_in_sentence))
                    table.set_item_num(15, i, numpy.std(len_paras_in_sentence))
                    table.set_item_num(16, i, numpy.mean(len_paras_in_clause))
                    table.set_item_num(17, i, numpy.std(len_paras_in_clause))
                    table.set_item_num(18, i, numpy.mean(len_paras_in_token))
                    table.set_item_num(19, i, numpy.std(len_paras_in_token))

                # Sentence Length
                if count_sentences == 0:
                    table.set_item_num(20, i, 0)
                    table.set_item_num(21, i, 0)
                else:
                    table.set_item_num(20, i, numpy.mean(len_sentences))
                    table.set_item_num(21, i, numpy.std(len_sentences))

                # Clause Length
                if count_clauses == 0:
                    table.set_item_num(22, i, 0)
                    table.set_item_num(23, i, 0)
                else:
                    table.set_item_num(22, i, numpy.mean(len_clauses))
                    table.set_item_num(23, i, numpy.std(len_clauses))

                # Token Length
                if count_tokens == 0:
                    table.set_item_num(24, i, 0)
                    table.set_item_num(25, i, 0)
                else:
                    table.set_item_num(24, i, numpy.mean(len_tokens))
                    table.set_item_num(25, i, numpy.std(len_tokens))

                # Type Length
                if count_types == 0:
                    table.set_item_num(26, i, 0)
                    table.set_item_num(27, i, 0)
                else:
                    table.set_item_num(26, i, numpy.mean(len_types))
                    table.set_item_num(27, i, numpy.std(len_types))

                count_tokens_lens.append(collections.Counter(len_tokens))

            # Count of n-length Tokens
            if any(count_tokens_lens):
                len_files = len(files)
                count_tokens_lens_files = wl_misc.merge_dicts(
                    count_tokens_lens)
                count_tokens_lens_total = {
                    len_token: count_tokens_files[-1]
                    for len_token, count_tokens_files in
                    count_tokens_lens_files.items()
                }
                len_tokens_max = max(count_tokens_lens_files)

                for i in range(len_tokens_max):
                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-Length Tokens'),
                        is_int=True,
                        is_cumulative=True)
                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-Length Tokens %'),
                        is_pct=True,
                        is_cumulative=True)

                for i in range(len_tokens_max):
                    counts = count_tokens_lens_files.get(
                        i + 1, [0] * (len_files + 1))

                    for j, count in enumerate(counts):
                        table.set_item_num(row=table.rowCount() -
                                           (len_tokens_max - i) * 2,
                                           col=j,
                                           val=count)
                        table.set_item_num(row=table.rowCount() -
                                           (len_tokens_max - i) * 2 + 1,
                                           col=j,
                                           val=count,
                                           total=count_tokens_lens_total.get(
                                               i + 1, 0))

            table.setUpdatesEnabled(True)
            table.blockSignals(False)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()

            table.itemChanged.emit(table.item(0, 0))

            wl_msg.wl_msg_generate_table_success(main)
        else:
            wl_msg_box.wl_msg_box_no_results(main)

            wl_msg.wl_msg_generate_table_error(main)
Exemple #4
0
def update_gui(err_msg, texts_stats_files):
    assert not err_msg

    count_tokens_lens = []
    count_sentences_lens = []

    assert len(texts_stats_files) >= 1

    for i, stats in enumerate(texts_stats_files):
        readability_statistics = stats[0]
        len_paras_in_sentences = stats[1]
        len_paras_in_tokens = stats[2]
        len_sentences = stats[3]
        len_tokens_in_syls = stats[4]
        len_tokens_in_chars = stats[5]
        len_types_in_syls = stats[6]
        len_types_in_chars = stats[7]
        len_syls = stats[8]
        ttr = stats[9]
        sttr = stats[10]

        count_paras = len(len_paras_in_sentences)
        count_sentences = len(len_sentences)
        count_tokens = len(len_tokens_in_chars)
        count_types = len(len_types_in_chars)
        count_syls = len(len_syls)
        count_chars = sum(len_tokens_in_chars)

        count_tokens_lens.append(collections.Counter(len_tokens_in_chars))
        count_sentences_lens.append(collections.Counter(len_sentences))

        # Data validation

        assert len(readability_statistics) == 12
        for statistic in readability_statistics:
            assert statistic

        assert count_paras
        assert count_sentences
        assert count_tokens
        assert count_types
        assert count_syls
        assert count_chars

        assert len_paras_in_sentences
        assert len_paras_in_tokens
        assert len_sentences

        assert len_tokens_in_syls
        assert len_tokens_in_chars
        assert len_types_in_syls
        assert len_types_in_chars

        if i < len(files):
            lang = re.search(r'(?<=\[)[a-z_]+(?=\])', files[i]['name']).group()

            if lang not in main.settings_global['syl_tokenizers']:
                assert all([len_syls == 1 for len_syls in len_tokens_in_syls])
                assert all([len_syls == 1 for len_syls in len_types_in_syls])

        assert ttr
        assert sttr

        assert numpy.mean(
            len_paras_in_sentences) == count_sentences / count_paras
        assert numpy.mean(len_paras_in_tokens) == count_tokens / count_paras
        assert numpy.mean(len_sentences) == count_tokens / count_sentences
        assert numpy.mean(len_tokens_in_syls) == count_syls / count_tokens
        assert numpy.mean(len_tokens_in_chars) == count_chars / count_tokens

    # Count of n-length Sentences
    if any(count_sentences_lens):
        count_sentences_lens_files = wl_misc.merge_dicts(count_sentences_lens)
        count_sentences_lens = sorted(count_sentences_lens_files.keys())

        # The total of counts of n-length sentences should be equal to the count of tokens
        for i, stats in enumerate(texts_stats_files):
            len_sentences_total = sum([
                count_sentences_files[i] * len_sentence for len_sentence,
                count_sentences_files in count_sentences_lens_files.items()
            ])

            assert len_sentences_total == sum(stats[3])

        # Sentence length should never be zero
        assert 0 not in count_sentences_lens

    # Count of n-length Tokens
    if any(count_tokens_lens):
        count_tokens_lens_files = wl_misc.merge_dicts(count_tokens_lens)
        count_tokens_lens = sorted(count_tokens_lens_files.keys())

        # The total of counts of n-length tokens should be equal to the count of characters
        for i, stats in enumerate(texts_stats_files):
            len_tokens_total = sum([
                count_tokens_files[i] * len_token for len_token,
                count_tokens_files in count_tokens_lens_files.items()
            ])

            assert len_tokens_total == sum(stats[5])

        # Token length should never be zero
        assert 0 not in count_tokens_lens
Exemple #5
0
def test_merge_dicts():
    assert wl_misc.merge_dicts([{1: 10}, {1: 20, 2: 30}]) == {1: [10, 20], 2: [0, 30]}
    assert wl_misc.merge_dicts([{1: [10, 20]}, {1: [30, 40], 2: [50, 60]}]) == {1: [[10, 20], [30, 40]], 2: [[0, 0], [50, 60]]}