def run(self): super().run() self.progress_updated.emit(self.tr('Rendering figure...')) self.worker_done.emit(self.err_msg, wl_misc.merge_dicts(self.keywords_freq_files), wl_misc.merge_dicts(self.keywords_stats_files))
def run(self): super().run() self.progress_updated.emit(self.tr('Rendering figure ...')) time.sleep(0.1) self.worker_done.emit(wl_misc.merge_dicts(self.tokens_freq_files), wl_misc.merge_dicts(self.tokens_stats_files))
def update_gui(texts_stats_files): if any(itertools.chain.from_iterable(texts_stats_files)): table.settings = copy.deepcopy(main.settings_custom) table.blockSignals(True) table.setUpdatesEnabled(False) table.clear_table() count_tokens_lens = [] # Insert column (total) for i, file in enumerate(files): table.insert_col(table.find_col(main.tr('Total')), file['name'], is_breakdown=True) count_paras_total = len(texts_stats_files[-1][0]) count_sentences_total = len(texts_stats_files[-1][3]) count_clauses_total = len(texts_stats_files[-1][4]) count_tokens_total = len(texts_stats_files[-1][5]) count_types_total = len(texts_stats_files[-1][6]) count_chars_total = sum(texts_stats_files[-1][5]) for i, stats in enumerate(texts_stats_files): len_paras_in_sentence = stats[0] len_paras_in_clause = stats[1] len_paras_in_token = stats[2] len_sentences = stats[3] len_clauses = stats[4] len_tokens = stats[5] len_types = stats[6] ttr = stats[7] sttr = stats[8] count_paras = len(len_paras_in_sentence) count_sentences = len(len_sentences) count_clauses = len(len_clauses) count_tokens = len(len_tokens) count_types = len(len_types) count_chars = sum(len_tokens) # Count of Paragraphs table.set_item_num(0, i, count_paras) table.set_item_num(1, i, count_paras, count_paras_total) # Count of Sentences table.set_item_num(2, i, count_sentences) table.set_item_num(3, i, count_sentences, count_sentences_total) # Count of Clauses table.set_item_num(4, i, count_clauses) table.set_item_num(5, i, count_clauses, count_clauses_total) # Count of Tokens table.set_item_num(6, i, count_tokens) table.set_item_num(7, i, count_tokens, count_tokens_total) # Count of Types table.set_item_num(8, i, count_types) table.set_item_num(9, i, count_types, count_types_total) # Count of Characters table.set_item_num(10, i, count_chars) table.set_item_num(11, i, count_chars, count_chars_total) # Type-Token Ratio table.set_item_num(12, i, ttr) # Type-Token Ratio (Standardized) table.set_item_num(13, i, sttr) # Paragraph Length if count_paras == 0: table.set_item_num(14, i, 0) table.set_item_num(15, i, 0) table.set_item_num(16, i, 0) table.set_item_num(17, i, 0) table.set_item_num(18, i, 0) table.set_item_num(19, i, 0) else: table.set_item_num(14, i, numpy.mean(len_paras_in_sentence)) table.set_item_num(15, i, numpy.std(len_paras_in_sentence)) table.set_item_num(16, i, numpy.mean(len_paras_in_clause)) table.set_item_num(17, i, numpy.std(len_paras_in_clause)) table.set_item_num(18, i, numpy.mean(len_paras_in_token)) table.set_item_num(19, i, numpy.std(len_paras_in_token)) # Sentence Length if count_sentences == 0: table.set_item_num(20, i, 0) table.set_item_num(21, i, 0) else: table.set_item_num(20, i, numpy.mean(len_sentences)) table.set_item_num(21, i, numpy.std(len_sentences)) # Clause Length if count_clauses == 0: table.set_item_num(22, i, 0) table.set_item_num(23, i, 0) else: table.set_item_num(22, i, numpy.mean(len_clauses)) table.set_item_num(23, i, numpy.std(len_clauses)) # Token Length if count_tokens == 0: table.set_item_num(24, i, 0) table.set_item_num(25, i, 0) else: table.set_item_num(24, i, numpy.mean(len_tokens)) table.set_item_num(25, i, numpy.std(len_tokens)) # Type Length if count_types == 0: table.set_item_num(26, i, 0) table.set_item_num(27, i, 0) else: table.set_item_num(26, i, numpy.mean(len_types)) table.set_item_num(27, i, numpy.std(len_types)) count_tokens_lens.append(collections.Counter(len_tokens)) # Count of n-length Tokens if any(count_tokens_lens): len_files = len(files) count_tokens_lens_files = wl_misc.merge_dicts( count_tokens_lens) count_tokens_lens_total = { len_token: count_tokens_files[-1] for len_token, count_tokens_files in count_tokens_lens_files.items() } len_tokens_max = max(count_tokens_lens_files) for i in range(len_tokens_max): table.insert_row( table.rowCount(), main.tr(f'Count of {i + 1}-Length Tokens'), is_int=True, is_cumulative=True) table.insert_row( table.rowCount(), main.tr(f'Count of {i + 1}-Length Tokens %'), is_pct=True, is_cumulative=True) for i in range(len_tokens_max): counts = count_tokens_lens_files.get( i + 1, [0] * (len_files + 1)) for j, count in enumerate(counts): table.set_item_num(row=table.rowCount() - (len_tokens_max - i) * 2, col=j, val=count) table.set_item_num(row=table.rowCount() - (len_tokens_max - i) * 2 + 1, col=j, val=count, total=count_tokens_lens_total.get( i + 1, 0)) table.setUpdatesEnabled(True) table.blockSignals(False) table.toggle_pct() table.toggle_cumulative() table.toggle_breakdown() table.itemChanged.emit(table.item(0, 0)) wl_msg.wl_msg_generate_table_success(main) else: wl_msg_box.wl_msg_box_no_results(main) wl_msg.wl_msg_generate_table_error(main)
def update_gui(err_msg, texts_stats_files): assert not err_msg count_tokens_lens = [] count_sentences_lens = [] assert len(texts_stats_files) >= 1 for i, stats in enumerate(texts_stats_files): readability_statistics = stats[0] len_paras_in_sentences = stats[1] len_paras_in_tokens = stats[2] len_sentences = stats[3] len_tokens_in_syls = stats[4] len_tokens_in_chars = stats[5] len_types_in_syls = stats[6] len_types_in_chars = stats[7] len_syls = stats[8] ttr = stats[9] sttr = stats[10] count_paras = len(len_paras_in_sentences) count_sentences = len(len_sentences) count_tokens = len(len_tokens_in_chars) count_types = len(len_types_in_chars) count_syls = len(len_syls) count_chars = sum(len_tokens_in_chars) count_tokens_lens.append(collections.Counter(len_tokens_in_chars)) count_sentences_lens.append(collections.Counter(len_sentences)) # Data validation assert len(readability_statistics) == 12 for statistic in readability_statistics: assert statistic assert count_paras assert count_sentences assert count_tokens assert count_types assert count_syls assert count_chars assert len_paras_in_sentences assert len_paras_in_tokens assert len_sentences assert len_tokens_in_syls assert len_tokens_in_chars assert len_types_in_syls assert len_types_in_chars if i < len(files): lang = re.search(r'(?<=\[)[a-z_]+(?=\])', files[i]['name']).group() if lang not in main.settings_global['syl_tokenizers']: assert all([len_syls == 1 for len_syls in len_tokens_in_syls]) assert all([len_syls == 1 for len_syls in len_types_in_syls]) assert ttr assert sttr assert numpy.mean( len_paras_in_sentences) == count_sentences / count_paras assert numpy.mean(len_paras_in_tokens) == count_tokens / count_paras assert numpy.mean(len_sentences) == count_tokens / count_sentences assert numpy.mean(len_tokens_in_syls) == count_syls / count_tokens assert numpy.mean(len_tokens_in_chars) == count_chars / count_tokens # Count of n-length Sentences if any(count_sentences_lens): count_sentences_lens_files = wl_misc.merge_dicts(count_sentences_lens) count_sentences_lens = sorted(count_sentences_lens_files.keys()) # The total of counts of n-length sentences should be equal to the count of tokens for i, stats in enumerate(texts_stats_files): len_sentences_total = sum([ count_sentences_files[i] * len_sentence for len_sentence, count_sentences_files in count_sentences_lens_files.items() ]) assert len_sentences_total == sum(stats[3]) # Sentence length should never be zero assert 0 not in count_sentences_lens # Count of n-length Tokens if any(count_tokens_lens): count_tokens_lens_files = wl_misc.merge_dicts(count_tokens_lens) count_tokens_lens = sorted(count_tokens_lens_files.keys()) # The total of counts of n-length tokens should be equal to the count of characters for i, stats in enumerate(texts_stats_files): len_tokens_total = sum([ count_tokens_files[i] * len_token for len_token, count_tokens_files in count_tokens_lens_files.items() ]) assert len_tokens_total == sum(stats[5]) # Token length should never be zero assert 0 not in count_tokens_lens
def test_merge_dicts(): assert wl_misc.merge_dicts([{1: 10}, {1: 20, 2: 30}]) == {1: [10, 20], 2: [0, 30]} assert wl_misc.merge_dicts([{1: [10, 20]}, {1: [30, 40], 2: [50, 60]}]) == {1: [[10, 20], [30, 40]], 2: [[0, 0], [50, 60]]}