def process_data(self): super().process_data() self.progress_updated.emit(self.tr('Rendering figure ...')) time.sleep(0.1) self.processing_finished.emit(wordless_misc.merge_dicts(self.tokens_freq_files), wordless_misc.merge_dicts(self.tokens_stats_files))
def run(self): super().run() self.progress_updated.emit(self.tr('Rendering figure ...')) time.sleep(0.1) self.worker_done.emit(wordless_misc.merge_dicts(self.keywords_freq_files), wordless_misc.merge_dicts(self.keywords_stats_files))
def update_gui(texts_stats_files): if any(itertools.chain.from_iterable(texts_stats_files)): table.settings = copy.deepcopy(main.settings_custom) table.blockSignals(True) table.setUpdatesEnabled(False) table.clear_table() count_tokens_lens = [] # Insert column (total) for i, file in enumerate(files): table.insert_col(table.find_col(main.tr('Total')), file['name'], is_breakdown=True) count_paras_total = len(texts_stats_files[-1][0]) count_sentences_total = len(texts_stats_files[-1][3]) count_clauses_total = len(texts_stats_files[-1][4]) count_tokens_total = len(texts_stats_files[-1][5]) count_types_total = len(texts_stats_files[-1][6]) count_chars_total = sum(texts_stats_files[-1][5]) for i, stats in enumerate(texts_stats_files): len_paras_in_sentence = stats[0] len_paras_in_clause = stats[1] len_paras_in_token = stats[2] len_sentences = stats[3] len_clauses = stats[4] len_tokens = stats[5] len_types = stats[6] ttr = stats[7] sttr = stats[8] count_paras = len(len_paras_in_sentence) count_sentences = len(len_sentences) count_clauses = len(len_clauses) count_tokens = len(len_tokens) count_types = len(len_types) count_chars = sum(len_tokens) # Count of Paragraphs table.set_item_num(0, i, count_paras) table.set_item_num(1, i, count_paras, count_paras_total) # Count of Sentences table.set_item_num(2, i, count_sentences) table.set_item_num(3, i, count_sentences, count_sentences_total) # Count of Clauses table.set_item_num(4, i, count_clauses) table.set_item_num(5, i, count_clauses, count_clauses_total) # Count of Tokens table.set_item_num(6, i, count_tokens) table.set_item_num(7, i, count_tokens, count_tokens_total) # Count of Types table.set_item_num(8, i, count_types) table.set_item_num(9, i, count_types, count_types_total) # Count of Characters table.set_item_num(10, i, count_chars) table.set_item_num(11, i, count_chars, count_chars_total) # Type-Token Ratio table.set_item_num(12, i, ttr) # Type-Token Ratio (Standardized) table.set_item_num(13, i, sttr) # Paragraph Length if count_paras == 0: table.set_item_num(14, i, 0) table.set_item_num(15, i, 0) table.set_item_num(16, i, 0) table.set_item_num(17, i, 0) table.set_item_num(18, i, 0) table.set_item_num(19, i, 0) else: table.set_item_num(14, i, numpy.mean(len_paras_in_sentence)) table.set_item_num(15, i, numpy.std(len_paras_in_sentence)) table.set_item_num(16, i, numpy.mean(len_paras_in_clause)) table.set_item_num(17, i, numpy.std(len_paras_in_clause)) table.set_item_num(18, i, numpy.mean(len_paras_in_token)) table.set_item_num(19, i, numpy.std(len_paras_in_token)) # Sentence Length if count_sentences == 0: table.set_item_num(20, i, 0) table.set_item_num(21, i, 0) else: table.set_item_num(20, i, numpy.mean(len_sentences)) table.set_item_num(21, i, numpy.std(len_sentences)) # Clause Length if count_clauses == 0: table.set_item_num(22, i, 0) table.set_item_num(23, i, 0) else: table.set_item_num(22, i, numpy.mean(len_clauses)) table.set_item_num(23, i, numpy.std(len_clauses)) # Token Length if count_tokens == 0: table.set_item_num(24, i, 0) table.set_item_num(25, i, 0) else: table.set_item_num(24, i, numpy.mean(len_tokens)) table.set_item_num(25, i, numpy.std(len_tokens)) # Type Length if count_types == 0: table.set_item_num(26, i, 0) table.set_item_num(27, i, 0) else: table.set_item_num(26, i, numpy.mean(len_types)) table.set_item_num(27, i, numpy.std(len_types)) count_tokens_lens.append(collections.Counter(len_tokens)) # Count of n-length Tokens if any(count_tokens_lens): len_files = len(files) count_tokens_lens_files = wordless_misc.merge_dicts( count_tokens_lens) count_tokens_lens_total = { len_token: count_tokens_files[-1] for len_token, count_tokens_files in count_tokens_lens_files.items() } len_tokens_max = max(count_tokens_lens_files) for i in range(len_tokens_max): table.insert_row( table.rowCount(), main.tr(f'Count of {i + 1}-Length Tokens'), is_int=True, is_cumulative=True) table.insert_row( table.rowCount(), main.tr(f'Count of {i + 1}-Length Tokens %'), is_pct=True, is_cumulative=True) for i in range(len_tokens_max): counts = count_tokens_lens_files.get( i + 1, [0] * (len_files + 1)) for j, count in enumerate(counts): table.set_item_num(row=table.rowCount() - (len_tokens_max - i) * 2, col=j, val=count) table.set_item_num(row=table.rowCount() - (len_tokens_max - i) * 2 + 1, col=j, val=count, total=count_tokens_lens_total.get( i + 1, 0)) table.setUpdatesEnabled(True) table.blockSignals(False) table.toggle_pct() table.toggle_cumulative() table.toggle_breakdown() table.itemChanged.emit(table.item(0, 0)) wordless_msg.wordless_msg_generate_table_success(main) else: wordless_msg_box.wordless_msg_box_no_results(main) wordless_msg.wordless_msg_generate_table_error(main)
def data_received(texts_stats_files): if any(itertools.chain.from_iterable(texts_stats_files)): table.settings = copy.deepcopy(main.settings_custom) table.blockSignals(True) table.setUpdatesEnabled(False) table.clear_table() len_tokens_files = [] for i, file in enumerate(files): table.insert_col(table.find_col(main.tr('Total')), file['name'], breakdown=True) for i, stats in enumerate(texts_stats_files): len_paras_in_sentence = stats[0] len_paras_in_clause = stats[1] len_paras_in_token = stats[2] len_sentences = stats[3] len_clauses = stats[4] len_tokens = stats[5] len_types = stats[6] ttr = stats[7] sttr = stats[8] count_paras = len(len_paras_in_token) count_sentences = len(len_sentences) count_clauses = len(len_clauses) count_tokens = len(len_tokens) count_types = len(len_types) count_chars = sum(len_tokens) # Count of Paragraphs table.set_item_num_cumulative(0, i, count_paras) # Count of Sentences table.set_item_num_cumulative(1, i, count_sentences) # Count of Clauses table.set_item_num_cumulative(2, i, count_clauses) # Count of Tokens table.set_item_num_cumulative(3, i, count_tokens) # Count of Types table.set_item_num_pct(4, i, count_types) # Count of Characters table.set_item_num_cumulative(5, i, count_chars) # Type-token Ratio table.set_item_num_float(6, i, ttr) # Type-token Ratio (Standardized) table.set_item_num_float(7, i, sttr) # Paragraph Length if count_paras == 0: table.set_item_num_float(8, i, 0) table.set_item_num_float(9, i, 0) table.set_item_num_float(10, i, 0) table.set_item_num_float(11, i, 0) table.set_item_num_float(12, i, 0) table.set_item_num_float(13, i, 0) else: table.set_item_num_float(8, i, numpy.mean(len_paras_in_sentence)) table.set_item_num_float(9, i, numpy.std(len_paras_in_sentence)) table.set_item_num_float(10, i, numpy.mean(len_paras_in_clause)) table.set_item_num_float(11, i, numpy.std(len_paras_in_clause)) table.set_item_num_float(12, i, numpy.mean(len_paras_in_token)) table.set_item_num_float(13, i, numpy.std(len_paras_in_token)) # Sentence Length if count_sentences == 0: table.set_item_num_float(14, i, 0) table.set_item_num_float(15, i, 0) else: table.set_item_num_float(14, i, numpy.mean(len_sentences)) table.set_item_num_float(15, i, numpy.std(len_sentences)) # Clause Length if count_clauses == 0: table.set_item_num_float(16, i, 0) table.set_item_num_float(17, i, 0) else: table.set_item_num_float(16, i, numpy.mean(len_clauses)) table.set_item_num_float(17, i, numpy.std(len_clauses)) # Token Length if count_tokens == 0: table.set_item_num_float(18, i, 0) table.set_item_num_float(19, i, 0) else: table.set_item_num_float(18, i, numpy.mean(len_tokens)) table.set_item_num_float(19, i, numpy.std(len_tokens)) # Type Length if count_types == 0: table.set_item_num_float(20, i, 0) table.set_item_num_float(21, i, 0) else: table.set_item_num_float(20, i, numpy.mean(len_types)) table.set_item_num_float(21, i, numpy.std(len_types)) len_tokens_files.append(collections.Counter(len_tokens)) # Count of n-length Tokens len_files = len(files) len_tokens_total = wordless_misc.merge_dicts(len_tokens_files) len_tokens_max = max(len_tokens_total) # Use tags only if settings['token_settings']['use_tags']: table.setVerticalHeaderLabels([ main.tr('Count of Paragraphs'), main.tr('Count of Sentences'), main.tr('Count of Clauses'), main.tr('Count of Tags'), main.tr('Count of Tag Types'), main.tr('Count of Characters'), main.tr('Type-tag Ratio'), main.tr('Type-tag Ratio (Standardized)'), main.tr('Paragraph Length in Sentence (Mean)'), main.tr( 'Paragraph Length in Sentence (Standard Deviation)'), main.tr('Paragraph Length in Sentence (Mean)'), main.tr( 'Paragraph Length in Sentence (Standard Deviation)'), main.tr('Paragraph Length in Tag (Mean)'), main.tr('Paragraph Length in Tag (Standard Deviation)'), main.tr('Sentence Length in Tag (Mean)'), main.tr('Sentence Length in Tag (Standard Deviation)'), main.tr('Clause Length in Tag (Mean)'), main.tr('Clause Length in Tag (Standard Deviation)'), main.tr('Tag Length in Character (Mean)'), main.tr('Tag Length in Character (Standard Deviation)'), main.tr('Tag Type Length in Character (Mean)'), main.tr( 'Tag Type Length in Character (Standard Deviation)') ]) for i in range(len_tokens_max): table.insert_row(table.rowCount(), main.tr(f'Count of {i + 1}-length Tags'), num=True, pct=True, cumulative=True) else: for i in range(len_tokens_max): table.insert_row( table.rowCount(), main.tr(f'Count of {i + 1}-length Tokens'), num=True, pct=True, cumulative=True) for i in range(len_tokens_max): freqs = len_tokens_total.get(i + 1, [0] * (len_files + 1)) for j, freq in enumerate(freqs): table.set_item_num_cumulative( table.rowCount() - len_tokens_max + i, j, freq) table.blockSignals(False) table.setUpdatesEnabled(True) table.toggle_pct() table.toggle_cumulative() table.toggle_breakdown() table.update_items_width() table.itemChanged.emit(table.item(0, 0)) wordless_msg.wordless_msg_generate_table_success(main) else: wordless_msg_box.wordless_msg_box_no_results(main) wordless_msg.wordless_msg_generate_table_error(main) dialog_progress.accept()
def data_received(texts_stats_files, texts_len_tokens_files): if any(texts_len_tokens_files): table.settings = copy.deepcopy(main.settings_custom) table.blockSignals(True) table.setUpdatesEnabled(False) table.clear_table() for i, file in enumerate(files): table.insert_col(table.find_col(main.tr('Total')), file['name'], breakdown = True) for i, stats in enumerate(texts_stats_files): count_paras = stats[0] count_sentences = stats[1] count_tokens = stats[2] count_types = stats[3] count_chars = stats[4] ttr = stats[5] sttr = stats[6] table.set_item_num_cumulative(0, i, count_paras) table.set_item_num_cumulative(1, i, count_sentences) table.set_item_num_cumulative(2, i, count_tokens) table.set_item_num_pct(3, i, count_types) table.set_item_num_cumulative(4, i, count_chars) table.set_item_num_float(5, i, ttr) table.set_item_num_float(6, i, sttr) if count_paras == 0: table.set_item_num_float(7, i, 0) table.set_item_num_float(8, i, 0) else: table.set_item_num_float(7, i, count_sentences / count_paras) table.set_item_num_float(8, i, count_tokens / count_paras) if count_sentences == 0: table.set_item_num_float(9, i, 0) else: table.set_item_num_float(9, i, count_tokens / count_sentences) if count_tokens == 0: table.set_item_num_float(10, i, 0) else: table.set_item_num_float(10, i, count_chars / count_tokens) # Count of n-length Tokens len_tokens_total = wordless_misc.merge_dicts(texts_len_tokens_files) len_tokens_max = max(len_tokens_total) if settings['token_settings']['use_tags']: table.setVerticalHeaderLabels([ main.tr('Count of Paragraphs'), main.tr('Count of Sentences'), main.tr('Count of Tags'), main.tr('Count of Tag Types'), main.tr('Count of Characters'), main.tr('Type-Tag Ratio'), main.tr('Type-Tag Ratio (Standardized)'), main.tr('Average Paragraph Length (in Sentence)'), main.tr('Average Paragraph Length (in Tag)'), main.tr('Average Sentence Length (in Tag)'), main.tr('Average Tag Length (in Character)') ]) for i in range(len_tokens_max): table.insert_row(table.rowCount(), main.tr(f'Count of {i + 1}-length Tags'), num = True, pct = True, cumulative = True) else: for i in range(len_tokens_max): table.insert_row(table.rowCount(), main.tr(f'Count of {i + 1}-length Tokens'), num = True, pct = True, cumulative = True) len_files = len(files) for i in range(len_tokens_max): freqs = len_tokens_total.get(i + 1, [0] * (len_files + 1)) for j, freq in enumerate(freqs): table.set_item_num_cumulative(table.rowCount() - len_tokens_max + i, j, freq) table.blockSignals(False) table.setUpdatesEnabled(True) table.toggle_pct() table.toggle_cumulative() table.toggle_breakdown() table.update_items_width() table.itemChanged.emit(table.item(0, 0)) wordless_msg.wordless_msg_generate_table_success(main) else: wordless_msg_box.wordless_msg_box_no_results(main) wordless_msg.wordless_msg_generate_table_error(main) dialog_progress.accept()