def test_profiler(): print('Start testing module Profiler...') for i in range(2): for file in files: file['selected'] = False # Single file if i == 0: random.choice(files)['selected'] = True # Multiple files elif i == 1: for file in files: file['selected'] = True files_selected = [ re.search(r'(?<=\[)[a-z_]+(?=\])', file_name).group() for file_name in main.wl_file_area.get_selected_file_names() ] print(f"Files: {', '.join(files_selected)}\n") wl_profiler.Wl_Worker_Profiler_Table( main, dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data( main), update_gui=update_gui).run() print('All pass!') main.app.quit()
def test_wordlist_generator(): print('Start testing module Wordlist Generator...') measures_dispersion = list( main.settings_global['measures_dispersion'].keys()) measures_adjusted_freq = list( main.settings_global['measures_adjusted_freq'].keys()) len_diff = abs(len(measures_dispersion) - len(measures_adjusted_freq)) if len(measures_dispersion) > len(measures_adjusted_freq): measures_adjusted_freq += measures_adjusted_freq * ( len_diff // len(measures_adjusted_freq) ) + measures_adjusted_freq[:len_diff % len(measures_adjusted_freq)] elif len(measures_adjusted_freq) > len(measures_dispersion): measures_dispersion += measures_dispersion * ( len_diff // len(measures_dispersion) ) + measures_dispersion[:len_diff % len(measures_dispersion)] for i, (measure_dispersion, measure_adjusted_freq) in enumerate( zip(measures_dispersion, measures_adjusted_freq)): for file in files: file['selected'] = False # Single file if i % 2 == 0: random.choice(files)['selected'] = True # Multiple files elif i % 2 == 1: for file in files: file['selected'] = True files_selected = [ re.search(r'(?<=\[)[a-z_]+(?=\])', file_name).group() for file_name in main.wl_file_area.get_selected_file_names() ] print(f"Files: {', '.join(files_selected)}") print(f'Measure of dispersion: {measure_dispersion}') print(f'Measure of adjusted frequency: {measure_adjusted_freq}\n') wl_wordlist_generator.Wl_Worker_Wordlist_Generator_Table( main, dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data( main), update_gui=update_gui).run() print('All pass!') main.app.quit()
def test_ngram_generator(): print('Start testing module N-gram Generator...') measures_dispersion = list( main.settings_global['measures_dispersion'].keys()) measures_adjusted_freq = list( main.settings_global['measures_adjusted_freq'].keys()) len_diff = abs(len(measures_dispersion) - len(measures_adjusted_freq)) if len(measures_dispersion) > len(measures_adjusted_freq): measures_adjusted_freq += measures_adjusted_freq * ( len_diff // len(measures_adjusted_freq) ) + measures_adjusted_freq[:len_diff % len(measures_adjusted_freq)] elif len(measures_adjusted_freq) > len(measures_dispersion): measures_dispersion += measures_dispersion * ( len_diff // len(measures_dispersion) ) + measures_dispersion[:len_diff % len(measures_dispersion)] # Search terms main.settings_custom['ngram_generator']['search_settings'][ 'multi_search_mode'] = True main.settings_custom['ngram_generator']['search_settings'][ 'search_terms'] = wl_test_init.SEARCH_TERMS for i, (measure_dispersion, measure_adjusted_freq) in enumerate( zip(measures_dispersion, measures_adjusted_freq)): for file in files: file['selected'] = False # Single file with search terms if i % 4 == 0: random.choice(files)['selected'] = True main.settings_custom['ngram_generator']['search_settings'][ 'search_settings'] = True # Single file without search terms elif i % 4 == 1: random.choice(files)['selected'] = True main.settings_custom['ngram_generator']['search_settings'][ 'search_settings'] = False # Multiple files with search terms elif i % 4 == 2: for file in files: file['selected'] = True main.settings_custom['ngram_generator']['search_settings'][ 'search_settings'] = True # Multiple files without search terms elif i % 4 == 3: for file in files: file['selected'] = True main.settings_custom['ngram_generator']['search_settings'][ 'search_settings'] = False files_selected = [ re.search(r'(?<=\[)[a-z_]+(?=\])', file_name).group() for file_name in main.wl_file_area.get_selected_file_names() ] print(f"Files: {', '.join(files_selected)}") print( f"Search settings: {main.settings_custom['ngram_generator']['search_settings']['search_settings']}" ) print(f'Measure of dispersion: {measure_dispersion}') print(f'Measure of adjusted frequency: {measure_adjusted_freq}\n') wl_ngram_generator.Wl_Worker_Ngram_Generator_Table( main, dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data( main), update_gui=update_gui).run() print('All pass!') main.app.quit()
def test_keyword_extractor(): print('Start testing module Keyword Extractor... ') tests_significance = list( main.settings_global['tests_significance']['keyword_extractor'].keys()) measures_effect_size = list(main.settings_global['measures_effect_size'] ['keyword_extractor'].keys()) len_diff = abs(len(tests_significance) - len(measures_effect_size)) if len(tests_significance) > len(measures_effect_size): measures_effect_size += measures_effect_size * ( len_diff // len(measures_effect_size) ) + measures_effect_size[:len_diff % len(measures_effect_size)] elif len(measures_effect_size) > len(tests_significance): tests_significance += tests_significance * (len_diff // len( tests_significance)) + tests_significance[:len_diff % len(tests_significance)] files = main.settings_custom['file_area']['files_open'] for i, (test_significance, measure_effect_size) in enumerate( zip(tests_significance, measures_effect_size)): for file in main.settings_custom['file_area']['files_open']: file['selected'] = False # Single reference file & single observed file if i % 4 == 0: file_reference, file_observed = random.sample(files, 2) main.settings_custom['keyword_extractor']['generation_settings'][ 'ref_files'] = [file_reference['name']] file_reference['selected'] = True file_observed['selected'] = True # Single reference file & multiple observed files elif i % 4 == 1: file_reference = random.choice(files) main.settings_custom['keyword_extractor']['generation_settings'][ 'ref_files'] = [file_reference['name']] for file in files: file['selected'] = True # Multiple reference files & single observed file elif i % 4 == 2: file_observed = random.choice(files) main.settings_custom['keyword_extractor']['generation_settings'][ 'ref_files'] = [ file['name'] for file in files if file != file_observed ] for file in files: file['selected'] = True # Multiple reference files & multiple observed files elif i % 4 == 3: main.settings_custom['keyword_extractor']['generation_settings'][ 'ref_files'] = [ file['name'] for file in random.sample(files, len(files) // 2) ] for file in main.settings_custom['file_area']['files_open']: file['selected'] = True files_reference = [ re.search(r'(?<=\[)[a-z_]+(?=\])', file_name).group() for file_name in main.settings_custom['keyword_extractor'] ['generation_settings']['ref_files'] ] files_observed = [ re.search(r'(?<=\[)[a-z_]+(?=\])', file['name']).group() for file in files if (file['selected'] and file['name'] not in main.settings_custom['keyword_extractor'] ['generation_settings']['ref_files']) ] print(f"Reference files: {', '.join(files_reference)}") print(f"Observed files: {', '.join(files_observed)}") print(f'Test of Statistical significance: {test_significance}') print(f'Measure of effect size: {measure_effect_size}\n') wl_keyword_extractor.Wl_Worker_Keyword_Extractor_Table( main, dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data( main), update_gui=update_gui).run() main.app.quit() print('All pass!')
def generate_table(main, table): def update_gui(err_msg, keywords_freq_files, keywords_stats_files): if not err_msg: if keywords_freq_files: try: table.settings = copy.deepcopy(main.settings_custom) text_test_significance = settings['generation_settings'][ 'test_significance'] text_measure_effect_size = settings['generation_settings'][ 'measure_effect_size'] (text_test_stat, text_p_value, text_bayes_factor ) = main.settings_global['tests_significance'][ 'keyword_extractor'][text_test_significance]['cols'] text_effect_size = main.settings_global[ 'measures_effect_size']['keyword_extractor'][ text_measure_effect_size]['col'] table.clr_table() # Insert columns (files) table.ins_header_hor(table.model().columnCount() - 2, _tr('wl_keyword_extractor', '[Reference Files]\nFrequency'), is_int=True, is_cumulative=True) table.ins_header_hor(table.model().columnCount() - 2, _tr('wl_keyword_extractor', '[Reference Files]\nFrequency %'), is_pct=True, is_cumulative=True) for file_observed in files_observed: table.ins_header_hor(table.model().columnCount() - 2, _tr('wl_keyword_extractor', '[{}]\nFrequency').format( file_observed['name']), is_int=True, is_cumulative=True, is_breakdown=True) table.ins_header_hor(table.model().columnCount() - 2, _tr('wl_keyword_extractor', '[{}]\nFrequency %').format( file_observed['name']), is_pct=True, is_cumulative=True, is_breakdown=True) if text_test_stat: table.ins_header_hor( table.model().columnCount() - 2, f'[{file_observed["name"]}]\n{text_test_stat}', is_float=True, is_breakdown=True) table.ins_header_hor( table.model().columnCount() - 2, f'[{file_observed["name"]}]\n{text_p_value}', is_float=True, is_breakdown=True) if text_bayes_factor: table.ins_header_hor( table.model().columnCount() - 2, f'[{file_observed["name"]}]\n{text_bayes_factor}', is_float=True, is_breakdown=True) table.ins_header_hor( table.model().columnCount() - 2, f'[{file_observed["name"]}]\n{text_effect_size}', is_float=True, is_breakdown=True) # Insert columns (total) table.ins_header_hor(table.model().columnCount() - 2, _tr('wl_keyword_extractor', 'Total\nFrequency'), is_int=True, is_cumulative=True) table.ins_header_hor(table.model().columnCount() - 2, _tr('wl_keyword_extractor', 'Total\nFrequency %'), is_pct=True, is_cumulative=True) if text_test_stat: table.ins_header_hor( table.model().columnCount() - 2, _tr('wl_keyword_extractor', 'Total\n') + text_test_stat, is_float=True) table.ins_header_hor( table.model().columnCount() - 2, _tr('wl_keyword_extractor', 'Total\n') + text_p_value, is_float=True) if text_bayes_factor: table.ins_header_hor( table.model().columnCount() - 2, _tr('wl_keyword_extractor', 'Total\n') + text_bayes_factor, is_float=True) table.ins_header_hor( table.model().columnCount() - 2, _tr('wl_keyword_extractor', 'Total\n') + text_effect_size, is_float=True) # Sort by p-value of the first observed file table.horizontalHeader().setSortIndicator( table.find_header_hor( f'[{files_observed[0]["name"]}]\n{text_p_value}'), Qt.AscendingOrder) cols_freq = table.find_headers_hor( _tr('wl_keyword_extractor', '\nFrequency')) cols_freq_pct = table.find_headers_hor( _tr('wl_keyword_extractor', '\nFrequency %')) for col in cols_freq_pct: cols_freq.remove(col) if text_test_stat: cols_test_stat = table.find_headers_hor( f'\n{text_test_stat}') cols_p_value = table.find_headers_hor( _tr('wl_keyword_extractor', '\np-value')) if text_bayes_factor: cols_bayes_factor = table.find_headers_hor( _tr('wl_keyword_extractor', '\nBayes Factor')) cols_effect_size = table.find_headers_hor( f'\n{text_effect_size}') col_files_found = table.find_header_hor( _tr('wl_keyword_extractor', 'Number of\nFiles Found')) col_files_found_pct = table.find_header_hor( _tr('wl_keyword_extractor', 'Number of\nFiles Found %')) freq_totals = numpy.array( list(keywords_freq_files.values())).sum(axis=0) len_files_observed = len(files_observed) table.model().setRowCount(len(keywords_freq_files)) table.disable_updates() for i, (keyword, stats_files) in enumerate( wl_sorting.sorted_keywords_stats_files( keywords_stats_files)): freq_files = keywords_freq_files[keyword] # Rank table.set_item_num(i, 0, -1) # Keyword table.model().setItem(i, 1, wl_tables.Wl_Table_Item(keyword)) # Frequency for j, freq in enumerate(freq_files): table.set_item_num(i, cols_freq[j], freq) table.set_item_num(i, cols_freq_pct[j], freq, freq_totals[j]) for j, (test_stat, p_value, bayes_factor, effect_size) in enumerate(stats_files): # Test Statistic if text_test_stat: table.set_item_num(i, cols_test_stat[j], test_stat) # p-value table.set_item_num(i, cols_p_value[j], p_value) # Bayes Factor if text_bayes_factor: table.set_item_num(i, cols_bayes_factor[j], bayes_factor) # Effect Size table.set_item_num(i, cols_effect_size[j], effect_size) # Number of Files Found num_files_found = len( [freq for freq in freq_files[1:-1] if freq]) table.set_item_num(i, col_files_found, num_files_found) table.set_item_num(i, col_files_found_pct, num_files_found, len_files_observed) table.enable_updates() table.toggle_pct() table.toggle_cumulative() table.toggle_breakdown() table.update_ranks() wl_msgs.wl_msg_generate_table_success(main) except Exception: err_msg = traceback.format_exc() else: wl_msg_boxes.wl_msg_box_no_results(main) wl_msgs.wl_msg_generate_table_error(main) if err_msg: wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() wl_msgs.wl_msg_fatal_error(main) settings = main.settings_custom['keyword_extractor'] files_ref = list( main.wl_file_area.find_files_by_name( settings['generation_settings']['ref_files'], selected_only=True)) files_observed = [ file_observed for file_observed in main.wl_file_area.get_selected_files() if file_observed not in files_ref ] if files_ref and files_observed: worker_keyword_extractor_table = Wl_Worker_Keyword_Extractor_Table( main, dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data( main), update_gui=update_gui) wl_threading.Wl_Thread(worker_keyword_extractor_table).start_worker() else: if not files_ref: wl_msg_box_missing_ref_files(main) elif not files_observed: wl_msg_box_missing_observed_files(main) wl_msgs.wl_msg_generate_table_error(main)
def generate_fig(main): def update_gui(err_msg, keywords_freq_files, keywords_stats_files): if not err_msg: if keywords_freq_files: try: text_test_significance = settings['generation_settings'][ 'test_significance'] text_measure_effect_size = settings['generation_settings'][ 'measure_effect_size'] (text_test_stat, text_p_value, text_bayes_factor ) = main.settings_global['tests_significance'][ 'keyword_extractor'][text_test_significance]['cols'] text_effect_size = main.settings_global[ 'measures_effect_size']['keyword_extractor'][ text_measure_effect_size]['col'] if settings['fig_settings']['use_data'] == _tr( 'wl_keyword_extractor', 'Frequency'): wl_figs_freqs.wl_fig_freq_keyword_extractor( main, keywords_freq_files, files_ref=files_ref, settings=settings['fig_settings'], label_x=_tr('wl_keyword_extractor', 'Keyword')) else: if settings['fig_settings'][ 'use_data'] == text_test_stat: keywords_stat_files = { keyword: numpy.array(stats_files)[:, 0] for keyword, stats_files in keywords_stats_files.items() } label_y = text_test_stat elif settings['fig_settings'][ 'use_data'] == text_p_value: keywords_stat_files = { keyword: numpy.array(stats_files)[:, 1] for keyword, stats_files in keywords_stats_files.items() } label_y = text_p_value elif settings['fig_settings'][ 'use_data'] == text_bayes_factor: keywords_stat_files = { keyword: numpy.array(stats_files)[:, 2] for keyword, stats_files in keywords_stats_files.items() } label_y = text_bayes_factor elif settings['fig_settings'][ 'use_data'] == text_effect_size: keywords_stat_files = { keyword: numpy.array(stats_files)[:, 3] for keyword, stats_files in keywords_stats_files.items() } label_y = text_effect_size wl_figs_stats.wl_fig_stat_keyword_extractor( main, keywords_stat_files, files_ref=files_ref, settings=settings['fig_settings'], label_y=label_y) # Hide the progress dialog early so that the main window will not obscure the generated figure worker_keyword_extractor_fig.dialog_progress.accept() wl_figs.show_fig() wl_msgs.wl_msg_generate_fig_success(main) except Exception: err_msg = traceback.format_exc() else: wl_msg_boxes.wl_msg_box_no_results(main) wl_msgs.wl_msg_generate_fig_error(main) if err_msg: wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() wl_msgs.wl_msg_fatal_error(main) settings = main.settings_custom['keyword_extractor'] files_ref = settings['generation_settings']['ref_files'] file_names_observed = [ file_name for file_name in main.wl_file_area.get_selected_file_names() if file_name not in files_ref ] if files_ref and file_names_observed: worker_keyword_extractor_fig = Wl_Worker_Keyword_Extractor_Fig( main, dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data( main), update_gui=update_gui) wl_threading.Wl_Thread(worker_keyword_extractor_fig).start_worker() else: if not files_ref: wl_msg_box_missing_ref_files(main) elif not file_names_observed: wl_msg_box_missing_observed_files(main) wl_msgs.wl_msg_generate_fig_error(main)
def generate_fig(main): def update_gui(err_msg, tokens_freq_files, tokens_stats_files): if not err_msg: if tokens_freq_files: try: settings = main.settings_custom['wordlist_generator'] measure_dispersion = settings['generation_settings'][ 'measure_dispersion'] measure_adjusted_freq = settings['generation_settings'][ 'measure_adjusted_freq'] col_dispersion = main.settings_global[ 'measures_dispersion'][measure_dispersion]['col'] col_adjusted_freq = main.settings_global[ 'measures_adjusted_freq'][measure_adjusted_freq]['col'] if settings['fig_settings']['use_data'] == _tr( 'wl_wordlist_generator', 'Frequency'): wl_figs_freqs.wl_fig_freq( main, tokens_freq_files, settings=settings['fig_settings'], label_x=_tr('wl_wordlist_generator', 'Token')) else: if settings['fig_settings'][ 'use_data'] == col_dispersion: tokens_stat_files = { token: numpy.array(stats_files)[:, 0] for token, stats_files in tokens_stats_files.items() } label_y = col_dispersion elif settings['fig_settings'][ 'use_data'] == col_adjusted_freq: tokens_stat_files = { token: numpy.array(stats_files)[:, 1] for token, stats_files in tokens_stats_files.items() } label_y = col_adjusted_freq wl_figs_stats.wl_fig_stat( main, tokens_stat_files, settings=settings['fig_settings'], label_x=_tr('wl_wordlist_generator', 'Token'), label_y=label_y) # Hide the progress dialog early so that the main window will not obscure the generated figure worker_wordlist_generator_fig.dialog_progress.accept() wl_figs.show_fig() wl_msgs.wl_msg_generate_fig_success(main) except Exception: err_msg = traceback.format_exc() else: wl_msg_boxes.wl_msg_box_no_results(main) wl_msgs.wl_msg_generate_fig_error(main) if err_msg: wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() wl_msgs.wl_msg_fatal_error(main) worker_wordlist_generator_fig = Wl_Worker_Wordlist_Generator_Fig( main, dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(main), update_gui=update_gui) wl_threading.Wl_Thread(worker_wordlist_generator_fig).start_worker()
def generate_table(main, table): def update_gui(err_msg, tokens_freq_files, tokens_stats_files): if not err_msg: if tokens_freq_files: try: table.settings = copy.deepcopy(main.settings_custom) settings = main.settings_custom['wordlist_generator'] text_measure_dispersion = settings['generation_settings'][ 'measure_dispersion'] text_measure_adjusted_freq = settings[ 'generation_settings']['measure_adjusted_freq'] text_dispersion = main.settings_global[ 'measures_dispersion'][text_measure_dispersion]['col'] text_adjusted_freq = main.settings_global[ 'measures_adjusted_freq'][text_measure_adjusted_freq][ 'col'] if settings['token_settings']['use_tags']: table.horizontalHeaderItem(1).setText( _tr('wl_wordlist_generator', 'Tag')) table.clr_table() # Insert columns (files) files = list(main.wl_file_area.get_selected_files()) for file in files: table.ins_header_hor(table.model().columnCount() - 2, _tr('wl_wordlist_generator', '[{}]\nFrequency').format( file['name']), is_int=True, is_cumulative=True, is_breakdown=True) table.ins_header_hor(table.model().columnCount() - 2, _tr('wl_wordlist_generator', '[{}]\nFrequency %').format( file['name']), is_pct=True, is_cumulative=True, is_breakdown=True) table.ins_header_hor( table.model().columnCount() - 2, f'[{file["name"]}]\n{text_dispersion}', is_float=True, is_breakdown=True) table.ins_header_hor( table.model().columnCount() - 2, f'[{file["name"]}]\n{text_adjusted_freq}', is_float=True, is_breakdown=True) # Insert columns (total) table.ins_header_hor(table.model().columnCount() - 2, _tr('wl_wordlist_generator', 'Total\nFrequency'), is_int=True, is_cumulative=True) table.ins_header_hor(table.model().columnCount() - 2, _tr('wl_wordlist_generator', 'Total\nFrequency %'), is_pct=True, is_cumulative=True) table.ins_header_hor( table.model().columnCount() - 2, _tr('wl_wordlist_generator', 'Total\n') + text_dispersion, is_float=True) table.ins_header_hor( table.model().columnCount() - 2, _tr('wl_wordlist_generator', 'Total\n') + text_adjusted_freq, is_float=True) # Sort by frequency of the first file table.horizontalHeader().setSortIndicator( table.find_header_hor( _tr('wl_wordlist_generator', '[{}]\nFrequency').format(files[0]['name'])), Qt.DescendingOrder) cols_freq = table.find_headers_hor( _tr('wl_wordlist_generator', '\nFrequency')) cols_freq_pct = table.find_headers_hor( _tr('wl_wordlist_generator', '\nFrequency %')) for col in cols_freq_pct: cols_freq.remove(col) cols_dispersion = table.find_headers_hor( f'\n{text_dispersion}') cols_adjusted_freq = table.find_headers_hor( f'\n{text_adjusted_freq}') col_files_found = table.find_header_hor( _tr('wl_wordlist_generator', 'Number of\nFiles Found')) col_files_found_pct = table.find_header_hor( _tr('wl_wordlist_generator', 'Number of\nFiles Found %')) freq_totals = numpy.array(list( tokens_freq_files.values())).sum(axis=0) len_files = len(files) table.model().setRowCount(len(tokens_freq_files)) table.disable_updates() for i, (token, freq_files) in enumerate( wl_sorting.sorted_tokens_freq_files( tokens_freq_files)): stats_files = tokens_stats_files[token] # Rank table.set_item_num(i, 0, -1) # Token table.model().setItem(i, 1, wl_tables.Wl_Table_Item(token)) # Frequency for j, freq in enumerate(freq_files): table.set_item_num(i, cols_freq[j], freq) table.set_item_num(i, cols_freq_pct[j], freq, freq_totals[j]) for j, (dispersion, adjusted_freq) in enumerate(stats_files): # Dispersion table.set_item_num(i, cols_dispersion[j], dispersion) # Adjusted Frequency table.set_item_num(i, cols_adjusted_freq[j], adjusted_freq) # Number of Files Found num_files_found = len( [freq for freq in freq_files[:-1] if freq]) table.set_item_num(i, col_files_found, num_files_found) table.set_item_num(i, col_files_found_pct, num_files_found, len_files) table.enable_updates() table.toggle_pct() table.toggle_cumulative() table.toggle_breakdown() table.update_ranks() wl_msgs.wl_msg_generate_table_success(main) except Exception: err_msg = traceback.format_exc() else: wl_msg_boxes.wl_msg_box_no_results(main) wl_msgs.wl_msg_generate_table_error(main) if err_msg: wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() wl_msgs.wl_msg_fatal_error(main) worker_wordlist_generator_table = Wl_Worker_Wordlist_Generator_Table( main, dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(main), update_gui=update_gui) wl_threading.Wl_Thread(worker_wordlist_generator_table).start_worker()
def test_collocation_extractor(): print('Start testing module Collocation Extractor...') tests_significance = list(main.settings_global['tests_significance'] ['collocation_extractor'].keys()) measures_effect_size = list(main.settings_global['measures_effect_size'] ['collocation_extractor'].keys()) len_diff = abs(len(tests_significance) - len(measures_effect_size)) if len(tests_significance) > len(measures_effect_size): measures_effect_size += measures_effect_size * ( len_diff // len(measures_effect_size) ) + measures_effect_size[:len_diff % len(measures_effect_size)] elif len(measures_effect_size) > len(tests_significance): tests_significance += tests_significance * (len_diff // len( tests_significance)) + tests_significance[:len_diff % len(tests_significance)] files = main.settings_custom['file_area']['files_open'] for i, (test_significance, measure_effect_size) in enumerate( zip(tests_significance, measures_effect_size)): for file in main.settings_custom['file_area']['files_open']: file['selected'] = False main.settings_custom['collocation_extractor']['search_settings'][ 'multi_search_mode'] = True main.settings_custom['collocation_extractor']['search_settings'][ 'search_terms'] = wl_test_init.SEARCH_TERMS # Single file with search terms if i % 4 == 0: random.choice(files)['selected'] = True main.settings_custom['collocation_extractor']['search_settings'][ 'search_settings'] = True # Single file without search terms elif i % 4 == 1: random.choice(files)['selected'] = True main.settings_custom['collocation_extractor']['search_settings'][ 'search_settings'] = False # Multiple files with search terms elif i % 4 == 2: for file in files: file['selected'] = True main.settings_custom['collocation_extractor']['search_settings'][ 'search_settings'] = True # Multiple files without search terms elif i % 4 == 3: for file in random.sample(files, 3): file['selected'] = True main.settings_custom['collocation_extractor']['search_settings'][ 'search_settings'] = False files_selected = [ re.search(r'(?<=\[)[a-z_]+(?=\])', file['name']).group() for file in files if file['selected'] ] print(f"Files: {', '.join(files_selected)}") print( f"Search settings: {main.settings_custom['collocation_extractor']['search_settings']['search_settings']}" ) print(f'Test of Statistical significance: {test_significance}') print(f'Measure of effect size: {measure_effect_size}\n') wl_collocation_extractor.Wl_Worker_Collocation_Extractor_Table( main, dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data( main), update_gui=update_gui).run() main.app.quit() print('pass!')