Beispiel #1
0
def test_profiler():
    print('Start testing module Profiler...')

    for i in range(2):
        for file in files:
            file['selected'] = False

        # Single file
        if i == 0:
            random.choice(files)['selected'] = True
        # Multiple files
        elif i == 1:
            for file in files:
                file['selected'] = True

        files_selected = [
            re.search(r'(?<=\[)[a-z_]+(?=\])', file_name).group()
            for file_name in main.wl_file_area.get_selected_file_names()
        ]

        print(f"Files: {', '.join(files_selected)}\n")

        wl_profiler.Wl_Worker_Profiler_Table(
            main,
            dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(
                main),
            update_gui=update_gui).run()

    print('All pass!')

    main.app.quit()
def test_wordlist_generator():
    print('Start testing module Wordlist Generator...')

    measures_dispersion = list(
        main.settings_global['measures_dispersion'].keys())
    measures_adjusted_freq = list(
        main.settings_global['measures_adjusted_freq'].keys())
    len_diff = abs(len(measures_dispersion) - len(measures_adjusted_freq))

    if len(measures_dispersion) > len(measures_adjusted_freq):
        measures_adjusted_freq += measures_adjusted_freq * (
            len_diff // len(measures_adjusted_freq)
        ) + measures_adjusted_freq[:len_diff % len(measures_adjusted_freq)]
    elif len(measures_adjusted_freq) > len(measures_dispersion):
        measures_dispersion += measures_dispersion * (
            len_diff // len(measures_dispersion)
        ) + measures_dispersion[:len_diff % len(measures_dispersion)]

    for i, (measure_dispersion, measure_adjusted_freq) in enumerate(
            zip(measures_dispersion, measures_adjusted_freq)):
        for file in files:
            file['selected'] = False

        # Single file
        if i % 2 == 0:
            random.choice(files)['selected'] = True
        # Multiple files
        elif i % 2 == 1:
            for file in files:
                file['selected'] = True

        files_selected = [
            re.search(r'(?<=\[)[a-z_]+(?=\])', file_name).group()
            for file_name in main.wl_file_area.get_selected_file_names()
        ]

        print(f"Files: {', '.join(files_selected)}")
        print(f'Measure of dispersion: {measure_dispersion}')
        print(f'Measure of adjusted frequency: {measure_adjusted_freq}\n')

        wl_wordlist_generator.Wl_Worker_Wordlist_Generator_Table(
            main,
            dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(
                main),
            update_gui=update_gui).run()

    print('All pass!')

    main.app.quit()
def test_ngram_generator():
    print('Start testing module N-gram Generator...')

    measures_dispersion = list(
        main.settings_global['measures_dispersion'].keys())
    measures_adjusted_freq = list(
        main.settings_global['measures_adjusted_freq'].keys())
    len_diff = abs(len(measures_dispersion) - len(measures_adjusted_freq))

    if len(measures_dispersion) > len(measures_adjusted_freq):
        measures_adjusted_freq += measures_adjusted_freq * (
            len_diff // len(measures_adjusted_freq)
        ) + measures_adjusted_freq[:len_diff % len(measures_adjusted_freq)]
    elif len(measures_adjusted_freq) > len(measures_dispersion):
        measures_dispersion += measures_dispersion * (
            len_diff // len(measures_dispersion)
        ) + measures_dispersion[:len_diff % len(measures_dispersion)]

    # Search terms
    main.settings_custom['ngram_generator']['search_settings'][
        'multi_search_mode'] = True
    main.settings_custom['ngram_generator']['search_settings'][
        'search_terms'] = wl_test_init.SEARCH_TERMS

    for i, (measure_dispersion, measure_adjusted_freq) in enumerate(
            zip(measures_dispersion, measures_adjusted_freq)):
        for file in files:
            file['selected'] = False

        # Single file with search terms
        if i % 4 == 0:
            random.choice(files)['selected'] = True

            main.settings_custom['ngram_generator']['search_settings'][
                'search_settings'] = True
        # Single file without search terms
        elif i % 4 == 1:
            random.choice(files)['selected'] = True

            main.settings_custom['ngram_generator']['search_settings'][
                'search_settings'] = False
        # Multiple files with search terms
        elif i % 4 == 2:
            for file in files:
                file['selected'] = True

            main.settings_custom['ngram_generator']['search_settings'][
                'search_settings'] = True
        # Multiple files without search terms
        elif i % 4 == 3:
            for file in files:
                file['selected'] = True

            main.settings_custom['ngram_generator']['search_settings'][
                'search_settings'] = False

        files_selected = [
            re.search(r'(?<=\[)[a-z_]+(?=\])', file_name).group()
            for file_name in main.wl_file_area.get_selected_file_names()
        ]

        print(f"Files: {', '.join(files_selected)}")
        print(
            f"Search settings: {main.settings_custom['ngram_generator']['search_settings']['search_settings']}"
        )
        print(f'Measure of dispersion: {measure_dispersion}')
        print(f'Measure of adjusted frequency: {measure_adjusted_freq}\n')

        wl_ngram_generator.Wl_Worker_Ngram_Generator_Table(
            main,
            dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(
                main),
            update_gui=update_gui).run()

    print('All pass!')

    main.app.quit()
def test_keyword_extractor():
    print('Start testing module Keyword Extractor... ')

    tests_significance = list(
        main.settings_global['tests_significance']['keyword_extractor'].keys())
    measures_effect_size = list(main.settings_global['measures_effect_size']
                                ['keyword_extractor'].keys())
    len_diff = abs(len(tests_significance) - len(measures_effect_size))

    if len(tests_significance) > len(measures_effect_size):
        measures_effect_size += measures_effect_size * (
            len_diff // len(measures_effect_size)
        ) + measures_effect_size[:len_diff % len(measures_effect_size)]
    elif len(measures_effect_size) > len(tests_significance):
        tests_significance += tests_significance * (len_diff // len(
            tests_significance)) + tests_significance[:len_diff %
                                                      len(tests_significance)]

    files = main.settings_custom['file_area']['files_open']

    for i, (test_significance, measure_effect_size) in enumerate(
            zip(tests_significance, measures_effect_size)):
        for file in main.settings_custom['file_area']['files_open']:
            file['selected'] = False

        # Single reference file & single observed file
        if i % 4 == 0:
            file_reference, file_observed = random.sample(files, 2)

            main.settings_custom['keyword_extractor']['generation_settings'][
                'ref_files'] = [file_reference['name']]

            file_reference['selected'] = True
            file_observed['selected'] = True
        # Single reference file & multiple observed files
        elif i % 4 == 1:
            file_reference = random.choice(files)

            main.settings_custom['keyword_extractor']['generation_settings'][
                'ref_files'] = [file_reference['name']]

            for file in files:
                file['selected'] = True
        # Multiple reference files & single observed file
        elif i % 4 == 2:
            file_observed = random.choice(files)

            main.settings_custom['keyword_extractor']['generation_settings'][
                'ref_files'] = [
                    file['name'] for file in files if file != file_observed
                ]

            for file in files:
                file['selected'] = True
        # Multiple reference files & multiple observed files
        elif i % 4 == 3:
            main.settings_custom['keyword_extractor']['generation_settings'][
                'ref_files'] = [
                    file['name']
                    for file in random.sample(files,
                                              len(files) // 2)
                ]

            for file in main.settings_custom['file_area']['files_open']:
                file['selected'] = True

        files_reference = [
            re.search(r'(?<=\[)[a-z_]+(?=\])', file_name).group()
            for file_name in main.settings_custom['keyword_extractor']
            ['generation_settings']['ref_files']
        ]
        files_observed = [
            re.search(r'(?<=\[)[a-z_]+(?=\])', file['name']).group()
            for file in files if (file['selected'] and file['name'] not in
                                  main.settings_custom['keyword_extractor']
                                  ['generation_settings']['ref_files'])
        ]

        print(f"Reference files: {', '.join(files_reference)}")
        print(f"Observed files: {', '.join(files_observed)}")
        print(f'Test of Statistical significance: {test_significance}')
        print(f'Measure of effect size: {measure_effect_size}\n')

        wl_keyword_extractor.Wl_Worker_Keyword_Extractor_Table(
            main,
            dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(
                main),
            update_gui=update_gui).run()

    main.app.quit()

    print('All pass!')
Beispiel #5
0
def generate_table(main, table):
    def update_gui(err_msg, keywords_freq_files, keywords_stats_files):
        if not err_msg:
            if keywords_freq_files:
                try:
                    table.settings = copy.deepcopy(main.settings_custom)

                    text_test_significance = settings['generation_settings'][
                        'test_significance']
                    text_measure_effect_size = settings['generation_settings'][
                        'measure_effect_size']

                    (text_test_stat, text_p_value, text_bayes_factor
                     ) = main.settings_global['tests_significance'][
                         'keyword_extractor'][text_test_significance]['cols']
                    text_effect_size = main.settings_global[
                        'measures_effect_size']['keyword_extractor'][
                            text_measure_effect_size]['col']

                    table.clr_table()

                    # Insert columns (files)
                    table.ins_header_hor(table.model().columnCount() - 2,
                                         _tr('wl_keyword_extractor',
                                             '[Reference Files]\nFrequency'),
                                         is_int=True,
                                         is_cumulative=True)
                    table.ins_header_hor(table.model().columnCount() - 2,
                                         _tr('wl_keyword_extractor',
                                             '[Reference Files]\nFrequency %'),
                                         is_pct=True,
                                         is_cumulative=True)

                    for file_observed in files_observed:
                        table.ins_header_hor(table.model().columnCount() - 2,
                                             _tr('wl_keyword_extractor',
                                                 '[{}]\nFrequency').format(
                                                     file_observed['name']),
                                             is_int=True,
                                             is_cumulative=True,
                                             is_breakdown=True)
                        table.ins_header_hor(table.model().columnCount() - 2,
                                             _tr('wl_keyword_extractor',
                                                 '[{}]\nFrequency %').format(
                                                     file_observed['name']),
                                             is_pct=True,
                                             is_cumulative=True,
                                             is_breakdown=True)

                        if text_test_stat:
                            table.ins_header_hor(
                                table.model().columnCount() - 2,
                                f'[{file_observed["name"]}]\n{text_test_stat}',
                                is_float=True,
                                is_breakdown=True)

                        table.ins_header_hor(
                            table.model().columnCount() - 2,
                            f'[{file_observed["name"]}]\n{text_p_value}',
                            is_float=True,
                            is_breakdown=True)

                        if text_bayes_factor:
                            table.ins_header_hor(
                                table.model().columnCount() - 2,
                                f'[{file_observed["name"]}]\n{text_bayes_factor}',
                                is_float=True,
                                is_breakdown=True)

                        table.ins_header_hor(
                            table.model().columnCount() - 2,
                            f'[{file_observed["name"]}]\n{text_effect_size}',
                            is_float=True,
                            is_breakdown=True)

                    # Insert columns (total)
                    table.ins_header_hor(table.model().columnCount() - 2,
                                         _tr('wl_keyword_extractor',
                                             'Total\nFrequency'),
                                         is_int=True,
                                         is_cumulative=True)
                    table.ins_header_hor(table.model().columnCount() - 2,
                                         _tr('wl_keyword_extractor',
                                             'Total\nFrequency %'),
                                         is_pct=True,
                                         is_cumulative=True)

                    if text_test_stat:
                        table.ins_header_hor(
                            table.model().columnCount() - 2,
                            _tr('wl_keyword_extractor', 'Total\n') +
                            text_test_stat,
                            is_float=True)

                    table.ins_header_hor(
                        table.model().columnCount() - 2,
                        _tr('wl_keyword_extractor', 'Total\n') + text_p_value,
                        is_float=True)

                    if text_bayes_factor:
                        table.ins_header_hor(
                            table.model().columnCount() - 2,
                            _tr('wl_keyword_extractor', 'Total\n') +
                            text_bayes_factor,
                            is_float=True)

                    table.ins_header_hor(
                        table.model().columnCount() - 2,
                        _tr('wl_keyword_extractor', 'Total\n') +
                        text_effect_size,
                        is_float=True)

                    # Sort by p-value of the first observed file
                    table.horizontalHeader().setSortIndicator(
                        table.find_header_hor(
                            f'[{files_observed[0]["name"]}]\n{text_p_value}'),
                        Qt.AscendingOrder)

                    cols_freq = table.find_headers_hor(
                        _tr('wl_keyword_extractor', '\nFrequency'))
                    cols_freq_pct = table.find_headers_hor(
                        _tr('wl_keyword_extractor', '\nFrequency %'))

                    for col in cols_freq_pct:
                        cols_freq.remove(col)

                    if text_test_stat:
                        cols_test_stat = table.find_headers_hor(
                            f'\n{text_test_stat}')

                    cols_p_value = table.find_headers_hor(
                        _tr('wl_keyword_extractor', '\np-value'))

                    if text_bayes_factor:
                        cols_bayes_factor = table.find_headers_hor(
                            _tr('wl_keyword_extractor', '\nBayes Factor'))

                    cols_effect_size = table.find_headers_hor(
                        f'\n{text_effect_size}')
                    col_files_found = table.find_header_hor(
                        _tr('wl_keyword_extractor', 'Number of\nFiles Found'))
                    col_files_found_pct = table.find_header_hor(
                        _tr('wl_keyword_extractor',
                            'Number of\nFiles Found %'))

                    freq_totals = numpy.array(
                        list(keywords_freq_files.values())).sum(axis=0)
                    len_files_observed = len(files_observed)

                    table.model().setRowCount(len(keywords_freq_files))

                    table.disable_updates()

                    for i, (keyword, stats_files) in enumerate(
                            wl_sorting.sorted_keywords_stats_files(
                                keywords_stats_files)):
                        freq_files = keywords_freq_files[keyword]

                        # Rank
                        table.set_item_num(i, 0, -1)

                        # Keyword
                        table.model().setItem(i, 1,
                                              wl_tables.Wl_Table_Item(keyword))

                        # Frequency
                        for j, freq in enumerate(freq_files):
                            table.set_item_num(i, cols_freq[j], freq)
                            table.set_item_num(i, cols_freq_pct[j], freq,
                                               freq_totals[j])

                        for j, (test_stat, p_value, bayes_factor,
                                effect_size) in enumerate(stats_files):
                            # Test Statistic
                            if text_test_stat:
                                table.set_item_num(i, cols_test_stat[j],
                                                   test_stat)

                            # p-value
                            table.set_item_num(i, cols_p_value[j], p_value)

                            # Bayes Factor
                            if text_bayes_factor:
                                table.set_item_num(i, cols_bayes_factor[j],
                                                   bayes_factor)

                            # Effect Size
                            table.set_item_num(i, cols_effect_size[j],
                                               effect_size)

                        # Number of Files Found
                        num_files_found = len(
                            [freq for freq in freq_files[1:-1] if freq])

                        table.set_item_num(i, col_files_found, num_files_found)
                        table.set_item_num(i, col_files_found_pct,
                                           num_files_found, len_files_observed)

                    table.enable_updates()

                    table.toggle_pct()
                    table.toggle_cumulative()
                    table.toggle_breakdown()
                    table.update_ranks()

                    wl_msgs.wl_msg_generate_table_success(main)
                except Exception:
                    err_msg = traceback.format_exc()
            else:
                wl_msg_boxes.wl_msg_box_no_results(main)
                wl_msgs.wl_msg_generate_table_error(main)

        if err_msg:
            wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()
            wl_msgs.wl_msg_fatal_error(main)

    settings = main.settings_custom['keyword_extractor']

    files_ref = list(
        main.wl_file_area.find_files_by_name(
            settings['generation_settings']['ref_files'], selected_only=True))
    files_observed = [
        file_observed
        for file_observed in main.wl_file_area.get_selected_files()
        if file_observed not in files_ref
    ]

    if files_ref and files_observed:
        worker_keyword_extractor_table = Wl_Worker_Keyword_Extractor_Table(
            main,
            dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(
                main),
            update_gui=update_gui)
        wl_threading.Wl_Thread(worker_keyword_extractor_table).start_worker()
    else:
        if not files_ref:
            wl_msg_box_missing_ref_files(main)
        elif not files_observed:
            wl_msg_box_missing_observed_files(main)

        wl_msgs.wl_msg_generate_table_error(main)
Beispiel #6
0
def generate_fig(main):
    def update_gui(err_msg, keywords_freq_files, keywords_stats_files):
        if not err_msg:
            if keywords_freq_files:
                try:
                    text_test_significance = settings['generation_settings'][
                        'test_significance']
                    text_measure_effect_size = settings['generation_settings'][
                        'measure_effect_size']

                    (text_test_stat, text_p_value, text_bayes_factor
                     ) = main.settings_global['tests_significance'][
                         'keyword_extractor'][text_test_significance]['cols']
                    text_effect_size = main.settings_global[
                        'measures_effect_size']['keyword_extractor'][
                            text_measure_effect_size]['col']

                    if settings['fig_settings']['use_data'] == _tr(
                            'wl_keyword_extractor', 'Frequency'):
                        wl_figs_freqs.wl_fig_freq_keyword_extractor(
                            main,
                            keywords_freq_files,
                            files_ref=files_ref,
                            settings=settings['fig_settings'],
                            label_x=_tr('wl_keyword_extractor', 'Keyword'))
                    else:
                        if settings['fig_settings'][
                                'use_data'] == text_test_stat:
                            keywords_stat_files = {
                                keyword: numpy.array(stats_files)[:, 0]
                                for keyword, stats_files in
                                keywords_stats_files.items()
                            }

                            label_y = text_test_stat
                        elif settings['fig_settings'][
                                'use_data'] == text_p_value:
                            keywords_stat_files = {
                                keyword: numpy.array(stats_files)[:, 1]
                                for keyword, stats_files in
                                keywords_stats_files.items()
                            }

                            label_y = text_p_value
                        elif settings['fig_settings'][
                                'use_data'] == text_bayes_factor:
                            keywords_stat_files = {
                                keyword: numpy.array(stats_files)[:, 2]
                                for keyword, stats_files in
                                keywords_stats_files.items()
                            }

                            label_y = text_bayes_factor
                        elif settings['fig_settings'][
                                'use_data'] == text_effect_size:
                            keywords_stat_files = {
                                keyword: numpy.array(stats_files)[:, 3]
                                for keyword, stats_files in
                                keywords_stats_files.items()
                            }

                            label_y = text_effect_size

                        wl_figs_stats.wl_fig_stat_keyword_extractor(
                            main,
                            keywords_stat_files,
                            files_ref=files_ref,
                            settings=settings['fig_settings'],
                            label_y=label_y)

                    # Hide the progress dialog early so that the main window will not obscure the generated figure
                    worker_keyword_extractor_fig.dialog_progress.accept()
                    wl_figs.show_fig()

                    wl_msgs.wl_msg_generate_fig_success(main)
                except Exception:
                    err_msg = traceback.format_exc()
            else:
                wl_msg_boxes.wl_msg_box_no_results(main)
                wl_msgs.wl_msg_generate_fig_error(main)

        if err_msg:
            wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()
            wl_msgs.wl_msg_fatal_error(main)

    settings = main.settings_custom['keyword_extractor']

    files_ref = settings['generation_settings']['ref_files']
    file_names_observed = [
        file_name for file_name in main.wl_file_area.get_selected_file_names()
        if file_name not in files_ref
    ]

    if files_ref and file_names_observed:
        worker_keyword_extractor_fig = Wl_Worker_Keyword_Extractor_Fig(
            main,
            dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(
                main),
            update_gui=update_gui)
        wl_threading.Wl_Thread(worker_keyword_extractor_fig).start_worker()
    else:
        if not files_ref:
            wl_msg_box_missing_ref_files(main)
        elif not file_names_observed:
            wl_msg_box_missing_observed_files(main)

        wl_msgs.wl_msg_generate_fig_error(main)
Beispiel #7
0
def generate_fig(main):
    def update_gui(err_msg, tokens_freq_files, tokens_stats_files):
        if not err_msg:
            if tokens_freq_files:
                try:
                    settings = main.settings_custom['wordlist_generator']

                    measure_dispersion = settings['generation_settings'][
                        'measure_dispersion']
                    measure_adjusted_freq = settings['generation_settings'][
                        'measure_adjusted_freq']

                    col_dispersion = main.settings_global[
                        'measures_dispersion'][measure_dispersion]['col']
                    col_adjusted_freq = main.settings_global[
                        'measures_adjusted_freq'][measure_adjusted_freq]['col']

                    if settings['fig_settings']['use_data'] == _tr(
                            'wl_wordlist_generator', 'Frequency'):
                        wl_figs_freqs.wl_fig_freq(
                            main,
                            tokens_freq_files,
                            settings=settings['fig_settings'],
                            label_x=_tr('wl_wordlist_generator', 'Token'))
                    else:
                        if settings['fig_settings'][
                                'use_data'] == col_dispersion:
                            tokens_stat_files = {
                                token: numpy.array(stats_files)[:, 0]
                                for token, stats_files in
                                tokens_stats_files.items()
                            }

                            label_y = col_dispersion
                        elif settings['fig_settings'][
                                'use_data'] == col_adjusted_freq:
                            tokens_stat_files = {
                                token: numpy.array(stats_files)[:, 1]
                                for token, stats_files in
                                tokens_stats_files.items()
                            }

                            label_y = col_adjusted_freq

                        wl_figs_stats.wl_fig_stat(
                            main,
                            tokens_stat_files,
                            settings=settings['fig_settings'],
                            label_x=_tr('wl_wordlist_generator', 'Token'),
                            label_y=label_y)

                    # Hide the progress dialog early so that the main window will not obscure the generated figure
                    worker_wordlist_generator_fig.dialog_progress.accept()
                    wl_figs.show_fig()

                    wl_msgs.wl_msg_generate_fig_success(main)
                except Exception:
                    err_msg = traceback.format_exc()
            else:
                wl_msg_boxes.wl_msg_box_no_results(main)
                wl_msgs.wl_msg_generate_fig_error(main)

        if err_msg:
            wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()
            wl_msgs.wl_msg_fatal_error(main)

    worker_wordlist_generator_fig = Wl_Worker_Wordlist_Generator_Fig(
        main,
        dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(main),
        update_gui=update_gui)
    wl_threading.Wl_Thread(worker_wordlist_generator_fig).start_worker()
Beispiel #8
0
def generate_table(main, table):
    def update_gui(err_msg, tokens_freq_files, tokens_stats_files):
        if not err_msg:
            if tokens_freq_files:
                try:
                    table.settings = copy.deepcopy(main.settings_custom)
                    settings = main.settings_custom['wordlist_generator']

                    text_measure_dispersion = settings['generation_settings'][
                        'measure_dispersion']
                    text_measure_adjusted_freq = settings[
                        'generation_settings']['measure_adjusted_freq']

                    text_dispersion = main.settings_global[
                        'measures_dispersion'][text_measure_dispersion]['col']
                    text_adjusted_freq = main.settings_global[
                        'measures_adjusted_freq'][text_measure_adjusted_freq][
                            'col']

                    if settings['token_settings']['use_tags']:
                        table.horizontalHeaderItem(1).setText(
                            _tr('wl_wordlist_generator', 'Tag'))

                    table.clr_table()

                    # Insert columns (files)
                    files = list(main.wl_file_area.get_selected_files())

                    for file in files:
                        table.ins_header_hor(table.model().columnCount() - 2,
                                             _tr('wl_wordlist_generator',
                                                 '[{}]\nFrequency').format(
                                                     file['name']),
                                             is_int=True,
                                             is_cumulative=True,
                                             is_breakdown=True)
                        table.ins_header_hor(table.model().columnCount() - 2,
                                             _tr('wl_wordlist_generator',
                                                 '[{}]\nFrequency %').format(
                                                     file['name']),
                                             is_pct=True,
                                             is_cumulative=True,
                                             is_breakdown=True)

                        table.ins_header_hor(
                            table.model().columnCount() - 2,
                            f'[{file["name"]}]\n{text_dispersion}',
                            is_float=True,
                            is_breakdown=True)

                        table.ins_header_hor(
                            table.model().columnCount() - 2,
                            f'[{file["name"]}]\n{text_adjusted_freq}',
                            is_float=True,
                            is_breakdown=True)

                    # Insert columns (total)
                    table.ins_header_hor(table.model().columnCount() - 2,
                                         _tr('wl_wordlist_generator',
                                             'Total\nFrequency'),
                                         is_int=True,
                                         is_cumulative=True)
                    table.ins_header_hor(table.model().columnCount() - 2,
                                         _tr('wl_wordlist_generator',
                                             'Total\nFrequency %'),
                                         is_pct=True,
                                         is_cumulative=True)

                    table.ins_header_hor(
                        table.model().columnCount() - 2,
                        _tr('wl_wordlist_generator', 'Total\n') +
                        text_dispersion,
                        is_float=True)

                    table.ins_header_hor(
                        table.model().columnCount() - 2,
                        _tr('wl_wordlist_generator', 'Total\n') +
                        text_adjusted_freq,
                        is_float=True)

                    # Sort by frequency of the first file
                    table.horizontalHeader().setSortIndicator(
                        table.find_header_hor(
                            _tr('wl_wordlist_generator',
                                '[{}]\nFrequency').format(files[0]['name'])),
                        Qt.DescendingOrder)

                    cols_freq = table.find_headers_hor(
                        _tr('wl_wordlist_generator', '\nFrequency'))
                    cols_freq_pct = table.find_headers_hor(
                        _tr('wl_wordlist_generator', '\nFrequency %'))

                    for col in cols_freq_pct:
                        cols_freq.remove(col)

                    cols_dispersion = table.find_headers_hor(
                        f'\n{text_dispersion}')
                    cols_adjusted_freq = table.find_headers_hor(
                        f'\n{text_adjusted_freq}')
                    col_files_found = table.find_header_hor(
                        _tr('wl_wordlist_generator', 'Number of\nFiles Found'))
                    col_files_found_pct = table.find_header_hor(
                        _tr('wl_wordlist_generator',
                            'Number of\nFiles Found %'))

                    freq_totals = numpy.array(list(
                        tokens_freq_files.values())).sum(axis=0)
                    len_files = len(files)

                    table.model().setRowCount(len(tokens_freq_files))

                    table.disable_updates()

                    for i, (token, freq_files) in enumerate(
                            wl_sorting.sorted_tokens_freq_files(
                                tokens_freq_files)):
                        stats_files = tokens_stats_files[token]

                        # Rank
                        table.set_item_num(i, 0, -1)

                        # Token
                        table.model().setItem(i, 1,
                                              wl_tables.Wl_Table_Item(token))

                        # Frequency
                        for j, freq in enumerate(freq_files):
                            table.set_item_num(i, cols_freq[j], freq)
                            table.set_item_num(i, cols_freq_pct[j], freq,
                                               freq_totals[j])

                        for j, (dispersion,
                                adjusted_freq) in enumerate(stats_files):
                            # Dispersion
                            table.set_item_num(i, cols_dispersion[j],
                                               dispersion)

                            # Adjusted Frequency
                            table.set_item_num(i, cols_adjusted_freq[j],
                                               adjusted_freq)

                        # Number of Files Found
                        num_files_found = len(
                            [freq for freq in freq_files[:-1] if freq])

                        table.set_item_num(i, col_files_found, num_files_found)
                        table.set_item_num(i, col_files_found_pct,
                                           num_files_found, len_files)

                    table.enable_updates()

                    table.toggle_pct()
                    table.toggle_cumulative()
                    table.toggle_breakdown()
                    table.update_ranks()

                    wl_msgs.wl_msg_generate_table_success(main)
                except Exception:
                    err_msg = traceback.format_exc()
            else:
                wl_msg_boxes.wl_msg_box_no_results(main)
                wl_msgs.wl_msg_generate_table_error(main)

        if err_msg:
            wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()
            wl_msgs.wl_msg_fatal_error(main)

    worker_wordlist_generator_table = Wl_Worker_Wordlist_Generator_Table(
        main,
        dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(main),
        update_gui=update_gui)
    wl_threading.Wl_Thread(worker_wordlist_generator_table).start_worker()
def test_collocation_extractor():
    print('Start testing module Collocation Extractor...')

    tests_significance = list(main.settings_global['tests_significance']
                              ['collocation_extractor'].keys())
    measures_effect_size = list(main.settings_global['measures_effect_size']
                                ['collocation_extractor'].keys())
    len_diff = abs(len(tests_significance) - len(measures_effect_size))

    if len(tests_significance) > len(measures_effect_size):
        measures_effect_size += measures_effect_size * (
            len_diff // len(measures_effect_size)
        ) + measures_effect_size[:len_diff % len(measures_effect_size)]
    elif len(measures_effect_size) > len(tests_significance):
        tests_significance += tests_significance * (len_diff // len(
            tests_significance)) + tests_significance[:len_diff %
                                                      len(tests_significance)]

    files = main.settings_custom['file_area']['files_open']

    for i, (test_significance, measure_effect_size) in enumerate(
            zip(tests_significance, measures_effect_size)):
        for file in main.settings_custom['file_area']['files_open']:
            file['selected'] = False

        main.settings_custom['collocation_extractor']['search_settings'][
            'multi_search_mode'] = True
        main.settings_custom['collocation_extractor']['search_settings'][
            'search_terms'] = wl_test_init.SEARCH_TERMS

        # Single file with search terms
        if i % 4 == 0:
            random.choice(files)['selected'] = True

            main.settings_custom['collocation_extractor']['search_settings'][
                'search_settings'] = True
        # Single file without search terms
        elif i % 4 == 1:
            random.choice(files)['selected'] = True

            main.settings_custom['collocation_extractor']['search_settings'][
                'search_settings'] = False
        # Multiple files with search terms
        elif i % 4 == 2:
            for file in files:
                file['selected'] = True

            main.settings_custom['collocation_extractor']['search_settings'][
                'search_settings'] = True
        # Multiple files without search terms
        elif i % 4 == 3:
            for file in random.sample(files, 3):
                file['selected'] = True

            main.settings_custom['collocation_extractor']['search_settings'][
                'search_settings'] = False

        files_selected = [
            re.search(r'(?<=\[)[a-z_]+(?=\])', file['name']).group()
            for file in files if file['selected']
        ]

        print(f"Files: {', '.join(files_selected)}")
        print(
            f"Search settings: {main.settings_custom['collocation_extractor']['search_settings']['search_settings']}"
        )
        print(f'Test of Statistical significance: {test_significance}')
        print(f'Measure of effect size: {measure_effect_size}\n')

        wl_collocation_extractor.Wl_Worker_Collocation_Extractor_Table(
            main,
            dialog_progress=wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(
                main),
            update_gui=update_gui).run()

    main.app.quit()

    print('pass!')