Beispiel #1
0
def nucleus_search(dict_occ_ref, candidates, nucleus_threshold, log_file_path):
    dict_nucleus = {}
    windows = ana_useful.define_windows(dict_occ_ref, candidates, 3, 2)
    valid_windows = []
    for window in windows:
        valid_window = nucleus_valid_window(window)
        if valid_window:
            valid_windows.append(valid_window)

        windowR = ana_useful.symmetric_window(window)
        valid_windowR = nucleus_valid_window(windowR)
        if valid_windowR:
            valid_window = ana_useful.symmetric_window(valid_windowR)
            valid_windows.append(valid_window)

    dict_aword = dict_found_words(valid_windows)
    dict_occ_cand = nucleus_find_cand(dict_aword, nucleus_threshold)

    if dict_occ_cand != {}:
        for shortshape, occ_cand_list in dict_occ_cand.items():
            new_cand, occ_count = ana_useful.new_cand_nucleus(occ_cand_list)
            dict_nucleus.setdefault(new_cand, []).append(occ_cand_list)

            ana_useful.write_log(
                log_file_path,
                'NOYAU TROUVE ' + str(new_cand) + ' ' + str(occ_count))
            # TODO retrouver les fenetres valides qui ont permis de créer le noyau
            ana_useful.write_log(log_file_path, '   LISTE DES OCCURRENCES')
            for occ_cand in occ_cand_list:
                ana_useful.write_log(log_file_path, '   ' + str(occ_cand))
    return dict_nucleus
Beispiel #2
0
def expression_search(dict_occ_ref, candidates, expression_threshold,
                      log_file_path):
    dict_expre = {}
    for candidate in candidates:
        candidate = [candidate]  # in order to use the define_windows
        windows = ana_useful.define_windows(
            dict_occ_ref, candidate, 3, 1
        )  #fenetre du type `CAND1 + (cand ou mot quelconque) + (cand ou mot quelconque)`. Les mots stop ("v") ne sont pas représentés
        valid_windows = []
        windows_cand_list = []

        valid_windows = expression_valid_windows(windows, candidate[0])

        if valid_windows != []:
            dict_cand_windows = expression_find_cand(valid_windows,
                                                     expression_threshold)

            if dict_cand_windows != {}:
                for shortshape, windows_cand_list in dict_cand_windows.items():
                    new_cand, occ_count = ana_useful.new_cand_expression(
                        windows_cand_list)
                    dict_expre[new_cand] = windows_cand_list
                    # dict_expre.setdefault(new_cand,[]).append(windows_cand_list)

                    ana_useful.write_log(
                        log_file_path, 'EXPRESSION TROUVEE ' + str(new_cand) +
                        ' ' + str(occ_count))
                    ana_useful.write_log(log_file_path,
                                         '   LISTE DES OCCURRENCES ')
                    for window_cand in windows_cand_list:
                        ana_useful.write_log(log_file_path,
                                             '   ' + str(window_cand))
    return dict_expre
Beispiel #3
0
def nucleus_search(dict_occ_ref, candidates, nucleus_threshold, log_file_path):
    dict_nucleus = {}
    windows = ana_useful.define_windows(dict_occ_ref, candidates, 3, 2)
    valid_windows = []
    for window in windows:
        valid_window = nucleus_valid_window(window)
        if valid_window:
            valid_windows.append(valid_window)

        windowR = ana_useful.symmetric_window(window)
        valid_windowR = nucleus_valid_window(windowR)
        if valid_windowR:
            valid_window = ana_useful.symmetric_window(valid_windowR)
            valid_windows.append(valid_window)

    dict_aword = dict_found_words(valid_windows)
    dict_occ_cand = nucleus_find_cand(dict_aword, nucleus_threshold)

    if dict_occ_cand != {}:
        for shortshape, occ_cand_list in dict_occ_cand.items():
            new_cand, occ_count = ana_useful.new_cand_nucleus(occ_cand_list)
            dict_nucleus.setdefault(new_cand,[]).append(occ_cand_list)

            ana_useful.write_log(log_file_path, 'NOYAU TROUVE ' + str(new_cand) + ' ' + str(occ_count))
            # TODO retrouver les fenetres valides qui ont permis de créer le noyau
            ana_useful.write_log(log_file_path, '   LISTE DES OCCURRENCES')
            for occ_cand in occ_cand_list:
                ana_useful.write_log(log_file_path, '   ' + str(occ_cand))
    return dict_nucleus
Beispiel #4
0
def expansion_search(dict_occ_ref, candidates, expansion_threshold, log_file_path):
    dict_expa = {}
    windows = ana_useful.define_windows(dict_occ_ref,candidates,3,2)
    valid_windows = expansion_valid_window(windows)
    dict_cand_windows = expansion_cand_search(valid_windows, expansion_threshold)

    # Find the new cand and build a new dict and write in the log, what there is at this step.
    for shape in dict_cand_windows:
        new_cand,occ_count = ana_useful.new_cand(dict_cand_windows[shape])
        ana_useful.write_log(log_file_path, 'EXPANSION TROUVEE ' + str(new_cand) + ' ' + str(occ_count))
        ana_useful.write_log(log_file_path, '   LISTE DES OCCURRENCES ')
        for window_cand in dict_cand_windows[shape]:
            ana_useful.write_log(log_file_path, '   ' + str(window_cand))
        # dict_expa.setdefault(new_cand,[]).append(dict_cand_windows[shape])
        dict_expa[new_cand] = dict_cand_windows[shape]
    return dict_expa
Beispiel #5
0
def expansion_search(dict_occ_ref, candidates, expansion_threshold,
                     log_file_path):
    dict_expa = {}
    windows = ana_useful.define_windows(dict_occ_ref, candidates, 3, 2)
    valid_windows = expansion_valid_window(windows)
    dict_cand_windows = expansion_cand_search(valid_windows,
                                              expansion_threshold)

    # Find the new cand and build a new dict and write in the log, what there is at this step.
    for shape in dict_cand_windows:
        new_cand, occ_count = ana_useful.new_cand(dict_cand_windows[shape])
        ana_useful.write_log(
            log_file_path,
            'EXPANSION TROUVEE ' + str(new_cand) + ' ' + str(occ_count))
        ana_useful.write_log(log_file_path, '   LISTE DES OCCURRENCES ')
        for window_cand in dict_cand_windows[shape]:
            ana_useful.write_log(log_file_path, '   ' + str(window_cand))
        # dict_expa.setdefault(new_cand,[]).append(dict_cand_windows[shape])
        dict_expa[new_cand] = dict_cand_windows[shape]
    return dict_expa
Beispiel #6
0
def expression_search(dict_occ_ref, candidates, expression_threshold, log_file_path):
    dict_expre = {}
    for candidate in candidates:
        candidate = [candidate] # in order to use the define_windows
        windows = ana_useful.define_windows(dict_occ_ref, candidate, 3, 1) #fenetre du type `CAND1 + (cand ou mot quelconque) + (cand ou mot quelconque)`. Les mots stop ("v") ne sont pas représentés
        valid_windows = []
        windows_cand_list = []

        valid_windows = expression_valid_windows(windows, candidate[0])

        if valid_windows != []:
            dict_cand_windows = expression_find_cand(valid_windows, expression_threshold)

            if dict_cand_windows != {}:
                for shortshape, windows_cand_list in dict_cand_windows.items():
                    new_cand, occ_count = ana_useful.new_cand_expression(windows_cand_list)
                    dict_expre[new_cand] = windows_cand_list
                    # dict_expre.setdefault(new_cand,[]).append(windows_cand_list)

                    ana_useful.write_log(log_file_path, 'EXPRESSION TROUVEE ' + str(new_cand) + ' ' + str(occ_count))
                    ana_useful.write_log(log_file_path, '   LISTE DES OCCURRENCES ')
                    for window_cand in windows_cand_list:
                        ana_useful.write_log(log_file_path, '   ' + str(window_cand))
    return dict_expre