Esempio n. 1
0
def expansion_cand_search(valid_windows, expansion_threshold):
    shape_list = []
    dict_cand_windows = {}
    for window in valid_windows:
        shape = ''
        for occurrence in window:
            if (occurrence[2] not in ['t', 'v']):
                shape += occurrence[2] + ' '
            if (occurrence[2] == 't'):
                shape += occurrence[1] + ' '
            shape.strip()
        dict_cand_windows.setdefault(shape, []).append(window)
    print(
        '\n\n\n################# RECHERCHE EXPANSIONS : DEBUT RECHERCHE CANDIDATS #################\n'
    )
    # TODO Note : L'étape suivante est TRES longue
    dict_cand_windows_norm = ana_useful.merge_egal_sple_dictkeys(
        dict_cand_windows)
    print(
        '\n\n\n################# RECHERCHE EXPANSIONS : FIN RECHERCHE CANDIDATS #################\n'
    )

    # Vérification du dépassement de seuil, expansion est une "expansion potentielle" avant d'être validée et insérée dans le final_dict
    final_dict = {}
    for expansion in dict_cand_windows_norm:
        if ((len(dict_cand_windows_norm[expansion])) >= expansion_threshold):
            final_dict[expansion] = dict_cand_windows_norm[expansion]
    return final_dict
Esempio n. 2
0
def dict_found_words(valid_windows):
    dict_aword = {}
    # On ne peut pas modifier au fil de l'eau un dict sur lequel on itère
    # Donc on construit d'abord un dict avec tous les mots t
    # Peu importe s'ils sont égaux à l'égalité souple près

    for window in valid_windows:
        for occurrence in window:  #a priori il n'y a qu'un seul t dans chaque fenetre'
            if occurrence[2] == 't':
                dict_aword.setdefault(occurrence[1], []).append(window)
    final_dict = ana_useful.merge_egal_sple_dictkeys(dict_aword)
    return final_dict
Esempio n. 3
0
def dict_found_words(valid_windows):
    dict_aword = {}
    # On ne peut pas modifier au fil de l'eau un dict sur lequel on itère
    # Donc on construit d'abord un dict avec tous les mots t
    # Peu importe s'ils sont égaux à l'égalité souple près

    for window in valid_windows:
        for occurrence in window: #a priori il n'y a qu'un seul t dans chaque fenetre'
            if occurrence[2] == 't':
                dict_aword.setdefault(occurrence[1],[]).append(window)
    final_dict = ana_useful.merge_egal_sple_dictkeys(dict_aword)
    return final_dict
Esempio n. 4
0
def not_expa_inside_expre(windows):
    dict_awords_shape_seen = {}
    valid_windows_t3 = []
    awords_shapes_list = []

    for window in windows:
        aword_shape = ana_useful.aword_shape(window)
        dict_awords_shape_seen.setdefault(aword_shape, []).append(window) # the strict eguality (on the aword) is ok. But remains the eglité souple.

    dict_awords_shape = ana_useful.merge_egal_sple_dictkeys(dict_awords_shape_seen)

    if dict_awords_shape != {}:
        for aword_shape, windows in dict_awords_shape.items():
            if (0 < len(windows) < 3):
                valid_windows_t3.extend(windows)
    return valid_windows_t3
Esempio n. 5
0
def not_expa_inside_expre(windows):
    dict_awords_shape_seen = {}
    valid_windows_t3 = []
    awords_shapes_list = []

    for window in windows:
        aword_shape = ana_useful.aword_shape(window)
        dict_awords_shape_seen.setdefault(aword_shape, []).append(
            window
        )  # the strict eguality (on the aword) is ok. But remains the eglité souple.

    dict_awords_shape = ana_useful.merge_egal_sple_dictkeys(
        dict_awords_shape_seen)

    if dict_awords_shape != {}:
        for aword_shape, windows in dict_awords_shape.items():
            if (0 < len(windows) < 3):
                valid_windows_t3.extend(windows)
    return valid_windows_t3
Esempio n. 6
0
def expansion_cand_search(valid_windows, expansion_threshold):
    shape_list = []
    dict_cand_windows = {}
    for window in valid_windows:
        shape = ''
        for occurrence in window:
            if (occurrence[2] not in ['t','v']):
                shape += occurrence[2] + ' '
            if (occurrence[2] == 't'):
                shape += occurrence[1] + ' '
            shape.strip()
        dict_cand_windows.setdefault(shape,[]).append(window)
    dict_cand_windows_norm = ana_useful.merge_egal_sple_dictkeys(dict_cand_windows)

    # Vérification du dépassement de seuil, expansion est une "expansion potentielle" avant d'être validée et insérée dans le final_dict
    final_dict = {}
    for expansion in dict_cand_windows_norm:
        if ( (len(dict_cand_windows_norm[expansion])) >= expansion_threshold ):
            final_dict[expansion] = dict_cand_windows_norm[expansion]
    return final_dict
Esempio n. 7
0
def expansion_cand_search(valid_windows, expansion_threshold):
    shape_list = []
    dict_cand_windows = {}
    for window in valid_windows:
        shape = ''
        for occurrence in window:
            if (occurrence[2] not in ['t', 'v']):
                shape += occurrence[2] + ' '
            if (occurrence[2] == 't'):
                shape += occurrence[1] + ' '
            shape.strip()
        dict_cand_windows.setdefault(shape, []).append(window)
    dict_cand_windows_norm = ana_useful.merge_egal_sple_dictkeys(
        dict_cand_windows)

    # Vérification du dépassement de seuil, expansion est une "expansion potentielle" avant d'être validée et insérée dans le final_dict
    final_dict = {}
    for expansion in dict_cand_windows_norm:
        if ((len(dict_cand_windows_norm[expansion])) >= expansion_threshold):
            final_dict[expansion] = dict_cand_windows_norm[expansion]
    return final_dict
Esempio n. 8
0
def expansion_cand_search(valid_windows, expansion_threshold):
    shape_list = []
    dict_cand_windows = {}
    for window in valid_windows:
        shape = ''
        for occurrence in window:
            if (occurrence[2] not in ['t','v']):
                shape += occurrence[2] + ' '
            if (occurrence[2] == 't'):
                shape += occurrence[1] + ' '
            shape.strip()
        dict_cand_windows.setdefault(shape,[]).append(window)
    print('\n\n\n################# RECHERCHE EXPANSIONS : DEBUT RECHERCHE CANDIDATS #################\n')
    # TODO Note : L'étape suivante est TRES longue
    dict_cand_windows_norm = ana_useful.merge_egal_sple_dictkeys(dict_cand_windows)
    print('\n\n\n################# RECHERCHE EXPANSIONS : FIN RECHERCHE CANDIDATS #################\n')

    # Vérification du dépassement de seuil, expansion est une "expansion potentielle" avant d'être validée et insérée dans le final_dict
    final_dict = {}
    for expansion in dict_cand_windows_norm:
        if ( (len(dict_cand_windows_norm[expansion])) >= expansion_threshold ):
            final_dict[expansion] = dict_cand_windows_norm[expansion]
    return final_dict