Exemple #1
0
def expression_valid_windows(windows, candidate):
    valid_window = []
    valid_windows = []
    buff = []

    for window in windows:
        if (ana_useful.exists_linkword(window) == True
                and ana_useful.count_cand(window) == 2):
            if ana_useful.is_cand(
                    window[-1]):  #list[-1] returns last item of the list
                cand2 = ana_useful.which_cand([window[-1]])
                if cand2[2] not in candidate.split(
                ):  #to avoid building expression like "bâtiment de cet ensemble de bâtiments" -> "batiment de bâtiment"
                    buff.append(
                        window
                    )  #dans ce cas la fenetre valide est de type (CAND1 + "aword" + CAND2) avec un mot de schéma quelque part.
                # in the buffer because we need to know if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression)
            else:
                short_window = ana_useful.cut_window(window, 2)
                #Puisqu'on a 2 CAND et que la fenetre fait 3 mots et que le dernier mot n'est pas un CAND alors la fenetre était de type CAND + CAND + mot quelconque
                if ana_useful.exists_linkword(short_window) == True:
                    valid_windows.append(
                        short_window
                    )  #dans ce cas la fenetre valide est de type (CAND1 + CAND2) avec un mot de schéma entre eux .

    # check if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression)
    valid_windows.extend(not_expa_inside_expre(buff))
    return valid_windows
Exemple #2
0
def nucleus_valid_window(window):
    if ana_useful.exists_linkword(window):
        for occurrence in window:
            index_cand = 0
            if ana_useful.is_cand(occurrence):
                index_cand = window.index(occurrence)
                break
        right_window = window[index_cand:]
        if ana_useful.count_cand(right_window) < 2 and ana_useful.exists_linkword(right_window):
            return right_window
Exemple #3
0
def nucleus_valid_window(window):
    if ana_useful.exists_linkword(window):
        for occurrence in window:
            index_cand = 0
            if ana_useful.is_cand(occurrence):
                index_cand = window.index(occurrence)
                break
        right_window = window[index_cand:]
        if ana_useful.count_cand(
                right_window) < 2 and ana_useful.exists_linkword(right_window):
            return right_window
Exemple #4
0
def expression_valid_windows(windows, candidate):
    valid_window = []
    valid_windows = []
    buff = []

    for window in windows:
        if (ana_useful.exists_linkword(window) == True and ana_useful.count_cand(window) == 2):
            if ana_useful.is_cand(window[-1]): #list[-1] returns last item of the list
                cand2 = ana_useful.which_cand([window[-1]])
                if cand2[2] not in candidate.split(): #to avoid building expression like "bâtiment de cet ensemble de bâtiments" -> "batiment de bâtiment"
                    buff.append(window) #dans ce cas la fenetre valide est de type (CAND1 + "aword" + CAND2) avec un mot de schéma quelque part.
                # in the buffer because we need to know if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression)
            else:
                short_window = ana_useful.cut_window(window, 2)
                #Puisqu'on a 2 CAND et que la fenetre fait 3 mots et que le dernier mot n'est pas un CAND alors la fenetre était de type CAND + CAND + mot quelconque
                if ana_useful.exists_linkword(short_window) == True:
                    valid_windows.append(short_window) #dans ce cas la fenetre valide est de type (CAND1 + CAND2) avec un mot de schéma entre eux .

    # check if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression)
    valid_windows.extend(not_expa_inside_expre(buff))
    return valid_windows