def expression_valid_windows(windows, candidate): valid_window = [] valid_windows = [] buff = [] for window in windows: if (ana_useful.exists_linkword(window) == True and ana_useful.count_cand(window) == 2): if ana_useful.is_cand( window[-1]): #list[-1] returns last item of the list cand2 = ana_useful.which_cand([window[-1]]) if cand2[2] not in candidate.split( ): #to avoid building expression like "bâtiment de cet ensemble de bâtiments" -> "batiment de bâtiment" buff.append( window ) #dans ce cas la fenetre valide est de type (CAND1 + "aword" + CAND2) avec un mot de schéma quelque part. # in the buffer because we need to know if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression) else: short_window = ana_useful.cut_window(window, 2) #Puisqu'on a 2 CAND et que la fenetre fait 3 mots et que le dernier mot n'est pas un CAND alors la fenetre était de type CAND + CAND + mot quelconque if ana_useful.exists_linkword(short_window) == True: valid_windows.append( short_window ) #dans ce cas la fenetre valide est de type (CAND1 + CAND2) avec un mot de schéma entre eux . # check if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression) valid_windows.extend(not_expa_inside_expre(buff)) return valid_windows
def nucleus_valid_window(window): if ana_useful.exists_linkword(window): for occurrence in window: index_cand = 0 if ana_useful.is_cand(occurrence): index_cand = window.index(occurrence) break right_window = window[index_cand:] if ana_useful.count_cand(right_window) < 2 and ana_useful.exists_linkword(right_window): return right_window
def nucleus_valid_window(window): if ana_useful.exists_linkword(window): for occurrence in window: index_cand = 0 if ana_useful.is_cand(occurrence): index_cand = window.index(occurrence) break right_window = window[index_cand:] if ana_useful.count_cand( right_window) < 2 and ana_useful.exists_linkword(right_window): return right_window
def expression_valid_windows(windows, candidate): valid_window = [] valid_windows = [] buff = [] for window in windows: if (ana_useful.exists_linkword(window) == True and ana_useful.count_cand(window) == 2): if ana_useful.is_cand(window[-1]): #list[-1] returns last item of the list cand2 = ana_useful.which_cand([window[-1]]) if cand2[2] not in candidate.split(): #to avoid building expression like "bâtiment de cet ensemble de bâtiments" -> "batiment de bâtiment" buff.append(window) #dans ce cas la fenetre valide est de type (CAND1 + "aword" + CAND2) avec un mot de schéma quelque part. # in the buffer because we need to know if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression) else: short_window = ana_useful.cut_window(window, 2) #Puisqu'on a 2 CAND et que la fenetre fait 3 mots et que le dernier mot n'est pas un CAND alors la fenetre était de type CAND + CAND + mot quelconque if ana_useful.exists_linkword(short_window) == True: valid_windows.append(short_window) #dans ce cas la fenetre valide est de type (CAND1 + CAND2) avec un mot de schéma entre eux . # check if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression) valid_windows.extend(not_expa_inside_expre(buff)) return valid_windows