def expression_valid_windows(windows, candidate): valid_window = [] valid_windows = [] buff = [] for window in windows: if (ana_useful.exists_linkword(window) == True and ana_useful.count_cand(window) == 2): if ana_useful.is_cand( window[-1]): #list[-1] returns last item of the list cand2 = ana_useful.which_cand([window[-1]]) if cand2[2] not in candidate.split( ): #to avoid building expression like "bâtiment de cet ensemble de bâtiments" -> "batiment de bâtiment" buff.append( window ) #dans ce cas la fenetre valide est de type (CAND1 + "aword" + CAND2) avec un mot de schéma quelque part. # in the buffer because we need to know if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression) else: short_window = ana_useful.cut_window(window, 2) #Puisqu'on a 2 CAND et que la fenetre fait 3 mots et que le dernier mot n'est pas un CAND alors la fenetre était de type CAND + CAND + mot quelconque if ana_useful.exists_linkword(short_window) == True: valid_windows.append( short_window ) #dans ce cas la fenetre valide est de type (CAND1 + CAND2) avec un mot de schéma entre eux . # check if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression) valid_windows.extend(not_expa_inside_expre(buff)) return valid_windows
def nucleus_valid_window(window): if ana_useful.exists_linkword(window): for occurrence in window: index_cand = 0 if ana_useful.is_cand(occurrence): index_cand = window.index(occurrence) break right_window = window[index_cand:] if ana_useful.count_cand(right_window) < 2 and ana_useful.exists_linkword(right_window): return right_window
def nucleus_valid_window(window): if ana_useful.exists_linkword(window): for occurrence in window: index_cand = 0 if ana_useful.is_cand(occurrence): index_cand = window.index(occurrence) break right_window = window[index_cand:] if ana_useful.count_cand( right_window) < 2 and ana_useful.exists_linkword(right_window): return right_window
def expansion_valid_window(windows): valid_windows = [] for window in windows: for occurrence in window: if ana_useful.is_cand(occurrence): pos_cand = window.index(occurrence) left_window = window[:pos_cand + 1] right_window = window[pos_cand:] exists_linkword_R = ana_useful.exists_linkword(right_window) exists_linkword_L = ana_useful.exists_linkword(left_window) clean_window = ana_useful.window_wo_fword(window) #Les expansions ne doivent pas contenir de mot de schéma # Le CAND est forcément en position 2 par construction et suppression des mots v if clean_window[2][2] == 't' and not exists_linkword_R: valid_windows.append(window[pos_cand:]) #RightWindow if clean_window[0][2] == 't' and not exists_linkword_L: valid_windows.append(window[:pos_cand + 1]) #LeftWindow return valid_windows
def expansion_valid_window(windows): valid_windows = [] for window in windows: for occurrence in window: if ana_useful.is_cand(occurrence): pos_cand = window.index(occurrence) left_window = window[:pos_cand+1] right_window = window[pos_cand:] exists_linkword_R = ana_useful.exists_linkword(right_window) exists_linkword_L = ana_useful.exists_linkword(left_window) clean_window = ana_useful.window_wo_fword(window) #Les expansions ne doivent pas contenir de mot de schéma # Le CAND est forcément en position 2 par construction et suppression des mots v if clean_window[2][2] == 't' and not exists_linkword_R: valid_windows.append(window[pos_cand:])#RightWindow if clean_window[0][2] == 't' and not exists_linkword_L: valid_windows.append(window[:pos_cand+1])#LeftWindow return valid_windows
def expression_valid_windows(windows, candidate): valid_window = [] valid_windows = [] buff = [] for window in windows: if (ana_useful.exists_linkword(window) == True and ana_useful.count_cand(window) == 2): if ana_useful.is_cand(window[-1]): #list[-1] returns last item of the list cand2 = ana_useful.which_cand([window[-1]]) if cand2[2] not in candidate.split(): #to avoid building expression like "bâtiment de cet ensemble de bâtiments" -> "batiment de bâtiment" buff.append(window) #dans ce cas la fenetre valide est de type (CAND1 + "aword" + CAND2) avec un mot de schéma quelque part. # in the buffer because we need to know if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression) else: short_window = ana_useful.cut_window(window, 2) #Puisqu'on a 2 CAND et que la fenetre fait 3 mots et que le dernier mot n'est pas un CAND alors la fenetre était de type CAND + CAND + mot quelconque if ana_useful.exists_linkword(short_window) == True: valid_windows.append(short_window) #dans ce cas la fenetre valide est de type (CAND1 + CAND2) avec un mot de schéma entre eux . # check if the aword in center is not the same 3 times or more (this case it would be better to build an expansion first, then an expression) valid_windows.extend(not_expa_inside_expre(buff)) return valid_windows