def concat_number(sentence): """ concatenate numbers with '+' Input=sentence Output=sentence """ # init i = 0 sentence = delete_and_from_number(sentence) while i < len(sentence): # There is a number if other_functions.number(sentence[i]) == 1: begin_pos = i while i < len(sentence) and other_functions.number( sentence[i]) == 1: i += 1 end_pos = i #We have to concatenate the last number if it is superlative if i < len(sentence) and other_functions.number(sentence[i]) == 2: end_pos += 1 if i < len(sentence) and sentence[i] in ResourcePool( ).adjective_numbers: end_pos += 1 sentence = sentence[:begin_pos] + [ other_functions.convert_to_string(sentence[begin_pos:end_pos]) ] + sentence[end_pos:] i += 1 return sentence
def concat_number(sentence): """ concatenate numbers with '+' Input=sentence Output=sentence """ #init i = 0 sentence = delete_and_from_number(sentence) while i < len(sentence): #There is a number if other_functions.number(sentence[i]) == 1: begin_pos = i while i < len(sentence) and other_functions.number(sentence[i]) == 1: i += 1 end_pos = i #We have to concatenate the last number if it is superlative if i < len(sentence) and other_functions.number(sentence[i]) == 2: end_pos += 1 if i < len(sentence) and sentence[i] in ResourcePool().adjective_numbers: end_pos += 1 sentence = sentence[:begin_pos] + [ other_functions.convert_to_string(sentence[begin_pos:end_pos])] + sentence[end_pos:] i += 1 return sentence
def delete_and_from_number(sentence): """ delete 'and' between two numbers Input=sentence Output=sentence """ #init i = 0 while i < len(sentence): if sentence[i] == 'and' and other_functions.number(sentence[i - 1]) == 1 and other_functions.number( sentence[i + 1]) == 1: sentence = sentence[:i] + sentence[i + 1:] i += 1 return sentence
def reorganize_adj(sentence): """ deletes ',' and 'and' if it is between adjectives Input=sentence Output=sentence """ #init i = 0 while i < len(sentence) - 1: if sentence[i] == ',' or sentence[i] == 'and': if analyse_nominal_group.is_an_adj(sentence[i + 1]) and analyse_nominal_group.is_an_adj(sentence[i - 1]): if other_functions.number(sentence[i + 1]) == 0 and other_functions.number(sentence[i - 1]) == 0: sentence = sentence[:i] + sentence[i + 1:] i += 1 return sentence
def find_the_plural(sentence, position): """ Finds if there is a plural and add 'a' :param sentence and position of nominal group :return: the position of plural or -1 """ if len(sentence) - position - 1 < 0: return -1 #It is a number the word is not a plural if other_functions.number(sentence[position]) == 1: return -1 #If it is proposal we continue if sentence[position] in ResourcePool().proposals: return find_the_plural(sentence, position + 1) if sentence[position] in ResourcePool().nouns_end_s: return -1 #If it is adjective we continue if is_an_adj(sentence[position]): if find_the_plural(sentence, position + 1) != -1: return position #If it is an adjective ends with 's' if sentence[0].endswith("'s") or sentence[position].endswith("ous"): return -1 #we have plural if the noun ends with 's' if find_sn_pos(sentence, position) == [] and sentence[position].endswith('s'): return position return -1
def delete_and_from_number(sentence): """ delete 'and' between two numbers Input=sentence Output=sentence """ # init i = 0 while i < len(sentence): if sentence[i] == 'and' and other_functions.number( sentence[i - 1]) == 1 and other_functions.number( sentence[i + 1]) == 1: sentence = sentence[:i] + sentence[i + 1:] i += 1 return sentence
def is_an_adj(word): """Determines if a word is an adjective :param string word: a word :return: True if the word is recognized as an adjective, False else. """ #It is a noun verb pronoun or determinant so we have to return False if word in ResourcePool().special_nouns + ResourcePool( ).special_verbs + ResourcePool().pronouns + ResourcePool().determinants: return False #For the regular adjectives for k in ResourcePool().adjective_rules: if word.endswith(k): return True #For adjectives created from numbers if word.endswith('th') and other_functions.number(word) == 2: return True #We use the irregular adjectives list to find it if word in list(ResourcePool().adjectives.keys()) + ResourcePool( ).adjective_numbers + ResourcePool().adj_quantifiers: return True return False
def is_an_adj(word): """Determines if a word is an adjective :param string word: a word :return: True if the word is recognized as an adjective, False else. """ #It is a noun verb pronoun or determinant so we have to return False if word in ResourcePool().special_nouns + ResourcePool().special_verbs + ResourcePool().pronouns + ResourcePool().determinants: return False #For the regular adjectives for k in ResourcePool().adjective_rules: if word.endswith(k): return True #For adjectives created from numbers if word.endswith('th') and other_functions.number(word) == 2: return True #We use the irregular adjectives list to find it if word in list( ResourcePool().adjectives.keys()) + ResourcePool().adjective_numbers + ResourcePool().adj_quantifiers: return True return False
def reorganize_adj(sentence): """ deletes ',' and 'and' if it is between adjectives Input=sentence Output=sentence """ # init i = 0 while i < len(sentence) - 1: if sentence[i] == ',' or sentence[i] == 'and': if analyse_nominal_group.is_an_adj( sentence[i + 1]) and analyse_nominal_group.is_an_adj( sentence[i - 1]): if other_functions.number( sentence[i + 1]) == 0 and other_functions.number( sentence[i - 1]) == 0: sentence = sentence[:i] + sentence[i + 1:] i += 1 return sentence
def convert_adj_to_digit(adj_list): """ returns the list of adjectives after change number to digit :param the adjective :return: the adjective """ for i in adj_list: if i.endswith('th') and other_functions.number(i) == 2: adj_list[adj_list.index(i)] = other_functions.convert_to_digit(i) + 'th' return adj_list
def convert_adj_to_digit(adj_list): """ returns the list of adjectives after change number to digit :param the adjective :return: the adjective """ for i in adj_list: if i.endswith('th') and other_functions.number(i) == 2: adj_list[adj_list.index( i)] = other_functions.convert_to_digit(i) + 'th' return adj_list
def find_sn(sentence): """ Returns the first nominal group found in the sentence. :param list sentence: the sentence as a list of words :return: the nominal group """ nb_position = 1 #If sentence is empty if not sentence: return [] for x in sentence: #If there is a pronoun if x in ResourcePool().pronouns: return [sentence[sentence.index(x)]] #If there is a nominal group with determinant if x in ResourcePool().determinants: nb_position += adjective_pos(sentence, sentence.index(x) + 1) return sentence[sentence.index(x):sentence.index(x) + nb_position] #If we have 'something' for k in ResourcePool().composed_nouns: if x.startswith(k): if x in ResourcePool().noun_not_composed: return [] return [sentence[sentence.index(x)]] #If there is a number, it will be the same with determinant if other_functions.number(x) == 1: nb_position += adjective_pos(sentence, sentence.index(x) + 1) return sentence[sentence.index(x):sentence.index(x) + nb_position] #If there is a proper name counter = sentence.index(x) while counter < len(sentence) and other_functions.find_cap_lettre( sentence[counter]) == 1: counter += 1 #Not equal => there is a proper name if counter != sentence.index(x): return sentence[sentence.index(x):counter] #Default case return []
def find_sn(sentence): """ Returns the first nominal group found in the sentence. :param list sentence: the sentence as a list of words :return: the nominal group """ nb_position = 1 #If sentence is empty if not sentence: return [] for x in sentence: #If there is a pronoun if x in ResourcePool().pronouns: return [sentence[sentence.index(x)]] #If there is a nominal group with determinant if x in ResourcePool().determinants: nb_position += adjective_pos(sentence, sentence.index(x) + 1) return sentence[sentence.index(x): sentence.index(x) + nb_position] #If we have 'something' for k in ResourcePool().composed_nouns: if x.startswith(k): if x in ResourcePool().noun_not_composed: return [] return [sentence[sentence.index(x)]] #If there is a number, it will be the same with determinant if other_functions.number(x) == 1: nb_position += adjective_pos(sentence, sentence.index(x) + 1) return sentence[sentence.index(x): sentence.index(x) + nb_position] #If there is a proper name counter = sentence.index(x) while counter < len(sentence) and other_functions.find_cap_lettre(sentence[counter]) == 1: counter += 1 #Not equal => there is a proper name if counter != sentence.index(x): return sentence[sentence.index(x): counter] #Default case return []
def am_pm(sentence): """ This function separate 'am' or 'pm' to the digit Input=sentence Output=sentence """ #init i = 0 while i < len(sentence): if sentence[i].endswith('am') or sentence[i].endswith('pm'): if other_functions.number(sentence[i]) == 1: sentence = sentence[:i] + [sentence[i][:len(sentence[i]) - 2]] + [ sentence[i][len(sentence[i]) - 2:]] + sentence[i + 1:] i += 1 i += 1 return sentence
def find_sn_pos(sentence, begin_pos): """ We will find the nominal group which is in a known position We have to use adjective_pos to return the end position of nominal group :param list sentence: the sentence (list of strings) :param begin_pos:the position of the nominal group :return: the nominal group (as a list of words) """ if begin_pos >= len(sentence): return [] end_pos = 1 #If it is a pronoun if sentence[begin_pos] in ResourcePool().pronouns: return [sentence[begin_pos]] #If there is a nominal group with determinant if sentence[begin_pos] in ResourcePool().determinants: end_pos += adjective_pos(sentence, begin_pos + 1) return sentence[begin_pos:end_pos + begin_pos] #If we have 'something' for k in ResourcePool().composed_nouns: if sentence[begin_pos].startswith(k): if sentence[begin_pos] in ResourcePool().noun_not_composed: return [] return [sentence[begin_pos]] #If there is a number, it will be the same with determinant if other_functions.number(sentence[begin_pos]) == 1: end_pos += adjective_pos(sentence, begin_pos + 1) return sentence[begin_pos:end_pos + begin_pos] #If it is a proper name counter = begin_pos while counter < len(sentence) and other_functions.find_cap_lettre( sentence[counter]) == 1: counter += 1 #Default case return [] => ok if counter=begin_pos return sentence[begin_pos:counter]
def find_sn_pos(sentence, begin_pos): """ We will find the nominal group which is in a known position We have to use adjective_pos to return the end position of nominal group :param list sentence: the sentence (list of strings) :param begin_pos:the position of the nominal group :return: the nominal group (as a list of words) """ if begin_pos >= len(sentence): return [] end_pos = 1 #If it is a pronoun if sentence[begin_pos] in ResourcePool().pronouns: return [sentence[begin_pos]] #If there is a nominal group with determinant if sentence[begin_pos] in ResourcePool().determinants: end_pos += adjective_pos(sentence, begin_pos + 1) return sentence[begin_pos: end_pos + begin_pos] #If we have 'something' for k in ResourcePool().composed_nouns: if sentence[begin_pos].startswith(k): if sentence[begin_pos] in ResourcePool().noun_not_composed: return [] return [sentence[begin_pos]] #If there is a number, it will be the same with determinant if other_functions.number(sentence[begin_pos]) == 1: end_pos += adjective_pos(sentence, begin_pos + 1) return sentence[begin_pos: end_pos + begin_pos] #If it is a proper name counter = begin_pos while counter < len(sentence) and other_functions.find_cap_lettre(sentence[counter]) == 1: counter += 1 #Default case return [] => ok if counter=begin_pos return sentence[begin_pos: counter]
def am_pm(sentence): """ This function separate 'am' or 'pm' to the digit Input=sentence Output=sentence """ # init i = 0 while i < len(sentence): if sentence[i].endswith('am') or sentence[i].endswith('pm'): if other_functions.number(sentence[i]) == 1: sentence = sentence[:i] + [ sentence[i][:len(sentence[i]) - 2] ] + [sentence[i][len(sentence[i]) - 2:]] + sentence[i + 1:] i += 1 i += 1 return sentence
def return_det(nominal_group): """ This function returns the determinant of the nominal group :param nominal_group: the nominal group :return: the determinant """ #nominal_group is empty if not nominal_group: return [] #We return the first element of the list if nominal_group[0] in ResourcePool().determinants: return [nominal_group[0]] #If there is a number if other_functions.number(nominal_group[0]) == 1: return [nominal_group[0]] #Default case return []
def adjective_pos(sentence, word_pos): """ Returns the position of the end of the nominal group We have to use the list of irregular adjectives :param list sentence: the sentence (list of strings) :param word_pos: the position of the first adjective :return: the position of the last word of the nominal group """ #If it is the end of the sentence if len(sentence) - 1 <= word_pos: return 1 #The case of '2 of them' if sentence[word_pos] == 'of': return 0 #It is a noun so we have to return 1 if sentence[word_pos] in ResourcePool().special_nouns: return 1 #For the regular adjectives for k in ResourcePool().adjective_rules: if sentence[word_pos].endswith(k): return 1 + adjective_pos(sentence, word_pos + 1) #For adjectives created from numbers if sentence[word_pos].endswith('th') and other_functions.number( sentence[word_pos]) == 2: return 1 + adjective_pos(sentence, word_pos + 1) #We use the irregular adjectives list to find it if sentence[word_pos] in list(ResourcePool().adjectives.keys( )) + ResourcePool().adjective_numbers + ResourcePool().adj_quantifiers: return 1 + adjective_pos(sentence, word_pos + 1) #Default case return 1
def adjective_pos(sentence, word_pos): """ Returns the position of the end of the nominal group We have to use the list of irregular adjectives :param list sentence: the sentence (list of strings) :param word_pos: the position of the first adjective :return: the position of the last word of the nominal group """ #If it is the end of the sentence if len(sentence) - 1 <= word_pos: return 1 #The case of '2 of them' if sentence[word_pos] == 'of': return 0 #It is a noun so we have to return 1 if sentence[word_pos] in ResourcePool().special_nouns: return 1 #For the regular adjectives for k in ResourcePool().adjective_rules: if sentence[word_pos].endswith(k): return 1 + adjective_pos(sentence, word_pos + 1) #For adjectives created from numbers if sentence[word_pos].endswith('th') and other_functions.number(sentence[word_pos]) == 2: return 1 + adjective_pos(sentence, word_pos + 1) #We use the irregular adjectives list to find it if sentence[word_pos] in list( ResourcePool().adjectives.keys()) + ResourcePool().adjective_numbers + ResourcePool().adj_quantifiers: return 1 + adjective_pos(sentence, word_pos + 1) #Default case return 1
def recover_quantifier(nom_gr): """ recovers the quantifier and put the noun in singular form if it's in plural Input=nominal group class Output=nominal group class """ #init flg = 0 #The default case is 'ONE' if not nom_gr.det: #If the noun is 'anything' => SOME if nom_gr.noun and nom_gr.noun[0] in ["anything"]: nom_gr._quantifier = 'SOME' #If the noun is 'everything' => ALL if nom_gr.noun and nom_gr.noun[0] in ["everything"]: nom_gr._quantifier = 'ALL' #If the noun starts with 'any' => we have 'all' if nom_gr.noun and nom_gr.noun[0].startswith('any'): nom_gr._quantifier = 'SOME' #If the noun starts with 'no' => we have 'none' if nom_gr.noun and nom_gr.noun[0].startswith('no'): nom_gr._quantifier = 'NONE' else: #If it is a number if other_functions.number(nom_gr.det[0]) == 1: nom_gr._quantifier = 'DIGIT' nom_gr.det = [other_functions.convert_to_digit(nom_gr.det[0])] #Here we will use the quantifier list for i in ResourcePool().det_quantifiers: if i[0] == nom_gr.det[0]: nom_gr._quantifier = i[1] #If we have a plural if nom_gr.noun != [] and nom_gr.noun[0].endswith('s'): for x in ResourcePool().nouns_end_s: if x == nom_gr.noun[0]: #It is a noun singular with 's' at the end flg = 1 break if flg == 0: #We delete determinant added in processing with his quantifier if nom_gr.det[0] == 'a': nom_gr.det = [] nom_gr._quantifier = 'ONE' elif nom_gr.det[0] == 'no': nom_gr._quantifier = 'ANY' #We have to put the noun in singular form for y in ResourcePool().plural_nouns: #If it is an irregular noun if y[0] == nom_gr.noun[0]: nom_gr.noun[0] = y[1] if nom_gr._quantifier == 'ONE': nom_gr._quantifier = 'ALL' return nom_gr #Else nom_gr.noun[0] = nom_gr.noun[0][:len(nom_gr.noun[0]) - 1] if nom_gr._quantifier == 'ONE': nom_gr._quantifier = 'ALL' return nom_gr return nom_gr
def upper_to_lower(sentence): """ converts the upper case to lower case Input=sentence, beginning sentence list Output=sentence """ # If the sentence begins with upper case if other_functions.find_cap_lettre(sentence[0]): # We convert upper case to lower case if it is not 'I' if sentence[0] == 'I': sentence = expand_contractions(sentence) return sentence else: sentence[0] = sentence[0][0].lower() + sentence[0][1:] #We make changes here because we need lower case and not upper case sentence = expand_contractions(sentence) sentence = adverbial_or_subsentence(sentence) stc = process_and_beginning_sentence(sentence) #If sentence is modified we can return it if stc != sentence: return stc #We find an action verb => it is an imperative sentence if sentence[0] in ThematicRolesDict().get_all_verbs(): return sentence #If we find the word in the Beginning_sentence list for v in ResourcePool().sentence_starts: if sentence[0] == v[0]: return sentence #We find a number if other_functions.number(sentence[0]) == 1: return sentence #If there is plural sentence = analyse_nominal_group.find_plural(sentence) #If it still start with adjective if analyse_nominal_group.is_an_adj(sentence[0]) == 1: sentence = ['the'] + sentence #If there is a nominal group if analyse_nominal_group.find_sn_pos(sentence, 0): return sentence #Default case: we assume a proper name, we convert lowercase to uppercase sentence[0] = sentence[0][0].upper() + sentence[0][1:] # If the sentence begins with lower case else: #We make changes here because we need lower case and not upper case sentence = expand_contractions(sentence) sentence = adverbial_or_subsentence(sentence) sentence = process_and_beginning_sentence(sentence) #If we find the word in the Beginning_sentence list so we can return it for v in ResourcePool().sentence_starts: if sentence[0] == v[0]: return sentence #If there is plural sentence = analyse_nominal_group.find_plural(sentence) #If it still start with adjective if analyse_nominal_group.is_an_adj(sentence[0]) == 1: sentence = ['the'] + sentence return sentence
def upper_to_lower(sentence): """ converts the upper case to lower case Input=sentence, beginning sentence list Output=sentence """ #If the sentence begins with upper case if other_functions.find_cap_lettre(sentence[0]): #We convert upper case to lower case if it is not 'I' if sentence[0] == 'I': sentence = expand_contractions(sentence) return sentence else: sentence[0] = sentence[0][0].lower() + sentence[0][1:] #We make changes here because we need lower case and not upper case sentence = expand_contractions(sentence) sentence = adverbial_or_subsentence(sentence) stc = process_and_beginning_sentence(sentence) #If sentence is modified we can return it if stc != sentence: return stc #We find an action verb => it is an imperative sentence if sentence[0] in ThematicRolesDict().get_all_verbs(): return sentence #If we find the word in the Beginning_sentence list for v in ResourcePool().sentence_starts: if sentence[0] == v[0]: return sentence #We find a number if other_functions.number(sentence[0]) == 1: return sentence #If there is plural sentence = analyse_nominal_group.find_plural(sentence) #If it still start with adjective if analyse_nominal_group.is_an_adj(sentence[0]) == 1: sentence = ['the'] + sentence #If there is a nominal group if analyse_nominal_group.find_sn_pos(sentence, 0): return sentence #Default case: we assume a proper name, we convert lowercase to uppercase sentence[0] = sentence[0][0].upper() + sentence[0][1:] #If the sentence begins with lower case else: #We make changes here because we need lower case and not upper case sentence = expand_contractions(sentence) sentence = adverbial_or_subsentence(sentence) sentence = process_and_beginning_sentence(sentence) #If we find the word in the Beginning_sentence list so we can return it for v in ResourcePool().sentence_starts: if sentence[0] == v[0]: return sentence #If there is plural sentence = analyse_nominal_group.find_plural(sentence) #If it still start with adjective if analyse_nominal_group.is_an_adj(sentence[0]) == 1: sentence = ['the'] + sentence return sentence