def extract_models_actions(input_text, models_fullInfo): object_coref = models_char.get_coref(input_text) get_objectCoref_map(input_text) lemmatizer = WordNetLemmatizer() tokinized_sentences = [] doc = nlp(input_text) for sent in doc.sents: tokinized_sentences.append(str(sent.text)) models_actions = [] sentence_index = -1 for sent in doc.sents: sentence_index += 1 tokens_in_currentSentence = [] for word in sent: tokens_in_currentSentence.append(str(word)) verb = subj = obj = "" if word.pos_ == "VERB" and ( (lemmatizer.lemmatize(str(word), 'v') in verb_noObject) or (lemmatizer.lemmatize(str(word), 'v') in verb_prep) or (lemmatizer.lemmatize(str(word), 'v') in verb_oneObject) or (lemmatizer.lemmatize(str(word), 'v') in verb_twoObject)): verb = lemmatizer.lemmatize(str(word), 'v') obj_word = word subj_word = word subj_id = -1 obj1_id = -1 obj2_id = -1 ###################### find objects ################## if (verb in verb_oneObject) or (verb in verb_twoObject): for child in word.children: if child.dep_ == "dobj": if child.pos_ == "PRON": obj_pronoun = str(child.text).lower() coref_obj = get_refrencedObject(obj_pronoun) objInfo = models_char.extract_models_char( coref_obj) coref_obj_word = "" coref_obj_chars = [] for i in range(0, len(objInfo)): if objInfo[i][1] == 'boy' or objInfo[i][ 1] == 'man': if obj_pronoun in models_char.male_pronoun: coref_obj_word = objInfo[i][1] coref_obj_chars = objInfo[i][2] break elif objInfo[i][1] == 'girl' or objInfo[i][ 1] == 'woman': if obj_pronoun in models_char.female_pronoun: coref_obj_word = objInfo[i][1] coref_obj_chars = objInfo[i][2] break elif obj_pronoun in models_char.rigid_pronoun: coref_obj_word = objInfo[i][1] coref_obj_chars = objInfo[i][2] break obj1_id = get_model_id(models_fullInfo, coref_obj_word, coref_obj_chars) else: obj_word = child ################################## detect object type #################################### model_type, object_coref = detect_object_type( str(obj_word), object_coref) if model_type == 'none': continue ################################## detect obj_chars ########################################### object_chars = [] object_chars = detect_object_char( model_type, obj_word) obj1_id = get_model_id(models_fullInfo, model_type, object_chars) if (verb in verb_prep) or ( verb in verb_twoObject): #----------------------> add coref for child in word.children: if child.dep_ == "prep": current_word = child for child_current_word in current_word.children: if child_current_word.dep_ == "pobj": if child_current_word.pos_ == "PRON": obj_pronoun = str( child_current_word.text).lower() coref_obj = get_refrencedObject( obj_pronoun) objInfo = models_char.extract_models_char( coref_obj) coref_obj_word = "" coref_obj_chars = [] for i in range(0, len(objInfo)): if objInfo[i][ 1] == 'boy' or objInfo[i][ 1] == 'man': if obj_pronoun in models_char.male_pronoun: coref_obj_word = objInfo[ i][1] coref_obj_chars = objInfo[ i][2] break elif objInfo[i][ 1] == 'girl' or objInfo[i][ 1] == 'woman': if obj_pronoun in models_char.female_pronoun: coref_obj_word = objInfo[ i][1] coref_obj_chars = objInfo[ i][2] break elif obj_pronoun in models_char.rigid_pronoun: coref_obj_word = objInfo[i][1] coref_obj_chars = objInfo[i][2] break obj_id = get_model_id( models_fullInfo, coref_obj_word, coref_obj_chars) if verb in verb_twoObject: obj2_id = obj_id else: obj1_id = obj_id else: obj_word = child_current_word ################################## detect object type #################################### model_type, object_coref = detect_object_type( str(obj_word), object_coref) if model_type == 'none': continue ################################## detect obj_chars ########################################### object_chars = [] object_chars = detect_object_char( model_type, obj_word) obj_id = get_model_id( models_fullInfo, model_type, object_chars) if verb in verb_twoObject: obj2_id = obj_id else: obj1_id = obj_id ############################################## find subjects ############################################### found_subj = False for child in word.children: ######### first subject ############### if child.dep_ == "nsubj": found_subj = True found_conjSubject = True current_subject = child while found_conjSubject: ###### check if it is a PRON , then get its coreference , otherwise put it as it is ###### ### get the num of pronouns preceding it from the start if current_subject.pos_ == "PRON": subj_pronoun = str( current_subject.text).lower() coref_subj = get_refrencedObject(subj_pronoun) subjInfo = models_char.extract_models_char( coref_subj) coref_subj_word = "" coref_subj_chars = [] for i in range(0, len(subjInfo)): if subjInfo[i][1] == 'boy' or subjInfo[i][ 1] == 'man': if subj_pronoun in models_char.male_pronoun: coref_subj_word = subjInfo[i][1] coref_subj_chars = subjInfo[i][2] break elif subjInfo[i][1] == 'girl' or subjInfo[ i][1] == 'woman': if subj_pronoun in models_char.female_pronoun: coref_subj_word = subjInfo[i][1] coref_subj_chars = subjInfo[i][2] break elif subj_pronoun in models_char.rigid_pronoun: coref_subj_word = subjInfo[i][1] coref_subj_chars = subjInfo[i][2] break coref_subj_id = get_model_id( models_fullInfo, coref_subj_word, coref_subj_chars) ################################# Add a new action to action list ####################### # action --> cat_num,verb_name,subj_id,obj1_id,obj2_id,action_pos if verb in verb_noObject: action = (1, verb, coref_subj_id, obj1_id, obj2_id, word.i) elif verb in verb_prep: action = (2, verb, coref_subj_id, obj1_id, obj2_id, word.i) elif verb in verb_oneObject: action = (3, verb, coref_subj_id, obj1_id, obj2_id, word.i) else: action = (4, verb, coref_subj_id, obj1_id, obj2_id, word.i) models_actions.append(action) ###### check if there is a conj subject ######## found_conjSubject = False for child_subj in current_subject.children: if child_subj.dep_ == "conj": current_subject = child_subj found_conjSubject = True break else: subj_word = current_subject ################################## detect subj type #################################### model_type, object_coref = detect_object_type( str(subj_word), object_coref) if model_type == 'none': continue ################################# find subj chars ####################################### object_chars = [] object_chars = detect_object_char( model_type, subj_word) subj_id = get_model_id(models_fullInfo, model_type, object_chars) ################################# Add a new action to action list ####################### # action --> cat_num,verb_name,subj_id,obj1_id,obj2_id,action_pos if verb in verb_noObject: action = (1, verb, subj_id, obj1_id, obj2_id, word.i) elif verb in verb_prep: action = (2, verb, subj_id, obj1_id, obj2_id, word.i) elif verb in verb_oneObject: action = (3, verb, subj_id, obj1_id, obj2_id, word.i) else: action = (4, verb, subj_id, obj1_id, obj2_id, word.i) models_actions.append(action) ###### check if there is a conj subject ######## found_conjSubject = False for child_subj in current_subject.children: if child_subj.dep_ == "conj": current_subject = child_subj found_conjSubject = True break ######################################## if there are conj verbs of the same subject ###################### if found_subj == False: current_verb = word found_conj_verb = False while ((found_subj) == False) and (current_verb.head.pos_ == "VERB") and (current_verb.head.i < current_verb.i): found_conj_verb = True current_verb = current_verb.head for child_verb in current_verb.children: if child_verb.dep_ == "nsubj": found_subj = True break if found_conj_verb == True: ### conj_verb --> copy only the subject for verb_info in models_actions: if verb_info[1] == lemmatizer.lemmatize( str(current_verb), 'v'): subj_id = verb_info[2] else: # while playing --> copy all the information (subject and object(s) ) for verb_info in models_actions: if verb_info[1] == lemmatizer.lemmatize( str(current_verb), 'v'): subj_id = verb_info[2] obj1_id = verb_info[3] obj2_id = verb_info[4] ################################# Add a new action to action list ####################### # action --> cat_num,verb_name,subj_id,obj1_id,obj2_id,action_pos if verb in verb_noObject: action = (1, verb, subj_id, obj1_id, obj2_id, word.i) elif verb in verb_prep: action = (2, verb, subj_id, obj1_id, obj2_id, word.i) elif verb in verb_oneObject: action = (3, verb, subj_id, obj1_id, obj2_id, word.i) else: action = (4, verb, subj_id, obj1_id, obj2_id, word.i) models_actions.append(action) return models_actions
####################### files for the following modules ########################### file_models_char = open("models_char.txt", "w") file_models_actions = open("model_actions.txt", "w") file_models_relations = open("models_relations.txt", "w") ################################################################################## file_input_text = open("input_text.txt", "r") ###################### input text ############################################### ---------- > take from GUI input_text = file_input_text.read() #print(input_text) #input_text = "There is an old , tall and smart gentleman in a room. He has a small white cat and he carries it. There is a small black table behind a huge red chair. There is a black laptop to the right of a large brown bed." ################################################################################ models_info = extract_models_char(input_text) relations_models = Objs_relations(input_text, models_info) model_actions = extract_models_actions(input_text, models_info) sequence(input_text) ########################### write model chars ###################### for i in range(0, len(models_info)): current_model_name = models_info[i][1] current_model_chars = models_info[i][2] file_models_char.write(current_model_name + " ") if len(current_model_chars) == 3: #human for j in range(0, len(current_model_chars) - 1): if j == 0 and current_model_chars[ j] == -1: #not mentioned age then set it not old current_model_chars[j] = 0
def Objs_relations(input_text,models_fullInfo): object_coref = models_char.get_coref(input_text) get_objectCoref_map(input_text) doc = nlp(input_text) relations = [] for sentence in doc.sents: for word in sentence: current_prep = pnoun = pobj = " " pnoun_id = -1 pobj_id = -1 if word.pos_ == "ADP" and (str(word) in rel_avail )and ( (str(word.head.pos_) != "VERB") or (lemmatizer.lemmatize(str(word.head),'v') == "be" )): #make sure that it isn't a prep with a verb current_prep = str(word) pnoun_word = word pobj_word = word pnoun_list = [] #################################### find pnoun ######################################### if word.head.pos_ == "NOUN": # There is a chair on the left of a table pnoun_word = word.head ################################# get conj_pnoun ######################################## current_pnoun = pnoun_word found_conjPnoun = True while found_conjPnoun: found_conjPnoun = False if current_pnoun.dep_ == "conj": current_pnoun = pnoun_word.head found_conjSubject = True ########### detect type of current pnoun ################ model_type_conjPnoun, object_coref = detect_object_type(str(current_pnoun), object_coref) if model_type_conjPnoun == 'none': continue ########## detect char of current pnoun ################# object_chars = [] object_chars = detect_object_char(model_type_conjPnoun, current_pnoun) new_pnoun_id = get_model_id(models_fullInfo, model_type_conjPnoun, object_chars) pnoun_info = [model_type_conjPnoun, new_pnoun_id] pnoun_list.append(pnoun_info) break ################################## detect pnoun type #################################### model_type_mainPnoun, object_coref = detect_object_type(str(pnoun_word), object_coref) if model_type_mainPnoun == 'none': continue ################################## get pnoun char ####################################### object_chars = [] object_chars = detect_object_char(model_type_mainPnoun, pnoun_word) pnoun_id = get_model_id(models_fullInfo, model_type_mainPnoun, object_chars) pnoun_info = [model_type_mainPnoun, pnoun_id] pnoun_list.append(pnoun_info) elif word.head.pos_ == "VERB": # a chair is on the left of a table verb_word = word.head for child_verb in verb_word.children: if child_verb.dep_ == "nsubj" or child_verb.dep_ == "attr": # a box is on the table # there is a chair in the room if child_verb.pos_ == "PRON": pnoun_pronoun = str(child_verb.text).lower() coref_pnoun = get_refrencedObject(pnoun_pronoun) pnounInfo = models_char.extract_models_char(coref_pnoun) coref_obj_word = "" coref_obj_chars = [] for i in range(0, len(pnounInfo)): if pnounInfo[i][1] == 'boy' or pnounInfo[i][1] == 'man': if pnoun_pronoun in models_char.male_pronoun: coref_obj_word = pnounInfo[i][1] coref_pnoun_chars = pnounInfo[i][2] break elif pnounInfo[i][1] == 'girl' or pnounInfo[i][1] == 'woman': if pnoun_pronoun in models_char.female_pronoun: coref_obj_word = pnounInfo[i][1] coref_pnoun_chars = pnounInfo[i][2] break elif pnoun_pronoun in models_char.rigid_pronoun: coref_obj_word = pnounInfo[i][1] coref_pnoun_chars = pnounInfo[i][2] break pnoun_id = get_model_id(models_fullInfo, coref_obj_word,coref_pnoun_chars) pnoun_info = [coref_obj_word, pnoun_id] pnoun_list.append(pnoun_info) else: pnoun_word = child_verb ################################# get conj_pnoun ######################################## current_pnoun = pnoun_word found_conjPnoun = True while found_conjPnoun: found_conjPnoun = False for child_pnoun in current_pnoun.children: if child_pnoun.dep_ == "conj": current_pnoun = child_pnoun found_conjSubject = True ########### detect type of current pnoun ################ model_type_conjPnoun, object_coref = detect_object_type(str(current_pnoun), object_coref) if model_type_conjPnoun == 'none': continue ########## detect char of current pnoun ################# object_chars = [] object_chars = detect_object_char(model_type_conjPnoun, current_pnoun) new_pnoun_id = get_model_id(models_fullInfo, model_type_conjPnoun, object_chars) pnoun_info = [model_type_conjPnoun, new_pnoun_id] pnoun_list.append(pnoun_info) break ################################## detect pnoun type #################################### model_type_mainPnoun, object_coref = detect_object_type(str(pnoun_word), object_coref) if model_type_mainPnoun == 'none': continue ################################## get pnoun char ####################################### object_chars = [] object_chars = detect_object_char(model_type_mainPnoun, pnoun_word) pnoun_id = get_model_id(models_fullInfo, model_type_mainPnoun, object_chars) pnoun_info = [model_type_mainPnoun, pnoun_id] pnoun_list.append(pnoun_info) ################################# find pobj ############################################ for child in word.children: if child.dep_ == "pobj": child_refer_to_prep = False for Child in child.children: if Child.dep_ == "prep": child_refer_to_prep = True break if not child_refer_to_prep: pobj = str(child) pobj_word = child ################################## detect pnoun type #################################### model_type, object_coref = detect_object_type(pobj, object_coref) if model_type == 'none': continue ################################## get pnoun char ####################################### object_chars = [] object_chars = detect_object_char(model_type, pobj_word) pobj_id = get_model_id(models_fullInfo, model_type, object_chars) ######################################### add new relation ############################## for i in range(0,len(pnoun_list)): rel = [current_prep, pnoun_list[i][0], pnoun_list[i][1], model_type, pobj_id] relations.append(rel) elif (word.pos_ == "ADP") and (str(word) == "of") and (str(word.head) in rel_avail): current_prep = str(word.head) pnoun_word = word pobj_word = word pnoun_list = [] #################################### find pnoun ######################################### current_word = word word_in_on = current_word.head.head if word_in_on.head.pos_ == "NOUN": # There is a chair on the left of a table pnoun_word = word_in_on.head ################################# get conj_pnoun ######################################## current_pnoun = pnoun_word found_conjPnoun = True while found_conjPnoun: found_conjPnoun = False if current_pnoun.dep_ == "conj": current_pnoun = pnoun_word.head found_conjSubject = True ########### detect type of current pnoun ################ model_type_conjPnoun, object_coref = detect_object_type(str(current_pnoun),object_coref) if model_type_conjPnoun == 'none': continue ########## detect char of current pnoun ################# object_chars = [] object_chars = detect_object_char(model_type_conjPnoun, current_pnoun) new_pnoun_id = get_model_id(models_fullInfo, model_type_conjPnoun, object_chars) pnoun_info = [model_type_conjPnoun, new_pnoun_id] pnoun_list.append(pnoun_info) break ################################## detect pnoun type #################################### model_type_mainPnoun, object_coref = detect_object_type(str(pnoun_word), object_coref) if model_type_mainPnoun == 'none': continue ################################## get pnoun char ####################################### object_chars = [] object_chars = detect_object_char(model_type_mainPnoun, pnoun_word) pnoun_id = get_model_id(models_fullInfo, model_type_mainPnoun, object_chars) pnoun_info = [model_type_mainPnoun, pnoun_id] pnoun_list.append(pnoun_info) elif word_in_on.head.pos_ == "VERB": # a chair is on the left of a table verb_word = word_in_on.head for child_verb in verb_word.children: if child_verb.dep_ == "nsubj" or child_verb.dep_ == "attr": # a box is on the table # there is a chair in the room if child_verb.pos_ == "PRON": pnoun_pronoun = str(child_verb.text).lower() coref_pnoun = get_refrencedObject(pnoun_pronoun) pnounInfo = models_char.extract_models_char(coref_pnoun) coref_obj_word = "" coref_obj_chars = [] for i in range(0, len(pnounInfo)): if pnounInfo[i][1] == 'boy' or pnounInfo[i][1] == 'man': if pnoun_pronoun in models_char.male_pronoun: coref_obj_word = pnounInfo[i][1] coref_pnoun_chars = pnounInfo[i][2] break elif pnounInfo[i][1] == 'girl' or pnounInfo[i][1] == 'woman': if pnoun_pronoun in models_char.female_pronoun: coref_obj_word = pnounInfo[i][1] coref_pnoun_chars = pnounInfo[i][2] break elif pnoun_pronoun in models_char.rigid_pronoun: coref_obj_word = pnounInfo[i][1] coref_pnoun_chars = pnounInfo[i][2] break pnoun_id = get_model_id(models_fullInfo, coref_obj_word,coref_pnoun_chars) pnoun_info = [coref_obj_word, pnoun_id] pnoun_list.append(pnoun_info) else: pnoun_word = child_verb ################################# get conj_pnoun ######################################## current_pnoun = pnoun_word found_conjPnoun = True while found_conjPnoun: found_conjPnoun = False for child_pnoun in current_pnoun.children: if child_pnoun.dep_ == "conj": current_pnoun = child_pnoun found_conjSubject = True ########### detect type of current pnoun ################ model_type_conjPnoun, object_coref = detect_object_type(str(current_pnoun), object_coref) if model_type_conjPnoun == 'none': continue ########## detect char of current pnoun ################# object_chars = [] object_chars = detect_object_char(model_type_conjPnoun, current_pnoun) new_pnoun_id = get_model_id(models_fullInfo, model_type_conjPnoun, object_chars) pnoun_info = [model_type_conjPnoun, new_pnoun_id] pnoun_list.append(pnoun_info) break ################################## detect pnoun type #################################### model_type_mainPnoun, object_coref = detect_object_type(str(pnoun_word), object_coref) if model_type_mainPnoun == 'none': continue ################################## get pnoun char ####################################### object_chars = [] object_chars = detect_object_char(model_type_mainPnoun, pnoun_word) pnoun_id = get_model_id(models_fullInfo, model_type_mainPnoun, object_chars) pnoun_info = [model_type_mainPnoun, pnoun_id] pnoun_list.append(pnoun_info) ################################# find pobj ############################################ for child in word.children: if child.dep_ == "pobj": child_refer_to_prep = False for Child in child.children: if Child.dep_ == "prep": child_refer_to_prep = True break if not child_refer_to_prep: pobj = str(child) pobj_word = child ################################## detect pnoun type #################################### model_type, object_coref = detect_object_type(pobj, object_coref) if model_type == 'none': continue ################################## get pnoun char ####################################### object_chars = [] object_chars = detect_object_char(model_type, pobj_word) pobj_id = get_model_id(models_fullInfo, pobj, object_chars) ######################################### add new relation ############################## for i in range(0,len(pnoun_list)): rel = [current_prep, pnoun_list[i][0], pnoun_list[i][1], model_type, pobj_id] relations.append(rel) ########################################################################################### #displacy.serve(parsed_sentence, style='dep') return relations
def nlp_module(input_text): ####################### files for the following modules ########################### file_models_char = open("models_char.txt", "w") file_models_actions = open("model_actions.txt", "w") file_models_relations = open("models_relations.txt", "w") ################################################################################## models_info = extract_models_char(input_text) relations_models = Objs_relations(input_text, models_info) infered_models, infered_relations = extract_totalInference( models_info, relations_models) print("infered_models : ", infered_models) print("infered_relations : ", infered_relations) model_actions = extract_models_actions(input_text, models_info) ############## add infered models and relations to models info and models relations ############# for i in range(0, len(infered_models)): models_info.append(infered_models[i]) for i in range(0, len(infered_relations)): relations_models.append(infered_relations[i]) ########################### write model chars ################################################# for i in range(0, len(models_info)): current_model_name = models_info[i][1] current_model_chars = models_info[i][2] file_models_char.write(current_model_name + " ") if len(current_model_chars) == 4: # human for j in range(0, len(current_model_chars) - 1): if j == 0 and current_model_chars[ j] == -1: # not mentioned age then set it not old current_model_chars[j] = 0 file_models_char.write(str(current_model_chars[j]) + " ") elif j == 2 and current_model_chars[ j] == -1: # not mentioned height then set it meduim height current_model_chars[j] = 1 file_models_char.write(str(current_model_chars[j]) + " ") else: file_models_char.write(str(current_model_chars[j]) + " ") else: for j in range(0, len(current_model_chars) - 1): if j == 1 and current_model_chars[ j] == -1: # not mentioned size then set it meduim size current_model_chars[j] = 1 file_models_char.write(str(current_model_chars[j]) + " ") else: file_models_char.write(str(current_model_chars[j]) + " ") file_models_char.write(str(models_info[i][3]) + "\n") file_models_char.close() ########################## write model actions ####################### for i in range(0, len(model_actions)): file_models_actions.write(model_actions[i][1] + " " + str(model_actions[i][2]) + " " + str(model_actions[i][3]) + " " + str(model_actions[i][4]) + " " + str(model_actions[i][5]) + "\n") file_models_actions.close() ########################### write models relations #################### for i in range(0, len(relations_models)): file_models_relations.write(relations_models[i][0] + " " + relations_models[i][1] + " " + str(relations_models[i][2]) + " " + relations_models[i][3] + " " + str(relations_models[i][4]) + "\n") file_models_relations.close() ############################# Extract action sequence ############### sequence(input_text) ###################################################################### return