def extract_PP_spec(cls, origin, full_sentence, element, node_index, dependencies): to_check = Search.find_dependencies(dependencies, (PREP, PREPC)) rc_mod = Search.find_dependencies(dependencies, RCMOD) for dep in to_check: cop = element.f_cop if isinstance(element, Action) else None if (dep['governor'] == node_index or dep['governorGloss'] == cop) and not cls.part_rc_mod(full_sentence, rc_mod, dep): dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) if phrase_tree: phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) space_index = phrase.find(" ") if space_index >= 0: specific = dep['spec'] if specific: phrase = phrase[space_index:] phrase = specific + phrase spec = Specifier(origin, dep['dependent'], phrase) spec.f_type = PP if dep_in_tree.parent().label().startswith(NP): obj = cls.create_object(origin, full_sentence, dep['dependent'], dependencies) spec.f_object = obj spec.f_headWord = specific element.f_specifiers.append(spec)
def create_action(cls, origin, full_sentence, node_index, dependencies, active): node = Search.find_dep_in_tree(full_sentence, node_index) action = Action(origin, node_index, node[0]) aux = cls.get_auxiliars(node_index, dependencies) if len(aux) > 0: action.f_aux = aux mod_index = cls.get_modifiers(node_index, dependencies) if mod_index: mod = Search.find_dep_in_tree(full_sentence, mod_index) action.f_mod = mod[0] action.f_modPos = mod_index action.f_negated = cls.is_negated(node, dependencies) cop_index = cls.get_cop(node_index, dependencies) if cop_index: cop = Search.find_dep_in_tree(full_sentence, cop_index) action.f_cop = cop[0] action.f_copIndex = cop_index prt = cls.get_prt(node_index, dependencies) if prt: action.f_prt = prt iobj_index = cls.get_iobj(node_index, dependencies) if iobj_index: iobj = Search.find_dep_in_tree(full_sentence, iobj_index) spec = Specifier(origin, iobj_index, " ".join(iobj.leaves())) spec.f_type = IOBJ action.f_specifiers.append(spec) if not active: cls.check_dobj(node_index, dependencies, action, origin, full_sentence) to_check = Search.find_dependencies(dependencies, (XCOMP, DEP)) for dep in to_check: if dep['governor'] == node_index: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) if dep['dep'] == DEP: if dep_in_tree.label()[0] != "V" or dep['dependent'] < dep['governor']: continue xcomp = cls.create_action(origin, full_sentence, dep['dependent'], dependencies, True) action.f_xcomp = xcomp break vp_head = Search.get_full_phrase_tree(node, VP) cls.extract_SBAR_spec(origin, full_sentence, action, vp_head) cls.extract_PP_spec(origin, full_sentence, action, node_index, dependencies) cls.extract_RCMOD_spec(origin, full_sentence, action, node_index, dependencies) cls.logger.debug("Identified action {}".format(action)) return action
def get_PARTMOD_specifiers(cls, origin, full_sentence, node_index, dependencies, element): to_check = Search.find_dependencies(dependencies, PARTMOD) for dep in to_check: if dep['governor'] == node_index: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, VP) phrase = phrase_tree.leaves() if phrase_tree else [] spec = Specifier(origin, dep['dependent'], " ".join(phrase)) spec.f_type = PARTMOD element.f_specifiers.append(spec)
def part_rc_mod(cls, full_sentence, rc_mod, dep): for rcm in rc_mod: if rcm['governor'] == dep['dependent']: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) if phrase in f_conditionIndicators: return True return False
def extract_RCMOD_spec(cls, origin, full_sentence, element, node_index, dependencies): to_check = Search.find_dependencies(dependencies, RCMOD) for dep in to_check: cop = element.f_cop if isinstance(element, Action) else None if dep['dependent'] == node_index or dep['dependentGloss'] == cop: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['governor']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) if phrase_tree: phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) spec = Specifier(origin, dep['dependent'], phrase) spec.f_type = RCMOD element.f_specifiers.append(spec)
def determine_noun_specifiers(cls, origin, full_sentence, node, node_index, dependencies, element): cls.find_determiner(node_index, dependencies, element) cls.find_AMOD_specifiers(origin, node_index, dependencies, element) cls.find_NN_specifiers(origin, node_index, dependencies, element) cls.find_INFMOD_specifiers(origin, node_index, dependencies, element) cls.get_PARTMOD_specifiers(origin, full_sentence, node_index, dependencies, element) cls.get_specifier_from_dependencies(origin, node_index, dependencies, element, NUM) phrase_tree = Search.get_full_phrase_tree(node, NP) cls.extract_SBAR_spec(origin, full_sentence, element, phrase_tree) cls.extract_PP_spec(origin, full_sentence, element, node_index, dependencies) if node.label() in f_relativeResolutionTags or node[0] in f_relativeResolutionWords: if len(node.parent()) == 1: for spec in element.get_specifiers(PP): if spec.f_headWord == OF: return element.f_needsResolve = True
def check_np_sub_sentences(self, dep_index, dependencies, obj): if not self.f_ignore_np_subsentences: dep_in_tree = Search.find_dep_in_tree(self.f_full_sentence, dep_index) head = Search.get_full_phrase_tree(dep_in_tree, NP) self.check_sub_sentences(head, dependencies, obj, True)
def determine_verbs(self, sentence, dependencies, active): actions = [] main_predicate_index = None # Determine main predicate if active: nsubj = Search.find_dependencies(dependencies, NSUBJ) nsubj = self.exclude_relative_clauses(sentence, nsubj) if len(nsubj) == 0: dobj = Search.find_dependencies(dependencies, DOBJ) dobj = self.exclude_relative_clauses(sentence, dobj) if len(dobj) >= 1: main_predicate_index = dobj[0]['governor'] elif len(nsubj) == 1: main_predicate_index = nsubj[0]['governor'] cop = Search.find_dependencies(dependencies, COP) cop = self.exclude_relative_clauses(sentence, cop) for dep in cop: if dep['governor'] == main_predicate_index: main_predicate_index = dep['dependent'] break else: self.logger.info("Sentence has more than one active predicate") self.logger.debug(nsubj) else: nsubjpass = Search.find_dependencies(dependencies, NSUBJPASS) nsubjpass = self.exclude_relative_clauses(sentence, nsubjpass) if len(nsubjpass) == 1: main_predicate_index = nsubjpass[0]['governor'] elif len(nsubjpass) > 1: self.logger.info("Sentence has more than one passive predicate") self.logger.debug(nsubjpass) # Find all actions if main_predicate_index: main_predicate = Search.find_dep_in_tree(self.f_full_sentence, main_predicate_index) vp_head = Search.get_full_phrase_tree(main_predicate, VP) action = Builder.create_action(self.f_stanford_sentence, self.f_full_sentence, main_predicate_index, dependencies, active) self.check_sub_sentences(vp_head, dependencies, action, False) actions.append(action) else: verbs = Search.find_in_tree(sentence, VP, (SBAR, S)) if len(verbs) == 0: self.logger.info("Sentence contains no action") elif len(verbs) > 1: self.logger.info("Sentence has more than one verb phrase") else: vp = verbs[0] action = Builder.create_action_syntax(self.f_stanford_sentence, self.f_full_sentence, vp) self.check_sub_sentences(vp, dependencies, action, False) actions.append(action) if len(actions) > 0: for new_action in self.check_conjunctions(dependencies, actions[0], False, False, active): actions.append(new_action) return actions