def _get_prep_phrases(self, head): prep_phrases = [] prep_list = self._get_dependents(self._dependencies['prep'], head) if prep_list: for prep in prep_list: if prep.word.lower() not in self._prep_blacklist_for_prep_phrases: # Ignore those prepositions that are far away from the head if abs(prep.index - head.index) < 3: # Look for pobj obj_list = self._get_dependents(self._dependencies['pobj'], prep) if obj_list: for obj in obj_list: if not self._head_extracting_condition(obj, pos=True): continue obj_conjunction = self._get_conjunction(obj) for o in obj_conjunction: obj_seq = self._expand_head_word(o) if obj_seq: obj_seq.add_word_unit(prep) prep_phrase = WordUnitSequence() prep_phrase.extend(obj_seq) prep_phrase.head = o prep_phrase.nn_head = obj_seq.nn_head if prep_phrase: self._print_expansion_debug_info(head, 'prep phrase', prep_phrase) prep_phrases.append(prep_phrase) return prep_phrases
def _get_neg_modifier(self, head): neg_mod = WordUnitSequence() neg_list = self._get_dependents(self._dependencies['neg'], head) if neg_list and neg_list[0].pos == self._pos_tags['dt']: neg_mod.add_word_unit(neg_list[0]) self._print_expansion_debug_info(head, 'negation', neg_list[0]) return neg_mod
def _get_num_modifier(self, head): num_mod = WordUnitSequence() num_list = self._get_dependents(self._dependencies['num'], head) if num_list: for num in num_list: num_mod.add_word_unit(num) self._print_expansion_debug_info(head, 'numeric modifier', num) return num_mod
def _get_noun_compound(self, head): nc = WordUnitSequence() nn_list = self._get_dependents(self._dependencies['nn'], head) if nn_list: for nn in nn_list: nc.add_word_unit(nn) self._print_expansion_debug_info(head, 'noun compound', nn) return nc
def _get_vmod_phrase(self, head): vmod_phrase = WordUnitSequence() vmod_list = self._get_dependents(self._dependencies['vmod'], head) if vmod_list: for vmod in vmod_list: predicate_object = self._get_predicate_object(vmod) if predicate_object: predicate, object = predicate_object[0] vmod_phrase.extend(predicate) vmod_phrase.extend(object) self._print_expansion_debug_info(head, 'vmod', vmod_phrase) return vmod_phrase
def _get_predicate_object(self, pred_head): predicate_object = [] predicates = self._expand_predicate(pred_head) for predicate in predicates: dobj_flag, acomp_flag, pobj_flag = False, False, False for ind, pred in predicate: # Look for direct object obj_list = self._get_dependents(self._dependencies['dobj'], pred) if obj_list: for obj in obj_list: if not self._head_extracting_condition(obj, pos=True): continue obj_conjunction = self._get_conjunction(obj) for o in obj_conjunction: expanded_obj = self._expand_head_word(o) if expanded_obj: object = WordUnitSequence() object.extend(expanded_obj) object.head = o object.nn_head = expanded_obj.nn_head dobj_flag = True predicate_object.append((predicate, object)) continue # Look for adjective compliment acomp_list = self._get_dependents(self._dependencies['acomp'], pred) if acomp_list: for acomp in acomp_list: acomp_prep_phrases = self._get_prep_phrases(acomp) for acomp_prep_phrase in acomp_prep_phrases: if len(acomp_prep_phrase) > 1: object = WordUnitSequence() object.extend(WordUnitSequence(acomp_prep_phrase[1:])) object.head = acomp_prep_phrase.head object.nn_head = acomp_prep_phrase.nn_head # Make a copy of predicate in case it gets expanded predicate_copy = deepcopy(predicate) # Merge the acomp and prep into the predicate predicate_copy.add_word_unit(acomp) predicate_copy.add_word_unit(acomp_prep_phrase[0]) acomp_flag = True predicate_object.append((predicate_copy, object)) continue # Look for prepositional objects prep_phrases = self._get_prep_phrases(pred) for prep_phrase in prep_phrases: if len(prep_phrase) > 1: object = WordUnitSequence() object.extend(WordUnitSequence(prep_phrase[1:])) object.head = prep_phrase.head object.nn_head = prep_phrase.nn_head # Make a copy of predicate in case it gets expanded predicate_copy = deepcopy(predicate) # Merge the prep into the predicate predicate_copy.add_word_unit(prep_phrase[0]) pobj_flag = True predicate_object.append((predicate_copy, object)) # Also return the predicate if it has no object in case it is a conjunction of other predicates. if not dobj_flag and not acomp_flag and not pobj_flag: predicate_object.append((predicate, None)) return predicate_object
def _expand_head_word(self, head): def _clean(word_unit_seq): # If the sequence is a single letter, ignore it if len(word_unit_seq) == 1 and len(word_unit_seq[0]) == 1: word_unit_seq = None # If the head of the sequence is a number, ignore it if word_unit_seq.head.pos == self._pos_tags['cd']: word_unit_seq = None return word_unit_seq expansion = WordUnitSequence(head, head) # Find out if the head is in a compound noun noun_compound = self._get_noun_compound(head) expansion.extend(noun_compound) expansion.nn_head = deepcopy(expansion) # # Find out if there is any numeric modifier # num_mod = self._get_num_modifier(head) # expansion.extend(num_mod) # Find out if there is any negation neg_mod = self._get_neg_modifier(head) expansion.extend(neg_mod) # Find out if the head has pobj phrase pobj_phrases = self._get_prep_phrases(head) if pobj_phrases: pobj_phrase = pobj_phrases[0] expansion.extend(pobj_phrase) # Transfer the head in the pattern "<num> of <noun>" from "<num>" to "<noun>" if (head.pos == self._pos_tags['cd'] or head.word.isdigit()) and pobj_phrase[0].word == 'of': expansion.head = pobj_phrase.head expansion.nn_head = pobj_phrase.nn_head # Find out if the head has vmod phrase vmod_phrase = self._get_vmod_phrase(head) expansion.extend(vmod_phrase) # Cleaning expansion = _clean(expansion) return expansion