def get_verbs(story, span): verbs = [] for token in span: if NLPUtility.is_verb(token) and str.lower(token.text) != 'can': verbs.append(token) return MinerUtility.get_span(story, verbs)
def get_verbs(story, span): verbs = [] for token in span: if NLPUtility.is_verb(token): verbs.append(token) return MinerUtility.get_span(story, verbs)
def remove_verbs(self, matrix, stories): verbs = [] cases = matrix.index.values.tolist() for case in cases: pos = [] for story in stories: for token in story.data: if NLPUtility.case(token) == case: pos.append(token) if len(set(pos)) == 1 and NLPUtility.is_verb(pos[0]): verbs.append(case) for verb in verbs: if matrix.loc[verb, 'sum'] > 0: verbs.remove(verb) return matrix[(-matrix.index.isin(verbs))]
def get_mobj_and_mv(self, story, part='means'): has_subj = False simple = False found_verb = False found_obj = False found_mv_phrase = False subject = [] main_verb = [] main_object = [] mv_phrase = [] # Simple case if the subj and dobj are linked by a verb for token in eval('story.' + str(part) + '.text'): if NLPUtility.is_subject(token): has_subj = True subject = token #BC if NLPUtility.is_verb(token.head): if NLPUtility.is_verb( token.head) and str.lower(token.head.text) != 'can': found_verb = True main_verb = token.head break if type(subject) is list: subject = eval('story.' + str(part) + '.text')[0] for token in eval('story.' + str(part) + '.text'): if NLPUtility.is_dobj(token): found_obj = True if token.pos_ == "PRON": # If it is a pronoun, look for a preposition with a pobj f = False for child in token.head.children: if child.dep_ == "prep" and child.right_edge.dep_ == "pobj" and not f: token = child.right_edge mv_phrase = [main_verb, child] f = True found_mv_phrase = True elif token.pos_ == "ADJ" or token.pos_ == "ADV": # Set to right edge if there is an adj/adv as dobj, and possibly make a verb phrase original_token = token f = False for child in token.children: if child.dep_ == "prep" and not f: for grandchild in child.children: if grandchild.dep_ == "pobj": mv_phrase = [main_verb, token, child] token = grandchild f = True found_mv_phrase = True if token.head == main_verb: simple = True main_object = token break # If the root of the sentence is a verb if not simple: for token in eval('story.' + str(part) + '.text'): if token.dep_ == 'ROOT' and NLPUtility.is_verb(token): found_verb = True main_verb = token break # If no main verb could be found it is the second word (directly after 'I') # Possibly a NLP error... if not found_verb: #BC main_verb = eval('story.' + str(part) + '.text')[1] if str(part) == 'means' or str.lower( eval('story.' + str(part) + '.text')[1].text) == 'can': main_verb = eval('story.' + str(part) + '.text')[2] else: main_verb = eval('story.' + str(part) + '.text')[1] # If the sentence contains no dobj it must be another obj if not found_obj: for token in eval('story.' + str(part) + '.text'): if token.dep_[1:] == 'obj': found_obj = True main_object = token break # If none is found it points to the unknown 'system part' # + get phrases for main_object and main_verb if not found_obj and part == 'means': main_object = story.system.main if part == 'means': story.means.main_verb.main = main_verb story.means.main_object.main = main_object if found_mv_phrase: story.means.main_verb.phrase = MinerUtility.get_span( story, mv_phrase, 'means.text') story.means.main_verb.type = "II" else: story.ends.subject.main = subject story.ends.main_verb.main = main_verb story.ends.main_object.main = main_object if found_mv_phrase: story.ends.main_verb.phrase = MinerUtility.get_span( story, mv_phrase, 'ends.text') story.ends.main_verb.type = "II" if type(main_object) is list or main_object == story.system.main: story = eval('self.get_' + str(part) + '_phrases(story, ' + str(found_mv_phrase) + ', False)') else: story = eval('self.get_' + str(part) + '_phrases(story, ' + str(found_mv_phrase) + ')') return story
def get_mobj_and_mv(self, story, part='means'): has_subj = False simple = False found_verb = False found_obj = False found_mv_phrase = False subject = [] main_verb = [] main_object = [] mv_phrase = [] # Simple case if the subj and dobj are linked by a verb for token in eval('story.' + str(part) + '.text'): if NLPUtility.is_subject(token): has_subj = True subject = token if NLPUtility.is_verb(token.head): found_verb = True main_verb = token.head break if type(subject) is list: subject = eval('story.' + str(part) + '.text')[0] for token in eval('story.' + str(part) + '.text'): if NLPUtility.is_dobj(token): found_obj = True if token.pos_ == "PRON": # If it is a pronoun, look for a preposition with a pobj f = False for child in token.head.children: if child.dep_ == "prep" and child.right_edge.dep_ == "pobj" and not f: token = child.right_edge mv_phrase = [main_verb, child] f = True found_mv_phrase = True elif token.pos_ == "ADJ" or token.pos_ == "ADV": # Set to right edge if there is an adj/adv as dobj, and possibly make a verb phrase original_token = token f = False for child in token.children: if child.dep_ == "prep" and not f: for grandchild in child.children: if grandchild.dep_ == "pobj": mv_phrase = [main_verb, token, child] token = grandchild f = True found_mv_phrase = True if token.head == main_verb: simple = True main_object = token break # If the root of the sentence is a verb if not simple: for token in eval('story.' + str(part) + '.text'): if token.dep_ == 'ROOT' and NLPUtility.is_verb(token): found_verb = True main_verb = token break # If no main verb could be found it is the second word (directly after 'I') # Possibly a NLP error... if not found_verb: main_verb = eval('story.' + str(part) + '.text')[1] # If the sentence contains no dobj it must be another obj if not found_obj: for token in eval('story.' + str(part) + '.text'): if token.dep_[1:] == 'obj': found_obj = True main_object = token break # If none is found it points to the unknown 'system part' # + get phrases for main_object and main_verb if not found_obj and part == 'means': main_object = story.system.main if part == 'means': story.means.main_verb.main = main_verb story.means.main_object.main = main_object if found_mv_phrase: story.means.main_verb.phrase = MinerUtility.get_span(story, mv_phrase, 'means.text') story.means.main_verb.type = "II" else: story.ends.subject.main = subject story.ends.main_verb.main = main_verb story.ends.main_object.main = main_object if found_mv_phrase: story.ends.main_verb.phrase = MinerUtility.get_span(story, mv_phrase, 'ends.text') story.ends.main_verb.type = "II" if main_object == story.system.main: story = eval('self.get_' + str(part) + '_phrases(story, ' + str(found_mv_phrase) + ', False)') else: story = eval('self.get_' + str(part) + '_phrases(story, ' + str(found_mv_phrase) + ')') return story