def test_get_main_verbs_of_sent(spacy_doc): expected = [['going'], ['love', 'love'], ['damaged'], ['Thank']] observed = [ [tok.text for tok in utils.get_main_verbs_of_sent(sent)] for sent in spacy_doc.sents ] for obs, exp in zip(observed, expected): assert obs == exp
def test_get_objects_of_verb(spacy_doc): expected = [[], ["Python"], ["incompatibilities"], [], ["God"]] main_verbs = [ tok for sent in spacy_doc.sents for tok in utils.get_main_verbs_of_sent(sent) ] observed = [[tok.text for tok in utils.get_objects_of_verb(main_verb)] for main_verb in main_verbs] for obs, exp in zip(observed, expected): assert obs == exp
def test_get_subjects_of_verb(spacy_doc): expected = [["tests"], ["I"], ["I"], ["programmers"], []] main_verbs = [ tok for sent in spacy_doc.sents for tok in utils.get_main_verbs_of_sent(sent) ] observed = [[tok.text for tok in utils.get_subjects_of_verb(main_verb)] for main_verb in main_verbs] for obs, exp in zip(observed, expected): assert obs == exp
def __call__(self, text): """""" doc = self.nlp(text) svo = [] for sent in doc.sents: chunks = '' # Return the main (non-auxiliary) verbs in a sentence. verbs = utils.get_main_verbs_of_sent(sent) for verb in verbs: # Filter negations negation = "".join([ t.text for t in sent if t.dep_ == 'neg' if t.head == verb ]) # Return all subjects of a verb according to the dependency parse. subj = utils.get_subjects_of_verb(verb) # Return all objects of a verb according to the dependency parse, # including open clausal # Get noun chunks of verb obj = utils.get_objects_of_verb(verb) # Return document indexes spanning all (adjacent) tokens around a verb # that are auxiliary verbs or negations. start, end = utils.get_span_for_verb_auxiliaries(verb) aux = doc[start:end + 1].as_doc().text for o in obj: for nc in sent.noun_chunks: # st.write('VERB', verb.text, # 'OBJ HEAD:', o.head.text, 'OBJ:', o.text, # 'NC HEAD:', nc.root.head.text, 'NC:', nc.text, # 'NC ANC:', [a.text for a in nc.root.head.ancestors] ) if o.text in nc.text.split(): chunks += f' {nc.text}' # st.write('CHUNK:', nc.text) # obj = " ".join([f"{nc.text}" for nc in sent.noun_chunks if obj.text in nc.text]) snippet = f'{" ".join([s.text for s in subj])} {negation} {aux} {chunks}' svo.append(snippet) return '. '.join(svo)
def test_get_main_verbs_of_sent(spacy_doc): expected = [["going"], ["love", "love"], ["damaged"], ["Thank"]] observed = [[tok.text for tok in utils.get_main_verbs_of_sent(sent)] for sent in spacy_doc.sents] for obs, exp in zip(observed, expected): assert obs == exp
# """Return the main (non-auxiliary) verbs in a sentence.""" # return [tok for tok in sent # if tok.pos == VERB and tok.dep_ not in constants.AUX_DEPS] # File: d:\miniconda3\envs\nlp\lib\site-packages\textacy\spacier\utils.py # Type: function toy_sentence = 'Shivangi is an engineer' doc = nlp(toy_sentence) """What are the entities in this sentence?""" displacy.render(doc, style='ent', jupyter=True) # Let's find out the main verb in this sentence: verbs = spacy_utils.get_main_verbs_of_sent(doc) print(verbs) # And what are nominal subjects of this verb? for verb in verbs: print(verb, spacy_utils.get_subjects_of_verb(verb)) """*You will notice that this has a reasonable overlap with the noun phrases which we pulled from our part-of-speech tagging but can be different as well.*""" [(token, token.tag_) for token in doc] """Tip: As an exercise, extend this approach to at least add Who, Where and When questions as practice. ## Level Up: Question and Answer