/
possessive_self.py
80 lines (69 loc) · 4.38 KB
/
possessive_self.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
tool_dir = 'tools'
import spacy.en
import sys
from nltk.corpus import wordnet as wn
reload(sys)
sys.setdefaultencoding('utf-8')
sys.path.append(tool_dir + '/inflection-0.3.1')
sys.path.append(tool_dir)
import inflection
import en
import ner
#globals for the most common unambiguous pronouns
possessive_dictionary = {'he' : 'his', 'i' : 'my', 'it' : 'its', 'she' : 'her', 'you' : 'your', 'we' : 'our', 'article' : 'its', 'this' : 'its', 'these' : 'their', 'those' : 'their'}
person_dictionary = {'he' : 3, 'i' : 1, 'it' : 3, 'she' : 3, 'you' : 2, 'we' : 1, 'article' : 3, 'this' : 3, 'these' : 3, 'those' : 3}
number_dictionary = {'he' : 1, 'i' : 1, 'it' : 1, 'she' : 1, 'you' : 2, 'we' : 2, 'article' : 1, 'this' : 1, 'these' : 2, 'those' : 2}
possessive_dictionary_singular = {'who' : 'his/her', 'which' : 'its', 'what' : 'its', 'whom' : 'his/her'}
def person_or_not (sentence, subject_word, ner_tagger, parsed_tokens, verb_token, subj_token):
#perform NER
entities = ner_tagger.get_entities(sentence)
#if a PERSON named entity is found, assume singular, and attach 'his/her'
if 'PERSON' in entities and [person_name for person_name in entities['PERSON'] if subject_word in person_name.lower()]:
return [3, 1, u'his/her/its']
#if a LOCATION or ORGANIZATION named entity is found, assume singular, and attach 'its'
if 'ORGANIZATION' in entities and [person_name for person_name in entities['ORGANIZATION'] if subject_word in person_name.lower()]:
return [3, 1, u'its/his/her']
if 'LOCATION' in entities and [person_name for person_name in entities['LOCATION'] if subject_word in person_name.lower()]:
return [3, 1, u'its/his/her']
#check for pronoun
if subject_word in possessive_dictionary:
return [person_dictionary[subject_word], number_dictionary[subject_word], possessive_dictionary[subject_word]]
if subject_word in possessive_dictionary_singular:
return [3, 1, possessive_dictionary_singular[subject_word]]
#check whether 'person' is a hypernym
if wn.synsets(subj_token.lemma_.lower(), pos=wn.NOUN):
subj_synset = wn.synset(subj_token.lemma_.lower() + '.n.01')
if wn.synset('person.n.01') in list(subj_synset.closure(lambda s: s.hypernyms())):
return [3, 1, u'his/her/its']
else:
return [3, 1, u'its/his/her']
return None
def get_subject_properties(parsed_tokens, verb_token, object_token, ner_tagger, sentence):
list_of_subjects = [subj_token for subj_token in parsed_tokens if subj_token.dep_ == "nsubj" and subj_token.head is verb_token]
if len(list_of_subjects) == 1:
subj_token = list_of_subjects[0]
subject_word = subj_token.orth_.lower()
#if the verb conjugation is singular 3rd person for sure
if (verb_token.tag_ == u'VBZ') or ((verb_token.tag_ == u'VBG' or verb_token.tag_ == u'VBN') and [aux_verb_token for aux_verb_token in parsed_tokens if aux_verb_token.dep_ == u'aux' and aux_verb_token.head is verb_token and aux_verb_token.tag_ == u'VBZ']):
return person_or_not (sentence, subject_word, ner_tagger, parsed_tokens, verb_token, subj_token)
#check if it's in the tiny pronoun dictionary
if subject_word in possessive_dictionary:
return [person_dictionary[subject_word], number_dictionary[subject_word], possessive_dictionary[subject_word]]
#if the subject is not 'i' or 'you' or 'we' and the verb form is not 3rd person singular
if (verb_token.tag_ == u'VBP') or ((verb_token.tag_ == u'VBG' or verb_token.tag_ == u'VBN') and [aux_verb_token for aux_verb_token in parsed_tokens if aux_verb_token.dep_ == u'aux' and aux_verb_token.head is verb_token and aux_verb_token.tag_ == u'VBP']):
return [3, 2, u'their']
"""#if the subject has a conjunction
compounded_subjects = [tok.orth_.lower() for tok in parsed_tokens if tok.dep_ == "conj" and tok.head is subj_token]
if compounded_subjects:
if [tok for tok in parsed_tokens if tok.orth_.lower() == "and" and tok.dep_ == "cc" and tok.head is subj_token]:
if u'i' in compounded_subjects:
return [1, 2, u'our']
elif 'you' in compounded_subjects:
return [2, 2, u'your']
else:
return [3, 2, u'their']"""
#if the subject is plural according to the inflector, and the verb form is not singular - doing this test only for dictionary words and not for NEs which were not detected
if wn.synsets(subj_token.lemma_.lower(), pos=wn.NOUN) and inflection.pluralize(subject_word) == subject_word and inflection.singularize(subject_word) != subject_word:
return [3, 2, u'their']
else:
return None