def get_dependency_features(feature_vector, dependency_list, generalization=None): """@param generalization: the generalization to apply, if any. Appropriate values are: None, 'pos', 'opinion', 'liwc', 'neg_dist_opinion' """ for i in range(len(dependency_list)): dependency_list[i]['governor_index'] = i negations = dict([(dep['governor_index'],dep) for dep in dependency_list if dep['relation']=='neg']) #Could stand to be reworked for dep in dependency_list: relation = dep['relation'].lower() governor = dep['governor'].lower() dependent = dep['dependent'].lower() if generalization==None: feature_vector['dependency:'+relation+'('+governor+', '+dependent+')']=True elif generalization=='pos': feature_vector['dep_'+generalization+'_generalized:'+relation+'('+mpqa.convert_pos(dep['governor_pos'])+', '+dependent+')']=True elif generalization=='opinion': gov_polarity = mpqa.lookup(governor, dep['governor_pos']) if gov_polarity != None: feature_vector['dep_'+generalization+'_generalized:'+relation+'('+gov_polarity['polarity']+', '+dependent+')']=True dep_polarity = mpqa.lookup(dependent, dep['dependent_pos']) if dep_polarity != None: feature_vector['dep_'+generalization+'_generalized:'+relation+'('+governor+', '+dep_polarity['polarity']+')']=True elif generalization=='neg_dist_opinion': for element in ['dependent','governor']: word = dep[element].lower() polarity_dict = mpqa.lookup(word, dep[element+'_pos']) if polarity_dict is not None: polarity = polarity_dict['polarity'] element_index = dep[element+'_index'] if element_index in negations and relation!='neg': if polarity=='negative': polarity = 'positive' elif polarity=='positive': polarity = 'negative' else: continue #TODO: This forces only flipped polarity deps to be included... is this desired? if element == 'dependent': feature_vector['dep_'+generalization+'_generalized:'+relation+'('+governor+', '+polarity+')']=True else: feature_vector['dep_'+generalization+'_generalized:'+relation+'('+polarity+', '+dependent+')']=True elif generalization=='liwc': gov_categories = word_category_counter.score_word(governor).keys() gov_categories.append(governor) dep_categories = word_category_counter.score_word(dependent).keys() dep_categories.append(dependent) for gov_category in gov_categories: for dep_category in dep_categories: if gov_category==governor and dep_category==dependent: continue #avoids the no generalization case feature_vector['dep_'+generalization+'_generalized:'+relation+'('+gov_category+', '+dep_category+')']=True
def __init__(self, postdict): self.text = postdict.get(u'OriginalText',postdict.get(u'Current', '')) self.lemma = postdict.get(u'Lemma') self.pos = postdict.get(u'PartOfSpeech',None) self.start = postdict.get(u'CharacterOffsetBegin',None) self.end = postdict.get(u'CharacterOffsetEnd',None) self.before = postdict.get(u'Before', u'') self.after = postdict.get(u'After', u'') self._liwc = None # lazy evaluation via @property self.mpqa = mpqa.lookup(self.text, self.pos)
def __init__(self, word, index, pos, rel=None, lemma=None, start=None, end=None): ''' constructor ''' self.next_tree = None self.gov = None self.deps = None self.prev = None self.nxt = None self.dist = None self.start = start self.end = end self.mpqa = mpqa.lookup(word, pos) self.liwc = score_word(word) self.pos = pos self.word = word self.index = index self.rel = rel self.lemma = lemma