def IOB_prose_features(self, word): """ IOB_prose_features() Purpose: Creates a dictionary of prose features for the given word. @param word. A string @return A dictionary of features >>> wf = WordFeatures() >>> wf.IOB_prose_features('test') is not None True """ # Feature: <dummy> features = {('dummy', None): 1} # always have >0 dimensions # Allow for particular features to be enabled for feature in self.enabled_IOB_prose_word_features: if feature == "word": features[(feature, word.lower())] = 1 if feature == "stem_lancaster": features[ (feature, lancaster_st.stem(word.lower())) ] = 1 # Feature: Generic# stemmed word if feature == 'Generic#': generic = re.sub('[0-9]','0',word) features[ ('Generic#',generic) ] = 1 # Feature: Last two leters of word if feature == 'last_two_letters': features[ ('last_two_letters',word[-2:]) ] = 1 if feature == "length": features[(feature, None)] = len(word) if feature == "stem_porter": features[(feature, porter_st.stem(word))] = 1 if feature == "mitre": for f in self.mitre_features: if re.search(self.mitre_features[f], word): features[(feature, f)] = 1 if feature == "word_shape": wordShapes = getWordShapes(word) for shape in wordShapes: features[(feature, shape)] = 1 return features
def IOB_prose_features(self, word): """ IOB_prose_features() Purpose: Creates a dictionary of prose features for the given word. @param word. A string @return A dictionary of features >>> wf = WordFeatures() >>> wf.IOB_prose_features('test') is not None True """ # Feature: <dummy> features = {('dummy', None): 1} # always have >0 dimensions # Allow for particular features to be enabled for feature in self.enabled_IOB_prose_word_features: if feature == "word": features[(feature, word.lower())] = 1 if feature == "stem_lancaster": features[(feature, lancaster_st.stem(word.lower()))] = 1 # Feature: Generic# stemmed word if feature == 'Generic#': generic = re.sub('[0-9]', '0', word) features[('Generic#', generic)] = 1 # Feature: Last two leters of word if feature == 'last_two_letters': features[('last_two_letters', word[-2:])] = 1 if feature == "length": features[(feature, None)] = len(word) if feature == "stem_porter": features[(feature, porter_st.stem(word))] = 1 if feature == "mitre": for f in self.mitre_features: if re.search(self.mitre_features[f], word): features[(feature, f)] = 1 if feature == "word_shape": wordShapes = getWordShapes(word) for shape in wordShapes: features[(feature, shape)] = 1 return features
def IOB_nonprose_features(self, word): """ IOB_nonprose_features() Purpose: Creates a dictionary of nonprose features for the given word. @param word. A string @return A dictionary of features >>> wf = WordFeatures() >>> wf.IOB_nonprose_features('test') is not None True """ features = {} # Feature: The word, itself features[('word', word.lower())] = 1 # Allow for particular features to be enabled for feature in self.enabled_IOB_nonprose_word_features: # Feature: Mitre if feature == "mitre": for f in self.mitre_features: if re.search(self.mitre_features[f], word): features[('mitre', f)] = 1 # Feature: Word Shape if feature == "word_shape": wordShapes = getWordShapes(word) for shape in wordShapes: features[('word_shape', shape)] = 1 # Feature: QANN features if feature == 'QANN': qann_feats = self.QANN_features(word) features.update(qann_feats) return features
def feature_word_shape(word): features = {} wordShapes = getWordShapes(word) for shape in wordShapes: features[('word_shape', shape)] = 1 return features