def __init__(self, normal_vectorizer=None, clean_vectorizer=None): self.mf_generator = MetaFeatureGenerator() self.fit_complete = False if normal_vectorizer and clean_vectorizer: self.fit_complete = True if normal_vectorizer: self.normal_vectorizer = normal_vectorizer else: self.normal_vectorizer = Vectorizer() if clean_vectorizer: self.clean_vectorizer = clean_vectorizer else: self.clean_vectorizer = Vectorizer()
class FeatureGenerator(object): def __init__(self, normal_vectorizer=None, clean_vectorizer=None): self.mf_generator = MetaFeatureGenerator() self.fit_complete = False if normal_vectorizer and clean_vectorizer: self.fit_complete = True if normal_vectorizer: self.normal_vectorizer = normal_vectorizer else: self.normal_vectorizer = Vectorizer() if clean_vectorizer: self.clean_vectorizer = clean_vectorizer else: self.clean_vectorizer = Vectorizer() def fit(self, input_text, input_scores): self.normal_vectorizer.fit(input_text, input_scores) clean_text = [ self.mf_generator.generate_clean_stem_text(t) for t in input_text ] self.clean_vectorizer.fit(clean_text, input_scores) def get_features(self, text): vec_feats = self.generate_vectorizer_features(text) vec_keys = self.normal_vectorizer.vocab + self.clean_vectorizer.vocab meta_feats = self.generate_meta_features(text) meta_keys = meta_feats.keys() meta_keys.sort() meta_feat_arr = np.matrix([meta_feats[k] for k in meta_keys]) self.colnames = vec_keys + meta_keys return np.hstack([vec_feats, meta_feat_arr]) def generate_meta_features(self, text): feats = self.mf_generator.generate_meta_features(text) return feats def generate_vectorizer_features(self, text): clean_text = self.mf_generator.generate_clean_stem_text(text) feats = self.normal_vectorizer.get_features([text]) clean_feats = self.clean_vectorizer.get_features([clean_text]) return np.hstack([feats, clean_feats])
class FeatureGenerator(object): def __init__(self, normal_vectorizer=None, clean_vectorizer=None): self.mf_generator = MetaFeatureGenerator() self.fit_complete = False if normal_vectorizer and clean_vectorizer: self.fit_complete = True if normal_vectorizer: self.normal_vectorizer = normal_vectorizer else: self.normal_vectorizer = Vectorizer() if clean_vectorizer: self.clean_vectorizer = clean_vectorizer else: self.clean_vectorizer = Vectorizer() def fit(self, input_text, input_scores): self.normal_vectorizer.fit(input_text, input_scores) clean_text = [self.mf_generator.generate_clean_stem_text(t) for t in input_text] self.clean_vectorizer.fit(clean_text, input_scores) def get_features(self, text): vec_feats = self.generate_vectorizer_features(text) vec_keys = self.normal_vectorizer.vocab + self.clean_vectorizer.vocab meta_feats = self.generate_meta_features(text) meta_keys = meta_feats.keys() meta_keys.sort() meta_feat_arr = np.matrix([meta_feats[k] for k in meta_keys]) self.colnames = vec_keys + meta_keys return np.hstack([vec_feats, meta_feat_arr]) def generate_meta_features(self, text): feats = self.mf_generator.generate_meta_features(text) return feats def generate_vectorizer_features(self, text): clean_text = self.mf_generator.generate_clean_stem_text(text) feats = self.normal_vectorizer.get_features([text]) clean_feats = self.clean_vectorizer.get_features([clean_text]) return np.hstack([feats, clean_feats])