class Scorer(object): classification_max = 4 cv_folds = 2 def __init__(self, text, scores): self.text = text self.scores = scores self.feature_generator = FeatureGenerator() self.classifier = RandomForestRegressor( n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1 ) unique_scores = set(scores) if len(unique_scores) <= self.classification_max: self.classifier = RandomForestClassifier( n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1 ) self.fit_feats() self.fit_done = False def fit_feats(self): self.feature_generator.fit(self.text, self.scores) def get_features(self): feats = [] for t in self.text: feats.append(self.feature_generator.get_features(t)) feat_mat = np.vstack(feats) return feat_mat def train(self): feats = self.get_features() scores = np.array(self.scores) # Compute error metrics for the estimator. self.cv_scores = cross_validation.cross_val_score(self.classifier, feats, scores) self.cv_score = self.cv_scores.mean() self.cv_dev = self.cv_scores.std() self.classifier.fit(feats, scores) self.fit_done = True def predict(self, text): feats = self.feature_generator.get_features(text) return self.classifier.predict(feats)
class Scorer(object): classification_max = 4 cv_folds = 2 def __init__(self, text, scores): self.text = text self.scores = scores self.feature_generator = FeatureGenerator() self.classifier = RandomForestRegressor(n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1) unique_scores = set(scores) if len(unique_scores) <= self.classification_max: self.classifier = RandomForestClassifier(n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1) self.fit_feats() self.fit_done = False def fit_feats(self): self.feature_generator.fit(self.text, self.scores) def get_features(self): feats = [] for t in self.text: feats.append(self.feature_generator.get_features(t)) feat_mat = np.vstack(feats) return feat_mat def train(self): feats = self.get_features() scores = np.array(self.scores) # Compute error metrics for the estimator. self.cv_scores = cross_validation.cross_val_score( self.classifier, feats, scores) self.cv_score = self.cv_scores.mean() self.cv_dev = self.cv_scores.std() self.classifier.fit(feats, scores) self.fit_done = True def predict(self, text): feats = self.feature_generator.get_features(text) return self.classifier.predict(feats)
def __init__(self, text, scores): self.text = text self.scores = scores self.feature_generator = FeatureGenerator() self.classifier = RandomForestRegressor(n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1) unique_scores = set(scores) if len(unique_scores) <= self.classification_max: self.classifier = RandomForestClassifier(n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1) self.fit_feats() self.fit_done = False
def __init__(self, text, scores): self.text = text self.scores = scores self.feature_generator = FeatureGenerator() self.classifier = RandomForestRegressor( n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1 ) unique_scores = set(scores) if len(unique_scores) <= self.classification_max: self.classifier = RandomForestClassifier( n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1 ) self.fit_feats() self.fit_done = False