self.name = name self.start, self.stop = start, stop self.ls = None def __call__(self, nn, train_history): if self.ls is None: self.ls = np.linspace(self.start, self.stop, nn.max_epochs) epoch = train_history[-1]['epoch'] new_value = float32(self.ls[epoch - 1]) getattr(nn, self.name).set_value(new_value) if __name__ == '__main__': train_size = 0.75 cls = RandomForestClassifier() train_df_orig = RevenueCompetition.load_data() y = train_df_orig['revenue'].values.astype('float32') del train_df_orig['revenue'] test_df_orig = RevenueCompetition.load_data(train=False) full_df_orig = train_df_orig.append(test_df_orig) print("Transforming...") tr = make_pipeline(RevenueTransform(rescale=True), StandardScaler()) tr.fit(full_df_orig) print('Classify the outliers...') ly = np.log(y) ym = ly.mean() ys = ly.std()
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier from sklearn.cross_validation import StratifiedShuffleSplit from sklearn.svm import SVR from revenue import RevenueCompetition, RevenueTransform if __name__ == '__main__': train_size = 0.75 cls = RandomForestClassifier() reg = RandomForestRegressor(n_estimators=20, max_features=5, max_depth=None, min_samples_split=2, min_samples_leaf=1, max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=-1) reg = SVR(C=10., gamma=0.1) train_df_orig = RevenueCompetition.load_data() y = train_df_orig['revenue'].values del train_df_orig['revenue'] test_df_orig = RevenueCompetition.load_data(train=False) full_df = train_df_orig.append(test_df_orig) print("Transforming...") tr = RevenueTransform(rescale=True) tr.fit(full_df) X = tr.transform(train_df_orig).values print('Classify the outliers...') ly = np.log(y) ym = ly.mean()