def q07_randomforest_regressor(path,columns = fe, random_state =9): np.random.seed(random_state) data = q05_feature_engineering_part4(path) splits = q02_data_splitter(path) rmse = [] for i in splits: train = i[0] valid = i[1] x_train, y_train = data[fe].values[train], data['Demand'].values[train] x_valid, y_valid = data[fe].values[valid], data['Demand'].values[valid] model = RandomForestRegressor( n_estimators=50, min_samples_leaf=30, random_state=10) model.fit(x_train, y_train) pred = model.predict(x_valid) measure = math.pow(mean_squared_error(y_valid, pred), 0.5) rmse.append(measure) return np.mean(rmse)
def q06_linear_regression(path, columns=fe, random_state=9): np.random.seed(random_state) data = q05_feature_engineering_part4(path) com_idx = q02_data_splitter(path) rmse = [] for i in com_idx: train_idx = i[0] valid_idx = i[1] X_train, y_train = data.ix[train_idx, fe], data.ix[train_idx, 'Demand'] X_valid, y_valid = data.ix[valid_idx, fe], data.ix[valid_idx, 'Demand'] model = LinearRegression() model.fit(X_train, y_train) y_pred = model.predict(X_valid) rms = mean_squared_error(y_valid, y_pred)**0.5 rmse.append(rms) return np.mean(rmse)
def q06_linear_regression(path, columns=fe, random_state=9): np.random.seed(random_state) data = q05_feature_engineering_part4(path) splits = q02_data_splitter(path) rmse = [] for i in splits: train = i[0] valid = i[1] x_train, y_train = data[fe].values[train], data['Demand'].values[train] x_valid, y_valid = data[fe].values[valid], data['Demand'].values[valid] model = LinearRegression() model.fit(x_train, y_train) pred = model.predict(x_valid) measure = math.pow(mean_squared_error(y_valid, pred), 0.5) rmse.append(measure) return np.mean(rmse)
def q07_randomforest_regressor(path, columns=fe, random_state=9): np.random.seed(random_state) data = q05_feature_engineering_part4(path) com_idx = q02_data_splitter(path) rmse = [] for i in com_idx: train_idx = i[0] valid_idx = i[1] X_train, y_train = data.ix[train_idx, fe], data.ix[train_idx, 'Demand'] X_valid, y_valid = data.ix[valid_idx, fe], data.ix[valid_idx, 'Demand'] model = RandomForestRegressor(n_estimators=50, min_samples_leaf=30, random_state=10) model.fit(X_train, y_train) y_pred = model.predict(X_valid) rms = mean_squared_error(y_valid, y_pred)**0.5 rmse.append(rms) return np.mean(rmse)
def q08_gradientboosting_regressor(path,columns = fe, random_state =9): np.random.seed(random_state) data = q05_feature_engineering_part4(path) splits = q02_data_splitter(path) 'write your solution here' rmse = [] for i in splits: train = i[0] valid = i[1] x_train, y_train = data[fe].values[train], data['Demand'].values[train] x_valid, y_valid = data[fe].values[valid], data['Demand'].values[valid] model = GradientBoostingRegressor(n_estimators=200, min_samples_leaf=10, learning_rate=0.01, random_state=random_state) model.fit(x_train, y_train) pred = model.predict(x_valid) measure = math.pow(mean_squared_error(y_valid, pred), 0.5) rmse.append(measure) return np.mean(rmse)