def __init__(self, data_path='Bike-Sharing-Dataset/hour.csv'): '''Initialize the random forest model. Keyword Arguments: data_path {str} -- Path to the Bike Sharing Dataset. (default: {'Bike-Sharing-Dataset/hour.csv'}) ''' # Make results reproducible random.seed(100) # Load data form bike sharing csv self.data = {} dataloader = Dataloader(data_path) self.data['full'] = dataloader.getFullData() # Define feature and target variables self.features= ['season', 'mnth', 'hr', 'holiday', 'weekday', 'workingday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed'] self.target = ['cnt'] # Convert pandas frame into samples and labels self.samples, self.labels = {}, {} self.samples['full'] = self.data['full'][self.features].values self.labels['full'] = self.data['full'][self.target].values.ravel() # Define model self.model = RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=4, min_weight_fraction_leaf=0.0, n_estimators=200, n_jobs=None, oob_score=False, random_state=100, verbose=0, warm_start=False)
from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.linear_model import LinearRegression import random %matplotlib inline random.seed(100) dataloader = Dataloader('Bike-Sharing-Dataset/hour.csv') train, val, test = dataloader.getData() fullData = dataloader.getFullData() category_features = ['season', 'holiday', 'mnth', 'hr', 'weekday', 'workingday', 'weathersit'] number_features = ['temp', 'atemp', 'hum', 'windspeed'] features= category_features + number_features target = ['cnt'] sns.set(font_scale=1.0) fig, axes = plt.subplots(nrows=3,ncols=2) fig.set_size_inches(15, 15) sns.boxplot(data=train,y="cnt",orient="v",ax=axes[0][0]) sns.boxplot(data=train,y="cnt",x="mnth",orient="v",ax=axes[0][1]) sns.boxplot(data=train,y="cnt",x="weathersit",orient="v",ax=axes[1][0]) sns.boxplot(data=train,y="cnt",x="workingday",orient="v",ax=axes[1][1]) sns.boxplot(data=train,y="cnt",x="hr",orient="v",ax=axes[2][0]) sns.boxplot(data=train,y="cnt",x="temp",orient="v",ax=axes[2][1])