def RF(self, args): ## Random Forest logger.info("Running Random Forest... ") if args.predictor.lower() == 'classifier': from sklearn.ensemble import RandomForestClassifier as randomforest rf = randomforest(criterion='entropy', class_weight='balanced', random_state=42) elif args.predictor.lower() == 'regressor': from sklearn.ensemble import RandomForestRegressor as randomforest ## Initialize RandomForest rf = randomforest(n_estimators=20000, max_depth=4, random_state=42, max_samples=0.6, n_jobs=-1) rf.fit(self.X_train, self.y_train) # Get the predicted values self.y_pred = rf.predict(self.X_data) if args.predictor.lower() == 'regressor': self.y_pred = logistic.cdf(self.y_pred) self.data['boosting_score'] = self.y_pred self.model = rf return self
def RF(self, args): ## Random Forest logger.info("Running Random Forest... ") if args.predictor.lower() == 'classifier': from sklearn.ensemble import RandomForestClassifier as randomforest rf = randomforest( #n_estimators = 5000, criterion='entropy', random_state=42) elif args.predictor.lower() == 'regressor': from sklearn.ensemble import RandomForestRegressor as randomforest ## Initialize RandomForest rf = randomforest(n_estimators=5000, min_samples_leaf=0.12, criterion='entropy', warm_start=True, max_depth=8) rf.fit(self.X_train, self.y_train) # Get the predicted values self.y_pred = rf.predict(self.X_data) if args.predictor.lower() == 'regressor': self.y_pred = logistic.cdf(self.y_pred) self.data['boosting_score'] = self.y_pred self.model = rf return self
def rdf(): x,y,yid = rdfreadvw() test,testy,testid = rdfreadvw('mytest.vw') #print len(testy),len(testid) train = x.toarray() test = test.toarray() rf = randomforest() rf.fit(train,y) print rf.score(train,y) pred = rf.predict(test) # #predprob = rf.predict_proba(test) with open('rfpred2.txt','w') as f: for f1,f2 in zip(pred,testid): print >> f, f1, f2
# submission = pd.DataFrame( # {'key': test_df.key, 'fare_amount': predicted_values}, # columns = ['key', 'fare_amount']) # submission.to_csv('submission.csv', index = False) print(os.listdir('.')) # In[ ]: from sklearn.ensemble import RandomForestRegressor as randomforest # Create the random forest random_forest = randomforest(n_estimators=20, max_depth=20, max_features=None, oob_score=True, bootstrap=True, verbose=1, n_jobs=-1) # Train on data random_forest.fit( df[[ 'trip_distance', 'pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'abs_lat_diff', 'abs_lon_diff', 'passenger_count' ]], df['fare_amount']) predicted_values = random_forest.predict(test_df[[ 'trip_distance', 'pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'abs_lat_diff', 'abs_lon_diff', 'passenger_count'