Beispiel #1
0
    def RF(self, args):  ## Random Forest

        logger.info("Running Random Forest... ")

        if args.predictor.lower() == 'classifier':
            from sklearn.ensemble import RandomForestClassifier as randomforest

            rf = randomforest(criterion='entropy',
                              class_weight='balanced',
                              random_state=42)

        elif args.predictor.lower() == 'regressor':
            from sklearn.ensemble import RandomForestRegressor as randomforest
            ## Initialize RandomForest
            rf = randomforest(n_estimators=20000,
                              max_depth=4,
                              random_state=42,
                              max_samples=0.6,
                              n_jobs=-1)

        rf.fit(self.X_train, self.y_train)

        # Get the predicted values
        self.y_pred = rf.predict(self.X_data)

        if args.predictor.lower() == 'regressor':
            self.y_pred = logistic.cdf(self.y_pred)
        self.data['boosting_score'] = self.y_pred
        self.model = rf
        return self
    def RF(self, args):  ## Random Forest

        logger.info("Running Random Forest... ")

        if args.predictor.lower() == 'classifier':
            from sklearn.ensemble import RandomForestClassifier as randomforest
            rf = randomforest(  #n_estimators = 5000,
                criterion='entropy', random_state=42)

        elif args.predictor.lower() == 'regressor':
            from sklearn.ensemble import RandomForestRegressor as randomforest
            ## Initialize RandomForest
            rf = randomforest(n_estimators=5000,
                              min_samples_leaf=0.12,
                              criterion='entropy',
                              warm_start=True,
                              max_depth=8)

        rf.fit(self.X_train, self.y_train)

        # Get the predicted values
        self.y_pred = rf.predict(self.X_data)

        if args.predictor.lower() == 'regressor':
            self.y_pred = logistic.cdf(self.y_pred)
        self.data['boosting_score'] = self.y_pred
        self.model = rf
        return self
Beispiel #3
0
def rdf():
	x,y,yid = rdfreadvw()
	test,testy,testid = rdfreadvw('mytest.vw')
	#print len(testy),len(testid)
	train = x.toarray()
	test = test.toarray()
	rf = randomforest()
	rf.fit(train,y)
	print rf.score(train,y)
	pred = rf.predict(test)
	# #predprob = rf.predict_proba(test)


	with open('rfpred2.txt','w') as f:
		for f1,f2 in zip(pred,testid):
			print >> f, f1, f2
Beispiel #4
0
# submission = pd.DataFrame(
#     {'key': test_df.key, 'fare_amount': predicted_values},
#     columns = ['key', 'fare_amount'])
# submission.to_csv('submission.csv', index = False)

print(os.listdir('.'))

# In[ ]:

from sklearn.ensemble import RandomForestRegressor as randomforest

# Create the random forest
random_forest = randomforest(n_estimators=20,
                             max_depth=20,
                             max_features=None,
                             oob_score=True,
                             bootstrap=True,
                             verbose=1,
                             n_jobs=-1)

# Train on data
random_forest.fit(
    df[[
        'trip_distance', 'pickup_longitude', 'pickup_latitude',
        'dropoff_longitude', 'dropoff_latitude', 'abs_lat_diff',
        'abs_lon_diff', 'passenger_count'
    ]], df['fare_amount'])
predicted_values = random_forest.predict(test_df[[
    'trip_distance', 'pickup_longitude', 'pickup_latitude',
    'dropoff_longitude', 'dropoff_latitude', 'abs_lat_diff', 'abs_lon_diff',
    'passenger_count'