Beispiel #1
0
    'Store', 'SchoolHoliday', 'Promo', 'cmp msr', 'IsPromotionMonth', 'Year',
    'Month', 'Day', 'DayOfTheWeek', 'WeekOfTheYear', 'StoreType',
    'CompetitionOpenSinceMonth', 'CompetitionDistance', 'PromoOpen'
]  # Features used for prediction

feature_engineering(rossman)
feature_engineering(rossman_test)

X = rossman[features]
y = rossman.Sales  # The value we are going to predict

train_features, test_features, train_predict, test_predict = train_test_split(
    X, y)

randomForest = RandomForestRegressor(n_estimators=35)
randomForest.verbose = True
randomForest.fit(X, y)

errorValue = cross_validation.cross_val_score(randomForest,
                                              rossman[features],
                                              y,
                                              scoring='mean_squared_error',
                                              cv=3)

predicted_value = randomForest.predict(test_features)
predicted_value = np.array(predicted_value)

test_predict = np.array(test_predict)
finalResult = randomForest.predict(rossman_test)

outputForSubmission = pd.DataFrame(rossman_test.Id).join(