dc.green_groupintodailydf(paths, 'Green' + str(year) + 'DailyDrivers.csv', printcsv) elif color == 'yellow': # Group dc.yellow_groupintodailydf(paths, 'Yellow' + str(year) + 'DailyDrivers.csv', printcsv) if __name__ == "__main__": #cleanandgroup(color='yellow', year=2015, nummonths=3, clean=False, printcsv=True) # Predict df = pd.read_csv('Yellow2014DailyDrivers.csv', usecols=[0,1,2,3,4,5]) p = TuneModel(df,'Day_taxi_drivers', testsize=.25, modeltype='regress', impute=False, scores=['r2']) clf = p.randomforestreport(folds=5, cores=1, plotit=False, saveit=False) # Predicting into 2015 (using all of 2014 as the training set) # clf = RandomForestRegressor(n_estimators=250) # #todo: make this into function, where number of days in X_test is a parameter # y_train = df['Day_taxi_drivers'] # X_train = df.drop('Day_taxi_drivers', axis=1) # # # Create X_test set going forward three months (and add holidays in) # X_test = modelutilities.create_future_X_test('01-01-15','03-31-2015',freq='D') # holidaylist=['2015-01-01','2015-02-13','2015-02-14','2015-03-16','2015-03-17'] # X_test['Holiday'] = 0 # for i in holidaylist: # loc should be able to obviate forloop # X_test.loc[i,'Holiday'] = 1
# o.notokenlogitreport(folds=2, cores=7) # # o.tokenlogitreport(folds=2, cores=7) # # o.notokensvcreport(folds=2, cores=-1) # # o.tokensvcreport(folds=2, cores=-1) #### CSV Example of how to use TuneClassifier and printreport for hyperparameter optimization df = pd.read_csv('SalesOrderHeaderNULL.csv') #p = PredictListFactors(df, 'OnlineOrderFlag', 'SalesPersonID', 'ShipMethodID') #p.predictfactors() p = TuneModel(df,'OnlineOrderFlag', testsize=.5, type='regress') # p.logitreport(folds=2, cores=6) # # p.treesreport(folds=2, cores=6) # # p.extratreesreport(folds=2, cores=6) # # p.randomforestreport(folds=2, cores=6) ## CSV example of how to use OverallRank class and methods printit, plotit ## General model feature importance #t = OverallRank(df,'OnlineOrderFlag') #t.printit()