Esempio n. 1
0
        dc.green_groupintodailydf(paths, 'Green' + str(year) + 'DailyDrivers.csv', printcsv)

    elif color == 'yellow':

        # Group
        dc.yellow_groupintodailydf(paths, 'Yellow' + str(year) + 'DailyDrivers.csv', printcsv)

if __name__ == "__main__":

    #cleanandgroup(color='yellow', year=2015, nummonths=3, clean=False, printcsv=True)


    # Predict
    df = pd.read_csv('Yellow2014DailyDrivers.csv', usecols=[0,1,2,3,4,5])

    p = TuneModel(df,'Day_taxi_drivers', testsize=.25, modeltype='regress', impute=False, scores=['r2'])

    clf = p.randomforestreport(folds=5, cores=1, plotit=False, saveit=False)

    # Predicting into 2015 (using all of 2014 as the training set)
    # clf = RandomForestRegressor(n_estimators=250)
    # #todo: make this into function, where number of days in X_test is a parameter
    # y_train = df['Day_taxi_drivers']
    # X_train = df.drop('Day_taxi_drivers', axis=1)
    #
    # # Create X_test set going forward three months (and add holidays in)
    # X_test = modelutilities.create_future_X_test('01-01-15','03-31-2015',freq='D')
    # holidaylist=['2015-01-01','2015-02-13','2015-02-14','2015-03-16','2015-03-17']
    # X_test['Holiday'] = 0
    # for i in holidaylist: # loc should be able to obviate forloop
    #     X_test.loc[i,'Holiday'] = 1
Esempio n. 2
0
    # o.notokenlogitreport(folds=2, cores=7)
    #
    # o.tokenlogitreport(folds=2, cores=7)
    #
    # o.notokensvcreport(folds=2, cores=-1)
    #
    # o.tokensvcreport(folds=2, cores=-1)

    #### CSV Example of how to use TuneClassifier and printreport for hyperparameter optimization
    df = pd.read_csv('SalesOrderHeaderNULL.csv')

    #p = PredictListFactors(df, 'OnlineOrderFlag', 'SalesPersonID', 'ShipMethodID')

    #p.predictfactors()

    p = TuneModel(df,'OnlineOrderFlag', testsize=.5, type='regress')
    #
    p.logitreport(folds=2, cores=6)
    #
    # p.treesreport(folds=2, cores=6)
    #
    # p.extratreesreport(folds=2, cores=6)
    #
    # p.randomforestreport(folds=2, cores=6)

    ## CSV example of how to use OverallRank class and methods printit, plotit
    ## General model feature importance
    #t = OverallRank(df,'OnlineOrderFlag')

    #t.printit()