def testMethods(nonRookieData=None,rookieData=None,careerData=None): """ The test suite for deciding the best model """ if not careerData: seasonStats,careerData,lookUp = getData.main() if not (nonRookieData and rookieData): nonRookieData,rookieData,careerData=getFeatures.main(careerData) resultsNonRookies,predsNonRookies = getCrossVal(nonRookieData,careerData) resultsRookies,predsRookies = getRookieCrossVal(rookieData,careerData) resultsNonRookies = writeResToPandas(resultsNonRookies,'nonRookies') resultsRookies = writeResToPandas(resultsRookies,'rookies') return(resultsNonRookies,resultsRookies,predsNonRookies,predsRookies)
def predictNextSeason(year=2015,nonRookieData='nonRookieData.p',rookieData='rookieData.p',careerData='careerData.p',seasonStats='seasonStats.p'): """This module uses previously selected algorithms: -Veterans: Random Forest; n_estimators=500, min_samples_split=125 -Novices: SVM; C=.15, gamma = .015, epsilon= .05 It then trains the models and generates predictions in csv format Inputs: year - string, optional (default=2015) nonRookieData,rookieData,careerData,seasonStats - dictionary, optional (default = None) If None, then the variable is generated using the getFeatures.py file Outputs: predictionNonRookies,predicitonRookies - Pandas Dataframe The dataframes containing the predictions for both groups """ t0=time.time() last2digits=str(year)[-2:] season = str((datetime.datetime(year,1,1)-datetime.timedelta(days=365)).year)+'-'+last2digits if not (careerData and seasonStats): seasonStats,careerData,lookUp = getData.main() if not (nonRookieData and rookieData): nonRookieData,rookieData,careerData=getFeatures.main(careerData) nonRookieData,rookieData,careerData,seasonStats=tryPickle(nonRookieData,rookieData,careerData,seasonStats) print('All past data found! Now fitting models ',time.time()-t0) nonRookiesModel,nonRookiesTrain,nonRookiesScaler = getModel(nonRookieData,'nonRookies') rookiesModel,rookiesTrain,rookiesScaler = getModel(rookieData,'rookies') print('Models fitted! Now getting all current players features ',time.time()-t0) nonRookies,rookies = findPlayerFeatures(year,seasonStats[year],careerData, nonRookiesTrain,rookiesTrain,nonRookiesScaler,rookiesScaler) print('Features found! Now making predictions ',time.time()-t0) predictionsNonRookies = getPredictions(nonRookies,nonRookiesModel,'nonRookies') print('Non-Rookie Predictions made! Now predicting Rookies ',time.time()-t0) ## predictionsNonRookies.to_csv(season+'_Veteran_Predictions.csv',index=False) predictionsRookies = getPredictions(rookies,rookiesModel,'rookies') ## predictionsRookies.to_csv(season+'_Novice_Predictions.csv',index=False) predictionsNonRookies.append(predictionsRookies).to_csv(season+'_Predictions.csv',index=False) print('Total Runtime is ',time.time()-t0,'s') return(predictionsNonRookies,predictionsRookies)