def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: trainData = buildTrainingSets(DIR + team + '-train.csv.knn') trainLabels = buildTrainingLabels(DIR + team + '-train.csv.knn') testData = buildTestingSets(DIR + team + '-test.csv.knn') testLabels = buildTestingLabels(DIR + team + '-test.csv.knn') total = total + len(testLabels) svm = cv2.SVM() svm.train(trainData, trainLabels, params=svm_params) svm.save('svm_data.dat') # Accuracy count = 0 for i in range(len(testLabels)): ret = svm.predict(np.array([testData[i]])) if ret == testLabels[i][0]: count = count + 1 countTotal = countTotal + count print 'INFO: Accuracy(', team, ')', count / float(len(testLabels)) print 'INFO: Total Accuracy: ', countTotal / float(total)
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: trainData = buildTrainingSets(DIR + team + '-train.csv') trainLabels = buildTrainingLabels(DIR + team + '-train.csv') testData = buildTestingSets(DIR + team + '-test.csv') testLabels = buildTestingLabels(DIR + team + '-test.csv') total = total + len(testLabels) knn = cv2.KNearest() knn.train(trainData, trainLabels) # Accuracy count = 0 for i in range(len(testLabels)): ret, results, neighbours, dist = knn.find_nearest( np.array([testData[i]]), 11) if results[0][0] == testLabels[i][0]: count = count + 1 countTotal = countTotal + count print 'INFO: Accuracy(', teamNames[teamIds.index( team)], ')', count / float(len(testLabels)) print 'INFO: Total Accuracy: ', countTotal / float(total)
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: trainData = buildTrainingSets(DIR + team + '-train.csv.knn') trainLabels = buildTrainingLabels(DIR + team + '-train.csv.knn') testData = buildTestingSets(DIR + team + '-test.csv.knn') testLabels = buildTestingLabels(DIR + team + '-test.csv.knn') total = total + len(testLabels) svm = cv2.SVM() svm.train(trainData, trainLabels, params=svm_params) svm.save('svm_data.dat') # Accuracy count = 0 for i in range(len(testLabels)): ret = svm.predict(np.array([testData[i]])) if ret == testLabels[i][0]: count = count + 1 countTotal = countTotal + count print 'INFO: Accuracy(', team, ')', count/float(len(testLabels)) print 'INFO: Total Accuracy: ', countTotal/float(total)
def teamMain(): DIR = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: train = buildTrainingSets(DIR + team + '-train.csv') test = buildTestingSets(DIR + team + '-test.csv') labels = buildTestingLabels(DIR + team + '-test.csv') total = total + len(labels) classifier = NaiveBayesClassifier.train(train) res = classifier.batch_classify(test) # accuracy count = 0 for i in range(len(res)): if labels[i] == res[i]: count = count + 1 countTotal = countTotal + count print 'INFO: Accuracy(', teamNames[teamIds.index( team)], ')', count / float(len(res)) print 'INFO: Total Accuracy: ', countTotal / float(total)
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: train = buildTrainingSets(DIR + team + '-train.csv') test = buildTestingSets(DIR + team + '-test.csv') labels = buildTestingLabels(DIR + team + '-test.csv') total = total + len(labels) # train classifier = nltk.MaxentClassifier.train(train, 'IIS', trace=0, max_iter=1000) # test count = 0 for i in range(len(labels)): pdist = classifier.prob_classify(test[i]) if pdist.prob('L') >= pdist.prob('W'): flag = 'L' else: flag = 'W' #print 'DEBUG: ', flag, labels[i] if flag == labels[i]: count = count + 1 print 'INFO: accuracy ', team, " ", float(count)/len(labels)
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: train = buildTrainingSets(DIR + team + '-train.csv') test = buildTestingSets(DIR + team + '-test.csv') labels = buildTestingLabels(DIR + team + '-test.csv') total = total + len(labels) # train classifier = nltk.MaxentClassifier.train(train, 'IIS', trace=0, max_iter=1000) # test count = 0 for i in range(len(labels)): pdist = classifier.prob_classify(test[i]) if pdist.prob('L') >= pdist.prob('W'): flag = 'L' else: flag = 'W' #print 'DEBUG: ', flag, labels[i] if flag == labels[i]: count = count + 1 print 'INFO: accuracy ', team, " ", float(count) / len(labels)
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: trainData = buildTrainingSets(DIR + team + '-train.csv') trainLabels = buildTrainingLabels(DIR + team + '-train.csv') testData = buildTestingSets(DIR + team + '-test.csv') testLabels = buildTestingLabels(DIR + team + '-test.csv') total = total + len(testLabels) knn = cv2.KNearest() knn.train(trainData, trainLabels) # Accuracy count = 0 for i in range(len(testLabels)): ret, results, neighbours, dist = knn.find_nearest(np.array([testData[i]]), 31) if results[0][0] == testLabels[i][0]: count = count + 1 countTotal = countTotal + count print 'INFO: Accuracy(', teamNames[teamIds.index(team)], ')', count/float(len(testLabels)) print 'INFO: Total Accuracy: ', countTotal/float(total)
def generateTestDataByTeams(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')] for teamId in teamIds: res = generateTestDataByTeam(teamId) outputFile = DIR + teamId + '-test.csv.knn' print 'INFO: ', outputFile saveMatrixToFile(outputFile, res)
def generateTestDataByTeam(teamId): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')] seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt') res = [] for season in seasons: mat = loadMatrixFromFile(DIR + season + '.playoff.csv') for row in mat: if teamNames[teamIds.index(teamId)] not in row[6]: continue if row[0] == 'W': WIN = 1 else: WIN = 0 if 'vs.' in row[6]: HOME = 1 else: HOME = 0 season = row[3] #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.total')[0] #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.avg')[0] heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.norm')[0] leagueranks = loadMatrixFromFile(DIR + season + '.l')[0] leaguerank = leagueranks[teamNames.index(row[6][0:3])] vsTeamId = teamIds[teamNames.index(row[6][-3:])] #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.total')[0] #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.avg')[0] vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.norm')[0] vsLeaguerank = leagueranks[teamIds.index(vsTeamId)] tmp = [] tmp.append(HOME) tmp.append(heightTotal) tmp.append(weightTotal) tmp.append(ageTotal) tmp.append(expTotal) tmp.append(leaguerank) tmp.append(vsHeightTotal) tmp.append(vsWeightTotal) tmp.append(vsAgeTotal) tmp.append(vsExpTotal) tmp.append(vsLeaguerank) tmp.append(WIN) res.append(tmp) return res
def generateTrainDataBySeason(season): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')] leagueranks = loadMatrixFromFile(DIR + season + '.l')[0] res = [] for team in teamIds: mat = loadMatrixFromFile(DIR + team + '.csv.sorted') for row in mat: if row[2] != season: continue if row[0] == 'W': WIN = 1 else: WIN = 0 if 'vs.' in row[5]: HOME = 1 else: HOME = 0 #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.total')[0] #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.avg')[0] heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.norm')[0] leaguerank = leagueranks[teamNames.index(row[5][0:3])] vsTeamId = teamIds[teamNames.index(row[5][-3:])] #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.total')[0] #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.avg')[0] vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.norm')[0] vsLeaguerank = leagueranks[teamIds.index(vsTeamId)] tmp = [] tmp.append(HOME) tmp.append(heightTotal) tmp.append(weightTotal) tmp.append(ageTotal) tmp.append(expTotal) tmp.append(leaguerank) tmp.append(vsHeightTotal) tmp.append(vsWeightTotal) tmp.append(vsAgeTotal) tmp.append(vsExpTotal) tmp.append(vsLeaguerank) tmp.append(WIN) res.append(tmp) return res
def main(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt') teamIds = loadTeamIds(DIR + 'teamidshortname.csv') seasonTypes = ['Regular Season', 'Playoffs'] # print seasons # return for team in teamIds: for season in seasons: #for seasonType in seasonTypes: seasonType = 'Regular Season' n = NBAStatsTeamPlayerExtractor(team, season, seasonType) outputFile = DIR + team + '.' + season + '.player.csv' print 'INFO: Processing ', outputFile mat = n.getStats() if mat == False: saveMatrixToFile(outputFile, []) else: saveMatrixToFile(outputFile, mat)
# -*- coding: utf-8 -*- # # Author: Archer # Date: 05/Jun/2015 # File: InsertPlayer.py # Desc: insert into NBA.Player table # # Produced By CSRGXTU import MySQLdb as mdb import sys from Utility import loadSeasons, loadTeamIds, loadMatrixFromFile basePath = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/' seasons = loadSeasons(basePath + 'seasons-18-Nov-2014.txt') teamIds = loadTeamIds(basePath + 'teamidname-18-Nov-2014.csv') def insertPlayer(cur): for team in teamIds: sql = "select TeamID from Team where StatsID = '%s'" % team cur.execute(sql) TeamID = cur.fetchone()[0] for season in seasons: sql = "select SeasonID from Season where Season = '%s' and Season_SeasonTypeID = 2" % season cur.execute(sql) SeasonID = cur.fetchone()[0] matrix = loadMatrixFromFile(basePath + team + '.' + season + '.player.csv')
#!/usr/bin/env python # coding = utf-8 # Author: Archer Reilly # Date: 24/DEC/2014 # File: NBAStatsTeamPlayerDataProcessor.py # Desc: the data downloaded from net isnt good, so need this # file process it before used in models # # Produced By CSRGXTU from Utility import loadMatrixFromFile, saveMatrixToFile, readmatricefromfile, loadSeasons, loadTeamIds, saveLstToFile DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt') teamIds = loadTeamIds(DIR + 'teamidshortname.csv') # noneWithAVG # replace None with average value # # @param teamId # @param season # @return res list(list) def noneWithAVG(teamId, season): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' mat = loadMatrixFromFile(DIR + teamId + "." + season + ".player.csv") if len(mat) == 0: return [[]] heights = [] weights = [] ages = []
tmpLst.append(item['PTS']) tmpLst.append(ranking['PPG']) tmpLst.append(ranking['RPG']) tmpLst.append(ranking['APG']) tmpLst.append(ranking['OPPG']) tmpLst.append(profile['Height']) tmpLst.append(profile['Weight']) tmpLst.append(profile['Age']) appendlst2file(tmpLst, dataFile) if __name__ == '__main__': # first, load team id teamIds = loadTeamIds('../../data/basketball/teamidname-18-Nov-2014.csv') # second, load seasons seasons = loadSeasons('../../data/basketball/seasons.txt') # seasonTypes seasonTypes = ['Regular Season'] # leagueId leagueId = "00" # for teamId in teamIds: # dataFile = '../../data/basketball/' + teamId + '.csv' # for t in seasonTypes: # for s in seasons: # print "Processing " + teamId + " " + s + " " + t,
#!/usr/bin/env python # coding = utf-8 # Author: Archer Reilly # Date: 24/DEC/2014 # File: NBAStatsTeamPlayerDataProcessor.py # Desc: the data downloaded from net isnt good, so need this # file process it before used in models # # Produced By CSRGXTU from Utility import loadMatrixFromFile, saveMatrixToFile, readmatricefromfile, loadSeasons, loadTeamIds, saveLstToFile DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt') teamIds = loadTeamIds(DIR + 'teamidshortname.csv') # noneWithAVG # replace None with average value # # @param teamId # @param season # @return res list(list) def noneWithAVG(teamId, season): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' mat = loadMatrixFromFile(DIR + teamId + "." + season + ".player.csv") if len(mat) == 0: return [[]] heights = [] weights = [] ages = [] exps = []
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Author: Archer # File: InsertTeamStats.py # Date: 05/Jun/2015 # Desc: insert NBA.TeamStats table # # Produced By CSRGXTU import MySQLdb as mdb import sys from Utility import loadMatrixFromFile, loadSeasons, loadTeamIds teamIds = loadTeamIds( '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidshortname.csv' ) seasons = loadSeasons( '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/seasons-18-Nov-2014.txt' ) TeamID2TeamShortNames = loadMatrixFromFile( '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/TeamID2TeamShortName.csv' ) def findId(shortName): for row in TeamID2TeamShortNames: if row[1] == shortName: return row[0] return False
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Author: Archer # File: InsertTeamStats.py # Date: 05/Jun/2015 # Desc: insert NBA.TeamStats table # # Produced By CSRGXTU import MySQLdb as mdb import sys from Utility import loadMatrixFromFile, loadSeasons, loadTeamIds teamIds = loadTeamIds('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidshortname.csv') seasons = loadSeasons('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/seasons-18-Nov-2014.txt') TeamID2TeamShortNames = loadMatrixFromFile('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/TeamID2TeamShortName.csv') def findId(shortName): for row in TeamID2TeamShortNames: if row[1] == shortName: return row[0] return False def isHome(matchUpString): if '@' in matchUpString: return 1 else: return 0 def none20(lst): for i in range(len(lst)):
tmpLst.append(item['PTS']) tmpLst.append(ranking['PPG']) tmpLst.append(ranking['RPG']) tmpLst.append(ranking['APG']) tmpLst.append(ranking['OPPG']) tmpLst.append(profile['Height']) tmpLst.append(profile['Weight']) tmpLst.append(profile['Age']) appendlst2file(tmpLst, dataFile) if __name__ == '__main__': # first, load team id teamIds = loadTeamIds('../../data/basketball/leaguerank/teamidname-18-Nov-2014.csv') # second, load seasons seasons = loadSeasons('../../data/basketball/leaguerank/seasons-18-Nov-2014.txt') # seasonTypes seasonTypes = ['Playoffs'] # leagueId leagueId = "00" """ for teamId in teamIds: dataFile = '../../data/basketball/leaguerank/' + teamId + '.playoff.csv' for t in seasonTypes: for s in seasons: