def __init__(self, teamfile, opponentteamfile, outputfile, k): self.teamFile = teamfile self.opponentTeamFile = opponentteamfile self.outputFile = outputfile self.teamLst = loadMatrixFromFile(self.teamFile) self.opponentTeamLst = loadMatrixFromFile(self.opponentTeamFile) self.k = k
def generateTestDataByTeam(teamId): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')] seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt') res = [] for season in seasons: mat = loadMatrixFromFile(DIR + season + '.playoff.csv') for row in mat: if teamNames[teamIds.index(teamId)] not in row[6]: continue if row[0] == 'W': WIN = 1 else: WIN = 0 if 'vs.' in row[6]: HOME = 1 else: HOME = 0 season = row[3] #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.total')[0] #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.avg')[0] heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.norm')[0] leagueranks = loadMatrixFromFile(DIR + season + '.l')[0] leaguerank = leagueranks[teamNames.index(row[6][0:3])] vsTeamId = teamIds[teamNames.index(row[6][-3:])] #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.total')[0] #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.avg')[0] vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.norm')[0] vsLeaguerank = leagueranks[teamIds.index(vsTeamId)] tmp = [] tmp.append(HOME) tmp.append(heightTotal) tmp.append(weightTotal) tmp.append(ageTotal) tmp.append(expTotal) tmp.append(leaguerank) tmp.append(vsHeightTotal) tmp.append(vsWeightTotal) tmp.append(vsAgeTotal) tmp.append(vsExpTotal) tmp.append(vsLeaguerank) tmp.append(WIN) res.append(tmp) return res
def generateTrainDataBySeason(season): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')] leagueranks = loadMatrixFromFile(DIR + season + '.l')[0] res = [] for team in teamIds: mat = loadMatrixFromFile(DIR + team + '.csv.sorted') for row in mat: if row[2] != season: continue if row[0] == 'W': WIN = 1 else: WIN = 0 if 'vs.' in row[5]: HOME = 1 else: HOME = 0 #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.total')[0] #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.avg')[0] heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.norm')[0] leaguerank = leagueranks[teamNames.index(row[5][0:3])] vsTeamId = teamIds[teamNames.index(row[5][-3:])] #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.total')[0] #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.avg')[0] vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.norm')[0] vsLeaguerank = leagueranks[teamIds.index(vsTeamId)] tmp = [] tmp.append(HOME) tmp.append(heightTotal) tmp.append(weightTotal) tmp.append(ageTotal) tmp.append(expTotal) tmp.append(leaguerank) tmp.append(vsHeightTotal) tmp.append(vsWeightTotal) tmp.append(vsAgeTotal) tmp.append(vsExpTotal) tmp.append(vsLeaguerank) tmp.append(WIN) res.append(tmp) return res
def selectColumns(self, matrix, teamid): DATA_PATH = '/home/archer/Documents/maxent/data/basketball/leaguerank/' TEAMID_FILE = DATA_PATH + 'teamidshortname.csv' TEAMIDS = [x[0] for x in loadMatrixFromFile(TEAMID_FILE)] res = [] for i in range(len(matrix)): win = 1 if matrix[i][0] == 'W' else 0 home = 1 if 'vs' in matrix[i][5] else 0 points = matrix[i][19] lr = loadMatrixFromFile(DATA_PATH + matrix[i][1] + '.l')[0][TEAMIDS.index(teamid)] # res.append([win, points, lr]) res.append([win, home, points, lr]) # res.append([matrix[i][0], matrix[i][1], matrix[i][5], matrix[i][19]]) return res
def teamMain(): DIR = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: train = buildTrainingSets(DIR + team + '-train.csv') test = buildTestingSets(DIR + team + '-test.csv') labels = buildTestingLabels(DIR + team + '-test.csv') total = total + len(labels) classifier = NaiveBayesClassifier.train(train) res = classifier.batch_classify(test) # accuracy count = 0 for i in range(len(res)): if labels[i] == res[i]: count = count + 1 countTotal = countTotal + count print 'INFO: Accuracy(', teamNames[teamIds.index( team)], ')', count / float(len(res)) print 'INFO: Total Accuracy: ', countTotal / float(total)
def buildTestingLabels(inputFile): res = [] mat = loadMatrixFromFile(inputFile) for row in mat: res.append(row[3]) return res
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: train = buildTrainingSets(DIR + team + '-train.csv') test = buildTestingSets(DIR + team + '-test.csv') labels = buildTestingLabels(DIR + team + '-test.csv') total = total + len(labels) # train classifier = nltk.MaxentClassifier.train(train, 'IIS', trace=0, max_iter=1000) # test count = 0 for i in range(len(labels)): pdist = classifier.prob_classify(test[i]) if pdist.prob('L') >= pdist.prob('W'): flag = 'L' else: flag = 'W' #print 'DEBUG: ', flag, labels[i] if flag == labels[i]: count = count + 1 print 'INFO: accuracy ', team, " ", float(count)/len(labels)
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: trainData = buildTrainingSets(DIR + team + '-train.csv.knn') trainLabels = buildTrainingLabels(DIR + team + '-train.csv.knn') testData = buildTestingSets(DIR + team + '-test.csv.knn') testLabels = buildTestingLabels(DIR + team + '-test.csv.knn') total = total + len(testLabels) svm = cv2.SVM() svm.train(trainData, trainLabels, params=svm_params) svm.save('svm_data.dat') # Accuracy count = 0 for i in range(len(testLabels)): ret = svm.predict(np.array([testData[i]])) if ret == testLabels[i][0]: count = count + 1 countTotal = countTotal + count print 'INFO: Accuracy(', team, ')', count/float(len(testLabels)) print 'INFO: Total Accuracy: ', countTotal/float(total)
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: trainData = buildTrainingSets(DIR + team + '-train.csv') trainLabels = buildTrainingLabels(DIR + team + '-train.csv') testData = buildTestingSets(DIR + team + '-test.csv') testLabels = buildTestingLabels(DIR + team + '-test.csv') total = total + len(testLabels) knn = cv2.KNearest() knn.train(trainData, trainLabels) # Accuracy count = 0 for i in range(len(testLabels)): ret, results, neighbours, dist = knn.find_nearest(np.array([testData[i]]), 31) if results[0][0] == testLabels[i][0]: count = count + 1 countTotal = countTotal + count print 'INFO: Accuracy(', teamNames[teamIds.index(team)], ')', count/float(len(testLabels)) print 'INFO: Total Accuracy: ', countTotal/float(total)
def insertPlayer(cur): for team in teamIds: sql = "select TeamID from Team where StatsID = '%s'" % team cur.execute(sql) TeamID = cur.fetchone()[0] for season in seasons: sql = "select SeasonID from Season where Season = '%s' and Season_SeasonTypeID = 2" % season cur.execute(sql) SeasonID = cur.fetchone()[0] matrix = loadMatrixFromFile(basePath + team + '.' + season + '.player.csv') for row in matrix: sql = "insert into Player (\ Name,\ Position,\ Height,\ Weight,\ Age,\ Experience,\ CreatedBy,\ CreatedTime,\ Player_TeamID,\ Player_SeasonID) value (\ \"%s\", '%s', '%s', '%s', '%s', '%s', 'archer', '2015-06-05 16:44:00', %d, %d)" %\ (row[0], row[1], row[2], row[3], row[4], row[5], TeamID, SeasonID) print sql cur.execute(sql)
def buildTestingSets(inputFile): res = [] mat = loadMatrixFromFile(inputFile) for row in mat: res.append([row[0], float(row[1]), float(row[2])]) return np.array(res).astype(np.float32)
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: train = buildTrainingSets(DIR + team + '-train.csv') test = buildTestingSets(DIR + team + '-test.csv') labels = buildTestingLabels(DIR + team + '-test.csv') total = total + len(labels) # train classifier = nltk.MaxentClassifier.train(train, 'IIS', trace=0, max_iter=1000) # test count = 0 for i in range(len(labels)): pdist = classifier.prob_classify(test[i]) if pdist.prob('L') >= pdist.prob('W'): flag = 'L' else: flag = 'W' #print 'DEBUG: ', flag, labels[i] if flag == labels[i]: count = count + 1 print 'INFO: accuracy ', team, " ", float(count) / len(labels)
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: trainData = buildTrainingSets(DIR + team + '-train.csv') trainLabels = buildTrainingLabels(DIR + team + '-train.csv') testData = buildTestingSets(DIR + team + '-test.csv') testLabels = buildTestingLabels(DIR + team + '-test.csv') total = total + len(testLabels) knn = cv2.KNearest() knn.train(trainData, trainLabels) # Accuracy count = 0 for i in range(len(testLabels)): ret, results, neighbours, dist = knn.find_nearest( np.array([testData[i]]), 11) if results[0][0] == testLabels[i][0]: count = count + 1 countTotal = countTotal + count print 'INFO: Accuracy(', teamNames[teamIds.index( team)], ')', count / float(len(testLabels)) print 'INFO: Total Accuracy: ', countTotal / float(total)
def buildTestingLabels(inputFile): res = [] mat = loadMatrixFromFile(inputFile) for row in mat: res.append([[row[11]]]) return np.array(res).astype(np.float32)
def teamMain(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')] countTotal = 0 total = 0 for team in teamIds: trainData = buildTrainingSets(DIR + team + '-train.csv.knn') trainLabels = buildTrainingLabels(DIR + team + '-train.csv.knn') testData = buildTestingSets(DIR + team + '-test.csv.knn') testLabels = buildTestingLabels(DIR + team + '-test.csv.knn') total = total + len(testLabels) svm = cv2.SVM() svm.train(trainData, trainLabels, params=svm_params) svm.save('svm_data.dat') # Accuracy count = 0 for i in range(len(testLabels)): ret = svm.predict(np.array([testData[i]])) if ret == testLabels[i][0]: count = count + 1 countTotal = countTotal + count print 'INFO: Accuracy(', team, ')', count / float(len(testLabels)) print 'INFO: Total Accuracy: ', countTotal / float(total)
def buildTrainingSets(inputFile): res = [] mat = loadMatrixFromFile(inputFile) for row in mat: res.append([row[0], float(row[1]), float(row[2]), float(row[3]), float(row[4]), float(row[5]), float(row[6]), float(row[7]), float(row[8]), float(row[9]), float(row[10])]) return np.array(res).astype(np.float32)
def generateTestDataByTeams(): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' teamIds = loadTeamIds(DIR + 'teamidshortname.csv') teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')] for teamId in teamIds: res = generateTestDataByTeam(teamId) outputFile = DIR + teamId + '-test.csv.knn' print 'INFO: ', outputFile saveMatrixToFile(outputFile, res)
def buildTestingLabels(inputFile): res = [] mat = loadMatrixFromFile(inputFile) for row in mat: if row[3] == 'W': WIN = 1 else: WIN = 0 res.append([[WIN]]) return np.array(res).astype(np.float32)
def buildTestingSets(inputFile): res = [] mat = loadMatrixFromFile(inputFile) for row in mat: if (float(row[1]) - float(row[2])) < 0: leaguerank = 0 else: leaguerank = 1 res.append([row[0], leaguerank]) return np.array(res).astype(np.float32)
def buildTestingSets(inputFile): res = [] mat = loadMatrixFromFile(inputFile) for row in mat: if (float(row[1]) - float(row[2])) < 0: leaguerank = 0 else: leaguerank = 1 res.append((dict(HOME=row[0], LeagueRank=leaguerank))) return res
def buildTestingSets(inputFile): res = [] mat = loadMatrixFromFile(inputFile) for row in mat: if (float(row[1]) - float(row[2])) < 0: leaguerank = 0 else: leaguerank = 1 res.append((dict(HOME = row[0], LeagueRank = leaguerank))) return res
def buildTrainingSets(inputFile): res = [] mat = loadMatrixFromFile(inputFile) for row in mat: res.append([ row[0], float(row[1]), float(row[2]), float(row[3]), float(row[4]), float(row[5]), float(row[6]), float(row[7]), float(row[8]), float(row[9]), float(row[10]) ]) return np.array(res).astype(np.float32)
def noneWithAVG(teamId, season): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' mat = loadMatrixFromFile(DIR + teamId + "." + season + ".player.csv") if len(mat) == 0: return [[]] heights = [] weights = [] ages = [] exps = [] for row in mat: heights.append(row[2]) weights.append(row[3]) ages.append(row[4]) exps.append(row[5]) # for heights # change height from inch to inchs for i in range(len(heights)): if heights[i] == 'None': continue else: heights[i] = int(heights[i].split('-')[0]) * 12 + int( heights[i].split('-')[1]) # get avgHeight tmpSum = 0 heightCount = 0 for i in range(len(heights)): if heights[i] == 'None': tmpSum = tmpSum + 0 else: heightCount = heightCount + 1 tmpSum = tmpSum + float(heights[i]) avgHeight = tmpSum / float(heightCount) # replace None with avgHeight for i in range(len(heights)): if heights[i] == 'None': heights[i] = avgHeight # for weights tmpSum = 0 weightCount = 0 for i in range(len(weights)): if weights[i] == 'None': tmpSum = tmpSum + 0 else: weightCount = weightCount + 1 tmpSum = tmpSum + float(weights[i]) avgWeight = tmpSum / float(weightCount) for i in range(len(weights)): if weights[i] == 'None': weights[i] = avgWeight # for ages tmpSum = 0 ageCount = 0 for i in range(len(ages)): if ages[i] == 'None': tmpSum = tmpSum + 0 else: ageCount = ageCount + 1 tmpSum = tmpSum + float(ages[i]) avgAge = tmpSum / float(ageCount) for i in range(len(ages)): if ages[i] == 'None': ages[i] = avgAge # make a mat res = [] for i in range(len(ages)): tmp = [] tmp.append(heights[i]) tmp.append(weights[i]) tmp.append(ages[i]) tmp.append(exps[i]) res.append(tmp) return res
from Utility import loadMatrixFromFile DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' mat = loadMatrixFromFile(DIR + '1610612741.csv.sorted') l1WinCount = 0 l1LoseCount = 0 l2WinCount = 0 l2LoseCount = 0 l3WinCount = 0 l3LoseCount = 0 l4WinCount = 0 l4LoseCount = 0 for row in mat: lr = float(loadMatrixFromFile(DIR + row[2] + '.l')[0][5]) cat = False if lr >= 0 and lr < 0.02: cat = 'l1' elif lr >= 0.02 and lr < 0.04: cat = 'l2' elif lr >= 0.04 and lr < 0.06: cat = 'l3' elif lr >= 0.06: cat = 'l4' if row[0] == 'W' and cat == 'l1': l1WinCount = l1WinCount + 1 elif row[0] == 'L' and cat == 'l1': l1LoseCount = l1LoseCount + 1
#!/usr/bin/env python # coding = utf8 # Author: Archer Reilly # Date: 20/Nov/2014 # File: TransformMatrixBuilder.py # Desc: build transform matrix from dates # # Produced By CSRGXTU from Utility import loadMatrixFromFile, appendlst2file, saveMatrixToFile from os import listdir dates = loadMatrixFromFile('/home/archer/Documents/maxent/data/basketball/leaguerank/dates.csv')[0] """ dirs = listdir('/home/archer/Documents/maxent/data/basketball/leaguerank/') sortedFiles = [] for f in dirs: if f.endswith('.csv.sorted'): sortedFiles.append(f) """ teamidabbrs = loadMatrixFromFile('/home/archer/Documents/maxent/data/basketball/leaguerank/teamidshortname.csv') sortedFiles = [] sortedNames = [] for row in teamidabbrs: sortedFiles.append(row[0] + '.csv.sorted') sortedNames.append(row[1]) # generateRow # generate a row from matrix for transform matrix # # @param matrix
#!/usr/bin/env python # coding = utf-8 # Author: Archer Reilly # Date: 19/Nov/2014 # File: SortByTime.py # Desc: sort the content in original data file by time # # Produced By CSRGXTU from Utility import loadMatrixFromFile, saveMatrixToFile from os import listdir from datetime import datetime """ matrix = loadMatrixFromFile('/home/archer/Documents/maxent/data/basketball/1610612766.csv') print matrix """ dirs = listdir('/home/archer/Documents/Python/maxent/data/basketball/') for f in dirs: if f.startswith('161'): print 'Process file: ' + f matrix = loadMatrixFromFile('/home/archer/Documents/Python/maxent/data/basketball/' + f) matrixa = sorted(matrix, key=lambda x: datetime.strptime(x[1], '%b %d:%Y')) saveMatrixToFile('/home/archer/Documents/Python/maxent/data/basketball/' + f + '.sorted', matrixa)
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Author: Archer # Date: 05/Jun/2015 # File: InsertTeam.py # Desc: insert the Team NBA.Team table # # Produced By CSRGXTU import MySQLdb as mdb import sys from Utility import loadMatrixFromFile id_names = loadMatrixFromFile('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidname-18-Nov-2014.csv') id_shortnames = loadMatrixFromFile('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidshortname.csv') matrix = [] for item in id_names: tmp = [item[0], item[1]] for item1 in id_shortnames: if item1[0] == item[0]: tmp.append(item1[1]) matrix.append(tmp) con = mdb.connect('localhost', 'root', 'root', 'NBA') with con: cur = con.cursor() for item in matrix: sql = "insert into Team (\ StatsID,\ NameEN,\
def insertPlayoff(teamIds, cur): basePath = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/' for team in teamIds: matrix = loadMatrixFromFile(basePath + team + '.playoff.csv') for row in matrix: row = none20(row) teamID = findId(row[6][0:3]) opponentTeamID = findId(row[6][-3:]) home = isHome(row[6]) sql = "select SeasonID from Season where Season = '%s' and Season_SeasonTypeID = 3" % row[3] cur.execute(sql) seasonID = cur.fetchone()[0] sql = "insert into TeamStats (\ TeamStats_TeamID,\ TeamStats_SeasonID,\ Result,\ Date,\ Home,\ Fgm,\ Fga,\ 3pm,\ 3pa,\ Ftm,\ Fta,\ Oreb,\ Dreb,\ Ast,\ Stl,\ Blk,\ Tov,\ Pf,\ CreatedBy,\ CreatedTime,\ Points,\ OpponentTeamID) value (\ %d,\ %d,\ '%c',\ '%s',\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ 'archer',\ '2015-06-05 15:02:22',\ %d,\ %d)" %\ (\ int(teamID),\ int(seasonID),\ row[0],\ row[1] + ' ' + row[2],\ int(home),\ int(row[7]),\ int(row[8]),\ int(row[9]),\ int(row[10]),\ int(row[11]),\ int(row[12]),\ int(row[13]),\ int(row[14]),\ int(row[15]),\ int(row[16]),\ int(row[17]),\ int(row[18]),\ int(row[19]),\ int(row[20]),\ int(opponentTeamID) ) # print sql cur.execute(sql)
#!/usr/bin/env python # coding = utf-8 # Author: Archer Reilly # Date: 19/Nov/2014 # File: SortByTime.py # Desc: sort the content in original data file by time # # Produced By CSRGXTU from Utility import loadMatrixFromFile, saveMatrixToFile from os import listdir from datetime import datetime """ matrix = loadMatrixFromFile('/home/archer/Documents/maxent/data/basketball/1610612766.csv') print matrix """ dirs = listdir('/home/archer/Documents/Python/maxent/data/basketball/') for f in dirs: if f.startswith('161'): print 'Process file: ' + f matrix = loadMatrixFromFile( '/home/archer/Documents/Python/maxent/data/basketball/' + f) matrixa = sorted(matrix, key=lambda x: datetime.strptime(x[1], '%b %d:%Y')) saveMatrixToFile( '/home/archer/Documents/Python/maxent/data/basketball/' + f + '.sorted', matrixa)
# GameName HomeAway FGM FGA 3PM 3PA FTM FTA OREB DREB AST TOV STL BLK PF PTS WinLose # # Produced By CSRGXTU import sys from Utility import loadMatrixFromFile, saveCrfMatrix, loadTeamRanks if len(sys.argv) != 2: print 'Usage: PrepareTrainingData6.py teamid' sys.exit(1) teamid = sys.argv[1] DATA_PATH = '../data/TeamRank/' mat = loadMatrixFromFile(DATA_PATH + teamid + '.csv.sorted') trainMat = [] for i in range(len(mat)): tmp = [] # GameName tmp.append('G' + str(i + 1)) # HomeAway, home is H, else A if '@' in mat[i][5]: tmp.append('H') else: tmp.append('A') # FGM tmp.append(mat[i][6]) # FGA tmp.append(mat[i][7])
#!/usr/bin/env python # coding = utf-8 # Author: Archer Reilly # Date: 21/Nov/2014 # File: BatchGenerateLeagueRank.py # Desc: use LeagueRank batch generate the league rank for each .m # file in the data dir # # Produced By CSRGXTU from LeagueRank import LeagueRank from Utility import loadMatrixFromFile, readmatricefromfile, appendlst2file DATA_PATH = '/home/archer/Documents/maxent/data/basketball/leaguerank/' DATES_FILE = DATA_PATH + 'dates.csv' L = [1/float(30) for e in range(1, 31)] dates = loadMatrixFromFile(DATES_FILE)[0] for d in dates[3633:]: print 'INFO: generate LeagueRank for ' + d + '.m' #o = LeagueRank(L, readmatricefromfile(DATA_PATH + d + '.m'), 0.00000001, 100000) o = LeagueRank(L, readmatricefromfile(DATA_PATH + d + '.m'), 0.001, 10) # print 'Debug: ', # print o.rank() appendlst2file(o.rank(), DATA_PATH + d + '.l') print ' Done' # break
#!/usr/bin/env python # # Author: Archer Reilly # Date: 15/May/2015 # File: MainDownloadOdds.py # Desc: download the raw data from web site # # Produced By CSRGXTU from Utility import loadMatrixFromFile from OddsDownloader import OddsDownloader res = loadMatrixFromFile('./SeasonId') for item in res: for index in range(1, int(item[2]) + 1): o = OddsDownloader(item[0], index) o.run()
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Author: Archer # File: InsertTeamStats.py # Date: 05/Jun/2015 # Desc: insert NBA.TeamStats table # # Produced By CSRGXTU import MySQLdb as mdb import sys from Utility import loadMatrixFromFile, loadSeasons, loadTeamIds teamIds = loadTeamIds('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidshortname.csv') seasons = loadSeasons('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/seasons-18-Nov-2014.txt') TeamID2TeamShortNames = loadMatrixFromFile('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/TeamID2TeamShortName.csv') def findId(shortName): for row in TeamID2TeamShortNames: if row[1] == shortName: return row[0] return False def isHome(matchUpString): if '@' in matchUpString: return 1 else: return 0 def none20(lst): for i in range(len(lst)):
# GameName HomeAway FGM FGA 3PM 3PA FTM FTA OREB DREB AST TOV STL BLK PF PTS WinLose # # Produced By CSRGXTU import sys from Utility import loadMatrixFromFile, saveCrfMatrix if len(sys.argv) != 2: print 'Usage: PrepareTrainingData.py teamid' sys.exit(1) teamid = sys.argv[1] DATA_PATH = '../data/TeamRank/' mat = loadMatrixFromFile(DATA_PATH + teamid + '.csv.sorted') trainMat = [] for i in range(len(mat)): tmp = [] # GameName tmp.append('G' + str(i + 1)) # HomeAway, home is H, else A if '@' in mat[i][5]: tmp.append('H') else: tmp.append('A') # FGM tmp.append(mat[i][6]) # FGA tmp.append(mat[i][7])
def noneWithAVG(teamId, season): DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/' mat = loadMatrixFromFile(DIR + teamId + "." + season + ".player.csv") if len(mat) == 0: return [[]] heights = [] weights = [] ages = [] exps = [] for row in mat: heights.append(row[2]) weights.append(row[3]) ages.append(row[4]) exps.append(row[5]) # for heights # change height from inch to inchs for i in range(len(heights)): if heights[i] == 'None': continue else: heights[i] = int(heights[i].split('-')[0]) * 12 + int(heights[i].split('-')[1]) # get avgHeight tmpSum = 0 heightCount = 0 for i in range(len(heights)): if heights[i] == 'None': tmpSum = tmpSum + 0 else: heightCount = heightCount + 1 tmpSum = tmpSum + float(heights[i]) avgHeight = tmpSum / float(heightCount) # replace None with avgHeight for i in range(len(heights)): if heights[i] == 'None': heights[i] = avgHeight # for weights tmpSum = 0 weightCount = 0 for i in range(len(weights)): if weights[i] == 'None': tmpSum = tmpSum + 0 else: weightCount = weightCount + 1 tmpSum = tmpSum + float(weights[i]) avgWeight = tmpSum / float(weightCount) for i in range(len(weights)): if weights[i] == 'None': weights[i] = avgWeight # for ages tmpSum = 0 ageCount = 0 for i in range(len(ages)): if ages[i] == 'None': tmpSum = tmpSum + 0 else: ageCount = ageCount + 1 tmpSum = tmpSum + float(ages[i]) avgAge = tmpSum / float(ageCount) for i in range(len(ages)): if ages[i] == 'None': ages[i] = avgAge # make a mat res = [] for i in range(len(ages)): tmp = [] tmp.append(heights[i]) tmp.append(weights[i]) tmp.append(ages[i]) tmp.append(exps[i]) res.append(tmp) return res
# Date: 05/Jun/2015 # Desc: insert NBA.TeamStats table # # Produced By CSRGXTU import MySQLdb as mdb import sys from Utility import loadMatrixFromFile, loadSeasons, loadTeamIds teamIds = loadTeamIds( '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidshortname.csv' ) seasons = loadSeasons( '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/seasons-18-Nov-2014.txt' ) TeamID2TeamShortNames = loadMatrixFromFile( '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/TeamID2TeamShortName.csv' ) def findId(shortName): for row in TeamID2TeamShortNames: if row[1] == shortName: return row[0] return False def isHome(matchUpString): if '@' in matchUpString: return 1 else: return 0
# coding=utf8 # # Author: Archer Reilly # Date: 23/Feb/2016 # Desc: use kmeans clustering the books # Usage: ./kmeans.py inputfile # # Produced By BR import sys from sklearn.cluster import KMeans from Utility import loadMatrixFromFile if len(sys.argv) != 2: print 'Usage: ./kmeans.py inputfile' sys.exit(1) inputfile = sys.argv[1] mat = loadMatrixFromFile(inputfile) nmat = [] for row in mat: nmat.append(row[1:]) num_clusters = 120 # douban's book have 120 categories km = KMeans(n_clusters=num_clusters) km.fit(nmat) clusters = km.labels_.tolist() print clusters
def insertPlayoff(teamIds, cur): basePath = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/' for team in teamIds: matrix = loadMatrixFromFile(basePath + team + '.playoff.csv') for row in matrix: row = none20(row) teamID = findId(row[6][0:3]) opponentTeamID = findId(row[6][-3:]) home = isHome(row[6]) sql = "select SeasonID from Season where Season = '%s' and Season_SeasonTypeID = 3" % row[ 3] cur.execute(sql) seasonID = cur.fetchone()[0] sql = "insert into TeamStats (\ TeamStats_TeamID,\ TeamStats_SeasonID,\ Result,\ Date,\ Home,\ Fgm,\ Fga,\ 3pm,\ 3pa,\ Ftm,\ Fta,\ Oreb,\ Dreb,\ Ast,\ Stl,\ Blk,\ Tov,\ Pf,\ CreatedBy,\ CreatedTime,\ Points,\ OpponentTeamID) value (\ %d,\ %d,\ '%c',\ '%s',\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ %d,\ 'archer',\ '2015-06-05 15:02:22',\ %d,\ %d)" %\ (\ int(teamID),\ int(seasonID),\ row[0],\ row[1] + ' ' + row[2],\ int(home),\ int(row[7]),\ int(row[8]),\ int(row[9]),\ int(row[10]),\ int(row[11]),\ int(row[12]),\ int(row[13]),\ int(row[14]),\ int(row[15]),\ int(row[16]),\ int(row[17]),\ int(row[18]),\ int(row[19]),\ int(row[20]),\ int(opponentTeamID) ) # print sql cur.execute(sql)
#!/usr/bin/env python # coding=utf-8 # # Author: Archer Reilly # Date: 23/Dec/2015 # File: StaticMongo.py # Desc: find which books isnt in the mongo # # Produced By BR from Utility import loadMatrixFromFile, appendMatrixToFileUtf from IsbnCheckIn import IsbnCheckIn filename = '/home/archer/Downloads/data.csv' mat = loadMatrixFromFile(filename) i = IsbnCheckIn('192.168.100.2', 27017) Res = [] # store not ins for row in mat: if not i.isIn(row[1]): print row[1], "not in database" Res.append(row) appendMatrixToFileUtf('NotFound.csv', Res)
# Date: 21/Nov/2014 # File: DisplayLeagueRank.py # Desc: display league rank info # # Produced By CSRGXTU from Utility import loadMatrixFromFile import operator DATA_PATH = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/' NAME_FILE = DATA_PATH + 'teamidname-18-Nov-2014.csv' #RANK_FILE = DATA_PATH + 'MAR 31:2014.l' #RANK_FILE = DATA_PATH + 'APR 16:2014.l' RANK_FILE = DATA_PATH + '2013-14.l' names = [ x[1] for x in loadMatrixFromFile(NAME_FILE)] ranks = loadMatrixFromFile(RANK_FILE)[0] res = {} for i in range(len(names)): res[names[i]] = ranks[i] # print names[i] + ' ' + ranks[i] # print ranks[i] + ' ' + names[i] sorted_res = sorted(res.items(), key=operator.itemgetter(1)) print 'INFO: All Teams' print("%22s %-15s" % ('Team', 'LeagueRank')) for item in sorted_res: # print item[0], ' ', item[1] print("%22s %-15s" % (item[0], item[1]))
#!/usr/bin/env python # coding = utf-8 # Author: Archer Reilly # Date: 21/Nov/2014 # File: BatchGenerateLeagueRank.py # Desc: use LeagueRank batch generate the league rank for each .m # file in the data dir # # Produced By CSRGXTU from LeagueRank import LeagueRank from Utility import loadMatrixFromFile, readmatricefromfile, appendlst2file DATA_PATH = '/home/archer/Documents/maxent/data/basketball/leaguerank/' DATES_FILE = DATA_PATH + 'dates.csv' L = [1 / float(30) for e in range(1, 31)] dates = loadMatrixFromFile(DATES_FILE)[0] for d in dates[3633:]: print 'INFO: generate LeagueRank for ' + d + '.m' #o = LeagueRank(L, readmatricefromfile(DATA_PATH + d + '.m'), 0.00000001, 100000) o = LeagueRank(L, readmatricefromfile(DATA_PATH + d + '.m'), 0.001, 10) # print 'Debug: ', # print o.rank() appendlst2file(o.rank(), DATA_PATH + d + '.l') print ' Done' # break