コード例 #1
0
ファイル: KNearestGames.py プロジェクト: ssj018/maxent
  def __init__(self, teamfile, opponentteamfile, outputfile, k):
    self.teamFile = teamfile
    self.opponentTeamFile = opponentteamfile
    self.outputFile = outputfile

    self.teamLst = loadMatrixFromFile(self.teamFile)
    self.opponentTeamLst = loadMatrixFromFile(self.opponentTeamFile)

    self.k = k
コード例 #2
0
ファイル: GenerateDataForKNN.py プロジェクト: ssj018/maxent
def generateTestDataByTeam(teamId):
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
  seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt')
  res = []
  
  for season in seasons:
    mat = loadMatrixFromFile(DIR + season + '.playoff.csv')
    for row in mat:
      if teamNames[teamIds.index(teamId)] not in row[6]:
        continue

      if row[0] == 'W':
        WIN = 1
      else:
        WIN = 0
     
      if 'vs.' in row[6]:
        HOME = 1
      else:
        HOME = 0

      season = row[3]
      #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.total')[0]
      #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.avg')[0]
      heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.norm')[0]

      leagueranks = loadMatrixFromFile(DIR + season + '.l')[0]
      leaguerank = leagueranks[teamNames.index(row[6][0:3])]

      vsTeamId = teamIds[teamNames.index(row[6][-3:])]
      #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.total')[0]
      #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.avg')[0]
      vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.norm')[0]
      vsLeaguerank = leagueranks[teamIds.index(vsTeamId)]

      tmp = []
      tmp.append(HOME)
      tmp.append(heightTotal)
      tmp.append(weightTotal)
      tmp.append(ageTotal)
      tmp.append(expTotal)
      tmp.append(leaguerank)

      tmp.append(vsHeightTotal)
      tmp.append(vsWeightTotal)
      tmp.append(vsAgeTotal)
      tmp.append(vsExpTotal)
      tmp.append(vsLeaguerank)

      tmp.append(WIN)
      
      res.append(tmp)
  return res
コード例 #3
0
ファイル: GenerateDataForKNN.py プロジェクト: ssj018/maxent
def generateTrainDataBySeason(season):
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
  leagueranks = loadMatrixFromFile(DIR + season + '.l')[0]
  res = []

  for team in teamIds:
    mat = loadMatrixFromFile(DIR + team + '.csv.sorted')
    for row in mat:
      if row[2] != season:
        continue

      if row[0] == 'W':
        WIN = 1
      else:
        WIN = 0
     
      if 'vs.' in row[5]:
        HOME = 1
      else:
        HOME = 0

      #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.total')[0]
      #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.avg')[0]
      heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.norm')[0]

      leaguerank = leagueranks[teamNames.index(row[5][0:3])]

      vsTeamId = teamIds[teamNames.index(row[5][-3:])]
      #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.total')[0]
      #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.avg')[0]
      vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.norm')[0]
      vsLeaguerank = leagueranks[teamIds.index(vsTeamId)]

      tmp = []
      tmp.append(HOME)
      tmp.append(heightTotal)
      tmp.append(weightTotal)
      tmp.append(ageTotal)
      tmp.append(expTotal)
      tmp.append(leaguerank)

      tmp.append(vsHeightTotal)
      tmp.append(vsWeightTotal)
      tmp.append(vsAgeTotal)
      tmp.append(vsExpTotal)
      tmp.append(vsLeaguerank)

      tmp.append(WIN)

      res.append(tmp)
  return res
コード例 #4
0
ファイル: KNearestGames.py プロジェクト: ssj018/maxent
 def selectColumns(self, matrix, teamid):
   DATA_PATH = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
   TEAMID_FILE = DATA_PATH + 'teamidshortname.csv'
   TEAMIDS = [x[0] for x in loadMatrixFromFile(TEAMID_FILE)]
   res = []
   for i in range(len(matrix)):
     win = 1 if matrix[i][0] == 'W' else 0
     home = 1 if 'vs' in matrix[i][5] else 0
     points = matrix[i][19]
     lr = loadMatrixFromFile(DATA_PATH + matrix[i][1] + '.l')[0][TEAMIDS.index(teamid)]
     # res.append([win, points, lr])
     res.append([win, home, points, lr])
     # res.append([matrix[i][0], matrix[i][1], matrix[i][5], matrix[i][19]])
   return res
コード例 #5
0
ファイル: NLTKNaiveBayes.py プロジェクト: ssj018/maxent
def teamMain():
    DIR = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/'
    teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
    teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
    countTotal = 0
    total = 0

    for team in teamIds:
        train = buildTrainingSets(DIR + team + '-train.csv')
        test = buildTestingSets(DIR + team + '-test.csv')
        labels = buildTestingLabels(DIR + team + '-test.csv')
        total = total + len(labels)

        classifier = NaiveBayesClassifier.train(train)
        res = classifier.batch_classify(test)

        # accuracy
        count = 0
        for i in range(len(res)):
            if labels[i] == res[i]:
                count = count + 1

        countTotal = countTotal + count
        print 'INFO: Accuracy(', teamNames[teamIds.index(
            team)], ')', count / float(len(res))
    print 'INFO: Total Accuracy: ', countTotal / float(total)
コード例 #6
0
ファイル: NLTKNaiveBayes.py プロジェクト: ssj018/maxent
def buildTestingLabels(inputFile):
    res = []
    mat = loadMatrixFromFile(inputFile)
    for row in mat:
        res.append(row[3])

    return res
コード例 #7
0
ファイル: NLTKMaxent.py プロジェクト: csrgxtu/maxent
def teamMain():
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
  countTotal = 0
  total = 0

  for team in teamIds:
    train = buildTrainingSets(DIR + team + '-train.csv')
    test = buildTestingSets(DIR + team + '-test.csv')
    labels = buildTestingLabels(DIR + team + '-test.csv')
    total = total + len(labels)
    
    # train
    classifier = nltk.MaxentClassifier.train(train, 'IIS', trace=0, max_iter=1000)
    
    # test
    count = 0
    for i in range(len(labels)):
      pdist = classifier.prob_classify(test[i])
      if pdist.prob('L') >= pdist.prob('W'):
        flag = 'L'
      else:
        flag = 'W'
      
      #print 'DEBUG: ', flag, labels[i]
      if flag == labels[i]:
        count = count + 1
        
    print 'INFO: accuracy ', team, " ", float(count)/len(labels)
コード例 #8
0
ファイル: NLTKMaxent.py プロジェクト: csrgxtu/maxent
def buildTestingLabels(inputFile):
  res = []
  mat = loadMatrixFromFile(inputFile)
  for row in mat:
    res.append(row[3])

  return res
コード例 #9
0
ファイル: CVSVM-V1.0.py プロジェクト: csrgxtu/maxent
def teamMain():
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
  countTotal = 0
  total = 0

  for team in teamIds:
    trainData = buildTrainingSets(DIR + team + '-train.csv.knn')
    trainLabels = buildTrainingLabels(DIR + team + '-train.csv.knn')
    testData = buildTestingSets(DIR + team + '-test.csv.knn')
    testLabels = buildTestingLabels(DIR + team + '-test.csv.knn')
    total = total + len(testLabels)

    svm = cv2.SVM()
    svm.train(trainData, trainLabels, params=svm_params)
    svm.save('svm_data.dat')
    
    # Accuracy
    count = 0
    for i in range(len(testLabels)):
      ret = svm.predict(np.array([testData[i]]))
      if ret == testLabels[i][0]:
        count = count + 1
    countTotal = countTotal + count
    print 'INFO: Accuracy(', team, ')', count/float(len(testLabels))
  print 'INFO: Total Accuracy: ', countTotal/float(total)
コード例 #10
0
ファイル: CVKNN-V1.0.py プロジェクト: csrgxtu/maxent
def teamMain():
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
  countTotal = 0
  total = 0

  for team in teamIds:
    trainData = buildTrainingSets(DIR + team + '-train.csv')
    trainLabels = buildTrainingLabels(DIR + team + '-train.csv')
    testData = buildTestingSets(DIR + team + '-test.csv')
    testLabels = buildTestingLabels(DIR + team + '-test.csv')
    total = total + len(testLabels)

    knn = cv2.KNearest()
    knn.train(trainData, trainLabels)

    # Accuracy
    count = 0
    for i in range(len(testLabels)):
      ret, results, neighbours, dist = knn.find_nearest(np.array([testData[i]]), 31)
      if results[0][0] == testLabels[i][0]:
        count = count + 1

    countTotal = countTotal + count
    print 'INFO: Accuracy(', teamNames[teamIds.index(team)], ')', count/float(len(testLabels))
  print 'INFO: Total Accuracy: ', countTotal/float(total)
コード例 #11
0
def insertPlayer(cur):
    for team in teamIds:
        sql = "select TeamID from Team where StatsID = '%s'" % team
        cur.execute(sql)
        TeamID = cur.fetchone()[0]

        for season in seasons:
            sql = "select SeasonID from Season where Season = '%s' and Season_SeasonTypeID = 2" % season
            cur.execute(sql)
            SeasonID = cur.fetchone()[0]

            matrix = loadMatrixFromFile(basePath + team + '.' + season +
                                        '.player.csv')
            for row in matrix:
                sql = "insert into Player (\
                        Name,\
                        Position,\
                        Height,\
                        Weight,\
                        Age,\
                        Experience,\
                        CreatedBy,\
                        CreatedTime,\
                        Player_TeamID,\
                        Player_SeasonID) value (\
                        \"%s\", '%s', '%s', '%s', '%s', '%s', 'archer', '2015-06-05 16:44:00', %d, %d)"                                                                                                        %\
                        (row[0], row[1], row[2], row[3], row[4], row[5], TeamID, SeasonID)
                print sql
                cur.execute(sql)
コード例 #12
0
ファイル: CVKNN-V1.0.py プロジェクト: ssj018/maxent
def buildTestingSets(inputFile):
    res = []
    mat = loadMatrixFromFile(inputFile)
    for row in mat:
        res.append([row[0], float(row[1]), float(row[2])])

    return np.array(res).astype(np.float32)
コード例 #13
0
ファイル: NLTKMaxent.py プロジェクト: ssj018/maxent
def teamMain():
    DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
    teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
    teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
    countTotal = 0
    total = 0

    for team in teamIds:
        train = buildTrainingSets(DIR + team + '-train.csv')
        test = buildTestingSets(DIR + team + '-test.csv')
        labels = buildTestingLabels(DIR + team + '-test.csv')
        total = total + len(labels)

        # train
        classifier = nltk.MaxentClassifier.train(train,
                                                 'IIS',
                                                 trace=0,
                                                 max_iter=1000)

        # test
        count = 0
        for i in range(len(labels)):
            pdist = classifier.prob_classify(test[i])
            if pdist.prob('L') >= pdist.prob('W'):
                flag = 'L'
            else:
                flag = 'W'

            #print 'DEBUG: ', flag, labels[i]
            if flag == labels[i]:
                count = count + 1

        print 'INFO: accuracy ', team, " ", float(count) / len(labels)
コード例 #14
0
ファイル: CVKNN.py プロジェクト: ssj018/maxent
def teamMain():
    DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
    teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
    teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
    countTotal = 0
    total = 0

    for team in teamIds:
        trainData = buildTrainingSets(DIR + team + '-train.csv')
        trainLabels = buildTrainingLabels(DIR + team + '-train.csv')
        testData = buildTestingSets(DIR + team + '-test.csv')
        testLabels = buildTestingLabels(DIR + team + '-test.csv')
        total = total + len(testLabels)

        knn = cv2.KNearest()
        knn.train(trainData, trainLabels)

        # Accuracy
        count = 0
        for i in range(len(testLabels)):
            ret, results, neighbours, dist = knn.find_nearest(
                np.array([testData[i]]), 11)
            if results[0][0] == testLabels[i][0]:
                count = count + 1

        countTotal = countTotal + count
        print 'INFO: Accuracy(', teamNames[teamIds.index(
            team)], ')', count / float(len(testLabels))
    print 'INFO: Total Accuracy: ', countTotal / float(total)
コード例 #15
0
ファイル: CVKNN-V2.0.py プロジェクト: csrgxtu/maxent
def buildTestingLabels(inputFile):
  res = []
  mat = loadMatrixFromFile(inputFile)
  for row in mat:
    res.append([[row[11]]])

  return np.array(res).astype(np.float32)
コード例 #16
0
def buildTestingLabels(inputFile):
    res = []
    mat = loadMatrixFromFile(inputFile)
    for row in mat:
        res.append([[row[11]]])

    return np.array(res).astype(np.float32)
コード例 #17
0
def teamMain():
    DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
    teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
    teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
    countTotal = 0
    total = 0

    for team in teamIds:
        trainData = buildTrainingSets(DIR + team + '-train.csv.knn')
        trainLabels = buildTrainingLabels(DIR + team + '-train.csv.knn')
        testData = buildTestingSets(DIR + team + '-test.csv.knn')
        testLabels = buildTestingLabels(DIR + team + '-test.csv.knn')
        total = total + len(testLabels)

        svm = cv2.SVM()
        svm.train(trainData, trainLabels, params=svm_params)
        svm.save('svm_data.dat')

        # Accuracy
        count = 0
        for i in range(len(testLabels)):
            ret = svm.predict(np.array([testData[i]]))
            if ret == testLabels[i][0]:
                count = count + 1
        countTotal = countTotal + count
        print 'INFO: Accuracy(', team, ')', count / float(len(testLabels))
    print 'INFO: Total Accuracy: ', countTotal / float(total)
コード例 #18
0
ファイル: CVKNN-V2.0.py プロジェクト: csrgxtu/maxent
def buildTrainingSets(inputFile):
  res = []
  mat = loadMatrixFromFile(inputFile)
  for row in mat:
    res.append([row[0], float(row[1]), float(row[2]), float(row[3]), float(row[4]), float(row[5]), float(row[6]), float(row[7]), float(row[8]), float(row[9]), float(row[10])])

  return np.array(res).astype(np.float32)
コード例 #19
0
ファイル: CVKNN-V1.0.py プロジェクト: csrgxtu/maxent
def buildTestingSets(inputFile):
  res = []
  mat = loadMatrixFromFile(inputFile)
  for row in mat:
    res.append([row[0], float(row[1]), float(row[2])])

  return np.array(res).astype(np.float32)
コード例 #20
0
ファイル: GenerateDataForKNN.py プロジェクト: ssj018/maxent
def generateTestDataByTeams():
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')]

  for teamId in teamIds:
    res = generateTestDataByTeam(teamId)
    outputFile = DIR + teamId + '-test.csv.knn'
    print 'INFO: ', outputFile
    saveMatrixToFile(outputFile, res)
コード例 #21
0
ファイル: CVKNN-V1.0.py プロジェクト: csrgxtu/maxent
def buildTestingLabels(inputFile):
  res = []
  mat = loadMatrixFromFile(inputFile)
  for row in mat:
    if row[3] == 'W':
      WIN = 1
    else:
      WIN = 0
    res.append([[WIN]])

  return np.array(res).astype(np.float32)
コード例 #22
0
ファイル: CVKNN.py プロジェクト: csrgxtu/maxent
def buildTestingSets(inputFile):
  res = []
  mat = loadMatrixFromFile(inputFile)
  for row in mat:
    if (float(row[1]) - float(row[2])) < 0:
      leaguerank = 0
    else:
      leaguerank = 1
    res.append([row[0], leaguerank])

  return np.array(res).astype(np.float32)
コード例 #23
0
ファイル: CVKNN.py プロジェクト: ssj018/maxent
def buildTestingSets(inputFile):
    res = []
    mat = loadMatrixFromFile(inputFile)
    for row in mat:
        if (float(row[1]) - float(row[2])) < 0:
            leaguerank = 0
        else:
            leaguerank = 1
        res.append([row[0], leaguerank])

    return np.array(res).astype(np.float32)
コード例 #24
0
ファイル: CVKNN.py プロジェクト: ssj018/maxent
def buildTestingLabels(inputFile):
    res = []
    mat = loadMatrixFromFile(inputFile)
    for row in mat:
        if row[3] == 'W':
            WIN = 1
        else:
            WIN = 0
        res.append([[WIN]])

    return np.array(res).astype(np.float32)
コード例 #25
0
ファイル: NLTKNaiveBayes.py プロジェクト: ssj018/maxent
def buildTestingSets(inputFile):
    res = []
    mat = loadMatrixFromFile(inputFile)
    for row in mat:
        if (float(row[1]) - float(row[2])) < 0:
            leaguerank = 0
        else:
            leaguerank = 1

        res.append((dict(HOME=row[0], LeagueRank=leaguerank)))

    return res
コード例 #26
0
ファイル: NLTKMaxent.py プロジェクト: csrgxtu/maxent
def buildTestingSets(inputFile):
  res = []
  mat = loadMatrixFromFile(inputFile)
  for row in mat:
    if (float(row[1]) - float(row[2])) < 0:
      leaguerank = 0
    else:
      leaguerank = 1

    res.append((dict(HOME = row[0], LeagueRank = leaguerank)))

  return res
コード例 #27
0
ファイル: CVKNN-V2.0.py プロジェクト: ssj018/maxent
def buildTrainingSets(inputFile):
    res = []
    mat = loadMatrixFromFile(inputFile)
    for row in mat:
        res.append([
            row[0],
            float(row[1]),
            float(row[2]),
            float(row[3]),
            float(row[4]),
            float(row[5]),
            float(row[6]),
            float(row[7]),
            float(row[8]),
            float(row[9]),
            float(row[10])
        ])

    return np.array(res).astype(np.float32)
コード例 #28
0
def noneWithAVG(teamId, season):
    DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
    mat = loadMatrixFromFile(DIR + teamId + "." + season + ".player.csv")
    if len(mat) == 0:
        return [[]]

    heights = []
    weights = []
    ages = []
    exps = []

    for row in mat:
        heights.append(row[2])
        weights.append(row[3])
        ages.append(row[4])
        exps.append(row[5])

    # for heights
    # change height from inch to inchs
    for i in range(len(heights)):
        if heights[i] == 'None':
            continue
        else:
            heights[i] = int(heights[i].split('-')[0]) * 12 + int(
                heights[i].split('-')[1])

    # get avgHeight
    tmpSum = 0
    heightCount = 0
    for i in range(len(heights)):
        if heights[i] == 'None':
            tmpSum = tmpSum + 0
        else:
            heightCount = heightCount + 1
            tmpSum = tmpSum + float(heights[i])

    avgHeight = tmpSum / float(heightCount)

    # replace None with avgHeight
    for i in range(len(heights)):
        if heights[i] == 'None':
            heights[i] = avgHeight

    # for weights
    tmpSum = 0
    weightCount = 0
    for i in range(len(weights)):
        if weights[i] == 'None':
            tmpSum = tmpSum + 0
        else:
            weightCount = weightCount + 1
            tmpSum = tmpSum + float(weights[i])

    avgWeight = tmpSum / float(weightCount)

    for i in range(len(weights)):
        if weights[i] == 'None':
            weights[i] = avgWeight

    # for ages
    tmpSum = 0
    ageCount = 0
    for i in range(len(ages)):
        if ages[i] == 'None':
            tmpSum = tmpSum + 0
        else:
            ageCount = ageCount + 1
            tmpSum = tmpSum + float(ages[i])

    avgAge = tmpSum / float(ageCount)

    for i in range(len(ages)):
        if ages[i] == 'None':
            ages[i] = avgAge

    # make a mat
    res = []
    for i in range(len(ages)):
        tmp = []
        tmp.append(heights[i])
        tmp.append(weights[i])
        tmp.append(ages[i])
        tmp.append(exps[i])
        res.append(tmp)

    return res
コード例 #29
0
from Utility import loadMatrixFromFile

DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'

mat = loadMatrixFromFile(DIR + '1610612741.csv.sorted')

l1WinCount = 0
l1LoseCount = 0
l2WinCount = 0
l2LoseCount = 0
l3WinCount = 0
l3LoseCount = 0
l4WinCount = 0
l4LoseCount = 0

for row in mat:
    lr = float(loadMatrixFromFile(DIR + row[2] + '.l')[0][5])
    cat = False
    if lr >= 0 and lr < 0.02:
        cat = 'l1'
    elif lr >= 0.02 and lr < 0.04:
        cat = 'l2'
    elif lr >= 0.04 and lr < 0.06:
        cat = 'l3'
    elif lr >= 0.06:
        cat = 'l4'

    if row[0] == 'W' and cat == 'l1':
        l1WinCount = l1WinCount + 1
    elif row[0] == 'L' and cat == 'l1':
        l1LoseCount = l1LoseCount + 1
コード例 #30
0
#!/usr/bin/env python
# coding = utf8
# Author: Archer Reilly
# Date: 20/Nov/2014
# File: TransformMatrixBuilder.py
# Desc: build transform matrix from dates
#
# Produced By CSRGXTU
from Utility import loadMatrixFromFile, appendlst2file, saveMatrixToFile
from os import listdir


dates = loadMatrixFromFile('/home/archer/Documents/maxent/data/basketball/leaguerank/dates.csv')[0]
"""
dirs = listdir('/home/archer/Documents/maxent/data/basketball/leaguerank/')
sortedFiles = []
for f in dirs:
  if f.endswith('.csv.sorted'):
    sortedFiles.append(f)
"""
teamidabbrs = loadMatrixFromFile('/home/archer/Documents/maxent/data/basketball/leaguerank/teamidshortname.csv')
sortedFiles = []
sortedNames = []
for row in teamidabbrs:
  sortedFiles.append(row[0] + '.csv.sorted')
  sortedNames.append(row[1])

# generateRow
# generate a row from matrix for transform matrix
#
# @param matrix
コード例 #31
0
ファイル: SortByTime.py プロジェクト: csrgxtu/maxent
#!/usr/bin/env python
# coding = utf-8
# Author: Archer Reilly
# Date: 19/Nov/2014
# File: SortByTime.py
# Desc: sort the content in original data file by time
#
# Produced By CSRGXTU
from Utility import loadMatrixFromFile, saveMatrixToFile
from os import listdir
from datetime import datetime

"""
matrix = loadMatrixFromFile('/home/archer/Documents/maxent/data/basketball/1610612766.csv')
print matrix
"""

dirs = listdir('/home/archer/Documents/Python/maxent/data/basketball/')
for f in dirs:
  if f.startswith('161'):
    print 'Process file: ' + f
    matrix = loadMatrixFromFile('/home/archer/Documents/Python/maxent/data/basketball/' + f)
    matrixa = sorted(matrix, key=lambda x: datetime.strptime(x[1], '%b %d:%Y'))
    saveMatrixToFile('/home/archer/Documents/Python/maxent/data/basketball/' + f + '.sorted', matrixa)
コード例 #32
0
ファイル: probility.py プロジェクト: csrgxtu/maxent
from Utility import loadMatrixFromFile

DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'

mat = loadMatrixFromFile(DIR + '1610612741.csv.sorted')

l1WinCount = 0
l1LoseCount = 0
l2WinCount = 0
l2LoseCount = 0
l3WinCount = 0
l3LoseCount = 0
l4WinCount = 0
l4LoseCount = 0

for row in mat:
  lr = float(loadMatrixFromFile(DIR + row[2] + '.l')[0][5])
  cat = False
  if lr >= 0 and lr < 0.02:
    cat = 'l1'
  elif lr >= 0.02 and lr < 0.04:
    cat = 'l2'
  elif lr >= 0.04 and lr < 0.06:
    cat = 'l3'
  elif lr >= 0.06:
    cat = 'l4'
  
  if row[0] == 'W' and cat == 'l1':
    l1WinCount = l1WinCount + 1
  elif row[0] == 'L' and cat == 'l1':
    l1LoseCount = l1LoseCount + 1
コード例 #33
0
ファイル: InsertTeam.py プロジェクト: csrgxtu/maxent
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Author: Archer
# Date: 05/Jun/2015
# File: InsertTeam.py
# Desc: insert the Team NBA.Team table
#
# Produced By CSRGXTU
import MySQLdb as mdb
import sys
from Utility import loadMatrixFromFile

id_names = loadMatrixFromFile('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidname-18-Nov-2014.csv')
id_shortnames = loadMatrixFromFile('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidshortname.csv')
matrix = []
for item in id_names:
    tmp = [item[0], item[1]]
    for item1 in id_shortnames:
        if item1[0] == item[0]:
            tmp.append(item1[1])
    matrix.append(tmp)

con = mdb.connect('localhost', 'root', 'root', 'NBA')

with con:
    cur = con.cursor()
    for item in matrix:
        sql = "insert into Team (\
                StatsID,\
                NameEN,\
コード例 #34
0
ファイル: InsertTeamStats.py プロジェクト: csrgxtu/maxent
def insertPlayoff(teamIds, cur):
    basePath = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/'

    for team in teamIds:
        matrix = loadMatrixFromFile(basePath + team + '.playoff.csv')
        for row in matrix:
            row = none20(row)

            teamID = findId(row[6][0:3])
            opponentTeamID = findId(row[6][-3:])
            home = isHome(row[6])

            sql = "select SeasonID from Season where Season = '%s' and Season_SeasonTypeID = 3" % row[3]
            cur.execute(sql)
            seasonID = cur.fetchone()[0]

            sql = "insert into TeamStats (\
                    TeamStats_TeamID,\
                    TeamStats_SeasonID,\
                    Result,\
                    Date,\
                    Home,\
                    Fgm,\
                    Fga,\
                    3pm,\
                    3pa,\
                    Ftm,\
                    Fta,\
                    Oreb,\
                    Dreb,\
                    Ast,\
                    Stl,\
                    Blk,\
                    Tov,\
                    Pf,\
                    CreatedBy,\
                    CreatedTime,\
                    Points,\
                    OpponentTeamID) value (\
                    %d,\
                    %d,\
                    '%c',\
                    '%s',\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    'archer',\
                    '2015-06-05 15:02:22',\
                    %d,\
                    %d)" %\
                    (\
                        int(teamID),\
                        int(seasonID),\
                        row[0],\
                        row[1] + ' ' + row[2],\
                        int(home),\
                        int(row[7]),\
                        int(row[8]),\
                        int(row[9]),\
                        int(row[10]),\
                        int(row[11]),\
                        int(row[12]),\
                        int(row[13]),\
                        int(row[14]),\
                        int(row[15]),\
                        int(row[16]),\
                        int(row[17]),\
                        int(row[18]),\
                        int(row[19]),\
                        int(row[20]),\
                        int(opponentTeamID)
                    )

            # print sql
            cur.execute(sql)
コード例 #35
0
ファイル: SortByTime.py プロジェクト: ssj018/maxent
#!/usr/bin/env python
# coding = utf-8
# Author: Archer Reilly
# Date: 19/Nov/2014
# File: SortByTime.py
# Desc: sort the content in original data file by time
#
# Produced By CSRGXTU
from Utility import loadMatrixFromFile, saveMatrixToFile
from os import listdir
from datetime import datetime
"""
matrix = loadMatrixFromFile('/home/archer/Documents/maxent/data/basketball/1610612766.csv')
print matrix
"""

dirs = listdir('/home/archer/Documents/Python/maxent/data/basketball/')
for f in dirs:
    if f.startswith('161'):
        print 'Process file: ' + f
        matrix = loadMatrixFromFile(
            '/home/archer/Documents/Python/maxent/data/basketball/' + f)
        matrixa = sorted(matrix,
                         key=lambda x: datetime.strptime(x[1], '%b %d:%Y'))
        saveMatrixToFile(
            '/home/archer/Documents/Python/maxent/data/basketball/' + f +
            '.sorted', matrixa)
コード例 #36
0
ファイル: PrepareTrainingData6TR.py プロジェクト: csrgxtu/crf
# GameName HomeAway FGM FGA 3PM 3PA FTM FTA OREB DREB AST TOV STL BLK PF PTS WinLose
#
# Produced By CSRGXTU
import sys

from Utility import loadMatrixFromFile, saveCrfMatrix, loadTeamRanks

if len(sys.argv) != 2:
  print 'Usage: PrepareTrainingData6.py teamid'
  sys.exit(1)

teamid = sys.argv[1]

DATA_PATH = '../data/TeamRank/'

mat = loadMatrixFromFile(DATA_PATH + teamid + '.csv.sorted')

trainMat = []
for i in range(len(mat)):
  tmp = []
  # GameName
  tmp.append('G' + str(i + 1))
  # HomeAway, home is H, else A
  if '@' in mat[i][5]:
    tmp.append('H')
  else:
    tmp.append('A')
  # FGM
  tmp.append(mat[i][6])
  # FGA
  tmp.append(mat[i][7])
コード例 #37
0
#!/usr/bin/env python
# coding = utf-8
# Author: Archer Reilly
# Date: 21/Nov/2014
# File: BatchGenerateLeagueRank.py
# Desc: use LeagueRank batch generate the league rank for each .m
# file in the data dir
#
# Produced By CSRGXTU
from LeagueRank import LeagueRank
from Utility import loadMatrixFromFile, readmatricefromfile, appendlst2file

DATA_PATH = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
DATES_FILE = DATA_PATH + 'dates.csv'
L = [1/float(30) for e in range(1, 31)]

dates = loadMatrixFromFile(DATES_FILE)[0]

for d  in dates[3633:]:
  print 'INFO: generate LeagueRank for ' + d + '.m'
  #o = LeagueRank(L, readmatricefromfile(DATA_PATH + d + '.m'), 0.00000001, 100000)
  o = LeagueRank(L, readmatricefromfile(DATA_PATH + d + '.m'), 0.001, 10)
  # print 'Debug: ',
  # print o.rank()
  appendlst2file(o.rank(), DATA_PATH + d + '.l')
  print '    Done'
  # break
コード例 #38
0
#!/usr/bin/env python
#
# Author: Archer Reilly
# Date: 15/May/2015
# File: MainDownloadOdds.py
# Desc: download the raw data from web site
#
# Produced By CSRGXTU
from Utility import loadMatrixFromFile
from OddsDownloader import OddsDownloader

res = loadMatrixFromFile('./SeasonId')

for item in res:
    for index in range(1, int(item[2]) + 1):
        o = OddsDownloader(item[0], index)
        o.run()
コード例 #39
0
ファイル: InsertTeamStats.py プロジェクト: csrgxtu/maxent
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Author: Archer
# File: InsertTeamStats.py
# Date: 05/Jun/2015
# Desc: insert NBA.TeamStats table
#
# Produced By CSRGXTU
import MySQLdb as mdb
import sys
from Utility import loadMatrixFromFile, loadSeasons, loadTeamIds

teamIds = loadTeamIds('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidshortname.csv')
seasons = loadSeasons('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/seasons-18-Nov-2014.txt')
TeamID2TeamShortNames = loadMatrixFromFile('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/TeamID2TeamShortName.csv')

def findId(shortName):
    for row in TeamID2TeamShortNames:
        if row[1] == shortName:
            return row[0]
    return False

def isHome(matchUpString):
    if '@' in matchUpString:
        return 1
    else:
        return 0

def none20(lst):
    for i in range(len(lst)):
コード例 #40
0
# GameName HomeAway FGM FGA 3PM 3PA FTM FTA OREB DREB AST TOV STL BLK PF PTS WinLose
#
# Produced By CSRGXTU
import sys

from Utility import loadMatrixFromFile, saveCrfMatrix

if len(sys.argv) != 2:
    print 'Usage: PrepareTrainingData.py teamid'
    sys.exit(1)

teamid = sys.argv[1]

DATA_PATH = '../data/TeamRank/'

mat = loadMatrixFromFile(DATA_PATH + teamid + '.csv.sorted')

trainMat = []
for i in range(len(mat)):
    tmp = []
    # GameName
    tmp.append('G' + str(i + 1))
    # HomeAway, home is H, else A
    if '@' in mat[i][5]:
        tmp.append('H')
    else:
        tmp.append('A')
    # FGM
    tmp.append(mat[i][6])
    # FGA
    tmp.append(mat[i][7])
コード例 #41
0
def noneWithAVG(teamId, season):
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  mat = loadMatrixFromFile(DIR + teamId + "." + season + ".player.csv")
  if len(mat) == 0:
    return [[]]

  heights = []
  weights = []
  ages = []
  exps = []

  for row in mat:
    heights.append(row[2])
    weights.append(row[3])
    ages.append(row[4])
    exps.append(row[5])

  # for heights
  # change height from inch to inchs
  for i in range(len(heights)):
    if heights[i] == 'None':
      continue
    else:
      heights[i] = int(heights[i].split('-')[0]) * 12 + int(heights[i].split('-')[1])

  # get avgHeight
  tmpSum = 0
  heightCount = 0
  for i in range(len(heights)):
    if heights[i] == 'None':
      tmpSum = tmpSum + 0
    else:
      heightCount = heightCount + 1
      tmpSum = tmpSum + float(heights[i])
  
  avgHeight = tmpSum / float(heightCount)

  # replace None with avgHeight
  for i in range(len(heights)):
    if heights[i] == 'None':
      heights[i] = avgHeight

  # for weights
  tmpSum = 0
  weightCount = 0
  for i in range(len(weights)):
    if weights[i] == 'None':
      tmpSum = tmpSum + 0
    else:
      weightCount = weightCount + 1
      tmpSum =  tmpSum + float(weights[i])
  
  avgWeight = tmpSum / float(weightCount)

  for i in range(len(weights)):
    if weights[i] == 'None':
      weights[i] = avgWeight

  # for ages
  tmpSum = 0
  ageCount = 0
  for i in range(len(ages)):
    if ages[i] == 'None':
      tmpSum = tmpSum + 0
    else:
      ageCount = ageCount + 1
      tmpSum = tmpSum + float(ages[i])

  avgAge = tmpSum / float(ageCount)

  for i in range(len(ages)):
    if ages[i] == 'None':
      ages[i] = avgAge

  # make a mat
  res = []
  for i in range(len(ages)):
    tmp = []
    tmp.append(heights[i])
    tmp.append(weights[i])
    tmp.append(ages[i])
    tmp.append(exps[i])
    res.append(tmp)

  return res
コード例 #42
0
# Date: 05/Jun/2015
# Desc: insert NBA.TeamStats table
#
# Produced By CSRGXTU
import MySQLdb as mdb
import sys
from Utility import loadMatrixFromFile, loadSeasons, loadTeamIds

teamIds = loadTeamIds(
    '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidshortname.csv'
)
seasons = loadSeasons(
    '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/seasons-18-Nov-2014.txt'
)
TeamID2TeamShortNames = loadMatrixFromFile(
    '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/TeamID2TeamShortName.csv'
)


def findId(shortName):
    for row in TeamID2TeamShortNames:
        if row[1] == shortName:
            return row[0]
    return False


def isHome(matchUpString):
    if '@' in matchUpString:
        return 1
    else:
        return 0
コード例 #43
0
ファイル: kmeans.py プロジェクト: csrgxtu/BRRS
# coding=utf8
#
# Author: Archer Reilly
# Date: 23/Feb/2016
# Desc: use kmeans clustering the books
# Usage: ./kmeans.py inputfile
#
# Produced By BR
import sys
from sklearn.cluster import KMeans

from Utility import loadMatrixFromFile

if len(sys.argv) != 2:
    print 'Usage: ./kmeans.py inputfile'
    sys.exit(1)

inputfile = sys.argv[1]

mat = loadMatrixFromFile(inputfile)
nmat = []
for row in mat:
    nmat.append(row[1:])

num_clusters = 120  # douban's book have 120 categories

km = KMeans(n_clusters=num_clusters)
km.fit(nmat)
clusters = km.labels_.tolist()
print clusters
コード例 #44
0
def insertPlayoff(teamIds, cur):
    basePath = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/'

    for team in teamIds:
        matrix = loadMatrixFromFile(basePath + team + '.playoff.csv')
        for row in matrix:
            row = none20(row)

            teamID = findId(row[6][0:3])
            opponentTeamID = findId(row[6][-3:])
            home = isHome(row[6])

            sql = "select SeasonID from Season where Season = '%s' and Season_SeasonTypeID = 3" % row[
                3]
            cur.execute(sql)
            seasonID = cur.fetchone()[0]

            sql = "insert into TeamStats (\
                    TeamStats_TeamID,\
                    TeamStats_SeasonID,\
                    Result,\
                    Date,\
                    Home,\
                    Fgm,\
                    Fga,\
                    3pm,\
                    3pa,\
                    Ftm,\
                    Fta,\
                    Oreb,\
                    Dreb,\
                    Ast,\
                    Stl,\
                    Blk,\
                    Tov,\
                    Pf,\
                    CreatedBy,\
                    CreatedTime,\
                    Points,\
                    OpponentTeamID) value (\
                    %d,\
                    %d,\
                    '%c',\
                    '%s',\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    %d,\
                    'archer',\
                    '2015-06-05 15:02:22',\
                    %d,\
                    %d)"                         %\
                    (\
                        int(teamID),\
                        int(seasonID),\
                        row[0],\
                        row[1] + ' ' + row[2],\
                        int(home),\
                        int(row[7]),\
                        int(row[8]),\
                        int(row[9]),\
                        int(row[10]),\
                        int(row[11]),\
                        int(row[12]),\
                        int(row[13]),\
                        int(row[14]),\
                        int(row[15]),\
                        int(row[16]),\
                        int(row[17]),\
                        int(row[18]),\
                        int(row[19]),\
                        int(row[20]),\
                        int(opponentTeamID)
                    )

            # print sql
            cur.execute(sql)
コード例 #45
0
#!/usr/bin/env python
# coding=utf-8
#
# Author: Archer Reilly
# Date: 23/Dec/2015
# File: StaticMongo.py
# Desc: find which books isnt in the mongo
#
# Produced By BR
from Utility import loadMatrixFromFile, appendMatrixToFileUtf
from IsbnCheckIn import IsbnCheckIn

filename = '/home/archer/Downloads/data.csv'
mat = loadMatrixFromFile(filename)
i = IsbnCheckIn('192.168.100.2', 27017)
Res = [] # store not ins

for row in mat:
    if not i.isIn(row[1]):
        print row[1], "not in database"
        Res.append(row)

appendMatrixToFileUtf('NotFound.csv', Res)
コード例 #46
0
ファイル: DisplayLeagueRank.py プロジェクト: ssj018/maxent
# Date: 21/Nov/2014
# File: DisplayLeagueRank.py
# Desc: display league rank info
#
# Produced By CSRGXTU
from Utility import loadMatrixFromFile
import operator

DATA_PATH = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/'

NAME_FILE = DATA_PATH + 'teamidname-18-Nov-2014.csv'

#RANK_FILE = DATA_PATH + 'MAR 31:2014.l'
#RANK_FILE = DATA_PATH + 'APR 16:2014.l'
RANK_FILE = DATA_PATH + '2013-14.l'
names = [ x[1] for x in loadMatrixFromFile(NAME_FILE)]
ranks = loadMatrixFromFile(RANK_FILE)[0]

res = {}
for i in range(len(names)):
  res[names[i]] = ranks[i]
  # print names[i] + '    ' + ranks[i]
  # print ranks[i] + '    ' + names[i]

sorted_res = sorted(res.items(), key=operator.itemgetter(1))
print 'INFO: All Teams'
print("%22s    %-15s" % ('Team', 'LeagueRank'))
for item in sorted_res:
  # print item[0], '    ', item[1]
  print("%22s    %-15s" % (item[0], item[1]))
コード例 #47
0
#!/usr/bin/env python
# coding = utf-8
# Author: Archer Reilly
# Date: 21/Nov/2014
# File: BatchGenerateLeagueRank.py
# Desc: use LeagueRank batch generate the league rank for each .m
# file in the data dir
#
# Produced By CSRGXTU
from LeagueRank import LeagueRank
from Utility import loadMatrixFromFile, readmatricefromfile, appendlst2file

DATA_PATH = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
DATES_FILE = DATA_PATH + 'dates.csv'
L = [1 / float(30) for e in range(1, 31)]

dates = loadMatrixFromFile(DATES_FILE)[0]

for d in dates[3633:]:
    print 'INFO: generate LeagueRank for ' + d + '.m'
    #o = LeagueRank(L, readmatricefromfile(DATA_PATH + d + '.m'), 0.00000001, 100000)
    o = LeagueRank(L, readmatricefromfile(DATA_PATH + d + '.m'), 0.001, 10)
    # print 'Debug: ',
    # print o.rank()
    appendlst2file(o.rank(), DATA_PATH + d + '.l')
    print '    Done'
    # break