예제 #1
0
def createPlayerMap(gameids,currentMap):

    #use defaultdict to map playerids to game stats
    playerMap = defaultdict(OrderedDict)
    #playerMap = OrderedDict()

    print(sorted(gameids,key=lambda x: x[1]))

    #will keep track of playerids of injured players on both teams
    injuredIDMap = ReadWriteFiles.readInjuredIDMap()
    
    for gameid in sorted(gameids,key=lambda x: x[1]):
        gameid = gameid[0]
        #print(gameid)
        gameBoxScoreURL = "http://espn.go.com/nba/boxscore?gameId=" + gameid
        boxScorePage = requests.get(gameBoxScoreURL)
        boxScoreTree = html.fromstring(boxScorePage.content)

        gameInfoURL = "http://espn.go.com/nba/game?gameId=" + gameid
        gameInfoPage = requests.get(gameInfoURL)
        gameInfoTree = html.fromstring(gameInfoPage.content)

        #first get game data not specific to each player (time,date,score,team numbers etc)
        gameDataList = []
        #print(gameid)

        try:
            #game_time_info = boxScoreTree.xpath("//div[@class='game-time-location']/p/text()")[0] OLD ESPN
            game_time_info = gameInfoTree.xpath("//div[@class='game-date-time']/span/@data-date")[0]
            OTfinalStatus = gameInfoTree.xpath("//div[@class='game-status']/span/text()")[0]
            if("OT" in OTfinalStatus):

                [_,ot] = OTfinalStatus.split("/")
                if (ot == "OT"):
                    overtime = 1
                else:
                    overtime = int(ot[0])
            else:
                overtime = 0
            #print(OTfinalStatus)
            #print(game_time_info)

            #[awayName,awayScore] = boxScoreTree.xpath("//div[@class='team away']/div/h3/*/text()") OLD ESPN
            #[homeName,homeScore] = boxScoreTree.xpath("//div[@class='team home']/div/h3/*/text()") OLD ESPN

            awayName = boxScoreTree.xpath("//div[@class='team away']/div[@class='content']/div[@class='team-container']/div[@class='team-info']/a/span[@class='short-name']/text()")[0]
            homeName = boxScoreTree.xpath("//div[@class='team home']/div[@class='content']/div[@class='team-container']/div[@class='team-info']/a/span[@class='short-name']/text()")[0]

            awayScore = boxScoreTree.xpath("//div[@class='team away']/div[@class='content']/div[@class='score-container']/div/text()")[0]
            homeScore = boxScoreTree.xpath("//div[@class='team home']/div[@class='content']/div[@class='score-container']/div/text()")[0]

            #scoreDifference = int(awayScore) - int(homeScore)


            #print(scoreDifference)
            #print(awayName)
            #print(homeName)


            #keep track of which players were on the away team and which were on the home team
            #[awayTeam] = boxScoreTree.xpath("//table/thead[position()=1]/tr[@class='team-color-strip']/th/text()")
            awayTeamNum = Util.team_dict[awayName]
            #awayPlayeridList = boxScoreTree.xpath("//table/tbody[position()=1 or position()=2]/tr[contains(@class,'player-46')]/@class") OLD ESPN
            #awayPlayeridList = [x.split("player-46-")[1] for x in awayPlayeridList]
            
            #print(boxScoreTree.xpath("//tr/td/a/@href"))
            #print(boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/*"))
            #print(boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/div[@class='sub-module']/*"))
            #print(boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/div[@class='sub-module']/div/table/tbody/tr/td/a/@href"))
            awayPlayerURLList = boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/div[@class='sub-module']/div/table/tbody/tr/td/a/@href")
            awayPlayeridList = [x.split("_/id/")[1] for x in awayPlayerURLList]
            awayStarteridList = awayPlayeridList[0:5]
            awayBenchidList = awayPlayeridList[5:]
            #print(awayPlayeridList)



           # [homeTeam] = boxScoreTree.xpath("//table/thead[position()=4]/tr[@class='team-color-strip']/th/text()")
            homeTeamNum = Util.team_dict[homeName]
            #homePlayeridList = boxScoreTree.xpath("//table/tbody[position()=4 or position()=5]/tr[contains(@class,'player-46')]/@class")
            #homePlayeridList = [x.split("player-46-")[1] for x in homePlayeridList]

            homePlayerURLList = boxScoreTree.xpath("//div[@class='col column-two gamepackage-home-wrap']/div[@class='sub-module']/div/table/tbody/tr/td/a/@href")
            homePlayeridList = [x.split("_/id/")[1] for x in homePlayerURLList]
            homeStarteridList = homePlayeridList[0:5]
            homeBenchidList = homePlayeridList[5:]
            #print(homePlayeridList)





            awayInjuredIDList = []
            #gets player stats for away players and appends that to the game stats
            for playerid in awayStarteridList:
                xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()"
                xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()"
                #print(xPathPositionString)

                gameStatsList = []
                # stores own team's number and also the opposing team's number
                # 0 for away team then 1 for being a starter and then away score (own score) then home score (other score)
                
                gameStatsList += Util.data_date_convert(game_time_info) + [awayTeamNum, homeTeamNum] + [0, 1, int(awayScore),int(homeScore), overtime]      
 
                playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString)
                
                #print(playerStatsList)

                if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1] or len(playerStatsList) != 2):         
                    playerStatsList = Util.playerStatsConvert(playerStatsList)
                   # print(playerStatsList)
                   # playerMap[playerid].append(gameid)
                   # print(gameStatsList+playerStatsList)
        
                    playerMap[playerid][gameid]=(gameStatsList+playerStatsList)
                    #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)})

                else:
                    awayInjuredIDList.append(playerid)
                    print(playerid + " is injured, so stats from game will not count")



            #gets player stats for away players and appends that to the game stats
            for playerid in awayBenchidList:
                xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()"
                xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()"
                #print(xPathPositionString)

                gameStatsList = []
                # stores own team's number and also the opposing team's number
                # 0 for away team then 0 for coming off the benchand then away score (own score) then home score (other score)
                
                gameStatsList += Util.data_date_convert(game_time_info) + [awayTeamNum, homeTeamNum] + [0, 0,int(awayScore),int(homeScore), overtime]      
 
                playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString)
                
                #print(playerStatsList)

                if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1]):         
                    playerStatsList = Util.playerStatsConvert(playerStatsList)
                   # print(playerStatsList)
                   # playerMap[playerid].append(gameid)
                   # print(gameStatsList+playerStatsList)
        
                    playerMap[playerid][gameid]=(gameStatsList+playerStatsList)
                    #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)})

                else:
                    awayInjuredIDList.append(playerid)
                    print(playerid + " is injured, so stats from game will not count")
        



            homeInjuredIDList = []
            #gets player stats for home players and appends that to the game stats
            for playerid in homeStarteridList:
                xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()"
               

                xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()"
                #print(xPathPositionString)

                gameStatsList = []
        
                # stores own team's number and also the opposing team's number
                # 1 for home team, 1 for starter and score difference is calc away score - home score
                #
                gameStatsList += Util.data_date_convert(game_time_info) + [homeTeamNum, awayTeamNum] + [1, 1, int(homeScore), int(awayScore), overtime]

                playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString)

                if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1]):

                
                    playerStatsList = Util.playerStatsConvert(playerStatsList)
                   # print(playerStatsList)
                   # playerMap[playerid].append(gameid)

                    playerMap[playerid][gameid]=(gameStatsList+playerStatsList)
                    #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)})
                else:
                    homeInjuredIDList.append(playerid)
                    print(playerid + " is injured, so stats from game will not count")


                    #gets player stats for home players and appends that to the game stats
            for playerid in homeBenchidList:
                xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()"
               

                xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()"
                #print(xPathPositionString)

                gameStatsList = []
        
                # stores own team's number and also the opposing team's number
                # 1 for home team, 0 for bench and score difference is calc away score - home score
                # 
                gameStatsList += Util.data_date_convert(game_time_info) + [homeTeamNum, awayTeamNum] + [1, 0, int(homeScore), int(awayScore), overtime]

                playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString)

                if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1]):

                
                    playerStatsList = Util.playerStatsConvert(playerStatsList)
                   # print(playerStatsList)
                   # playerMap[playerid].append(gameid)

                    playerMap[playerid][gameid]=(gameStatsList+playerStatsList)
                    #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)})
                else:
                    homeInjuredIDList.append(playerid)
                    print(playerid + " is injured, so stats from game will not count")

            injuredIDMap[gameid] = (awayInjuredIDList,homeInjuredIDList)

        except (IndexError):
            print("Game " + gameid + " does not exist")
            #raise IndexError
    print(playerMap)

    #need to UNION currentMap(defaultdict in file) and playerMap(recent games)
    for playerid,orderedDict in playerMap.items():

        for gameid,statList in orderedDict.items():
            #TODO: look to make this more efficient
            #if(not gameid in currentMap[playerid] or not playerid in currentMap):
            if(not playerid in currentMap):
                currentMap[playerid] = OrderedDict()
            #print(type(gameid))
            currentMap[playerid][gameid] = statList



    return (currentMap,injuredIDMap)
예제 #2
0
# print("Reading previously stored player-stats map")
(lastModifiedDate,currentMap) = ReadWriteFiles.readPlayerStatsFile()
isUpdated = (lastModifiedDate == datetime.date.today())

print("Getting data about players playing today")
today_playerMap = Scraper.create_todays_playerMap()
projStarters = Scraper.getProjStarters()

today_playerMap = Util.addStarting(today_playerMap,projStarters)
print(json.dumps(today_playerMap))


(lastModifiedDate,currentMap) = ReadWriteFiles.readPlayerStatsFile()

injuredTodayMap = Scraper.getInjuredPlayers()
injuredIDMap = ReadWriteFiles.readInjuredIDMap()



if(not isUpdated):
    print("Creating Player Map")
    gameids = Scraper.getNewGameIDs(lastModifiedDate)
    #print(gameids)
    (currentMap,injuredIDMap) = Scraper.createPlayerMap(gameids,currentMap)
    #
    # ReadWriteFiles.writePlayerStats(currentMap)
    # ReadWriteFiles.writeInjuredIDMap(injuredIDMap)

    # print(currentMap)

    print("Done creating and writing Player Map")