def createPlayerMap(gameids,currentMap): #use defaultdict to map playerids to game stats playerMap = defaultdict(OrderedDict) #playerMap = OrderedDict() print(sorted(gameids,key=lambda x: x[1])) #will keep track of playerids of injured players on both teams injuredIDMap = ReadWriteFiles.readInjuredIDMap() for gameid in sorted(gameids,key=lambda x: x[1]): gameid = gameid[0] #print(gameid) gameBoxScoreURL = "http://espn.go.com/nba/boxscore?gameId=" + gameid boxScorePage = requests.get(gameBoxScoreURL) boxScoreTree = html.fromstring(boxScorePage.content) gameInfoURL = "http://espn.go.com/nba/game?gameId=" + gameid gameInfoPage = requests.get(gameInfoURL) gameInfoTree = html.fromstring(gameInfoPage.content) #first get game data not specific to each player (time,date,score,team numbers etc) gameDataList = [] #print(gameid) try: #game_time_info = boxScoreTree.xpath("//div[@class='game-time-location']/p/text()")[0] OLD ESPN game_time_info = gameInfoTree.xpath("//div[@class='game-date-time']/span/@data-date")[0] OTfinalStatus = gameInfoTree.xpath("//div[@class='game-status']/span/text()")[0] if("OT" in OTfinalStatus): [_,ot] = OTfinalStatus.split("/") if (ot == "OT"): overtime = 1 else: overtime = int(ot[0]) else: overtime = 0 #print(OTfinalStatus) #print(game_time_info) #[awayName,awayScore] = boxScoreTree.xpath("//div[@class='team away']/div/h3/*/text()") OLD ESPN #[homeName,homeScore] = boxScoreTree.xpath("//div[@class='team home']/div/h3/*/text()") OLD ESPN awayName = boxScoreTree.xpath("//div[@class='team away']/div[@class='content']/div[@class='team-container']/div[@class='team-info']/a/span[@class='short-name']/text()")[0] homeName = boxScoreTree.xpath("//div[@class='team home']/div[@class='content']/div[@class='team-container']/div[@class='team-info']/a/span[@class='short-name']/text()")[0] awayScore = boxScoreTree.xpath("//div[@class='team away']/div[@class='content']/div[@class='score-container']/div/text()")[0] homeScore = boxScoreTree.xpath("//div[@class='team home']/div[@class='content']/div[@class='score-container']/div/text()")[0] #scoreDifference = int(awayScore) - int(homeScore) #print(scoreDifference) #print(awayName) #print(homeName) #keep track of which players were on the away team and which were on the home team #[awayTeam] = boxScoreTree.xpath("//table/thead[position()=1]/tr[@class='team-color-strip']/th/text()") awayTeamNum = Util.team_dict[awayName] #awayPlayeridList = boxScoreTree.xpath("//table/tbody[position()=1 or position()=2]/tr[contains(@class,'player-46')]/@class") OLD ESPN #awayPlayeridList = [x.split("player-46-")[1] for x in awayPlayeridList] #print(boxScoreTree.xpath("//tr/td/a/@href")) #print(boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/*")) #print(boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/div[@class='sub-module']/*")) #print(boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/div[@class='sub-module']/div/table/tbody/tr/td/a/@href")) awayPlayerURLList = boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/div[@class='sub-module']/div/table/tbody/tr/td/a/@href") awayPlayeridList = [x.split("_/id/")[1] for x in awayPlayerURLList] awayStarteridList = awayPlayeridList[0:5] awayBenchidList = awayPlayeridList[5:] #print(awayPlayeridList) # [homeTeam] = boxScoreTree.xpath("//table/thead[position()=4]/tr[@class='team-color-strip']/th/text()") homeTeamNum = Util.team_dict[homeName] #homePlayeridList = boxScoreTree.xpath("//table/tbody[position()=4 or position()=5]/tr[contains(@class,'player-46')]/@class") #homePlayeridList = [x.split("player-46-")[1] for x in homePlayeridList] homePlayerURLList = boxScoreTree.xpath("//div[@class='col column-two gamepackage-home-wrap']/div[@class='sub-module']/div/table/tbody/tr/td/a/@href") homePlayeridList = [x.split("_/id/")[1] for x in homePlayerURLList] homeStarteridList = homePlayeridList[0:5] homeBenchidList = homePlayeridList[5:] #print(homePlayeridList) awayInjuredIDList = [] #gets player stats for away players and appends that to the game stats for playerid in awayStarteridList: xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()" xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()" #print(xPathPositionString) gameStatsList = [] # stores own team's number and also the opposing team's number # 0 for away team then 1 for being a starter and then away score (own score) then home score (other score) gameStatsList += Util.data_date_convert(game_time_info) + [awayTeamNum, homeTeamNum] + [0, 1, int(awayScore),int(homeScore), overtime] playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString) #print(playerStatsList) if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1] or len(playerStatsList) != 2): playerStatsList = Util.playerStatsConvert(playerStatsList) # print(playerStatsList) # playerMap[playerid].append(gameid) # print(gameStatsList+playerStatsList) playerMap[playerid][gameid]=(gameStatsList+playerStatsList) #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)}) else: awayInjuredIDList.append(playerid) print(playerid + " is injured, so stats from game will not count") #gets player stats for away players and appends that to the game stats for playerid in awayBenchidList: xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()" xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()" #print(xPathPositionString) gameStatsList = [] # stores own team's number and also the opposing team's number # 0 for away team then 0 for coming off the benchand then away score (own score) then home score (other score) gameStatsList += Util.data_date_convert(game_time_info) + [awayTeamNum, homeTeamNum] + [0, 0,int(awayScore),int(homeScore), overtime] playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString) #print(playerStatsList) if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1]): playerStatsList = Util.playerStatsConvert(playerStatsList) # print(playerStatsList) # playerMap[playerid].append(gameid) # print(gameStatsList+playerStatsList) playerMap[playerid][gameid]=(gameStatsList+playerStatsList) #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)}) else: awayInjuredIDList.append(playerid) print(playerid + " is injured, so stats from game will not count") homeInjuredIDList = [] #gets player stats for home players and appends that to the game stats for playerid in homeStarteridList: xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()" xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()" #print(xPathPositionString) gameStatsList = [] # stores own team's number and also the opposing team's number # 1 for home team, 1 for starter and score difference is calc away score - home score # gameStatsList += Util.data_date_convert(game_time_info) + [homeTeamNum, awayTeamNum] + [1, 1, int(homeScore), int(awayScore), overtime] playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString) if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1]): playerStatsList = Util.playerStatsConvert(playerStatsList) # print(playerStatsList) # playerMap[playerid].append(gameid) playerMap[playerid][gameid]=(gameStatsList+playerStatsList) #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)}) else: homeInjuredIDList.append(playerid) print(playerid + " is injured, so stats from game will not count") #gets player stats for home players and appends that to the game stats for playerid in homeBenchidList: xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()" xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()" #print(xPathPositionString) gameStatsList = [] # stores own team's number and also the opposing team's number # 1 for home team, 0 for bench and score difference is calc away score - home score # gameStatsList += Util.data_date_convert(game_time_info) + [homeTeamNum, awayTeamNum] + [1, 0, int(homeScore), int(awayScore), overtime] playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString) if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1]): playerStatsList = Util.playerStatsConvert(playerStatsList) # print(playerStatsList) # playerMap[playerid].append(gameid) playerMap[playerid][gameid]=(gameStatsList+playerStatsList) #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)}) else: homeInjuredIDList.append(playerid) print(playerid + " is injured, so stats from game will not count") injuredIDMap[gameid] = (awayInjuredIDList,homeInjuredIDList) except (IndexError): print("Game " + gameid + " does not exist") #raise IndexError print(playerMap) #need to UNION currentMap(defaultdict in file) and playerMap(recent games) for playerid,orderedDict in playerMap.items(): for gameid,statList in orderedDict.items(): #TODO: look to make this more efficient #if(not gameid in currentMap[playerid] or not playerid in currentMap): if(not playerid in currentMap): currentMap[playerid] = OrderedDict() #print(type(gameid)) currentMap[playerid][gameid] = statList return (currentMap,injuredIDMap)
# print("Reading previously stored player-stats map") (lastModifiedDate,currentMap) = ReadWriteFiles.readPlayerStatsFile() isUpdated = (lastModifiedDate == datetime.date.today()) print("Getting data about players playing today") today_playerMap = Scraper.create_todays_playerMap() projStarters = Scraper.getProjStarters() today_playerMap = Util.addStarting(today_playerMap,projStarters) print(json.dumps(today_playerMap)) (lastModifiedDate,currentMap) = ReadWriteFiles.readPlayerStatsFile() injuredTodayMap = Scraper.getInjuredPlayers() injuredIDMap = ReadWriteFiles.readInjuredIDMap() if(not isUpdated): print("Creating Player Map") gameids = Scraper.getNewGameIDs(lastModifiedDate) #print(gameids) (currentMap,injuredIDMap) = Scraper.createPlayerMap(gameids,currentMap) # # ReadWriteFiles.writePlayerStats(currentMap) # ReadWriteFiles.writeInjuredIDMap(injuredIDMap) # print(currentMap) print("Done creating and writing Player Map")