def __init__(self, ids): self.gameidlist = [] self.gameinseason = [] # self.teamname = input('What team? ') # season = input('What season? ') self.teamname = "Virginia Tech" season = "2013" url = ( "http://espn.go.com/college-football/team/schedule/_/id/" + ids[self.teamname][0] + "/year/" + str(season) + "/" + ids[self.teamname][1] ) soup = muf.cleanhtml(url) for link in soup.find_all("a"): if link.get("href")[:14] == "/ncf/recap?id=": # link should look like: # http://espn.go.com/ncf/recap?id=292620259 self.gameidlist.append(link.get("href")[14:]) if link.get("href")[:17] == "/ncf/boxscore?id=": # link should look like: # http://espn.go.com/ncf/boxscore?id=262990259 self.gameidlist.append(link.get("href")[17:]) # open and parse for tuple of games, and return for games in self.gameidlist: self.gameinseason.append(gpbp.Game(games, self.teamname, season))
def __init__(self, gameid, teamname, season): #http://espn.go.com/ncf/playbyplay?gameId=292620259&period=0 # url = 'http://espn.go.com/ncf/playbyplay?gameId=' + gameid + '&period=0' #apparent update to format #http://espn.go.com/college-football/playbyplay?gameId=242410259 url = 'http://espn.go.com/ncf/playbyplay?gameId=' + gameid soup = muf.cleanhtml(url) game_pbp = "" self.teamname = "" self.opponent = "" self.season = season #title from the ESPN page, which typically includes both team names in full, plus game date # <title>Western Michigan Broncos vs. Virginia Tech Hokies - Play By Play - September 27, 2014 - ESPN</title> self.game_title = "" self.home = None for title in soup.findAll("title"): titletxt = title.get_text() team1 = titletxt[:titletxt.find(' v')] team2 = titletxt[titletxt.find('.')+2:titletxt.find(' -')] #print(team1, " ", team2) if all(word in team1 for word in teamname.split()): self.teamname = team1 self.opponent = team2 self.home = False elif all(word in team2 for word in teamname.split()): self.teamname = team2 self.opponent = team1 self.home = True self.game_result = "" for game in soup.findAll("div", { "class" : "competitors" }): for team in game.findAll("tr"): for teams in team.findAll("td", { "class" : "team-name" }): #print('teams: ', teams) self.game_result += teams.get_text() + " " for scores in team.find_all("td", { "class" : "final-score" }): #print('scores: ', scores) self.game_result += scores.get_text() + " " #print('game_result: ', self.game_result) #list of team and opponents names and abbreviation that are likely to be used self.tmnmlist = [self.teamname] self.oppnmlist = [self.opponent] gameresultlist = self.game_result.split() #print('gameresultlist: ', gameresultlist) if len(gameresultlist)>4: while len(gameresultlist)>4: if not gameresultlist[0].isdigit() and not gameresultlist[1].isdigit(): gameresultlist[0] = gameresultlist[0] + " " + gameresultlist[1] gameresultlist.remove(gameresultlist[1]) elif not gameresultlist[2].isdigit() and not gameresultlist[3].isdigit(): gameresultlist[2] = gameresultlist[2] + " " + gameresultlist[3] gameresultlist.remove(gameresultlist[3]) if self.home: if muf.is_abbrev(gameresultlist[2], self.teamname) and muf.is_abbrev(gameresultlist[0], self.opponent): self.tmnmlist.append(gameresultlist[2]) self.oppnmlist.append(gameresultlist[0]) elif muf.is_abbrev(gameresultlist[2], self.opponent) and muf.is_abbrev(gameresultlist[0], self.teamname): self.oppnmlist.append(gameresultlist[2]) self.tmnmlist.append(gameresultlist[0]) elif muf.is_abbrev(gameresultlist[2], self.teamname) and not muf.is_abbrev(gameresultlist[2], self.opponent): self.tmnmlist.append(gameresultlist[2]) self.oppnmlist.append(gameresultlist[0]) elif muf.is_abbrev(gameresultlist[2], self.opponent) and not muf.is_abbrev(gameresultlist[2], self.teamname): self.oppnmlist.append(gameresultlist[2]) self.tmnmlist.append(gameresultlist[0]) elif muf.is_abbrev(gameresultlist[0], self.teamname) and not muf.is_abbrev(gameresultlist[0], self.opponent): self.tmnmlist.append(gameresultlist[0]) self.oppnmlist.append(gameresultlist[2]) elif muf.is_abbrev(gameresultlist[0], self.opponent) and not muf.is_abbrev(gameresultlist[0], self.teamname): self.oppnmlist.append(gameresultlist[0]) self.tmnmlist.append(gameresultlist[2]) else: print('could not make sense of abbrevs') else: if muf.is_abbrev(gameresultlist[2], self.opponent) and muf.is_abbrev(gameresultlist[0], self.teamname): self.tmnmlist.append(gameresultlist[0]) self.oppnmlist.append(gameresultlist[2]) elif muf.is_abbrev(gameresultlist[2], self.teamname) and muf.is_abbrev(gameresultlist[0], self.opponent): self.oppnmlist.append(gameresultlist[0]) self.tmnmlist.append(gameresultlist[2]) elif muf.is_abbrev(gameresultlist[2], self.opponent) and not muf.is_abbrev(gameresultlist[2], self.teamname): self.tmnmlist.append(gameresultlist[0]) self.oppnmlist.append(gameresultlist[2]) elif muf.is_abbrev(gameresultlist[2], self.teamname) and not muf.is_abbrev(gameresultlist[2], self.opponent): self.oppnmlist.append(gameresultlist[0]) self.tmnmlist.append(gameresultlist[2]) elif muf.is_abbrev(gameresultlist[0], self.opponent) and not muf.is_abbrev(gameresultlist[0], self.teamname): self.tmnmlist.append(gameresultlist[2]) self.oppnmlist.append(gameresultlist[0]) elif muf.is_abbrev(gameresultlist[0], self.teamname) and not muf.is_abbrev(gameresultlist[0], self.opponent): self.oppnmlist.append(gameresultlist[2]) self.tmnmlist.append(gameresultlist[0]) else: print('could not make sense of abbrevs') #pbps is a data container for each string from ESPNs playbyplay file #which should include information about the quarter, drive, etc, in addition #to the actual play data self.pbps = [] #plays is a structure to store the actual play data, includeing all relevent #data about the current state of the game self.plays = [] self.ply = [] #here are my local variables for keeping track of game state information (ie quarter, drive, etc) gameperiod = 1 #1,2,3,4,5,6,7,8... etc, with 5 representing a first OT period, and so on drive = 1 possesion = self.teamname gamestate = {'quarter': gameperiod, 'driveno': drive, 'teamname': self.tmnmlist, 'opponent': self.oppnmlist, 'season': self.season, 'home':self.home, 'offense': possesion,} for gamestring in soup.findAll("div", { "id" : "gamepackage-play-by-play" }): game_pbp = gamestring for play in game_pbp.findAll("li"): print("play: ", play) self.pbps.append(play.get_text(separator=u'<>')) if 'Quarter' in self.pbps[-1]: if str(gameperiod) not in self.pbps[-1]: for char in self.pbps[-1]: if char.isdigit(): gameperiod = int(char) print(self.pbps[-1], " ", gameperiod) elif 'OT ' in self.pbps[-1]: gameperiod = self.isDigitOT(self.pbps[-1]) print(self.pbps[-1], " ", gameperiod) #if quarter or drive info, store to local variable, if play info, store info to plays, including quarter and drive info #play by play entries that still need to be dealt with elif any(word in self.pbps[-1] for word in ['coin toss', 'toss', 'kick', 'attempt failed', 'extra point', 'Extra Point', 'Extra point', 'Conversion', 'conversion', 'punt', 'FG', 'field goal', 'Field Goal', 'Penalty', 'PENALTY', 'penalty', 'Fumble']): pass #play by play entries I don't yet have plans for elif any(word in self.pbps[-1] for word in ['DRIVE', 'ball on', 'End']): pass #timeouts contain game clock info elif any(word in self.pbps[-1] for word in ['Timeout', 'timeout']): pass elif any(word in self.pbps[-1] for word in ['rush ', 'run ', 'Run ', 'sacked ', 'scramble ']): currentplay = plys.Rushing(self.pbps[-1], gamestate) self.plays.append(currentplay.getplayinfo()) self.ply.append(currentplay) elif any(word in self.pbps[-1] for word in ['pass ', 'incompletion ', 'interception ', 'Interception ']): currentplay = plys.Passing(self.pbps[-1], gamestate) self.plays.append(currentplay.getplayinfo()) elif any(word in self.pbps[-1] for word in ['SAFETY', 'Safety', 'safety']): pass elif any(word in self.pbps[-1] for word in [':']): #contains the team currently with the ball possesion = self.pbps[-1].split() while len(possesion)>1: if not possesion[0][0].isdigit() and not possesion[1][0].isdigit(): if possesion[1] == 'at': for n in range(len(possesion)-1): possesion.remove(possesion[1]) else: possesion[0] = possesion[0] + " " + possesion[1] possesion.remove(possesion[1]) else: possesion.remove(possesion[1]) #likely also containing the best game clock info for the game pass elif any(word in self.pbps[-1] for word in ['at','and']): #these might be empty plays due to inconsistencies and errors in espns play by play #may need to add some checks for this? pass else: #pass print("these are unhandled plays: ", self.pbps[-1]) gamestate['quarter'] = gameperiod gamestate['driveno'] = drive gamestate['offense'] = possesion