def ProcessBRPage(filename, con): dateStr = filename.split('_')[-1] gameDate = date(int(dateStr[:4]),int(dateStr[4:6]),int(dateStr[6:8])) html = open(filename).read().decode('utf-8').replace('·','*') pbp = PlayByPlay() pbp.inning = '' playNum = 1 playInd = 0 #Game Stuff b = GameTeamsParser() b.feed(html) pbp.hAbb = ConvertTeamAbb('BR',b.homeTeamAbb) pbp.aAbb = ConvertTeamAbb('BR',b.awayTeamAbb) pbp.hTeam = GetTeamfromAbb(pbp.hAbb, con) pbp.aTeam = GetTeamfromAbb(pbp.aAbb, con) currentGame = Game(pbp.hTeam, pbp.aTeam, gameDate, con) b = GameTimeParser() b.feed(html) currentGame.time = b.time lengthStr = b.gamelen.split(':') currentGame.gameLength += int(lengthStr[0]) * 60 + int(lengthStr[1]) b = GameWeatherParser() b.feed(html) currentGame.temp = int(re.search(': -?([0-9]{2}) F', b.weather).group(1)) currentGame.windSpeed = int(re.search('Wind ([0-9]{1,2})mph', b.weather, flags=re.IGNORECASE).group(1)) windDir = re.search('mph ([^,]*),', b.weather, flags=re.IGNORECASE).group(1) if windDir == 'out to Centerfield': windDir = 'tocf' elif windDir == 'out to Rightfield': windDir = 'torf' elif windDir == 'out to Leftfield': windDir = 'tolf' elif windDir == 'from Left to Right': windDir = 'ltor' elif windDir == 'from Right to Left': windDir = 'rtol' elif windDir == 'in from Centerfield': windDir = 'fromcf' elif windDir == 'in from Rightfield': windDir = 'fromrf' elif windDir == 'in from Leftfield': windDir = 'fromlf' weatherStr = b.weather.split(',') if len(weatherStr) < 4: precip = 'unknown' sky = weatherStr[-1].replace('.','').lower() else: precip = weatherStr[-1].replace('.','').lower() sky = weatherStr[-2].lower() field = b.field.lower() if (b.field != '') else 'unknown' currentGame.weather = 'Field = '+field+', Prec = '+precip+', Sky = '+sky b = GameUmpParser() b.feed(html) currentGame.homeUmp = b.homeump #Lineups b = GameWinLossSaveParser() b.feed(html) winPitch = abb(b.winPitch) lossPitch = abb(b.lossPitch) savePitch = '' if len(b.savePitch) == 0 else abb(b.savePitch) b = BRLineupParser() b.feed(html) for line in b.lineup: if len(line) == 8 and line[0].isdigit(): aBatNum = int(line[0]) aUID = str(line[1]) aName = str(line[2]) aPos = str(line[3]).strip() hBatNum = int(line[4]) hUID = str(line[5]) hName = str(line[6]) hPos = str(line[7]).strip() if aPos == 'P': pbp.aPitcher = abb(aName) if hPos == 'P': pbp.hPitcher = abb(hName) pbp.lineup[abb(aName)] = Lineup(123, pbp.aTeam, aName, aBatNum, aPos, aUID, 'BR', con) pbp.lineup[abb(hName)] = Lineup(123, pbp.hTeam, hName, hBatNum, hPos, hUID, 'BR', con) elif len(line) == 6: pbp.aPitcher = abb(str(line[1])) pbp.hPitcher = abb(str(line[4])) #Pitch Rosters bp = BRPitcherParser() bp.feed(html) pitchIDLookup = {} erLookup = {} for pitcher in bp.roster: uid = str(pitcher[0]) name = str(pitcher[1]) pitchIDLookup[abb(name)] = uid er = int(pitcher[-2]) erLookup[abb(name)] = er team = pbp.aTeam if pitcher[-1] == 'A' else pbp.hTeam pbp.pitchers[abb(name)] = PitchRoster(123, team, name, uid, 'BR', con) pbp.pitchers[abb(name)].IP = 0 pbp.pitchers[abb(name)].pitcherRole = 'Starter' #Hitter ID Lookup bb = BRBatterParser() bb.feed(html) batID = {} for x in bb.batID.keys(): batID[abb(x)] = bb.batID[x] for y in pbp.lineup.keys(): if abb(x) == y: break elif abb(x)[:11] == y: pbp.lineup[abb(x)] = pbp.lineup[y] del pbp.lineup[y] #Plays b = BRPlayParser() b.feed(html) for playNum in range(1, len(b.plays)+1): pbp.plays[playInd] = Play() pbp.plays[playInd].hitterID = b.plays[playNum][7] pbp.plays[playInd].pitcherID = b.plays[playNum][8] #Inning Stuff innPre = 'Top' if b.plays[playNum][0][0] == 't' else 'Bot' prevInn = pbp.inning pbp.inning = innPre + ' ' + str(b.plays[playNum][0][1]) pbp.plays[playInd].inning = pbp.inning if pbp.inning != prevInn: pbp.outs = 0 pbp.firstBase = None pbp.secondBase = None pbp.thirdBase = None pbp.plays[playInd].startSit = pbp.ReturnSit() #Pitches pitchStr = b.plays[playNum][4] if len(pitchStr.split(' ')) > 1: pbp.plays[playInd].pitchSeq = pitchStr.split(' ')[1] pbp.plays[playInd].strikes = int(pitchStr.split('-')[1][0]) pbp.plays[playInd].balls = int(pitchStr.split('-')[0][-1]) pbp.plays[playInd].resultOuts = b.plays[playNum][5].count('O') pbp.outs += b.plays[playNum][5].count('O') pbp.plays[playInd].runsScored = b.plays[playNum][5].count('R') if pbp.ProcessBRPlay(b.plays[playNum][11], playInd): pbp.lineup[b.plays[playNum][7]].RBI += pbp.plays[playInd].runsScored pbp.plays[playInd].endSit = pbp.ReturnSit() pbp.plays[playInd].playNum = playInd + 1 #Subs if playNum in b.subs.keys(): subs = b.subs[playNum].split(';') for sub in subs: #Pitching Change if re.search('(.*) replaces (.*) pitching', sub) is not None: newP = re.search('(.*) replaces (.*) pitching', sub).group(1) p = pbp.aPitcher if b.plays[playNum+1][0][0] == 'b' else pbp.hPitcher pbp.pitchers[p].IP += int(b.plays[playNum+1][0][-1])-1 for x in pbp.pitchers.keys(): if pbp.pitchers[x].team == pbp.pitchers[p].team and x != p: pbp.pitchers[p].IP -= float(pbp.pitchers[x].IP) if int(pbp.outs) in range(1,3): pbp.pitchers[p].IP += int(pbp.outs) / 3.0 if b.plays[playNum+1][0][0] == 'b': pbp.aPitcher = abb(newP) team = pbp.aTeam else: pbp.hPitcher = abb(newP) team = pbp.hTeam pbp.pitchers[abb(newP)] = PitchRoster(123, team, newP, pitchIDLookup[abb(newP)], 'BR', con) pbp.pitchers[abb(newP)].pitcherRole = 'Reliever' batnum = int(re.search('batting ([0-9])',sub).group(1)) \ if re.search('batting ([0-9])', sub) is not None else 0 pbp.lineup[abb(newP)] = Lineup(123, team, newP, batnum, 'P', pitchIDLookup[abb(newP)], 'BR', con) #Pinch Hitter if re.search('(.*) pinch hits for (.*)', sub) is not None: newP = re.search('(.*) pinch hits for (.*)', sub).group(1) team = pbp.hTeam if b.plays[playNum+1][0][0] == 'b' else pbp.aTeam batnum = int(re.search('batting ([0-9])', sub).group(1)) \ if re.search('batting ([0-9])', sub) is not None else 0 pbp.lineup[abb(newP)] = Lineup(123, team, newP, batnum, 'PH', bb.batID[newP], 'BR', con) #Defensive Sub if re.search('(.*) replaces (.*) playing (.*) batting', sub) is not None: newP = re.search('(.*) replaces (.*) playing (.*) batting', b.subs[playNum]).group(1) team = pbp.aTeam if b.plays[playNum+1][0][0] == 'b' else pbp.hTeam batnum = int(re.search('batting ([0-9])', b.subs[playNum]).group(1)) \ if re.search('batting ([0-9])', b.subs[playNum]) is not None else 0 pos = re.search('(.*) replaces (.*) playing (.*) batting', b.subs[playNum]).group(3) pbp.lineup[abb(newP)] = Lineup(123, team, newP, batnum, pos, bb.batID[newP], 'BR', con) #Pinch Runner if re.search('(.*) pinch runs for', b.subs[playNum]) is not None: newP = re.search('(.*) pinch runs for', b.subs[playNum]).group(1) team = pbp.hTeam if b.plays[playNum+1][0][0] == 'b' else pbp.aTeam batnum = int(re.search('batting ([0-9])', b.subs[playNum]).group(1)) \ if re.search('batting ([0-9])', b.subs[playNum]) is not None else 0 pbp.lineup[abb(newP)] = Lineup(123, team, newP, batnum, 'PR', bb.batID[newP], 'BR', con) playInd += 1 pbp.plays[playInd] = Play() pbp.plays[playInd].hitterID = abb(re.search('(.*) pinch runs for (.*) \(', b.subs[playNum]).group(1)) pbp.plays[playInd].pitcherID = abb(pbp.aPitcher) if pbp.inning[0] == 'B' else abb(pbp.hPitcher) pbp.plays[playInd].inning = pbp.inning pbp.plays[playInd].startSit = pbp.plays[playInd].endSit = pbp.ReturnSit() pbp.plays[playInd].playType = 'Pinch Runner' replacee = re.search('(.*) pinch runs for (.*) \(', b.subs[playNum]).group(2) if pbp.firstBase != None and pbp.firstBase[1] == abb(replacee): pbp.firstBase = [playInd, abb(newP)] elif pbp.secondBase != None and pbp.secondBase[1] == abb(replacee): pbp.secondBase = [playInd, abb(newP)] elif pbp.thirdBase != None and pbp.thirdBase[1] == abb(replacee): pbp.thirdBase = [playInd, abb(newP)] playInd += 1 #Calculate IP for Last Pitchers currentGame.InsertBlankGame(con) currentGame.totalInnings = int(pbp.inning.split(' ')[-1]) pbp.pitchers[pbp.hPitcher].IP += currentGame.totalInnings pbp.pitchers[pbp.aPitcher].IP += currentGame.totalInnings if pbp.inning[:3] == 'Top': pbp.pitchers[pbp.aPitcher].IP -= 1 elif pbp.inning[:3] == 'Bot': pbp.pitchers[pbp.aPitcher].IP -= float(3 - int(pbp.outs)) / 3.0 for x in pbp.pitchers.keys(): if pbp.pitchers[x].team == pbp.pitchers[pbp.hPitcher].team and x != pbp.hPitcher: pbp.pitchers[pbp.hPitcher].IP -= float(pbp.pitchers[x].IP) if pbp.pitchers[x].team == pbp.pitchers[pbp.aPitcher].team and x != pbp.aPitcher: pbp.pitchers[pbp.aPitcher].IP -= float(pbp.pitchers[x].IP) #Insert Game, Plays, Pitchers, and Lineups into DB for x in pbp.plays.values(): if x.playType not in ( 'No Play', 'Stolen Base', 'Caught Stealing', 'Pick Off', 'Balk', 'Passed Ball', 'Wild Pitch', 'Defensive Indifference', 'Error on Foul', 'Unknown Runner Activity'): x.CalcPitches() pbp.lineup[x.hitterID].PA += 1 pbp.pitchers[x.pitcherID].ContactStrikes += x.contactX pbp.pitchers[x.pitcherID].SwingStrikes += x.swingX pbp.pitchers[x.pitcherID].LookStrikes += x.lookX pbp.pitchers[x.pitcherID].Strikes += x.lookX + x.swingX + x.contactX pbp.pitchers[x.pitcherID].Balls += x.balls pbp.pitchers[x.pitcherID].pitchCount += x.lookX + x.swingX + x.contactX + x.balls if x.ballType in ('Ground Ball', 'Bunt Ground Ball'): pbp.pitchers[x.pitcherID].GB += 1 elif x.ballType in ('Line Drive', 'Bunt Line Drive'): pbp.pitchers[x.pitcherID].LD += 1 elif x.ballType in ('Fly Ball', 'Pop Up', 'Bunt Pop'): pbp.pitchers[x.pitcherID].FB += 1 if x.playType in ( 'Strikeout', 'Out', 'Double Play', 'Triple Play', "Fielders Choice", 'Reach On Error', 'Single', 'Double', 'Ground Rule Double', 'Triple', 'Home Run'): pbp.lineup[x.hitterID].AB += 1 if x.playType in ('Single', 'Double', 'Ground Rule Double', 'Triple', 'Home Run'): pbp.lineup[x.hitterID].Hits += 1 pbp.pitchers[x.pitcherID].Hits += 1 if x.playType == 'Single': pbp.lineup[x.hitterID].Single += 1 elif x.playType in ('Double', 'Ground Rule Double'): pbp.lineup[x.hitterID].Double += 1 elif x.playType == 'Triple': pbp.lineup[x.hitterID].Triple += 1 elif x.playType == 'Home Run': pbp.lineup[x.hitterID].HR += 1 elif x.playType == 'Strikeout': pbp.pitchers[x.pitcherID].K += 1 else: if x.playType in ('Walk', 'Intentional Walk'): pbp.pitchers[x.pitcherID].BB += 1 pbp.lineup[x.hitterID].BB += 1 pbp.pitchers[x.pitcherID].pitchCount += 1 elif x.playType == 'Hit By Pitch': pbp.pitchers[x.pitcherID].HBP += 1 pbp.lineup[x.hitterID].HBP += 1 if x.inning[:3] == 'Top': if x.hit: currentGame.awayHits += 1 if x.runScored: pbp.lineup[x.hitterID].Runs += 1 currentGame.awayRuns += x.runsScored else: if x.hit: currentGame.homeHits += 1 if x.runScored: pbp.lineup[x.hitterID].Runs += 1 currentGame.homeRuns += x.runsScored x.gameKey = currentGame.gameKey x.hitterID = batID[x.hitterID] x.pitcherID = pitchIDLookup[x.pitcherID] x.InsertPlay('BR', con) if currentGame.homeRuns > currentGame.awayRuns: currentGame.homeTeamWin = True elif currentGame.homeRuns == currentGame.awayRuns: currentGame.tie = True currentGame.UpdateStats(con) # Go Through Pitchers for x in pbp.pitchers.keys(): if x == winPitch: pbp.pitchers[x].Win = True elif x == lossPitch: pbp.pitchers[x].Loss = True elif x == savePitch: pbp.pitchers[x].Save = True if pbp.pitchers[x].IP == 9.0: pbp.pitchers[x].CG = True if pbp.pitchers[x].Runs == 0: pbp.pitchers[x].SO = True if pbp.pitchers[x].Hits == 0: pbp.pitchers[x].NH = True pbp.pitchers[x].gameKey = currentGame.gameKey pbp.pitchers[x].earnedRuns = erLookup[x] pbp.pitchers[x].InsertRosterRow(con) # Go Through Hitters for x in pbp.lineup.keys(): pbp.lineup[x].game = currentGame.gameKey pbp.lineup[x].InsertLineupRow(con) return currentGame, pbp
from BRParser import BRPitcherParser import urllib2 b = BRPitcherParser() url = "http://www.baseball-reference.com/boxes/DET/DET201504060.shtml" html = urllib2.urlopen(url).read().decode('utf-8').replace('·','*') b.feed(html) for x in b.roster: print x