def weather_hourly(teamID,gametime): #gametime is formatted as 7:30 AM or 7:00 pm, team_map=Ugen.mlb_map(4,8) batter_map=Ugen.mlb_map(4,9) batter_dir=batter_map[teamID] zipcode=str(team_map[teamID]) try: gametime_hour=convert_time_24(int(gametime.split()[0].split(":")[0]),int(gametime.split()[0].split(":")[1]),gametime.split()[1]) except: print 'gametime %s is not in proper format' % gametime return None data=weather_response('hourly',zipcode) weather_dict={} tzone=data['hourly_forecast'][1]['FCTTIME']['pretty'].split()[2] gametime_hour=convert_timezones(gametime_hour,'ET',tzone) temp_list,hum_list,wspd_list,wdir_list,pop_list=([] for i in range(5)) i=1 #counter to make sure you don't take forecast from the next day for e in data['hourly_forecast']: #each e is one hours time forecast_hour=e['FCTTIME']['hour_padded'] if forecast_hour[0]=='0' and forecast_hour[1]=='0': forecast_hour=forecast_hour.replace("0","",1) elif forecast_hour[0]=='0': forecast_hour=forecast_hour.replace("0","") if i<4 and (forecast_hour==str(gametime_hour) or forecast_hour==str(int(gametime_hour)+1) or forecast_hour==str(int(gametime_hour)+2)): try: temp_list.append(float(e['temp']['english'])) #temp in degF except: pass try: hum_list.append(float(e['humidity'])) except: pass try: pop_list.append(float(e['pop'])) except: pass wdir=e['wdir']['degrees'] try: if int(wdir)<180: #wdir is the direction the wind is blowing from wdir_list.append(abs(float((int(wdir)+180)-batter_dir))) #right now do abs value else: wdir_list.append(abs(float((int(wdir)-180)-batter_dir))) except: pass try: wspd_list.append(float(e['wspd']['english'])) #mph or wspdm for metric except: pass i=i+1 weather_dict['temp']=round(np.mean(temp_list),2) weather_dict['humidity']=round(np.mean(hum_list),2) weather_dict['wind']={} weather_dict['wind']['wind_dir']=round(np.mean(wdir_list),2) weather_dict['wind']['wind_speed']=round(np.mean(wspd_list),2) weather_dict['pop']=round(np.mean(pop_list),2) return weather_dict
def weather_hist(teamID, weather_date, gametime): weather_dict = {} team_map = Ugen.mlb_map(4, 8) batter_map = Ugen.mlb_map(4, 9) zipcode = str(team_map[teamID]) batter_dir = float(batter_map[teamID]) try: gametime_hour = convert_time_24( int(gametime.split()[0].split(":")[0]), int(gametime.split()[0].split(":")[1]), gametime.split()[1] ) except: print "gametime %s is not in proper format" % gametime return None data = weather_response("history" + "_" + weather_date.replace("-", ""), zipcode) tzone = data["history"]["observations"][1]["date"]["pretty"].split()[2] gametime_hour = convert_timezones(gametime_hour, "ET", tzone) temp_list, hum_list, wspd_list, wdir_list = ([] for i in range(4)) for e in data["history"]["observations"]: forecast_hour = e["date"]["hour"] if forecast_hour[0] == "0" and forecast_hour[1] == "0": forecast_hour = forecast_hour.replace("0", "", 1) elif forecast_hour[0] == "0": forecast_hour = forecast_hour.replace("0", "") if ( forecast_hour == str(gametime_hour) or forecast_hour == str(int(gametime_hour) + 1) or forecast_hour == str(int(gametime_hour) + 2) ): try: temp_list.append(float(e["tempi"])) # temp in degF except: pass try: hum_list.append(float(e["hum"])) except: pass wdir = e["wdird"] try: if int(wdir) < 180: wdir_list.append(abs(float((int(wdir) + 180) - batter_dir))) # right now do abs value else: wdir_list.append(abs(float((int(wdir) - 180) - batter_dir))) except: pass try: wspd_list.append(float(e["wspdi"])) # mph or wspdm for metric except: pass weather_dict["temp"] = round(np.mean(temp_list), 2) weather_dict["humidity"] = round(np.mean(hum_list), 2) weather_dict["wind"] = {} weather_dict["wind"]["wind_dir"] = round(np.mean(wdir_list), 2) weather_dict["wind"]["wind_speed"] = round(np.mean(wspd_list), 2) return weather_dict
def weather_hist(teamID,weather_date,gametime): weather_dict={} team_map=Ugen.mlb_map(4,8) batter_map=Ugen.mlb_map(4,9) zipcode=str(team_map[teamID]) batter_dir=float(batter_map[teamID]) try: gametime_hour=convert_time_24(int(gametime.split()[0].split(":")[0]),int(gametime.split()[0].split(":")[1]),gametime.split()[1]) except: print 'gametime %s is not in proper format' % gametime return None data=weather_response('history'+'_'+weather_date.replace('-',''),zipcode) tzone=data['history']['observations'][1]['date']['pretty'].split()[2] gametime_hour=convert_timezones(gametime_hour,"ET",tzone) temp_list,hum_list,wspd_list,wdir_list=([] for i in range(4)) for e in data['history']['observations']: forecast_hour=e['date']['hour'] if forecast_hour[0]=='0' and forecast_hour[1]=='0': forecast_hour=forecast_hour.replace("0","",1) elif forecast_hour[0]=='0': forecast_hour=forecast_hour.replace("0","") if forecast_hour==str(gametime_hour) or forecast_hour==str(int(gametime_hour)+1) or forecast_hour==str(int(gametime_hour)+2): try: temp_list.append(float(e['tempi'])) #temp in degF except: pass try: hum_list.append(float(e['hum'])) except: pass wdir=e['wdird'] try: if int(wdir)<180: wdir_list.append(abs(float((int(wdir)+180)-batter_dir))) #right now do abs value else: wdir_list.append(abs(float((int(wdir)-180)-batter_dir))) except: pass try: wspd_list.append(float(e['wspdi'])) #mph or wspdm for metric except: pass weather_dict['temp']=round(np.mean(temp_list),2) weather_dict['humidity']=round(np.mean(hum_list),2) weather_dict['wind']={} weather_dict['wind']['wind_dir']=round(np.mean(wdir_list),2) weather_dict['wind']['wind_speed']=round(np.mean(wspd_list),2) return weather_dict
def vegas_odds_sportsbook(date): #takes in date YYYY-MM-DD or YYYYMMDD print 'getting odds for %s' % date url='http://www.sportsbookreview.com/betting-odds/mlb-baseball/merged/?date='+date.replace("-","") content=urllib2.urlopen(url).read() soup=BeautifulSoup(content) team_map=Ugen.mlb_map(10,4) table=soup.find("div",{"class":"eventGroup class-mlb-baseball"}) odds_dict={} for row in soup.findAll("div",{"class":"event-holder holder-complete"}): row_data=row.find("div",{"class":"eventLine odd status-complete "}) if not row_data: row_data=row.find("div",{"class":"eventLine status-complete "}) hyperlink=row_data.find("meta",{"itemprop":"url"}) matchup_dict=get_gameday_odds(hyperlink.get('content'),team_map) for team,odds in matchup_dict.iteritems(): if team not in odds_dict.keys(): #Ian: this is here so we don't take the second game of a double header (FD usually does first) odds_dict[team]=odds return odds_dict
def pitcher_season_averages(self,hist_data,home_away,year): team_map=Ugen.mlb_map(11,4) season_avg={} if not home_away and stat=='innings_pitched': try: season_avg['K9']=numpy.mean([float(SO/IP*9) for SO,date,IP in zip(hist_data['strike_outs'],hist_data['Date'],hist_data['innings_pitched']) if str(date).split("-")[0]==year and IP>3]) season_avg['IP']=numpy.mean([IP for AT,HT,IP,date in zip(hist_data['away_starting_lineup'],hist_data['home_starting_lineup'], \ hist_data['innings_pitched'],hist_data['Date']) if str(date).split("-")[0]==year and (player in AT or player in HT)]) except: season_avg['IP']=numpy.mean([IP for IP,date in zip(hist_data['innings_pitched'],hist_data['Date']) if str(date).split("-")[0]=='2015'and IP>3]) elif home_away=='home': try: season_avg['K9']=numpy.mean([float(SO/IP*9) for SO,date,IP,team,home_team,away_team \ in zip(hist_data['strike_outs'],hist_data['Date'],hist_data['innings_pitched'],hist_data['Team'],hist_data['home_team'],hist_data['away_team']) \ if str(date).split("-")[0]==year and IP>3 and team==team_map[home_team]]) season_avg['IP']=numpy.mean([IP for AT,HT,IP,date,team,home_team,away_team in zip(hist_data['away_starting_lineup'],hist_data['home_starting_lineup'], \ hist_data['innings_pitched'],hist_data['Date'],hist_data['Team'],hist_data['home_team'],hist_data['away_team']) \ if str(date).split("-")[0]==year and (player in AT or player in HT) and team==team_map[home_team]]) season_avg['ER']=numpy.mean([ER for AT,HT,ER,date,team,home_team,away_team in zip(hist_data['away_starting_lineup'],hist_data['home_starting_lineup'], \ hist_data['earned_runs'],hist_data['Date'],hist_data['Team'],hist_data['home_team'],hist_data['away_team']) \ if str(date).split("-")[0]==year and (player in AT or player in HT) and team==team_map[home_team]]) except: season_avg['IP']=numpy.mean([IP for team,home_team,away_team,IP,date in zip(hist_data['Team'],hist_data['home_team'],hist_data['away_team'],\ hist_data['innings_pitched'],hist_data['Date']) if str(date).split("-")[0]==year and IP>3 and team==team_map[home_team]]) season_avg['ER']=numpy.mean([ER for team,home_team,away_team,IP,date,ER in zip(hist_data['Team'],hist_data['home_team'],hist_data['away_team'],\ hist_data['innings_pitched'],hist_data['Date'],hist_data['earned_runs']) if str(date).split("-")[0]==year and IP>3 and team==team_map[home_team]]) elif home_away=='away': try: season_avg['K9']=numpy.mean([float(SO/IP*9) for SO,date,IP,team,home_team,away_team \ in zip(hist_data['strike_outs'],hist_data['Date'],hist_data['innings_pitched'],hist_data['Team'],hist_data['home_team'],hist_data['away_team']) \ if str(date).split("-")[0]==year and IP>3 and team==team_map[home_team]]) season_avg['IP']=numpy.mean([IP for AT,HT,IP,date,team,home_team,away_team in zip(hist_data['away_starting_lineup'],hist_data['home_starting_lineup'], \ hist_data['innings_pitched'],hist_data['Date'],hist_data['Team'],hist_data['home_team'],hist_data['away_team']) \ if str(date).split("-")[0]==year and (player in AT or player in HT) and team==team_map[away_team]]) season_avg['ER']=numpy.mean([ER for AT,HT,ER,date,team,home_team,away_team in zip(hist_data['away_starting_lineup'],hist_data['home_starting_lineup'], \ hist_data['earned_runs'],hist_data['Date'],hist_data['Team'],hist_data['home_team'],hist_data['away_team']) \ if str(date).split("-")[0]==year and (player in AT or player in HT) and team==team_map[away_team]]) except: season_avg['IP']=numpy.mean([IP for team,home_team,away_team,IP,date in zip(hist_data['Team'],hist_data['home_team'],hist_data['away_team'],\ hist_data['innings_pitched'],hist_data['Date']) if str(date).split("-")[0]==year and IP>3 and team==team_map[away_team]]) season_avg['ER']=numpy.mean([ER for team,home_team,away_team,IP,date,ER in zip(hist_data['Team'],hist_data['home_team'],hist_data['away_team'],\ hist_data['innings_pitched'],hist_data['Date'],hist_data['earned_runs']) if str(date).split("-")[0]==year and IP>3 and team==team_map[away_team]]) return season_avg
def vegas_odds_sportsbook(date): #takes in date YYYY-MM-DD or YYYYMMDD print 'getting odds for %s' % date url = 'http://www.sportsbookreview.com/betting-odds/mlb-baseball/merged/?date=' + date.replace( "-", "") content = urllib2.urlopen(url).read() soup = BeautifulSoup(content) team_map = Ugen.mlb_map(10, 4) table = soup.find("div", {"class": "eventGroup class-mlb-baseball"}) odds_dict = {} for row in soup.findAll("div", {"class": "event-holder holder-complete"}): row_data = row.find("div", {"class": "eventLine odd status-complete "}) if not row_data: row_data = row.find("div", {"class": "eventLine status-complete "}) hyperlink = row_data.find("meta", {"itemprop": "url"}) matchup_dict = get_gameday_odds(hyperlink.get('content'), team_map) for team, odds in matchup_dict.iteritems(): if team not in odds_dict.keys( ): #Ian: this is here so we don't take the second game of a double header (FD usually does first) odds_dict[team] = odds return odds_dict
def mlb_starting_lineups(date=time.strftime("%Y-%m-%d")): #take date as string 'YYYY-MM-DD'. print date url='http://www.baseballpress.com/lineups/'+date content=urllib2.urlopen(url).read() soup=BeautifulSoup(content,"html.parser") #Ian: added this html.parser option based on suggestion from OSX terminal...may not be necessary on windows?? team_map=Ugen.mlb_map(6,4) player_map=Ugen.mlb_map(2,0) team_list,pitcher_list,lineups_list,gametime_list,weather_list,pitcher_arm_list,player_arm_list=([] for i in range(7)) teamid_dict={} playerid_dict={} gametime_list=[event_date.text for event_date in soup.findAll("div",{"class":"game-time"})] gametime_list=[x for pair in zip(gametime_list,gametime_list) for x in pair] #duplicate elements in list for forecast in soup.findAll("a",{"target":"forecast"}): forecast_string=filter(lambda x: x in string.printable, forecast.text).split('Forecast: ')[1].split(' PoP')[0].replace(" ","-").replace("--","-").replace('F','degF') if len(forecast_string.split('-'))==4: forecast_string=forecast_string.split('-')[0]+'-'+forecast_string.split('-')[1]+' '+forecast_string.split('-')[2]+'-'+forecast_string.split('-')[3] weather_list.append(forecast_string) weather_list.append(forecast_string) for team in soup.findAll("div",{"class":"team-data"}): team_name=team.find("div",{"class":"team-name"}).get_text() pitcher_name=team.find("a",{"class":"player-link"}).get_text() pitcher_arm=team.find('div',{"class":"text"}).get_text().split('(')[1].split(')')[0] if team_name in team_map: #Ian: Check if team name has been listed team_name=team_map[team_name] if pitcher_name in player_map: pitcher_name=player_map[pitcher_name] team_list.append(team_name) pitcher_list.append(pitcher_name.replace("'","")+'_'+'pitcher') pitcher_arm_list.append(pitcher_arm) for table in soup.findAll("div",{"class":"cssDialog clearfix"}): table_string=table.get_text() home_lineup,away_lineup,home_lineup_arms,away_lineup_arms=([] for i in range(4)) if table_string.count("9. ")==2: #Ian: rethink; checks that both teams lineups have been listed. What if only one has been.. for j in range(1,10): name_list_raw=table_string[table_string.find(str(j)+". ")+3:].split(" (") player=name_list_raw[0] home_lineup_arms.append(name_list_raw[1].split(')')[0]) if player in player_map: player=player_map[player] home_lineup.append(player.replace("'","")+'_'+'batter') name_list_raw=table_string[table_string.find((str(j)+". "),table_string.find(str(j)+". ")+3)+3:].split(" (") player=name_list_raw[0] away_lineup_arms.append(name_list_raw[1].split(')')[0]) if player in player_map: player=player_map[player] away_lineup.append(player.replace("'","")+'_'+'batter') lineups_list.append(home_lineup) lineups_list.append(away_lineup) player_arm_list.append(home_lineup_arms) player_arm_list.append(away_lineup_arms) else: lineups_list.append(['no home_lineup listed']) lineups_list.append(['no away_lineup listed']) player_arm_list.append(['no hitting style listed']) player_arm_list.append(['no hitting style listed']) opponent_dict={team_list[i]:{'home_team':team_list[i],'opponent':team_list[i-1]} for i in range(0,len(team_list)) if i%2 !=0} opponent_dict.update({team_list[i]:{'home_team':team_list[i+1],'opponent':team_list[i+1]} for i in range(0,len(team_list)) if team_list[i] not in opponent_dict}) teamid_dict={} for i in range(0,len(lineups_list)): batting_order=range(1,len(lineups_list[i])+1) lineups_list[i].append(pitcher_list[i]) player_arm_list[i].append(pitcher_arm_list[i]) player_arm_dict={player:{'arm':arm,'batting_order':order} for player,arm,order in zip(lineups_list[i],player_arm_list[i],batting_order)} teamid_dict.update({team_list[i]:{'start_time':gametime_list[i],'date':date,'lineup':player_arm_dict,'home_teamid':opponent_dict[team_list[i]]['home_team'], \ 'opponent':opponent_dict[team_list[i]]['opponent'],'weather_forecast':weather_list[i]}}) playerid_dict={player:{'start_time':teamid_dict[team_id]['start_time'],'weather_forecast':teamid_dict[team_id]['weather_forecast'], \ 'teamid':team_id,'opposing_lineup': teamid_dict[teamid_dict[team_id]['opponent']]['lineup'],'arm':teamid_dict[team_id]['lineup'][player]['arm'], \ 'batting_order':teamid_dict[team_id]['lineup'][player]['batting_order'],'home_teamid':teamid_dict[team_id]['home_teamid']} \ for team_id in teamid_dict for player in teamid_dict[team_id]['lineup']} return teamid_dict,playerid_dict
def batter_lineup_stats(self,date,lineup_data,player_arm): #Ian: needs refactoring!! lineup_stats_dict={} hist_lineup_strikeout_rate,hist_lineup_ops,hist_lineup_slg,strikeout_PAs_list,ops_PAs_list,slg_PAs_list=([] for i in range(6)) team_map=Ugen.mlb_map(11,4) for player in lineup_data[0]: player_strikeout_rate_splits,player_ops_splits,player_slg_splits=([] for i in range(3)) if player.split("_")[1]!='pitcher': player_data=self.get_db_gamedata(player.split("_")[0],"20130301",Ugen.previous_day(str(date)).replace("-","")) #may need to play with how much data you use to get batter's K avg try: player_data=player_data[player] except KeyError: print ('player %s not in db, needs new player map' % player) rw=2 map_list=[] while Cell("Output",rw,7).value: map_list.append(Cell("Output",rw,7).value) rw+=1 if player not in map_list: Cell("Output",rw,7).value=player Cell("Output",rw,8).value=Ugen.previous_day(str(date)).replace("-","") continue for indx,event in enumerate(player_data['event_id']): try: reverse_index = len(player_data['event_id'])-indx-1 team=player_data['Team'][reverse_index] home_away=lineup_data[1] #This tells us whether the batter facing the current pitcher is at home or away. We want his K% splits for H/A home_team=team_map[player_data['home_team'][reverse_index]] try: if team==home_team: matchup='home' opposing_lineup=ast.literal_eval(player_data['away_starting_lineup'][reverse_index]) else: matchup='away' opposing_lineup=ast.literal_eval(player_data['home_starting_lineup'][reverse_index]) opposing_pitcher_hand=[opposing_player_data['arm'] for opposing_player,opposing_player_data in opposing_lineup.iteritems() if 'pitcher' in opposing_player][0] if matchup==home_away and (player_arm==opposing_pitcher_hand or player_arm=='S'): # strike_outs=player_data['strike_outs'][reverse_index] plate_appearances=player_data['plate_appearances'][reverse_index] if plate_appearances>0: strikeout_rate=float(strike_outs/plate_appearances) if numpy.isnan(strikeout_rate): print ("isnan error for calculated strikeout rate %s,%s" % (player,player_data['GameID'][reverse_index])) player_strikeout_rate_splits.append(0.200) else: player_strikeout_rate_splits.append(strikeout_rate) if player_data['ops'][reverse_index]>=0: player_ops_splits.append(player_data['ops'][reverse_index]) else: player_ops_splits.append(0) if player_data['slg'][reverse_index]>=0: player_slg_splits.append(player_data['slg'][reverse_index]) else: player_slg_splits.append(0) except ValueError: #This is when there is no starting lineups data (usually) #print ('Value error %s %s' %(player_data["Date"][reverse_index],player)) pass except IndexError: print ('index error') break batting_order=lineup_data[0][player]['batting_order'] if len(player_strikeout_rate_splits)>2: #say we need 3 min values to incorporate the players strikeout rate into feature hist_lineup_strikeout_rate.append(float(self.avg_plate_appearances[batting_order]*numpy.mean(player_strikeout_rate_splits))) strikeout_PAs_list.append(self.avg_plate_appearances[batting_order]) if len(player_ops_splits)>2: hist_lineup_ops.append(float(self.avg_plate_appearances[batting_order]*numpy.mean(player_ops_splits))) ops_PAs_list.append(self.avg_plate_appearances[batting_order]) if len(player_slg_splits)>2: hist_lineup_slg.append(float(self.avg_plate_appearances[batting_order]*numpy.mean(player_slg_splits))) slg_PAs_list.append(self.avg_plate_appearances[batting_order]) lineup_stats_dict['strikeout_rate']=numpy.sum(hist_lineup_strikeout_rate)/numpy.sum(strikeout_PAs_list) lineup_stats_dict['ops']=numpy.sum(hist_lineup_ops)/numpy.sum(ops_PAs_list) lineup_stats_dict['slg']=numpy.sum(hist_lineup_slg)/numpy.sum(slg_PAs_list) # print (lineup_stats_dict) # os.system('pause') return lineup_stats_dict
def mlb_starting_lineups(date=time.strftime("%Y-%m-%d")): #take date as string 'YYYY-MM-DD'. [desperately] Needs refactoring. print date url='http://www.baseballpress.com/lineups/'+date content=urllib2.urlopen(url).read() soup=BeautifulSoup(content) team_map=Ugen.mlb_map(6,4) player_map=Ugen.mlb_map(2,0) # print 'player/team maps complete' team_list,pitcher_list,lineups_list,gametime_list,weather_list,pitcher_arm_list,player_arm_list=([] for i in range(7)) teamid_dict={} playerid_dict={} for event_date in soup.findAll("div",{"class":"game-time"}): gametime_list.append(event_date.text) for forecast in soup.findAll("a",{"target":"forecast"}): forecast_string=filter(lambda x: x in string.printable, forecast.text).split('Forecast: ')[1].split(' PoP')[0].replace(" ","-").replace("--","-").replace('F','degF') if len(forecast_string.split('-'))==4: forecast_string=forecast_string.split('-')[0]+'-'+forecast_string.split('-')[1]+' '+forecast_string.split('-')[2]+'-'+forecast_string.split('-')[3] weather_list.append(forecast_string) for team in soup.findAll("div",{"class":"team-data"}): try: team_name=team.find("div",{"class":"team-name"}).get_text() except: team_name='' try: pitcher_name=team.find("a",{"class":"player-link"}).get_text() except: pitcher_name='' try: pitcher_arm=team.find('div',{"class":"text"}).get_text().split('(')[1].split(')')[0] except: pitcher_arm='' if team_name in team_map: #Ian: Check if team name has been listed team_list.append(team_map[team_name]) else: team_list.append(team_name) if pitcher_name in player_map: pitcher_name=player_map[pitcher_name] pitcher_list.append(pitcher_name.replace("'","")+'_'+'pitcher') pitcher_arm_list.append(pitcher_arm) for table in soup.findAll("div",{"class":"cssDialog clearfix"}): table_string=table.get_text() home_lineup,away_lineup,home_lineup_arms,away_lineup_arms=([] for i in range(4)) if table_string.count("9. ")==2: #Ian: rethink-checks that both teams lineups have been listed. What if only one has been.. for j in range(1,10): name_list_raw=table_string[table_string.find(str(j)+". ")+3:].split(" (") player=name_list_raw[0] home_lineup_arms.append(name_list_raw[1].split(')')[0]) if player in player_map: player=player_map[player] home_lineup.append(player.replace("'","")+'_'+'batter') name_list_raw=table_string[table_string.find((str(j)+". "),table_string.find(str(j)+". ")+3)+3:].split(" (") player=name_list_raw[0] away_lineup_arms.append(name_list_raw[1].split(')')[0]) if player in player_map: player=player_map[player] away_lineup.append(player.replace("'","")+'_'+'batter') lineups_list.append(home_lineup) lineups_list.append(away_lineup) player_arm_list.append(home_lineup_arms) player_arm_list.append(away_lineup_arms) else: lineups_list.append(['no home_lineup listed']) lineups_list.append(['no away_lineup listed']) player_arm_list.append(['no hitting style listed']) player_arm_list.append(['no hitting style listed']) i=j=0 while i<len(lineups_list): #if pitcher_list[i] not in lineups_list[i] lineups_list[i].append(pitcher_list[i]) player_arm_list[i].append(pitcher_arm_list[i]) teamid_dict[team_list[i]]={} teamid_dict[team_list[i]]['start_time']=gametime_list[j] teamid_dict[team_list[i]]['weather_forecast']=weather_list[j] teamid_dict[team_list[i]]['date']=date player_arm_dict={} z=1 for player,arm in zip(lineups_list[i],player_arm_list[i]): player_arm_dict[player]={} player_arm_dict[player]['arm']=arm player_arm_dict[player]['batting_order']=str(z) z=z+1 teamid_dict[team_list[i]]['lineup']=player_arm_dict if i%2 !=0: j=j+1 teamid_dict[team_list[i]]['home_teamid']=team_list[i] teamid_dict[team_list[i]]['opponent']=team_list[i-1] else: teamid_dict[team_list[i]]['home_teamid']=team_list[i+1] teamid_dict[team_list[i]]['opponent']=team_list[i+1] i=i+1 i=j=0 while i<len(lineups_list): z=1 for player,arm in zip(lineups_list[i],player_arm_list[i]): playerid_dict[player]={} playerid_dict[player]['start_time']=gametime_list[j] playerid_dict[player]['weather_forecast']=weather_list[j] playerid_dict[player]['teamid']=team_list[i] playerid_dict[player]['arm']=arm playerid_dict[player]['home_teamid']=teamid_dict[playerid_dict[player]['teamid']]['home_teamid'] playerid_dict[player]['opposing_lineup']=teamid_dict[teamid_dict[playerid_dict[player]['teamid']]['opponent']]['lineup'] playerid_dict[player]['batting_order']=str(z) z=z+1 if i%2 !=0: j=j+1 i=i+1 return teamid_dict,playerid_dict
def weather_hourly(teamID, gametime): # gametime is formatted as 7:30 AM or 7:00 pm, team_map = Ugen.mlb_map(4, 8) batter_map = Ugen.mlb_map(4, 9) batter_dir = batter_map[teamID] zipcode = str(team_map[teamID]) try: gametime_hour = convert_time_24( int(gametime.split()[0].split(":")[0]), int(gametime.split()[0].split(":")[1]), gametime.split()[1] ) except: print "gametime %s is not in proper format" % gametime return None data = weather_response("hourly", zipcode) weather_dict = {} tzone = data["hourly_forecast"][1]["FCTTIME"]["pretty"].split()[2] gametime_hour = convert_timezones(gametime_hour, "ET", tzone) temp_list, hum_list, wspd_list, wdir_list, pop_list = ([] for i in range(5)) i = 1 # counter to make sure you don't take forecast from the next day for e in data["hourly_forecast"]: # each e is one hours time forecast_hour = e["FCTTIME"]["hour_padded"] if forecast_hour[0] == "0" and forecast_hour[1] == "0": forecast_hour = forecast_hour.replace("0", "", 1) elif forecast_hour[0] == "0": forecast_hour = forecast_hour.replace("0", "") if i < 4 and ( forecast_hour == str(gametime_hour) or forecast_hour == str(int(gametime_hour) + 1) or forecast_hour == str(int(gametime_hour) + 2) ): try: temp_list.append(float(e["temp"]["english"])) # temp in degF except: pass try: hum_list.append(float(e["humidity"])) except: pass try: pop_list.append(float(e["pop"])) except: pass wdir = e["wdir"]["degrees"] try: if int(wdir) < 180: # wdir is the direction the wind is blowing from wdir_list.append(abs(float((int(wdir) + 180) - batter_dir))) # right now do abs value else: wdir_list.append(abs(float((int(wdir) - 180) - batter_dir))) except: pass try: wspd_list.append(float(e["wspd"]["english"])) # mph or wspdm for metric except: pass i = i + 1 weather_dict["temp"] = round(np.mean(temp_list), 2) weather_dict["humidity"] = round(np.mean(hum_list), 2) weather_dict["wind"] = {} weather_dict["wind"]["wind_dir"] = round(np.mean(wdir_list), 2) weather_dict["wind"]["wind_speed"] = round(np.mean(wspd_list), 2) weather_dict["pop"] = round(np.mean(pop_list), 2) return weather_dict
def mlb_starting_lineups( date=time.strftime("%Y-%m-%d") ): #take date as string 'YYYY-MM-DD'. [desperately] Needs refactoring. print date url = 'http://www.baseballpress.com/lineups/' + date content = urllib2.urlopen(url).read() soup = BeautifulSoup(content) team_map = Ugen.mlb_map(6, 4) player_map = Ugen.mlb_map(2, 0) # print 'player/team maps complete' team_list, pitcher_list, lineups_list, gametime_list, weather_list, pitcher_arm_list, player_arm_list = ( [] for i in range(7)) teamid_dict = {} playerid_dict = {} for event_date in soup.findAll("div", {"class": "game-time"}): gametime_list.append(event_date.text) for forecast in soup.findAll("a", {"target": "forecast"}): forecast_string = filter( lambda x: x in string.printable, forecast.text).split('Forecast: ')[1].split(' PoP')[0].replace( " ", "-").replace("--", "-").replace('F', 'degF') if len(forecast_string.split('-')) == 4: forecast_string = forecast_string.split( '-')[0] + '-' + forecast_string.split( '-')[1] + ' ' + forecast_string.split( '-')[2] + '-' + forecast_string.split('-')[3] weather_list.append(forecast_string) for team in soup.findAll("div", {"class": "team-data"}): try: team_name = team.find("div", {"class": "team-name"}).get_text() except: team_name = '' try: pitcher_name = team.find("a", {"class": "player-link"}).get_text() except: pitcher_name = '' try: pitcher_arm = team.find('div', { "class": "text" }).get_text().split('(')[1].split(')')[0] except: pitcher_arm = '' if team_name in team_map: #Ian: Check if team name has been listed team_list.append(team_map[team_name]) else: team_list.append(team_name) if pitcher_name in player_map: pitcher_name = player_map[pitcher_name] pitcher_list.append(pitcher_name.replace("'", "") + '_' + 'pitcher') pitcher_arm_list.append(pitcher_arm) for table in soup.findAll("div", {"class": "cssDialog clearfix"}): table_string = table.get_text() home_lineup, away_lineup, home_lineup_arms, away_lineup_arms = ( [] for i in range(4)) if table_string.count( "9. " ) == 2: #Ian: rethink-checks that both teams lineups have been listed. What if only one has been.. for j in range(1, 10): name_list_raw = table_string[table_string.find(str(j) + ". ") + 3:].split(" (") player = name_list_raw[0] home_lineup_arms.append(name_list_raw[1].split(')')[0]) if player in player_map: player = player_map[player] home_lineup.append(player.replace("'", "") + '_' + 'batter') name_list_raw = table_string[table_string.find( (str(j) + ". "), table_string.find(str(j) + ". ") + 3) + 3:].split(" (") player = name_list_raw[0] away_lineup_arms.append(name_list_raw[1].split(')')[0]) if player in player_map: player = player_map[player] away_lineup.append(player.replace("'", "") + '_' + 'batter') lineups_list.append(home_lineup) lineups_list.append(away_lineup) player_arm_list.append(home_lineup_arms) player_arm_list.append(away_lineup_arms) else: lineups_list.append(['no home_lineup listed']) lineups_list.append(['no away_lineup listed']) player_arm_list.append(['no hitting style listed']) player_arm_list.append(['no hitting style listed']) i = j = 0 while i < len(lineups_list): #if pitcher_list[i] not in lineups_list[i] lineups_list[i].append(pitcher_list[i]) player_arm_list[i].append(pitcher_arm_list[i]) teamid_dict[team_list[i]] = {} teamid_dict[team_list[i]]['start_time'] = gametime_list[j] teamid_dict[team_list[i]]['weather_forecast'] = weather_list[j] teamid_dict[team_list[i]]['date'] = date player_arm_dict = {} z = 1 for player, arm in zip(lineups_list[i], player_arm_list[i]): player_arm_dict[player] = {} player_arm_dict[player]['arm'] = arm player_arm_dict[player]['batting_order'] = str(z) z = z + 1 teamid_dict[team_list[i]]['lineup'] = player_arm_dict if i % 2 != 0: j = j + 1 teamid_dict[team_list[i]]['home_teamid'] = team_list[i] teamid_dict[team_list[i]]['opponent'] = team_list[i - 1] else: teamid_dict[team_list[i]]['home_teamid'] = team_list[i + 1] teamid_dict[team_list[i]]['opponent'] = team_list[i + 1] i = i + 1 i = j = 0 while i < len(lineups_list): z = 1 for player, arm in zip(lineups_list[i], player_arm_list[i]): playerid_dict[player] = {} playerid_dict[player]['start_time'] = gametime_list[j] playerid_dict[player]['weather_forecast'] = weather_list[j] playerid_dict[player]['teamid'] = team_list[i] playerid_dict[player]['arm'] = arm playerid_dict[player]['home_teamid'] = teamid_dict[ playerid_dict[player]['teamid']]['home_teamid'] playerid_dict[player]['opposing_lineup'] = teamid_dict[teamid_dict[ playerid_dict[player]['teamid']]['opponent']]['lineup'] playerid_dict[player]['batting_order'] = str(z) z = z + 1 if i % 2 != 0: j = j + 1 i = i + 1 return teamid_dict, playerid_dict