def test_mlb_teams_string_representation(self): expected = """Los Angeles Dodgers (LAD) Cleveland Indians (CLE) Houston Astros (HOU) Washington Nationals (WSN) Boston Red Sox (BOS) Arizona Diamondbacks (ARI) Chicago Cubs (CHC) New York Yankees (NYY) Colorado Rockies (COL) Milwaukee Brewers (MIL) Minnesota Twins (MIN) St. Louis Cardinals (STL) Los Angeles Angels (LAA) Tampa Bay Rays (TBR) Kansas City Royals (KCR) Seattle Mariners (SEA) Texas Rangers (TEX) Miami Marlins (MIA) Toronto Blue Jays (TOR) Pittsburgh Pirates (PIT) Baltimore Orioles (BAL) Oakland Athletics (OAK) Atlanta Braves (ATL) San Diego Padres (SDP) New York Mets (NYM) Cincinnati Reds (CIN) Chicago White Sox (CHW) Philadelphia Phillies (PHI) San Francisco Giants (SFG) Detroit Tigers (DET)""" teams = Teams() assert teams.__repr__() == expected
def add_season_data(apps, schema_editor): Season = apps.get_model('blog', 'Season') summary_list = [] for x in range(1970, 2015): season = str(x) teams = Teams(season) for team in teams: if team.abbreviation == 'ATL': summary_list.append( (season, team.wins, team.losses, team.earned_runs_against_plus, team.batting_average, team.on_base_plus_slugging_percentage_plus, team.run_difference)) summary = tuple(summary_list) Season.objects.bulk_create( Season(year=season, wins=wins, losses=losses, era_plus=era, bat_ave=batting_average, ops=ops, run_diff=run_diff) for season, wins, losses, era, batting_average, ops, run_diff in summary)
def test_mlb_integration_all_teams_dataframe_returns_dataframe( self, *args, **kwargs): teams = Teams() result = teams.dataframes.drop_duplicates(keep=False) assert len(result) == len(self.abbreviations) assert set(result.columns.values) == set(self.results.keys())
def test_mlb_integration_returns_correct_team_abbreviations(self, *args, **kwargs): teams = Teams() for team in teams: assert team.abbreviation in self.abbreviations
def test_mlb_integration_returns_correct_attributes_for_team( self, *args, **kwargs): teams = Teams() houston = teams('HOU') for attribute, value in self.results.items(): assert getattr(houston, attribute) == value
def test_mlb_invalid_default_year_reverts_to_previous_year( self, *args, **kwargs): flexmock(utils) \ .should_receive('_find_year_for_season') \ .and_return(2018) teams = Teams() for team in teams: assert team._year == '2017'
def test_mlb_empty_page_returns_no_teams(self): flexmock(utils) \ .should_receive('_no_data_found') \ .once() flexmock(utils) \ .should_receive('_get_stats_table') \ .and_return(None) teams = Teams() assert len(teams) == 0
def test_mlb_integration_dataframe_returns_dataframe( self, *args, **kwargs): teams = Teams() df = pd.DataFrame([self.results], index=['HOU']) houston = teams('HOU') # Pandas doesn't natively allow comparisons of DataFrames. # Concatenating the two DataFrames (the one generated during the test # and the expected one above) and dropping duplicate rows leaves only # the rows that are unique between the two frames. This allows a quick # check of the DataFrame to see if it is empty - if so, all rows are # duplicates, and they are equal. frames = [df, houston.dataframe] df1 = pd.concat(frames).drop_duplicates(keep=False) assert df1.empty
def setup_method(self, *args, **kwargs): self.results = { 'rank': 3, 'abbreviation': 'HOU', 'name': 'Houston Astros', 'league': 'AL', 'games': 162, 'wins': 101, 'losses': 61, 'win_percentage': .624, 'streak': None, 'runs': 5.5, 'runs_against': 4.3, 'run_difference': 1.2, 'strength_of_schedule': 0.0, 'simple_rating_system': 1.2, 'pythagorean_win_loss': '99-63', 'luck': 2, 'interleague_record': '15-5', 'home_record': '48-33', 'home_wins': 48, 'home_losses': 33, 'away_record': '53-28', 'away_wins': 53, 'away_losses': 28, 'extra_inning_record': '4-4', 'extra_inning_wins': 4, 'extra_inning_losses': 4, 'single_run_record': '19-13', 'single_run_wins': 19, 'single_run_losses': 13, 'record_vs_right_handed_pitchers': '80-37', 'wins_vs_right_handed_pitchers': 80, 'losses_vs_right_handed_pitchers': 37, 'record_vs_left_handed_pitchers': '21-24', 'wins_vs_left_handed_pitchers': 21, 'losses_vs_left_handed_pitchers': 24, 'record_vs_teams_over_500': '18-15', 'wins_vs_teams_over_500': 18, 'losses_vs_teams_over_500': 15, 'record_vs_teams_under_500': '83-46', 'wins_vs_teams_under_500': 83, 'losses_vs_teams_under_500': 46, 'last_ten_games_record': None, 'wins_last_ten_games': None, 'losses_last_ten_games': None, 'last_twenty_games_record': None, 'wins_last_twenty_games': None, 'losses_last_twenty_games': None, 'last_thirty_games_record': None, 'wins_last_thirty_games': None, 'losses_last_thirty_games': None, 'number_players_used': 46, 'average_batter_age': 28.8, 'plate_appearances': 6271, 'at_bats': 5611, 'total_runs': 896, 'hits': 1581, 'doubles': 346, 'triples': 20, 'home_runs': 238, 'runs_batted_in': 854, 'stolen_bases': 98, 'times_caught_stealing': 42, 'bases_on_balls': 509, 'times_struck_out': 1087, 'batting_average': .282, 'on_base_percentage': .346, 'slugging_percentage': .478, 'on_base_plus_slugging_percentage': .823, 'on_base_plus_slugging_percentage_plus': 127, 'total_bases': 2681, 'grounded_into_double_plays': 139, 'times_hit_by_pitch': 70, 'sacrifice_hits': 11, 'sacrifice_flies': 61, 'intentional_bases_on_balls': 27, 'runners_left_on_base': 1094, 'number_of_pitchers': 27, 'average_pitcher_age': 28.5, 'runs_allowed_per_game': 4.32, 'earned_runs_against': 4.12, 'games_finished': 161, 'complete_games': 1, 'shutouts': 9, 'complete_game_shutouts': 0, 'saves': 45, 'innings_pitched': 1446.0, 'hits_allowed': 1314, 'home_runs_against': 192, 'bases_on_walks_given': 522, 'strikeouts': 1593, 'hit_pitcher': 70, 'balks': 4, 'wild_pitches': 86, 'batters_faced': 6111, 'earned_runs_against_plus': 96, 'fielding_independent_pitching': 3.91, 'whip': 1.270, 'hits_per_nine_innings': 8.2, 'home_runs_per_nine_innings': 1.2, 'bases_on_walks_given_per_nine_innings': 3.2, 'strikeouts_per_nine_innings': 9.9, 'strikeouts_per_base_on_balls': 3.05, 'opposing_runners_left_on_base': 1073 } self.abbreviations = [ 'NYY', 'BOS', 'ATL', 'LAA', 'HOU', 'MIL', 'PHI', 'ARI', 'STL', 'PIT', 'SEA', 'WSN', 'CHC', 'COL', 'NYM', 'TOR', 'CLE', 'SFG', 'OAK', 'MIN', 'DET', 'TBR', 'LAD', 'TEX', 'SDP', 'MIA', 'CIN', 'KCR', 'BAL', 'CHW' ] flexmock(utils) \ .should_receive('_todays_date') \ .and_return(MockDateTime(YEAR, MONTH)) self.teams = Teams()
def test_mlb_invalid_team_name_raises_value_error(self, *args, **kwargs): teams = Teams() with pytest.raises(ValueError): teams('INVALID_NAME')
from sportsreference.mlb.teams import Teams year_start = 2000 #starting year year_end = 2019 #ending year sport = 'MLB' while year_start <= year_end: teams = Teams(year_start) for team in teams: zscore = (team.wins - (0.5 * (team.wins + team.losses))) / ( (team.wins + team.losses) * 0.5 * 0.5)**(0.5) zscoreabs = abs(zscore) print( f'{sport},{team.name},{team.wins},{team.losses},{year_start},{zscore},{zscoreabs}' ) year_start += 1
def test_mlb_integration_returns_correct_number_of_teams( self, *args, **kwargs): teams = Teams() assert len(teams) == len(self.abbreviations)
from sportsreference.mlb.teams import Teams from sportsreference.mlb.boxscore import Boxscore from sportsreference.mlb.schedule import Schedule import pandas as pd import datetime # Enter Opening Day to remove every team's first game because no data opening_day = datetime.datetime(2017, 4, 4) x = opening_day.strftime('%m/%d/%Y') # Create dataframe of all unique games for a season games = [] dates = [] team1 = [] team2 = [] for team in Teams('2017'): schedule = team.schedule for game in schedule: games.append(game.boxscore_index) dates.append(game.datetime) team1.append(team.abbreviation) team2.append(game.opponent_abbr) sched_df = pd.DataFrame(list(zip(dates, games, team1, team2)), columns=[ 'Date', 'Game', 'Team1', 'Team2']) sched_df = sched_df.drop_duplicates(subset='Game') sched_df['Date'] = pd.to_datetime(sched_df.Date) sched_df['Date'] = sched_df['Date'].dt.strftime('%m/%d/%Y') # Filter out opening games
import streamlit as st import pandas as pd import pickle from sportsreference.mlb.teams import Teams from sportsreference.mlb.schedule import Schedule teams = Teams('2021') df = teams.dataframes keys = list(df['name'].values) #full_name vals = list(df['abbreviation'].values) #abbreviation team_dict = dict(zip(keys,vals)) # @st.cache def schedule_df(team_name): team_obj = Schedule(team_name, year=2019) df = team_obj.dataframe df.reset_index(inplace=True) df.rename(columns={"index": "team_name"},inplace=True) df['team_name'] = team_name return df team_list = list(team_dict.values()) schedule_dict = {team:schedule_df(team) for team in team_list} teams_df = pd.concat(schedule_dict.values(), ignore_index=True) pickle.dump(teams_df, open("pickle_jar/df_2019.p", "wb"))
import sys, os sys.path.append(os.path.dirname(os.path.dirname(sys.path[0]))) from sportsreference.mlb.teams import Teams for team in Teams(): print(team.name) for player in team.roster.players: try: print(player.name) except UnicodeEncodeError: print(player.name.encode('utf-8')) for game in team.schedule: print(game.dataframe) print(game.dataframe_extended)
return datetime.strptime(str, " %B %d, %Y").month def get_day(str): return datetime.strptime(str, " %B %d, %Y").day def to_duration(str): t1 = datetime.strptime(str, '%H:%M') t2 = datetime(1900, 1, 1) return (t1 - t2).total_seconds() / 60.0 today = date.today() teams = Teams() for team in teams: abr = team.abbreviation sched = Schedule(abr) #open csv #get the last row month and day fname = abr + '.csv' old_df = pd.read_csv(fname) new_rows = [] last_month = int(old_df.tail(1)['month']) last_day = int(old_df.tail(1)['day']) last_date = datetime(today.year, last_month, last_day) for game in sched:
baseBall = [] whosePlaying = [] weather = [] mlbTeams = "" mlbAbbr = {"Team": "Abbr"} #check for mlb team, if you find a match, look at the schedule to see if there is a game td td = datetime.datetime.now() #Weekday, Mon date wD = td.strftime("%A") mon = td.strftime("%b") d = td.strftime("%d") today = wD + ", " + mon + " " + d for n, team in enumerate(Teams()): if n == 0: mlbTeams += "(" + team.name + ")" else: mlbTeams += "|(" + team.name + ")" mlbAbbr[team.name] = team.abbreviation #loop through the wiki tags list and parse each wikipedia page for scraping for count, cTag in enumerate(cTags): qpage = "https://en.wikipedia.org/wiki/" + cTag print(qpage) page = urlopen(qpage) soup = BeautifulSoup(page, 'html.parser') #general info table for cities info = soup.find('table', {"class": "infobox geography vcard"}) #main contents