def schedule_df(team_name): team_obj = Schedule(team_name, year=2019) df = team_obj.dataframe df.reset_index(inplace=True) df.rename(columns={"index": "team_name"},inplace=True) df['team_name'] = team_name return df
def setup_method(self, *args, **kwargs): self.results = { 'game': 2, 'boxscore_index': 'TBA/TBA201704040', 'date': 'Tuesday, Apr 4', 'datetime': datetime(2017, 4, 4), 'game_number_for_day': 1, 'location': AWAY, 'opponent_abbr': 'TBR', 'result': WIN, 'runs_scored': 5, 'runs_allowed': 0, 'innings': 9, 'record': '1-1', 'rank': 3, 'games_behind': 0.5, 'winner': 'Sabathia', 'loser': 'Odorizzi', 'save': None, 'game_duration': '3:07', 'day_or_night': NIGHT, 'attendance': 19366, 'streak': '+' } flexmock(Boxscore) \ .should_receive('_parse_game_data') \ .and_return(None) flexmock(Boxscore) \ .should_receive('dataframe') \ .and_return(pd.DataFrame([{'key': 'value'}])) flexmock(utils) \ .should_receive('_todays_date') \ .and_return(MockDateTime(YEAR, MONTH)) self.schedule = Schedule('NYY')
def test_empty_page_return_no_games(self): flexmock(utils) \ .should_receive('_no_data_found') \ .once() flexmock(utils) \ .should_receive('_get_stats_table') \ .and_return(None) schedule = Schedule('NYY') assert len(schedule) == 0
def test_no_dataframes_extended_returns_none(self): flexmock(Schedule) \ .should_receive('_pull_schedule') \ .and_return(None) schedule = Schedule('HOU') fake_game = flexmock(dataframe_extended=None) fake_games = PropertyMock(return_value=fake_game) type(schedule).__iter__ = fake_games assert schedule.dataframe_extended is None
def test_invalid_dataframe_not_included_with_schedule_dataframes(self): # If a DataFrame is not valid, it should not be included with the # dataframes property. If no dataframes are present, the DataFrame # should return the default value of None. flexmock(Schedule) \ .should_receive('_pull_schedule') \ .and_return(None) schedule = Schedule('HOU') fake_game = flexmock(_runs_scored=None, _runs_allowed=None) fake_games = PropertyMock(return_value=fake_game) type(schedule).__iter__ = fake_games assert schedule.dataframe is None
def test_invalid_dataframe_not_included_with_schedule_dataframes(self): # If a DataFrame is not valid, it should not be included with the # dataframes property. If no dataframes are present, a ValueError # should be raised. flexmock(Schedule) \ .should_receive('_pull_schedule') \ .and_return(None) schedule = Schedule('HOU') fake_game = flexmock(_runs_scored=None, _runs_allowed=None) fake_games = PropertyMock(return_value=fake_game) type(schedule).__iter__ = fake_games with pytest.raises(ValueError): schedule.dataframe
def test_mlb_invalid_default_year_reverts_to_previous_year(self, *args, **kwargs): results = { 'game': 2, 'boxscore_index': 'TBA/TBA201704040', 'date': 'Tuesday, Apr 4', 'datetime': datetime(2017, 4, 4), 'game_number_for_day': 1, 'location': AWAY, 'opponent_abbr': 'TBR', 'result': WIN, 'runs_scored': 5, 'runs_allowed': 0, 'innings': 9, 'record': '1-1', 'rank': 3, 'games_behind': 0.5, 'winner': 'Sabathia', 'loser': 'Odorizzi', 'save': None, 'game_duration': '3:07', 'day_or_night': NIGHT, 'attendance': 19366, 'streak': '+' } flexmock(Boxscore) \ .should_receive('_parse_game_data') \ .and_return(None) flexmock(Boxscore) \ .should_receive('dataframe') \ .and_return(pd.DataFrame([{'key': 'value'}])) flexmock(utils) \ .should_receive('_find_year_for_season') \ .and_return(2018) schedule = Schedule('NYY') for attribute, value in results.items(): assert getattr(schedule[1], attribute) == value
def get_day(str): return datetime.strptime(str, " %B %d, %Y").day def to_duration(str): t1 = datetime.strptime(str, '%H:%M') t2 = datetime(1900, 1, 1) return (t1 - t2).total_seconds() / 60.0 today = date.today() teams = Teams() for team in teams: abr = team.abbreviation sched = Schedule(abr) #open csv #get the last row month and day fname = abr + '.csv' old_df = pd.read_csv(fname) new_rows = [] last_month = int(old_df.tail(1)['month']) last_day = int(old_df.tail(1)['day']) last_date = datetime(today.year, last_month, last_day) for game in sched: if game.datetime > last_date and len(game.boxscore_index) == 16: new_rows.append(Boxscore(game.boxscore_index).dataframe)
from sportsreference.mlb.teams import Teams from sportsreference.mlb.boxscore import Boxscore from sportsreference.mlb.schedule import Schedule import pandas as pd ##### Single Team Work ###### # Clean up date format sea_df = Schedule('SEA').dataframe_extended sea_df['date'] = pd.to_datetime(sea_df.date) sea_df['date'] = sea_df['date'].dt.strftime('%m/%d/%Y') # Add Home and Away Team fields home_team = [] away_team = [] for ind in sea_df.index: if sea_df['winner'][ind] == 'Home': home_team.append(sea_df['winning_abbr'][ind]) else: home_team.append(sea_df['losing_abbr'][ind]) if sea_df['winner'][ind] == 'Away': away_team.append(sea_df['winning_abbr'][ind]) else: away_team.append(sea_df['losing_abbr'][ind]) sea_df.insert(81, 'home_team', home_team, True) sea_df.insert(82, 'away_team', away_team, True) sea_df['team'] = 'SEA'
# already integrated with pandas # print datafram for each team for team in Teams(): print(team.name) print(team.dataframe) # use .loc[] to access a single ron for team in Teams(): print(team.name) schedule = team.schedule # Request the current team's schedule for game in schedule: print(game.date, game.points_scored, game.points_allowed) # different way to access schedule houston_schedule = Schedule('HOU') for game in houston_schedule: print(game.date, game.points_scored, game.points_allowed) # get boxscores game_data = Boxscore('BOS/BOS201808020') print(game_data.away_runs, game_data.home_runs) print(game_data.dataframe) # different way to get boxscores houston_schedule = Schedule('HOU') for game in houston_schedule: print(game.boxscore_index) # Prints the boxscore URI for each game # Returns an instance of the Boxscore class for this specific game
elif re.search(sPattern2, tLinkText): teams.append(tLinkText) else: for tLink in tLinks: tLinkText = tLink.text if re.search(sPattern1, tLinkText): teams.append(tLinkText) #removing duplicates teams = list(dict.fromkeys(teams)) baseBall.append(teams) #loop through each baseball team, if any, and see if they are playing at home today if len(teams) > 0: for team in teams: sch = Schedule(mlbAbbr[team]) for game in sch: if game.date == today and game.location == 'Home': isPlaying.append(team) whosePlaying.append(isPlaying) #creating the data frame df = pd.DataFrame(list( zip(Names, States, Lat, Long, TZ, Pops, Area, Demonyms, featColl, baseBall, whosePlaying, weather)), columns=[ 'City Name', 'State', 'Latitude', 'Longitude', 'Time Zone', 'Population', 'Area', 'Demonyms', 'Notable Educational Insitiutiions', 'Baseball Team(s)', 'What team is playing at home today', 'the weather' ])