Example #1
0
def scrape_game(game_id, date, if_scrape_shifts):
    """
    This scrapes the info for the game.
    The pbp is automatically scraped, and the whether or not to scrape the shifts is left up to the user.
    
    :param game_id: game to scrap
    :param date: ex: 2016-10-24
    :param if_scrape_shifts: Boolean indicating whether to also scrape shifts 
    
    :return: DataFrame of pbp info
             (optional) DataFrame with shift info otherwise just None
    """
    print(' '.join(['Scraping Game ', game_id, date]))
    shifts_df = None

    roster = playing_roster.scrape_roster(game_id)
    game_json = json_pbp.get_pbp(game_id)           # Contains both player info (id's) and plays
    players, teams = get_teams_and_players(game_json, roster, game_id)

    # Game fails without any of these
    if not roster or not game_json or not teams or not players:
        broken_pbp_games.extend([[game_id, date]])
        broken_shifts_games.extend([[game_id, date]])
        return None, None

    pbp_df = scrape_pbp(game_id, date, roster, game_json, players, teams)

    if if_scrape_shifts and pbp_df is not None:
        shifts_df = scrape_shifts(game_id, players, date)

    if pbp_df is None:
        broken_pbp_games.extend([[game_id, date]])

    return pbp_df, shifts_df
def test_combine_players_lists(players):
    """ Check that it combines the list of players from the json pbp and the html roster correctly """
    game_id = "2017020891"
    json_players = game_scraper.get_players_json(
        json_pbp.get_pbp(game_id)['gameData']['players'])
    roster = playing_roster.scrape_roster(game_id)['players']

    assert players == game_scraper.combine_players_lists(
        json_players, roster, game_id)
Example #3
0
    def scrape_live_game(self, force=False):
        """
        Scrape the live info for a given game
        
        :param force: Whether to scrape no matter what (used for intermission here)
        
        :return: None
        """
        game_json = json_pbp.get_pbp(str(self.game_id))

        # When don't have json...can't do anything without it
        if game_json is None:
            return

        # Shift Game Statuses b4 we do anything
        self.prev_api_game_status = self.api_game_status
        self.prev_html_game_status = self.html_game_status

        # Swap old pbp & shift DataFrames
        self.prev_pbp_df = self.pbp_df
        self.prev_shifts_df = self.shifts_df

        # If json is in intermission:
        # Update self.api_game_status, get minutes remaining in intermission, and check if html is intermission too.
        # If both feeds are in intermission we return, otherwise we wait for the html to catch up.
        # "Intermission" is my own game status so otherwise just take whatever is in the api
        if game_json['liveData']['linescore']['intermissionInfo'][
                'inIntermission']:
            self.api_game_status = "Intermission"
            self.intermission_time_remaining = game_json['liveData'][
                'linescore']['intermissionInfo']["intermissionTimeRemaining"]

            # If see the both says intermission and we do too, we can just safely return and not bother with scraping.
            # This will be false if the HTML hasn't updated yet to intermission
            # If force we scrape no matter what
            if self.is_intermission() and not force:
                return
        else:
            # Update API Status if NOT in intermission to whatever is there
            self.api_game_status = game_json["gameData"]["status"][
                "abstractGameState"]

        # Leave if b4 game started
        if game_json["gameData"]["status"]["abstractGameState"] in ["Preview"]:
            self.html_game_status = self.api_game_status = game_json[
                "gameData"]["status"]["abstractGameState"]
            return

        # We get this the 1st time it scrapes the info (or when it's first available)
        # Don't bother with earlier as it may not be there or we may end up with an old version
        if not self.players:
            roster = playing_roster.scrape_roster(self.game_id)
            if roster is not None:
                self.players, _ = game_scraper.get_teams_and_players(
                    game_json, roster, self.game_id)
                self.head_coaches = roster['head_coaches']
            else:
                return  # If we try and still can't get it we leave - Termination Reason #2

        # Don't bother with scraper warnings
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            # Scrape pbp - pay attention to each argument
            self.pbp_df, self.html_game_status = game_scraper.scrape_pbp_live(
                self.game_id,
                self.date, {"head_coaches": self.head_coaches},
                game_json,
                self.players, {
                    "Home": self.home_team,
                    "Away": self.away_team
                },
                espn_id=self.espn_id)
            # Get shifts if asked for
            if self.if_scrape_shifts:
                self.shifts_df = game_scraper.scrape_shifts(
                    self.game_id, self.players, self.date)
def scraped_roster():
    return playing_roster.scrape_roster("2016020475")