コード例 #1
0
def test():
    game_directory = '/components/game/mlb/year_2014/month_04/day_04/gid_2014_04_04_anamlb_houmlb_1'

#    print "Fetching epg page"
#    epg_page = gds.fetch_epg(year, month, day)
#    print "Parsing epg page"
#    parser.parse_epg(epg_page)
#
#    print "Fetching players page"
#    players_page = gds.fetch_players(game_directory)
#    print "Parsing players page"
#    parser.parse_players(players_page)
#
#    print "Fetching hit chart page"
#    hit_chart_page = gds.fetch_hit_chart(game_directory)
#    print "Parsing hit chart page"
#    parser.parse_hit_chart(hit_chart_page)
    from pygameday import scrape

    print("Fetching innings_all page")
    inning_all_page = scrape.fetch_inning_all(game_directory)
    print("Parsing innings_all page")
    at_bats = parse_inning_all(inning_all_page)
    print(at_bats)
コード例 #2
0
    def process_game(self, game_xml_node):
        """Ingests a single game's GameDay data

        Parameters
        ----------
        game_xml_node : lxml node
            The XML node of the game to process
        """
        session = self.Session()

        game_dir = game_xml_node.get("game_data_directory")
        gameday_id = game_xml_node.get("id")

        if gameday_id in self.gameday_ids:
            # The game has been processed and should already be in the database
            logger.warn("Skipping game: {}. It's already in the DB.".format(gameday_id))
            return

        # Parse the game
        db_game = parse.parse_game(game_xml_node)

        # If no data comes back, the game probably wasn't a Final. Abort.
        if db_game is None:
            msg = ("Skipping game: {}. It contained no data, "
                   "probably because its status isn't Final").format(gameday_id)
            logger.warn(msg)
            return

        # If the game is a spring training game, skip it if ingest_spring_training is False
        if not self.ingest_spring_training and (db_game.game_type == "S" or db_game.game_type == "E"):
            msg = "Skipping game: {}. It's a spring training or exhibition game.".format(gameday_id)
            logger.warn(msg)
            return

        msg = "Processing game ID {}".format(gameday_id)
        logger.info(msg)

        # --------------------------------------------------------------------------------------------------------------
        # Fetch game data
        #
        hit_chart_page = scrape.fetch_hit_chart(game_dir)
        players_page = scrape.fetch_players(game_dir)
        inning_all_page = scrape.fetch_inning_all(game_dir)

        # do some error checking
        if hit_chart_page is None:
            msg = "Error fetching hit chart page for game {}".format(gameday_id)
            logger.error(msg)
        if players_page is None:
            msg = "Error fetching players page for game {}".format(gameday_id)
            logger.error(msg)
        if inning_all_page is None:
            msg = "Error fetching inning events page for game {}".format(gameday_id)
            logger.error(msg)

        # --------------------------------------------------------------------------------------------------------------
        # Parse AtBats (including Pitches), HitsInPlay, Players
        #
        db_at_bats = parse.parse_inning_all(inning_all_page)  # Appends Pitches to AtBats
        db_hips = parse.parse_hit_chart(hit_chart_page)
        db_players = parse.parse_players(players_page)

        # --------------------------------------------------------------------------------------------------------------
        # Append the AtBats to the Game. Note that Pitches are appended to AtBats
        # when the AtBats are parsed, so we don't have to do anything with Pitches.
        #
        for db_ab in db_at_bats:
            db_game.at_bats.append(db_ab)

        # --------------------------------------------------------------------------------------------------------------
        # Append the hits in play to the Game
        #
        for db_h in db_hips:
            db_game.hits_in_play.append(db_h)

        # --------------------------------------------------------------------------------------------------------------
        # Add the players using the database session and commit
        # This has to be done one at a time (instead of using session.add_all)
        # because add_all will fail if ANY of the players in the list are
        # duplicated, which could lead to some players being excluded from
        # the database.
        #
        for player in db_players:

            if int(player.player_id) in self.player_ids:
                # The player has been processed and should already be in the database
                logger.debug("Skipping player: {} because it has already been processed.".format(player.player_id))

            else:
                # We haven't inserted this player yet
                error_occurred = False

                try:
                    session.add(player)
                    session.commit()

                except IntegrityError:
                    # If an IntegrityError occurs, it's probably because the data
                    # has already been inserted.
                    session.rollback()
                    msg = ("IntegrityError when inserting player: {}, "
                           "probably because it's already in the database".format(str(player)))
                    logger.error(msg)
                    error_occurred = True

                except Exception as ex:
                    # Just log other exceptions for now, and continue
                    session.rollback()
                    logger.error(str(ex))
                    error_occurred = True

                if not error_occurred:
                    self.player_ids.add(player.player_id)

        # --------------------------------------------------------------------------------------------------------------
        # Add the game data
        #
        if db_game.gameday_id in self.gameday_ids:
            # The game has been processed and should already be in the database
            logger.info("Skipping game: {} because it has already been ingested.".format(db_game.gameday_id))

        else:
            # We haven't inserted this game yet
            error_occurred = False

            try:
                session.add(db_game)
                session.commit()

            except IntegrityError:
                # If an IntegrityError occurs, it's probably because the data
                # has already been inserted.
                session.rollback()
                msg = ("IntegrityError when inserting game: {}, "
                       "probably because it's already in the database".format(db_game.gameday_id))
                logger.error(msg)
                error_occurred = True

            except Exception as ex:
                # Just log other exceptions for now, and continue
                session.rollback()
                logger.error(str(ex))
                error_occurred = True

            if not error_occurred:
                self.gameday_ids.add(db_game.gameday_id)

        # We are done
        session.close()