コード例 #1
0
ファイル: mlbam.py プロジェクト: Shinichi-Nakagawa/pitchpx
    def _download(self, timestamp):
        """
        download MLBAM Game Day
        :param timestamp: day
        """
        games, atbats, pitches = [], [], []
        rosters, coaches, umpires = [], [], []
        timestamp_params = {
            'year': str(timestamp.year),
            'month': str(timestamp.month).zfill(2),
            'day': str(timestamp.day).zfill(2)
        }

        logging.info('->- Game data download start({year}/{month}/{day})'.format(**timestamp_params))

        base_url = self.DELIMITER.join([self.url, self.PAGE_URL_GAME_DAY.format(**timestamp_params)])
        html = MlbamUtil.find_xml(base_url, self.parser)

        href = self.PAGE_URL_GAME_PREFIX.format(**timestamp_params)
        for gid in html.find_all('a', href=re.compile(href)):
            gid_path = gid.get_text().strip()
            gid_url = self.DELIMITER.join([base_url, gid_path])
            # Read XML & create dataset
            try:
                game = Game.read_xml(gid_url, self.parser, timestamp, MlbAm._get_game_number(gid_path))
                players = Players.read_xml(gid_url, self.parser, game)
                innings = Inning.read_xml(gid_url, self.parser, game, players)
            except MlbAmHttpNotFound as e:
                logging.warning(e.msg)
                continue

            # append a dataset
            games.append(game.row())
            rosters.extend([roseter.row() for roseter in players.rosters.values()])
            coaches.extend([coach.row() for coach in players.coaches.values()])
            umpires.extend([umpire.row() for umpire in players.umpires.values()])
            atbats.extend(innings.atbats)
            pitches.extend(innings.pitches)

        # writing csv
        day = "".join([timestamp_params['year'], timestamp_params['month'], timestamp_params['day']])
        for params in (
                {'datasets': games, 'filename': Game.DOWNLOAD_FILE_NAME},
                {'datasets': rosters, 'filename': Players.Player.DOWNLOAD_FILE_NAME},
                {'datasets': coaches, 'filename': Players.Coach.DOWNLOAD_FILE_NAME},
                {'datasets': umpires, 'filename': Players.Umpire.DOWNLOAD_FILE_NAME},
                {'datasets': atbats, 'filename': AtBat.DOWNLOAD_FILE_NAME},
                {'datasets': pitches, 'filename': Pitch.DOWNLOAD_FILE_NAME},
        ):
            self._write_csv(params['datasets'], params['filename'].format(day=day, extension=self.extension))
        time.sleep(2)

        logging.info('-<- Game data download end({year}/{month}/{day})'.format(**timestamp_params))
コード例 #2
0
    def _download(self, timestamp):
        """
        download MLBAM Game Day
        :param timestamp: day
        """
        games, atbats, pitches = [], [], []
        rosters, coaches, umpires = [], [], []
        boxscores, actions = [], []
        timestamp_params = {
            'year': str(timestamp.year),
            'month': str(timestamp.month).zfill(2),
            'day': str(timestamp.day).zfill(2)
        }

        logging.info(
            '->- Game data download start({year}/{month}/{day})'.format(
                **timestamp_params))

        base_url = self.DELIMITER.join(
            [self.url,
             self.PAGE_URL_GAME_DAY.format(**timestamp_params)])
        html = MlbamUtil.find_xml(base_url, self.parser)

        href = self.PAGE_URL_GAME_PREFIX.format(**timestamp_params)
        for gid in html.find_all('a', href=re.compile(href)):
            gid_path = gid.get_text().strip()
            gid_url = self.DELIMITER.join([base_url, gid_path])
            # Read XML & create dataset
            try:
                game = Game.read_xml(gid_url, self.parser, timestamp,
                                     MlbAm._get_game_number(gid_path))
                players = Players.read_xml(gid_url, self.parser, game)
                innings = Inning.read_xml(gid_url, self.parser, game, players)
                boxscore = BoxScore.read_xml(gid_url, self.parser, game,
                                             players)
            except MlbAmHttpNotFound as e:
                logging.warning(e.msg)
                continue

            # append a dataset
            games.append(game.row())
            rosters.extend(
                [roseter.row() for roseter in players.rosters.values()])
            coaches.extend([coach.row() for coach in players.coaches.values()])
            umpires.extend(
                [umpire.row() for umpire in players.umpires.values()])
            atbats.extend(innings.atbats)
            pitches.extend(innings.pitches)
            actions.extend(innings.actions)
            boxscores.append(boxscore.row())

        # writing csv
        day = "".join([
            timestamp_params['year'], timestamp_params['month'],
            timestamp_params['day']
        ])
        for params in (
            {
                'datasets': games,
                'filename': Game.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': rosters,
                'filename': Players.Player.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': coaches,
                'filename': Players.Coach.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': umpires,
                'filename': Players.Umpire.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': atbats,
                'filename': AtBat.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': pitches,
                'filename': Pitch.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': boxscores,
                'filename': BoxScore.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': actions,
                'filename': InningAction.DOWNLOAD_FILE_NAME
            },
        ):
            self._write_csv(
                params['datasets'],
                params['filename'].format(day=day, extension=self.extension))
        time.sleep(2)

        logging.info('-<- Game data download end({year}/{month}/{day})'.format(
            **timestamp_params))