def _download(self, timestamp): """ download MLBAM Game Day :param timestamp: day """ games, atbats, pitches = [], [], [] rosters, coaches, umpires = [], [], [] timestamp_params = { 'year': str(timestamp.year), 'month': str(timestamp.month).zfill(2), 'day': str(timestamp.day).zfill(2) } logging.info('->- Game data download start({year}/{month}/{day})'.format(**timestamp_params)) base_url = self.DELIMITER.join([self.url, self.PAGE_URL_GAME_DAY.format(**timestamp_params)]) html = MlbamUtil.find_xml(base_url, self.parser) href = self.PAGE_URL_GAME_PREFIX.format(**timestamp_params) for gid in html.find_all('a', href=re.compile(href)): gid_path = gid.get_text().strip() gid_url = self.DELIMITER.join([base_url, gid_path]) # Read XML & create dataset try: game = Game.read_xml(gid_url, self.parser, timestamp, MlbAm._get_game_number(gid_path)) players = Players.read_xml(gid_url, self.parser, game) innings = Inning.read_xml(gid_url, self.parser, game, players) except MlbAmHttpNotFound as e: logging.warning(e.msg) continue # append a dataset games.append(game.row()) rosters.extend([roseter.row() for roseter in players.rosters.values()]) coaches.extend([coach.row() for coach in players.coaches.values()]) umpires.extend([umpire.row() for umpire in players.umpires.values()]) atbats.extend(innings.atbats) pitches.extend(innings.pitches) # writing csv day = "".join([timestamp_params['year'], timestamp_params['month'], timestamp_params['day']]) for params in ( {'datasets': games, 'filename': Game.DOWNLOAD_FILE_NAME}, {'datasets': rosters, 'filename': Players.Player.DOWNLOAD_FILE_NAME}, {'datasets': coaches, 'filename': Players.Coach.DOWNLOAD_FILE_NAME}, {'datasets': umpires, 'filename': Players.Umpire.DOWNLOAD_FILE_NAME}, {'datasets': atbats, 'filename': AtBat.DOWNLOAD_FILE_NAME}, {'datasets': pitches, 'filename': Pitch.DOWNLOAD_FILE_NAME}, ): self._write_csv(params['datasets'], params['filename'].format(day=day, extension=self.extension)) time.sleep(2) logging.info('-<- Game data download end({year}/{month}/{day})'.format(**timestamp_params))
def _download(self, timestamp): """ download MLBAM Game Day :param timestamp: day """ games, atbats, pitches = [], [], [] rosters, coaches, umpires = [], [], [] boxscores, actions = [], [] timestamp_params = { 'year': str(timestamp.year), 'month': str(timestamp.month).zfill(2), 'day': str(timestamp.day).zfill(2) } logging.info( '->- Game data download start({year}/{month}/{day})'.format( **timestamp_params)) base_url = self.DELIMITER.join( [self.url, self.PAGE_URL_GAME_DAY.format(**timestamp_params)]) html = MlbamUtil.find_xml(base_url, self.parser) href = self.PAGE_URL_GAME_PREFIX.format(**timestamp_params) for gid in html.find_all('a', href=re.compile(href)): gid_path = gid.get_text().strip() gid_url = self.DELIMITER.join([base_url, gid_path]) # Read XML & create dataset try: game = Game.read_xml(gid_url, self.parser, timestamp, MlbAm._get_game_number(gid_path)) players = Players.read_xml(gid_url, self.parser, game) innings = Inning.read_xml(gid_url, self.parser, game, players) boxscore = BoxScore.read_xml(gid_url, self.parser, game, players) except MlbAmHttpNotFound as e: logging.warning(e.msg) continue # append a dataset games.append(game.row()) rosters.extend( [roseter.row() for roseter in players.rosters.values()]) coaches.extend([coach.row() for coach in players.coaches.values()]) umpires.extend( [umpire.row() for umpire in players.umpires.values()]) atbats.extend(innings.atbats) pitches.extend(innings.pitches) actions.extend(innings.actions) boxscores.append(boxscore.row()) # writing csv day = "".join([ timestamp_params['year'], timestamp_params['month'], timestamp_params['day'] ]) for params in ( { 'datasets': games, 'filename': Game.DOWNLOAD_FILE_NAME }, { 'datasets': rosters, 'filename': Players.Player.DOWNLOAD_FILE_NAME }, { 'datasets': coaches, 'filename': Players.Coach.DOWNLOAD_FILE_NAME }, { 'datasets': umpires, 'filename': Players.Umpire.DOWNLOAD_FILE_NAME }, { 'datasets': atbats, 'filename': AtBat.DOWNLOAD_FILE_NAME }, { 'datasets': pitches, 'filename': Pitch.DOWNLOAD_FILE_NAME }, { 'datasets': boxscores, 'filename': BoxScore.DOWNLOAD_FILE_NAME }, { 'datasets': actions, 'filename': InningAction.DOWNLOAD_FILE_NAME }, ): self._write_csv( params['datasets'], params['filename'].format(day=day, extension=self.extension)) time.sleep(2) logging.info('-<- Game data download end({year}/{month}/{day})'.format( **timestamp_params))