예제 #1
0
    def read_xml(cls, url, markup, game, players):
        """
        read xml object
        :param url: contents url
        :param markup: markup provider
        :param game: MLBAM Game object
        :param players: MLBAM Players object
        :return: pitchpx.game.game.Game object
        """
        innings = Inning(game, players)
        base_url = "".join([url, cls.DIRECTORY])
        # hit location data
        hit_location = cls._read_hit_chart_data(
                MlbamUtil.find_xml('/'.join([base_url, cls.FILENAME_INNING_HIT]), markup)
        )

        # create for atbat & pitch data
        for inning in MlbamUtil.find_xml_all(base_url, markup, cls.TAG, cls.FILENAME_PATTERN):
            soup = MlbamUtil.find_xml("/".join([base_url, inning.get_text().strip()]), markup)
            inning_number = int(soup.inning['num'])
            for inning_type in cls.INNINGS.keys():
                inning_soup = soup.inning.find(inning_type)
                if inning_soup is None:
                    break
                innings._inning_events(inning_soup, inning_number, cls.INNINGS[inning_type], hit_location)
        return innings
예제 #2
0
    def read_xml(cls, url, markup, game, players):
        """
        read xml object
        :param url: contents url
        :param markup: markup provider
        :param game: MLBAM Game object
        :param players: MLBAM Players object
        :return: pitchpx.game.game.Game object
        """
        innings = Inning(game, players)
        base_url = "".join([url, cls.DIRECTORY])
        # hit location data
        hit_location = cls._read_hit_chart_data(
            MlbamUtil.find_xml('/'.join([base_url, cls.FILENAME_INNING_HIT]),
                               markup))

        # create for atbat & pitch data
        for inning in MlbamUtil.find_xml_all(base_url, markup, cls.TAG,
                                             cls.FILENAME_PATTERN):
            soup = MlbamUtil.find_xml(
                "/".join([base_url, inning.get_text().strip()]), markup)
            inning_number = int(soup.inning['num'])
            for inning_type in cls.INNINGS.keys():
                inning_soup = soup.inning.find(inning_type)
                if inning_soup is None:
                    break
                innings._inning_events(inning_soup, inning_number,
                                       cls.INNINGS[inning_type], hit_location)
                innings._inning_actions(inning_soup, inning_number,
                                        cls.INNINGS[inning_type])
        return innings
예제 #3
0
 def read_xml(cls, url, markup, game):
     """
     read xml object
     :param url: contents url
     :param markup: markup provider
     :param game: MLBAM Game object
     :return: pitchpx.game.players.Players object
     """
     return Players._read_objects(MlbamUtil.find_xml("/".join([url, cls.FILENAME]), markup), game)
예제 #4
0
 def test_find_xml_200(self):
     """
     Get xml content(status:200, head:default)
     """
     req = MlbamUtil.find_xml(
         'http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_lanmlb_sdnmlb_1/game.xml',
         'lxml',
     )
     self.assertIsNotNone(req)
예제 #5
0
 def test_find_xml_200(self):
     """
     Get xml content(status:200, head:default)
     """
     req = MlbamUtil.find_xml(
         'http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_lanmlb_sdnmlb_1/game.xml',
         'lxml',
     )
     self.assertIsNotNone(req)
예제 #6
0
 def read_xml(cls, url, markup, game):
     """
     read xml object
     :param url: contents url
     :param markup: markup provider
     :param game: MLBAM Game object
     :return: pitchpx.game.players.Players object
     """
     return Players._read_objects(
         MlbamUtil.find_xml("".join([url, cls.FILENAME]), markup), game)
예제 #7
0
 def read_xml(cls, url, features, timestamp, game_number):
     """
     read xml object
     :param url: contents url
     :param features: markup provider
     :param timestamp: game day
     :param game_number: game number
     :return: pitchpx.game.game.Game object
     """
     soup = MlbamUtil.find_xml("".join([url, cls.FILENAME]), features)
     return cls._generate_game_object(soup, timestamp, game_number)
예제 #8
0
 def read_xml(cls, url, features, timestamp, game_number):
     """
     read xml object
     :param url: contents url
     :param features: markup provider
     :param timestamp: game day
     :param game_number: game number
     :return: pitchpx.game.game.Game object
     """
     soup = MlbamUtil.find_xml("".join([url, cls.FILENAME]), features)
     return cls._generate_game_object(soup, timestamp, game_number)
예제 #9
0
 def read_xml(cls, url, features, game, players):
     """
     read xml object
     :param url: contents url
     :param features: markup provider
     :param game: MLBAM Game object
     :param players: MLBAM Players object
     :return: pitchpx.box_score.box_score.BoxScore object
     """
     soup = MlbamUtil.find_xml("".join([url, cls.FILENAME]), features)
     return cls._generate_object(soup, game, players)
예제 #10
0
    def _download(self, timestamp):
        """
        download MLBAM Game Day
        :param timestamp: day
        """
        games, atbats, pitches = [], [], []
        rosters, coaches, umpires = [], [], []
        timestamp_params = {
            'year': str(timestamp.year),
            'month': str(timestamp.month).zfill(2),
            'day': str(timestamp.day).zfill(2)
        }

        logging.info('->- Game data download start({year}/{month}/{day})'.format(**timestamp_params))

        base_url = self.DELIMITER.join([self.url, self.PAGE_URL_GAME_DAY.format(**timestamp_params)])
        html = MlbamUtil.find_xml(base_url, self.parser)

        href = self.PAGE_URL_GAME_PREFIX.format(**timestamp_params)
        for gid in html.find_all('a', href=re.compile(href)):
            gid_path = gid.get_text().strip()
            gid_url = self.DELIMITER.join([base_url, gid_path])
            # Read XML & create dataset
            try:
                game = Game.read_xml(gid_url, self.parser, timestamp, MlbAm._get_game_number(gid_path))
                players = Players.read_xml(gid_url, self.parser, game)
                innings = Inning.read_xml(gid_url, self.parser, game, players)
            except MlbAmHttpNotFound as e:
                logging.warning(e.msg)
                continue

            # append a dataset
            games.append(game.row())
            rosters.extend([roseter.row() for roseter in players.rosters.values()])
            coaches.extend([coach.row() for coach in players.coaches.values()])
            umpires.extend([umpire.row() for umpire in players.umpires.values()])
            atbats.extend(innings.atbats)
            pitches.extend(innings.pitches)

        # writing csv
        day = "".join([timestamp_params['year'], timestamp_params['month'], timestamp_params['day']])
        for params in (
                {'datasets': games, 'filename': Game.DOWNLOAD_FILE_NAME},
                {'datasets': rosters, 'filename': Players.Player.DOWNLOAD_FILE_NAME},
                {'datasets': coaches, 'filename': Players.Coach.DOWNLOAD_FILE_NAME},
                {'datasets': umpires, 'filename': Players.Umpire.DOWNLOAD_FILE_NAME},
                {'datasets': atbats, 'filename': AtBat.DOWNLOAD_FILE_NAME},
                {'datasets': pitches, 'filename': Pitch.DOWNLOAD_FILE_NAME},
        ):
            self._write_csv(params['datasets'], params['filename'].format(day=day, extension=self.extension))
        time.sleep(2)

        logging.info('-<- Game data download end({year}/{month}/{day})'.format(**timestamp_params))
예제 #11
0
 def test_find_xml_404(self):
     """
     Get xml content(status:404, head:default)
     """
     try:
         _ = MlbamUtil.find_xml(
             'http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_chnmlb_anamlb_1/game.xml',
             'lxml',
         )
     except MlbAmHttpNotFound as e:
         self.assertEqual(e.msg, (
             'HTTP Error '
             'url: http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_chnmlb_anamlb_1/game.xml '
             'status: 404'))
예제 #12
0
 def test_find_xml_404(self):
     """
     Get xml content(status:404, head:default)
     """
     try:
         _ = MlbamUtil.find_xml(
             'http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_chnmlb_anamlb_1/game.xml',
             'lxml',
         )
     except MlbAmHttpNotFound as e:
         self.assertEqual(
             e.msg,
             ('HTTP Error '
              'url: http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_chnmlb_anamlb_1/game.xml '
              'status: 404'
              )
         )
예제 #13
0
    def _download(self, timestamp):
        """
        download MLBAM Game Day
        :param timestamp: day
        """
        games, atbats, pitches = [], [], []
        rosters, coaches, umpires = [], [], []
        boxscores, actions = [], []
        timestamp_params = {
            'year': str(timestamp.year),
            'month': str(timestamp.month).zfill(2),
            'day': str(timestamp.day).zfill(2)
        }

        logging.info(
            '->- Game data download start({year}/{month}/{day})'.format(
                **timestamp_params))

        base_url = self.DELIMITER.join(
            [self.url,
             self.PAGE_URL_GAME_DAY.format(**timestamp_params)])
        html = MlbamUtil.find_xml(base_url, self.parser)

        href = self.PAGE_URL_GAME_PREFIX.format(**timestamp_params)
        for gid in html.find_all('a', href=re.compile(href)):
            gid_path = gid.get_text().strip()
            gid_url = self.DELIMITER.join([base_url, gid_path])
            # Read XML & create dataset
            try:
                game = Game.read_xml(gid_url, self.parser, timestamp,
                                     MlbAm._get_game_number(gid_path))
                players = Players.read_xml(gid_url, self.parser, game)
                innings = Inning.read_xml(gid_url, self.parser, game, players)
                boxscore = BoxScore.read_xml(gid_url, self.parser, game,
                                             players)
            except MlbAmHttpNotFound as e:
                logging.warning(e.msg)
                continue

            # append a dataset
            games.append(game.row())
            rosters.extend(
                [roseter.row() for roseter in players.rosters.values()])
            coaches.extend([coach.row() for coach in players.coaches.values()])
            umpires.extend(
                [umpire.row() for umpire in players.umpires.values()])
            atbats.extend(innings.atbats)
            pitches.extend(innings.pitches)
            actions.extend(innings.actions)
            boxscores.append(boxscore.row())

        # writing csv
        day = "".join([
            timestamp_params['year'], timestamp_params['month'],
            timestamp_params['day']
        ])
        for params in (
            {
                'datasets': games,
                'filename': Game.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': rosters,
                'filename': Players.Player.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': coaches,
                'filename': Players.Coach.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': umpires,
                'filename': Players.Umpire.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': atbats,
                'filename': AtBat.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': pitches,
                'filename': Pitch.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': boxscores,
                'filename': BoxScore.DOWNLOAD_FILE_NAME
            },
            {
                'datasets': actions,
                'filename': InningAction.DOWNLOAD_FILE_NAME
            },
        ):
            self._write_csv(
                params['datasets'],
                params['filename'].format(day=day, extension=self.extension))
        time.sleep(2)

        logging.info('-<- Game data download end({year}/{month}/{day})'.format(
            **timestamp_params))