Exemple #1
0
    def load(self, date):
        """
    Fetch game data from mlb.com and generate a list of Game objects
    """
        streams = self.loadStreams(date)
        iphone_xml = XML.ElementFromURL(Util.DateURL(date, C["URL"]["GAMES"]),
                                        cacheTime=C["GAME_CACHE_TTL"],
                                        isHTML=True)
        # for some reason switching to the soup parser (isHTML=True) made every game
        # appear twice.  keep track of which games we've already listed.
        games_parsed = {}
        for xml in iphone_xml.xpath('game'):
            game = Game.fromXML(xml)
            if game:
                try:
                    pseudo_id = game.home_team.fullName(
                    ) + game.away_team.fullName()
                except:
                    pseudo_id = None
                if pseudo_id in games_parsed:
                    continue
                games_parsed[pseudo_id] = True

                if game.event_id:
                    game.streams = streams[game.event_id]
                    # game.streams.game = game
                else:
                    game.streams = []
                self.append(game)
Exemple #2
0
    def loadStreams(self, date):
        """
    Load stream data for a given day.  (A stream, for this purpose, is any
    game-specific media listed on http://mlb.mlb.com/mediacenter/)
    """
        events = {}
        table = XML.ElementFromURL(
            Util.DateURL(date, C["URL"]["MEDIA"]), True,
            encoding='UTF-8').cssselect('.mmg_table tbody')[0]

        # how many columns in the table?
        num_columns = 0
        for cell in table.cssselect('tr:first-child td'):
            num_columns += 1 if not cell.get('colspan') else int(
                cell.get('colspan'))

        for column_types in C["MEDIA_COLUMNS"]:
            if num_columns == len(column_types):
                # parse some HTML
                for row in table.cssselect('tr'):
                    event_id = row.get('id')
                    if not event_id: continue

                    streams = []
                    cells = row.cssselect('td')
                    if len(cells) < len(column_types): continue

                    for i in range(0, len(column_types)):
                        stream = Stream.fromHTML(column_types[i], cells[i])
                        if stream:
                            streams.append(stream)

                    events[event_id] = list(GameStreamList(streams))
                break

        return events