コード例 #1
0
ファイル: match.py プロジェクト: cmcdowell/match-thread-bot
    def __init__(self, row, gunners=False):
        self.kick_off = datetime.strptime(row[1], '%Y-%m-%d %H:%M:%S')
        self.home_team = row[2]
        self.away_team = row[3]
        self.venue = row[4]
        self.played = row[6]
        self.gunners = gunners
        self.home_url = list_replace([self.home_team], CHANGES)[0]
        self.away_url = list_replace([self.away_team], CHANGES)[0]

        stats_url_template = 'http://www.guardian.co.uk/football/match/{0}/{1}/{2}/{3}-v-{4}'
        events_url_template = 'http://www.guardian.co.uk/football/match-popup/{0}/{1}/{2}/{3}-v-{4}'

        self.stats_url = stats_url_template.format(self.kick_off.year,
                                                   datetime.strftime(self.kick_off,
                                                                     '%b').lower(),
                                                   str(self.kick_off.day).zfill(2),
                                                   self.home_url,
                                                   self.away_url)

        self.events_url = events_url_template.format(self.kick_off.year,
                                                     datetime.strftime(self.kick_off,
                                                                       '%b').lower(),
                                                     str(self.kick_off.day).zfill(2),
                                                     self.home_url,
                                                     self.away_url)
コード例 #2
0
ファイル: match.py プロジェクト: cmcdowell/match-thread-bot
    def scrape_events(self):
        """
        Scrapes events from the target url.

        Returns a named tuple with minute, event_type, event
        """

        url = self.events_url
        print 'Scraping events form ', url

        output = namedtuple('output', 'minute event_type event')
        output.minute, output.event_type, output.event = [], [], []

        try:
            page = urlopen(url).read()
        except HTTPError as e:
            print "Can't find events page", e
            return output
        except URLError as e:
            print "Can't find events page", e
            return output

        soup = BeautifulSoup(page)

        # List of table rows with css class event
        table = soup.findAll('tr', {'class': 'event'})

        # Defines what's to be changed for /r/soccer's custom match thread
        # icons
        changes = [('SUB', '[](//#sub) Sub'),
                   ('RED CARD', '[](//#red) Red'),
                   ('YELLOW CARD', '[](//#yellow) Yellow'),
                   ('GOAL', '[](//#ball) **Goal**')]

        for row in table:
            try:
                output.minute.append(row.td.contents[0])
            except AttributeError:
                output.minute.append('')
            try:
                output.event_type.append(row.td.next_sibling.next_sibling.contents[1].contents[0])
            except AttributeError:
                output.event_type.append('')
            try:
                output.event.append(row.td.next_sibling.next_sibling.contents[2])
            except AttributeError:
                output.event.append('')

        # Replace the event types with custom event types defined above
        output.event_type = list_replace(output.event_type, changes)

        return output