コード例 #1
0
ファイル: app.py プロジェクト: 312362115/toutiao_daily
def run():
    CONFIG.load()
    date = Tools.get_date()
    crawler = Crawler()
    if crawler.start(date): 
        Mail().send_mail(date)
コード例 #2
0
ファイル: crawler.py プロジェクト: 312362115/toutiao_daily
    def _parser(self, html):
        daily = html.find(class_='daily')
        date = daily.find(class_='date')

        data = [date.prettify(self.html_encode)]
        for post in daily.find_all(class_='post'):
            title = post.find(class_='title').a
            subject = post.find(class_='subject-name')
            if not subject.a['href'].startswith(self.base_url):
                subject.a['href'] = self.base_url + subject.a['href']

            data.append('<br />')
            data.append(title.prettify(self.html_encode))
            data.append(subject.prettify(self.html_encode))

        return data

    def _save(self, date, data):
        filename = date + '.html'
        filepath = self.path + filename

        with open(filepath, 'w') as f:
            f.write(''.join(data))


if __name__ == '__main__':
    CONFIG.load()
    date = Tools.get_date()
    Crawler().start(date)