Esempio n. 1
0
    def infomation_crew(self):
        # 一条一条取
        total_count = 20000
        i = 0
        while i < total_count:
            i += 1
            # 去除诗词的详情页url
            info = self.db.select_unanalyzed_infomation()

            if info is not None:
                # 下载分析
                url = info['url']
                html = Downloader.get_html(url, 'infomation')
                if html:
                    content = Analyzer.get_info_detail(html)
                    if content:
                        self.db.update_infomationurl(url, content)
                        print '%d/%d %s %s' % (i, total_count, info['title'], url)
                    else:
                        self.db.insert_error('analyze_info_detail_error', 7, 'reason', url)
                else:
                    self.db.insert_error('download_info_detail_error', 6, 'reason', url)
            else:
                # 没有了
                return