Esempio n. 1
0
    def poem_detail_crew(self):
        # update urls set analyzed = 1 where url = 'http://www.haoshiwen.org/view.php?id=9510'
        # 一条一条取
        total_count = 10
        c = 1
        while c < total_count:
            c += 1
            continue

            # 去除诗词的详情页url
            url = self.db.select_unanalyzed_url(2)

            if url is not None:
                url = url['url']

                # 判断是否抓取过
                if not self.db.poem_exists(url):
                    content = Downloader.get_html(url, 'poem')
                    if content:
                        try:
                            poem = Analyzer.get_poem_detail(content, url)
                            if poem:
                                # 分析成功,入库
                                poem_content = poem[0]
                                poem_info = poem[1]

                                self.db.insert_poem(poem_content)
                                self.db.insert_infomations(url, 1, poem_info)

                                self.db.update_url(url)

                                print "%d/%d %s %s" % (c, total_count, poem_content['title'], poem_content['url'])
                            else:
                                self.db.insert_error('analyze_poem_detail_error', 5, 'reason', url)
                        except Exception, e:
                            continue

                    else:
                        self.db.insert_error('get_poem_detail_error', 4, 'reason', url)
                else:
                    self.db.update_url(url)
            else:
                count = total_count