def poem_detail_crew(self): # update urls set analyzed = 1 where url = 'http://www.haoshiwen.org/view.php?id=9510' # 一条一条取 total_count = 10 c = 1 while c < total_count: c += 1 continue # 去除诗词的详情页url url = self.db.select_unanalyzed_url(2) if url is not None: url = url['url'] # 判断是否抓取过 if not self.db.poem_exists(url): content = Downloader.get_html(url, 'poem') if content: try: poem = Analyzer.get_poem_detail(content, url) if poem: # 分析成功,入库 poem_content = poem[0] poem_info = poem[1] self.db.insert_poem(poem_content) self.db.insert_infomations(url, 1, poem_info) self.db.update_url(url) print "%d/%d %s %s" % (c, total_count, poem_content['title'], poem_content['url']) else: self.db.insert_error('analyze_poem_detail_error', 5, 'reason', url) except Exception, e: continue else: self.db.insert_error('get_poem_detail_error', 4, 'reason', url) else: self.db.update_url(url) else: count = total_count