def poem_author_crew(self): # 一条一条取 total_count = 20000 i = 0 while i < total_count: i += 1 # 去除诗词的详情页url authors = self.db.select_authors() cou = len(authors) i = 1 for author in authors: url = author[0] if url is not None and url is not '': # 判断是否存在 if not self.db.author_exists(url): html = Downloader.get_html(url, 'author') if html: try: author_info = Analyzer.get_author_detail(html, url) if author_info: author_content = author_info[0] author_infos = author_info[1] self.db.insert_author(author_content) self.db.insert_infomations(url, 2, author_infos) print '%d/%d %s %s' % (i, cou, author_content['name'], url) i += 1 else: self.db.insert_error('analyze_author_detail_error', 9, 'reason', url) except Exception, e: print 'error %s' % (url,) else: self.db.insert_error('download_author_detail_error', 8, 'reason', url) else: # 没有了 return