Esempio n. 1
0
    def poem_author_crew(self):
        # 一条一条取
        total_count = 20000
        i = 0
        while i < total_count:
            i += 1
            # 去除诗词的详情页url
            authors = self.db.select_authors()
            cou = len(authors)
            i = 1
            for author in authors:
                url = author[0]

                if url is not None and url is not '':
                    # 判断是否存在
                    if not self.db.author_exists(url):
                        html = Downloader.get_html(url, 'author')
                        if html:
                            try:
                                author_info = Analyzer.get_author_detail(html, url)

                                if author_info:
                                    author_content = author_info[0]
                                    author_infos = author_info[1]

                                    self.db.insert_author(author_content)
                                    self.db.insert_infomations(url, 2, author_infos)
                                    print '%d/%d %s %s' % (i, cou, author_content['name'], url)
                                    i += 1
                                else:
                                    self.db.insert_error('analyze_author_detail_error', 9, 'reason', url)
                            except Exception, e:
                                print 'error %s' % (url,)

                        else:
                            self.db.insert_error('download_author_detail_error', 8, 'reason', url)
            else:
                # 没有了
                return