Exemple #1
0
 def get_cookie(self):
     filename = ExtraTools.md5(ExtraTools.get_time())
     with open(filename, 'w') as f:
         pass
     self.cookieJar.save(filename)
     with open(filename) as f:
         content = f.read()
     os.remove(filename)
     return content
Exemple #2
0
 def get_cookie(self):
     filename = ExtraTools.md5(ExtraTools.get_time())
     with open(filename, 'w') as f:
         pass
     self.cookieJar.save(filename)
     with open(filename) as f:
         content = f.read()
     os.remove(filename)
     return content
Exemple #3
0
 def parse_date(date='1357-08-12'):
     if u":" in date:
         if u'昨天' in date:
             return ExtraTools.get_yesterday()
         else:
             return ExtraTools.get_today()
     if u'今天' in date:
         return ExtraTools.get_today()
     return ParserTools.match_content(r'\d{4}-\d{2}-\d{2}', date, '1357-08-12')  # 一三五七八十腊,三十一天永不差!
Exemple #4
0
 def parse_date(date='1357-08-12'):
     if u":" in date:
         if u'昨天' in date:
             return ExtraTools.get_yesterday()
         else:
             return ExtraTools.get_today()
     if u'今天' in date:
         return ExtraTools.get_today()
     return ParserTools.match_content(r'\d{4}-\d{2}-\d{2}', date,
                                      '1357-08-12')  # 一三五七八十腊,三十一天永不差!
Exemple #5
0
    def generate_article_page(self, article):
        """
        :type article: src.container.data.article.Article
        :return:
        :rtype:
        """
        answer_content = Template.answer.format(
            **{
                'author_avatar_url': article.author_avatar_url,
                'author_name': article.author_name,
                'author_id': article.author_id,
                'author_headline': article.author_headline,

                'content': article.content,
                'comment_count': article.comment_count,
                'voteup_count': article.voteup_count,
                'updated_time':  ExtraTools.format_date('Y-m-d H:i:s', article.updated_time),
            }
        )

        filename = self.get_random_html_file_name()
        content = Template.question.format(
            **{
                'title': article.title,
                'description': '',
                'answer': answer_content
            }
        )
        uri = Path.html_pool_path + '/' + filename
        buf_file = open(uri, 'w')
        buf_file.write(content)
        buf_file.close()
        return uri
    def create(self):
        self.image_container.set_save_path(Path.image_pool_path)
        self.image_container.start_download()
        title = "_".join([book.property.epub.title for book in self.book_list])
        title = title.strip()
        Path.chdir(Path.base_path + u"/知乎电子书临时资源库/")
        epub = Book(title, 27149527)
        html_tmp_path = Path.html_pool_path + "/"
        image_tmp_path = Path.image_pool_path + "/"
        for book in self.book_list:
            page = book.page_list[0]
            with open(html_tmp_path + page.filename, "w") as html:
                html.write(page.content)
            epub.createChapter(html_tmp_path + page.filename, ExtraTools.get_time(), page.title)

            for page in book.page_list[1:]:
                with open(html_tmp_path + page.filename, "w") as html:
                    html.write(page.content)
                epub.addHtml(html_tmp_path + page.filename, page.title)
        for image in self.book["image_list"]:
            epub.addImg(image_tmp_path + image["filename"])
        epub.addLanguage("zh-cn")
        epub.addCreator("ZhihuHelp1.7.0")
        epub.addDesc(u"该电子书由知乎助手生成,知乎助手是姚泽源为知友制作的仅供个人使用的简易电子书制作工具,源代码遵循WTFPL,希望大家能认真领会该协议的真谛,为飞面事业做出自己的贡献 XD")
        epub.addRight("CC")
        epub.addPublisher("ZhihuHelp")
        Debug.logger.debug(u"当前目录为")
        Path.pwd()
        epub.addCss(Path.base_path + u"/epubResource/markdown.css")
        epub.addCss(Path.base_path + u"/epubResource/front.css")
        epub.buildingEpub()
        return
Exemple #7
0
    def login(self, account, password, captcha=''):
        content = Http.get_content('https://www.zhihu.com/')
        xsrf = Match.xsrf(content)
        if not xsrf:
            Debug.logger.info(u'登陆失败')
            Debug.logger.info(u'敲击回车重新发送登陆请求')
            return False
        xsrf = xsrf.split('=')[1]
        # add xsrf as cookie into cookieJar,
        cookie = Http.make_cookie(name='_xsrf', value=xsrf, domain='www.zhihu.com')
        self.cookieJar.set_cookie(cookie)
        if captcha:
            post_data = {'_xsrf': xsrf, 'email': account, 'password': password, 'remember_me': True,
                         'captcha': captcha}
        else:
            post_data = {'_xsrf': xsrf, 'email': account, 'password': password, 'remember_me': True}

        header = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip,deflate',  # 主要属性,只要有此项知乎即认为来源非脚本
            'Accept-Language': 'zh,zh-CN;q=0.8,en-GB;q=0.6,en;q=0.4',
            'Host': 'www.zhihu.com',
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36(KHTML, like Gecko)Chrome/34.0.1847.116 Safari/537.36',
            'Connection': 'keep-alive',
            'X-Requested-With': 'XMLHttpRequest',
            'Origin': 'https://www.zhihu.com',
            'Referer': 'https://www.zhihu.com/',
        }
        result = Http.get_content(url=r'https://www.zhihu.com/login/email', data=post_data, extra_header=header)
        if not result:
            Debug.logger.info(u'登陆失败,请敲击回车重新登陆')
            return False
        response = json.loads(result)

        if response['r'] == 0:
            print u'登陆成功!'
            print u'登陆账号:', account
            print u'请问是否需要记住帐号密码?输入yes记住,输入其它任意字符跳过,回车确认'
            if raw_input() == 'yes':
                Config.account, Config.password, Config.remember_account = account, password, True
                print u'帐号密码已保存,可通过修改config.json修改设置'
            else:
                Config.account, Config.password, Config.remember_account = '', '', False
                print u'跳过保存环节,进入下一流程'
            Config._save()
            cookie = self.get_cookie()
            DB.execute('delete from LoginRecord')  # 登陆成功后清除数据库中原有的登录记录,避免下次登陆时取到旧记录
            data = {}
            data['account'] = account
            data['password'] = password
            data['recordDate'] = ExtraTools.get_today()
            data['cookieStr'] = cookie
            DB.save(data, 'LoginRecord')
            DB.commit()
            return True
        else:
            print u'登陆失败'
            Debug.print_dict(response)
            return False
Exemple #8
0
 def get_random_html_file_name(self):
     u"""
     生成一个随机html
     :return:
     :rtype:
     """
     filename = ExtraTools.md5(str(uuid.uuid4())) + '.xhtml'
     return filename
Exemple #9
0
 def get_random_html_file_name(self):
     u"""
     生成一个随机html
     :return:
     :rtype:
     """
     filename = ExtraTools.md5(str(uuid.uuid4())) + '.xhtml'
     return filename
Exemple #10
0
 def create_filename(self, href):
     u"""
     根据 href 创建md5编码之后的文件名
     :param href:
     :return:
     """
     filename = ExtraTools.md5(href) + '.jpg'
     return filename
Exemple #11
0
 def create_filename(self, href):
     u"""
     根据 href 创建md5编码之后的文件名
     :param href:
     :return:
     """
     filename = ExtraTools.md5(href) + '.jpg'
     return filename
Exemple #12
0
    def set_info(self, info):
        self.info.update(info)
        if self.kind == Type.question:
            self.property.epub.title = '知乎问题集锦({})'.format(ExtraTools.get_time())
            self.property.epub.id = ExtraTools.get_time()
        elif self.kind == Type.answer:
            self.property.epub.title = '知乎回答集锦({})'.format(ExtraTools.get_time())
            self.property.epub.id = ExtraTools.get_time()
        if self.kind == Type.article:
            self.property.epub.title = '知乎专栏文章集锦({})'.format(ExtraTools.get_time())
            self.property.epub.id = ExtraTools.get_time()
        if self.kind in [Type.answer, Type.question, Type.article]:
            self.info['title'] = self.property.epub.title

        if self.kind == Type.topic:
            self.property.epub.title = '话题_{}({})'.format(info['title'], info['topic_id'])
            self.property.epub.id = info['topic_id']
        if self.kind == Type.collection:
            self.property.epub.title = '收藏夹_{}({})'.format(info['title'], info['collection_id'])
            self.property.epub.id = info['collection_id']
        if self.kind == Type.author:
            self.property.epub.title = '作者_{}({})'.format(info['name'], info['author_id'])
            self.property.epub.id = info['author_id']
        if self.kind == Type.column:
            self.property.epub.title = '专栏_{}({})'.format(info['name'], info['column_id'])
            self.property.epub.id = info['column_id']
        return
Exemple #13
0
    def generate_question_page(self, question):
        """
        :type question: src.container.task_result.Question
        :return:
        :rtype:
        """
        # 先输出answer的内容
        answer_content = u''
        for answer in question.answer_list:
            answer_content += Template.answer.format(
                **{
                    'author_avatar_url':
                    answer.author_avatar_url,
                    'author_name':
                    answer.author_name,
                    'author_id':
                    answer.author_id,
                    'author_headline':
                    answer.author_headline,
                    'content':
                    answer.content,
                    'comment_count':
                    answer.comment_count,
                    'voteup_count':
                    answer.voteup_count,
                    'updated_time':
                    ExtraTools.format_date(u'%Y-%m-%d %H:%M:%S',
                                           answer.updated_time),
                })

        filename = self.get_random_html_file_name()
        content = Template.question.format(
            **{
                'title': question.question_info.title,
                'description': question.question_info.detail,
                'answer': answer_content
            })
        uri = Path.html_pool_path + '/' + filename
        buf_file = open(uri, 'w')
        buf_file.write(content)
        buf_file.close()
        return uri
 def create_single_html_book(self):
     title = '_'.join([book.epub.title for book in self.book_list])
     title = title.strip()[:128] # 避开window文件名长度限制
     title = ExtraTools.fix_filename(title) # 移除特殊字符
     Path.reset_path()
     Path.chdir(Path.result_path)
     Path.rmdir(u'./' + title)
     Path.mkdir(u'./' + title)
     Path.chdir(u'./' + title)
     page = []
     for book in self.book_list:
         page += book.page_list
     content = ' \r\n<hr /> \r\n '.join([Match.html_body(x.content) for x in page]).replace('../images/', './images/')
     with open(Path.base_path + '/src/template/content/single_html.html') as html:
         template = html.read().format(title=title, content=content)
     with open(title + u'.html', 'w') as html:
         html.write(template)
     shutil.copytree(Path.html_pool_path + u'/../{}/OEBPS/images'.format(title), './images')
     shutil.copy(Path.www_css + '/front.css' , './front.css')
     shutil.copy(Path.www_css + '/markdown.css' , './markdown.css')
     Path.reset_path()
     return
Exemple #15
0
    def generate_article_page(self, article):
        """
        :type article: src.container.data.article.Article
        :return:
        :rtype:
        """
        answer_content = Template.answer.format(
            **{
                'author_avatar_url':
                article.author_avatar_url,
                'author_name':
                article.author_name,
                'author_id':
                article.author_id,
                'author_headline':
                article.author_headline,
                'content':
                article.content,
                'comment_count':
                article.comment_count,
                'voteup_count':
                article.voteup_count,
                'updated_time':
                ExtraTools.format_date(u'%Y-%m-%d %H:%M:%S',
                                       article.updated_time),
            })

        filename = self.get_random_html_file_name()
        content = Template.question.format(**{
            'title': article.title,
            'description': '',
            'answer': answer_content
        })
        uri = Path.html_pool_path + '/' + filename
        buf_file = open(uri, 'w')
        buf_file.write(content)
        buf_file.close()
        return uri
 def create(self):
     self.image_container.set_save_path(Path.image_pool_path)
     self.image_container.start_download()
     title = '_'.join([book.epub.title for book in self.book_list])
     title = title.strip()[:128] # 避开window文件名长度限制
     title = ExtraTools.fix_filename(title) # 移除特殊字符
     if not title:
         # 电子书题目为空时自动跳过
         # 否则会发生『rm -rf / 』的惨剧。。。
         return
     Path.chdir(Path.base_path + u'/知乎电子书临时资源库/')
     epub = Book(title, 27149527)
     html_tmp_path = Path.html_pool_path + '/'
     image_tmp_path = Path.image_pool_path + '/'
     for book in self.book_list:
         page = book.page_list[0]
         with open(html_tmp_path + page.filename, 'w') as html:
             html.write(page.content)
         #epub.createChapter(html_tmp_path + page.filename, ExtraTools.get_time(), page.title)
         epub.addInfoPage(html_tmp_path + page.filename, page.title)
         for page in book.page_list[1:]:
             with open(html_tmp_path + page.filename, 'w') as html:
                 html.write(page.content)
             epub.addHtml(html_tmp_path + page.filename, page.title)
     for image in self.book.image_list:
         epub.addImg(image_tmp_path + image['filename'])
     epub.addLanguage('zh-cn')
     epub.addCreator('ZhihuHelp1.7.0')
     epub.addDesc(u'该电子书由知乎助手生成,知乎助手是姚泽源为知友制作的仅供个人使用的简易电子书制作工具,源代码遵循WTFPL,希望大家能认真领会该协议的真谛,为飞面事业做出自己的贡献 XD')
     epub.addRight('CC')
     epub.addPublisher('ZhihuHelp')
     epub.addCss(Path.base_path + u'/www/css/markdown.css')
     epub.addCss(Path.base_path + u'/www/css/front.css')
     epub.buildingEpub()
     Path.reset_path()
     return
Exemple #17
0
    def generate_question_page(self, question):
        """
        :type question: src.container.task_result.Question
        :return:
        :rtype:
        """
        # 先输出answer的内容
        answer_content = u''
        for answer in question.answer_list:
            answer_content += Template.answer.format(
                **{
                    'author_avatar_url': answer.author_avatar_url,
                    'author_name': answer.author_name,
                    'author_id': answer.author_id,
                    'author_headline': answer.author_headline,

                    'content': answer.content,
                    'comment_count': answer.comment_count,
                    'voteup_count': answer.voteup_count,
                    'updated_time': ExtraTools.format_date('Y-m-d H:i:s', answer.updated_time),
                }
            )

        filename = self.get_random_html_file_name()
        content = Template.question.format(
            **{
                'title': question.question_info.title,
                'description': question.question_info.detail,
                'answer': answer_content
            }
        )
        uri = Path.html_pool_path + '/' + filename
        buf_file = open(uri, 'w')
        buf_file.write(content)
        buf_file.close()
        return uri
Exemple #18
0
    def login(self, account, password, captcha=''):
        content = Http.get_content('https://www.zhihu.com/')
        xsrf = Match.xsrf(content)
        if not xsrf:
            Debug.logger.info(u'登陆失败')
            Debug.logger.info(u'敲击回车重新发送登陆请求')
            return False
        xsrf = xsrf.split('=')[1]
        # add xsrf as cookie into cookieJar,
        cookie = Http.make_cookie(name='_xsrf',
                                  value=xsrf,
                                  domain='www.zhihu.com')
        self.cookieJar.set_cookie(cookie)
        if captcha:
            post_data = {
                '_xsrf': xsrf,
                'email': account,
                'password': password,
                'remember_me': True,
                'captcha': captcha
            }
        else:
            post_data = {
                '_xsrf': xsrf,
                'email': account,
                'password': password,
                'remember_me': True
            }

        header = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip,deflate',  # 主要属性,只要有此项知乎即认为来源非脚本
            'Accept-Language': 'zh,zh-CN;q=0.8,en-GB;q=0.6,en;q=0.4',
            'Host': 'www.zhihu.com',
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36(KHTML, like Gecko)Chrome/34.0.1847.116 Safari/537.36',
            'Connection': 'keep-alive',
            'X-Requested-With': 'XMLHttpRequest',
            'Origin': 'https://www.zhihu.com',
            'Referer': 'https://www.zhihu.com/',
        }
        result = Http.get_content(url=r'https://www.zhihu.com/login/email',
                                  data=post_data,
                                  extra_header=header)
        if not result:
            Debug.logger.info(u'登陆失败,请敲击回车重新登陆')
            return False
        response = json.loads(result)

        if response['r'] == 0:
            print u'登陆成功!'
            print u'登陆账号:', account
            print u'请问是否需要记住帐号密码?输入yes记住,输入其它任意字符跳过,回车确认'
            if raw_input() == 'yes':
                Config.account, Config.password, Config.remember_account = account, password, True
                print u'帐号密码已保存,可通过修改config.json修改设置'
            else:
                Config.account, Config.password, Config.remember_account = '', '', False
                print u'跳过保存环节,进入下一流程'
            Config._save()
            cookie = self.get_cookie()
            DB.execute(
                'delete from LoginRecord')  # 登陆成功后清除数据库中原有的登录记录,避免下次登陆时取到旧记录
            data = {}
            data['account'] = account
            data['password'] = password
            data['recordDate'] = ExtraTools.get_today()
            data['cookieStr'] = cookie
            DB.save(data, 'LoginRecord')
            DB.commit()
            return True
        else:
            print u'登陆失败'
            Debug.print_dict(response)
            return False
Exemple #19
0
 def create_filename(self, href):
     filename = ExtraTools.md5(href) + '.jpg'
     return filename
Exemple #20
0
 def create_filename(self, href):
     filename = ExtraTools.md5(href) + '.jpg'
     return filename
Exemple #21
0
    def parse_article_id(self):

        from src.tools.extra_tools import ExtraTools
        article_id = ExtraTools.md5(self.info['title'])
        self.info['article_id'] = article_id
 def parse_date(date="1357-08-12"):
     if u"昨天" in date:
         return ExtraTools.get_yesterday()
     if u"今天" in date:
         return ExtraTools.get_today()
     return ParserTools.match_content(r"\d{4}-\d{2}-\d{2}", date, date)  # 一三五七八十腊,三十一天永不差!