Beispiel #1
0
def get_blog_info(directory, encoded_category, encoded_filename, matched_result):
    ''' 获取日志基本信息:标题、分类、id、评论数、阅读数
    '''

    read_num = 0
    comment_num = 0
    blog_title = matched_result[1]
    blog_id = int(matched_result[2])

    blog_info = None
    full_filename = os.path.join(directory, encoded_category, encoded_filename)
    with open(full_filename, "r", encoding="utf-8") as fin:
        m = NUMBER_PATTERN.search(fin.read())
        if m:
            read_num = int(m[1])
            comment_num = int(m[2])

        blog_info = BlogInfo(recover_file_name(encoded_category),
                             recover_file_name(blog_title),
                             blog_id, comment_num,  read_num)

    return blog_info
Beispiel #2
0
    def _get_blog_data(self, *args, **kwargs):
        '''获取日志数据
        '''
        if not self.can_access:
            return

        # 获取日志数量
        self._get_main_page_data()

        blogs_url = "https://h5.qzone.qq.com/proxy/domain/b.qzone.qq.com/cgi-bin/blognew/get_abs"
        blogs_payload = {
            "inCharset": "utf-8",
            "outCharset": "utf-8",
            "format": "jsonp",
            "hostUin": self._account_info.target_uin,
            "uin": self._account_info.self_uin,
            "g_tk": self._account_info.g_tk,
            "pos": "%d" % 0,
            "num": "%d" % 0,
            "blogType": "0",
            "reqInfo": "1"
        }

        single_blog_url = "https://h5.qzone.qq.com/proxy/domain/b.qzone.qq.com/cgi-bin/blognew/blog_output_data"
        single_blog_payload = {
            "uin": "%s" % self._account_info.target_uin,
            "blogid": "%s" % "",
            "numperpage": "15",
            "inCharset": "utf-8",
            "outCharset": "utf-8 ",
            "ref": "qzone"
        }

        # 最多100篇日志
        num = 100
        loop_num = math.ceil(self._account_info.blog_num / num)

        total_num = 0
        for i in range(loop_num):
            pos = i * num
            current_num = num if i < loop_num - 1 else self._account_info.blog_num - i * num
            blogs_payload["pos"] = "%d" % pos
            blogs_payload["num"] = "%d" % current_num
            r = self._account_info.get_url(blogs_url, params=blogs_payload)
            json_data = get_json_data_from_response(r.text)

            blogs_info = BlogsInfo(json_data, pos, pos + current_num,
                                   self._directory)
            blogs_info.export()

            for blog in json_data["data"]["list"]:
                title = blog["title"]
                print("process blog:", title)

                blog_id = blog["blogId"]
                category = blog["cate"]
                comment_num = blog["commentNum"]
                blog_info = BlogInfo(category, title, blog_id, comment_num)
                statistical_json_data = self._get_blog_comment_data(blog_info)

                single_blog_payload["blogid"] = "%s" % blog_id
                temp = self._account_info.get_url(single_blog_url,
                                                  params=single_blog_payload)

                read = 0
                try:
                    new_data = statistical_json_data["data"][0]["current"][
                        "newdata"]
                    if new_data and len(new_data) > 0:
                        read = new_data["RZRD"]
                except Exception as e:
                    print("get read num error")
                    print(e)
                    logging.exception(blog_info)
                    logging.exception(e)

                single_blog = BlogParser(self._directory, blog_info, temp.text,
                                         read)
                single_blog.export()

            total_num += len(json_data["data"]["list"])
            print("current get %d blog(s)" % total_num)

        if total_num != self._account_info.blog_num:
            logging.warning(
                "qq %s: not get encough blog, get: %d, should get: %d" %
                (self._account_info.target_uin, total_num,
                 self._account_info.blog_num))