コード例 #1
0
    def _get_blog_comment_data(self, blog_info):
        '''获取日志评论及点赞数据
        '''

        if not self.can_access:
            return

        unikey = "http://user.qzone.qq.com/%s/blog/%s" % (
            self._account_info.target_uin, blog_info.blog_id)
        statistical_json_data = self._get_like_data(unikey)
        comment_num = blog_info.comment_num

        print("process blog comment, [%s]\tid: %s\tcomment_num: %d" %
              (blog_info.title, blog_info.blog_id, comment_num))

        comment_url = "https://h5.qzone.qq.com/proxy/domain/b.qzone.qq.com/cgi-bin/blognew/get_comment_list"

        start = 0
        # 每次最多获取50条评论
        num = 50

        payload = {
            "inCharset": "gb2312",
            "outCharset": "gb2312",
            "format": "jsonp",
            "uin": self._account_info.target_uin,
            "g_tk": self._account_info.g_tk,
            "start": "%d" % start,
            "num": "%d" % num,
            "topicId":
            "%s_%s" % (self._account_info.target_uin, blog_info.blog_id)
        }

        loop_num = math.ceil(comment_num / num)
        total_num = 0
        for i in range(loop_num):
            start = i * num
            current_num = num if i < loop_num - 1 else comment_num - i * num
            payload["start"] = "%d" % start
            payload["num"] = "%d" % current_num
            print("blog: %s, comment: [%d, %d)" %
                  (blog_info.title, start, start + current_num))
            r = self._account_info.get_url(comment_url, params=payload)
            json_data = get_json_data_from_response(r.text)
            if "comments" in json_data["data"]:
                total_num += len(json_data["data"]["comments"])

            blog_comment = BlogComment(json_data, start, start + current_num,
                                       blog_info, self._directory)
            blog_comment.export()
            random_sleep(0, 1)

        if total_num != comment_num:
            logging.warning(
                "qq %s: not get correct blog comment, "
                "blog: %s, comment get: %d, comment should get: %d" %
                (self._account_info.target_uin, blog_info.title, total_num,
                 comment_num))

        return statistical_json_data
コード例 #2
0
    def export(self, need_download_media=False):
        '''默认下载非登录id发表的资源
        '''

        if "msglist" in self.json_data:
            msglist = self.json_data["msglist"]
            tid_file = os.path.join(self.directory_path,
                                    QzoneFileName.SHUOSHUO_TID)
            with open(tid_file, "a", encoding="utf-8") as f:
                msglist_len = len(msglist)
                for i in range(msglist_len):
                    msg = msglist[i]
                    print("%05d\t" % ShuoShuoParser._shuoshuo_count,
                          "process shuoshuo, tid:", msg["tid"])
                    f.write("%s\n" % msg["tid"])
                    need_sleep = False

                    comment_num = msg["cmtnum"]
                    if comment_num > 0 and "commentlist" in msg and msg[
                            "commentlist"]:
                        comment_list = msg["commentlist"]
                        if len(comment_list) != comment_num:
                            need_sleep = True
                            msg = msglist[i] = self._parse_single_shuoshuo(
                                msg["tid"], comment_num)
                            comment_list = msg["commentlist"]

                        for comment in comment_list:
                            if comment["uin"] != self._account_info.self_uin \
                                    or need_download_media:
                                export_media_url(comment, self.directory_path)

                    # 说说图片大于 9 张
                    if QzoneType.PICTURE in msg and msg[QzoneType.PICTURE]\
                            and QzoneKey.PIC_TOTAL in msg and msg[QzoneKey.PIC_TOTAL]\
                            and len(msg[QzoneType.PICTURE]) == 9\
                            and msg[QzoneKey.PIC_TOTAL] > 9:
                        floatview_data = self._parse_all_picture(msg)
                        msglist[i][QzoneKey.OPTION_DATA] = {}
                        msglist[i][QzoneKey.OPTION_DATA][QzoneKey.SHUOSHUO_FLOATVIEW] =\
                            self._parse_all_picture(msg)
                        msg = msglist[i]

                    # 需要获取全文
                    if msg.get("has_more_con"):
                        self._parse_full_content(msg)

                    if self._account_info.target_uin != self._account_info.self_uin \
                            or need_download_media:
                        export_media_url(msg, self.directory_path)

                    ShuoShuoParser._shuoshuo_count += 1

                    if need_sleep:
                        random_sleep(0, 1)

        self.save(self._filename)
コード例 #3
0
    def export(self, need_download_media=False):
        '''默认下载非登录id发表的资源
        '''

        if "msglist" in self.json_data:
            msglist = self.json_data["msglist"]
            tid_file = os.path.join(self.directory_path,
                                    config.SHUOSHUO_TID_FILE)
            with open(tid_file, "a", encoding="utf-8") as f:
                for i in range(0, len(msglist)):
                    msg = msglist[i]
                    print("%05d\t" % ShuoShuoParser._shuoshuo_count,
                          "process shuoshuo, tid:", msg["tid"])
                    f.write("%s\n" % msg["tid"])
                    need_sleep = False

                    comment_num = msg["cmtnum"]
                    if comment_num > 0 and "commentlist" in msg and msg[
                            "commentlist"]:
                        comment_list = msg["commentlist"]
                        if len(comment_list) != comment_num:
                            need_sleep = True
                            msg = msglist[i] = self._parse_single_shuoshuo(
                                msg["tid"], comment_num)
                            comment_list = msg["commentlist"]

                        for comment in comment_list:
                            if comment["uin"] != self._account_info.self_uin \
                                    or need_download_media:
                                export_comment_media_url(
                                    comment, self.directory_path)

                    if self._account_info.target_uin != self._account_info.self_uin \
                            or need_download_media:
                        for media_type in config.MEDIA_TYPE:
                            if media_type in msg:
                                medias = msg[media_type]
                                for media in medias:
                                    export_content_media_url(
                                        media, media_type, self.directory_path)

                    ShuoShuoParser._shuoshuo_count += 1

                    if need_sleep:
                        random_sleep(0, 1)

        self.save(self._file_name)
コード例 #4
0
    def _delete_all_shuoshuo(self):
        '''根据文件中的说说tid删除所有说说。频繁删除会出现验证码
        '''

        if not self._account_info.is_self():
            return

        file = os.path.join(self._directory, QzonePath.SHUOSHUO,
                            QzoneFileName.SHUOSHUO_TID)
        count = 0
        with open(file, "r", encoding="utf-8") as f:
            for line in f:
                count += 1
                shuoshuo_tid = line.strip("\n")
                self._delete_shuoshuo(shuoshuo_tid)
                print(count, "delete tid:", shuoshuo_tid)
                random_sleep(0, 1)
コード例 #5
0
    def _get_shuoshuo_like_data(self):
        '''获取说说点赞数据
        '''

        if not self.can_access:
            return

        unikey_pattern = "http://user.qzone.qq.com/%s/mood/%s"
        file = os.path.join(self._directory, QzonePath.SHUOSHUO,
                            QzoneFileName.SHUOSHUO_TID)
        with open(file, "r", encoding="utf-8") as f:
            for line in f:
                shuoshuo_tid = line.strip("\n")
                unikey = unikey_pattern % (self._account_info.target_uin,
                                           shuoshuo_tid)
                self._get_like_data(unikey)
                random_sleep(1, 2)
コード例 #6
0
    def _get_message_board(self, *args, **kwargs):
        '''获取留言板数据
        '''

        if not self.can_access:
            return

        url_pattern = "https://user.qzone.qq.com/proxy/domain/m.qzone.qq.com/cgi-bin/new/get_msgb"
        num = 20
        pos = 0
        result_code = 0

        payload = {
            "format": "jsonp",
            "inCharset": "utf-8",
            "outCharset": "utf-8",
            "uin": self._account_info.self_uin,
            "hostUin": self._account_info.target_uin,
            "start": "%d" % pos,
            "num": "%d" % num,
            "g_tk": self._account_info.g_tk,
        }

        # 获取前20条留言及留言总数
        for i in range(RETRY_TIMES):
            r = self._account_info.get_url(url_pattern, params=payload)
            json_data = get_json_data_from_response(r.text)
            result_code = json_data["code"]
            if result_code == 0:
                break
            random_sleep(1, 2)

        if result_code != 0:
            return

        current_num = len(json_data["data"]["commentList"])
        msg_parser = MsgBoardParser(json_data, pos, pos + current_num,
                                    self._directory)
        msg_parser.export()
        msg_num = json_data["data"]["total"]
        total_num = current_num
        print("current get msgboard num", total_num)

        # 处理剩余留言
        loop_num = math.ceil(msg_num / num)
        for i in range(1, loop_num):
            pos = i * num
            current_num = num if i < loop_num - 1 else msg_num - (i * num)
            payload["start"] = "%d" % pos
            payload["num"] = "%d" % current_num
            for i in range(RETRY_TIMES):
                r = self._account_info.get_url(url_pattern, params=payload)
                json_data = get_json_data_from_response(r.text)
                result_code = json_data["code"]
                if result_code == 0:
                    break
                random_sleep(1, 2)
            if result_code != 0:
                continue

            if "commentList" in json_data["data"]:
                total_num += len(json_data["data"]["commentList"])
            msg_parser = MsgBoardParser(json_data, pos, pos + current_num,
                                        self._directory)
            msg_parser.export()

            print("current get msgboard num", total_num)
            random_sleep(0, 1)

        if total_num != msg_num:
            logging.warning(
                "qq %s: not get correct msg in msg_board, get: %d\t, should get: %d"
                % (self._account_info.target_uin, total_num, msg_num))
コード例 #7
0
    def _get_like_data(self, unikey):
        '''获取unikey对应的点赞数据
        '''

        if not self.can_access:
            return

        print("process like data:", unikey)

        data_count_file = os.path.join(self._directory,
                                       QzoneExporter.DATA_COUNT_FILE)
        with QzoneExporter._lock:
            if not os.path.exists(data_count_file):
                with open(data_count_file, "w", encoding="utf-8") as f:
                    f.write("{\n}")
        json_data = None
        like_count = 0
        with open(data_count_file, "r", encoding="utf-8") as f:
            json_data = json.load(f)

        # 获取点赞数据
        like_count_url = "https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/user/qz_opcnt2"
        payload = {
            "fupdate": "1",
            "unikey": unikey,
            "g_tk": self._account_info.g_tk
        }
        r = self._account_info.get_url(like_count_url, params=payload)
        unikey_json_data = get_json_data_from_response(r.text)
        try:
            like_count = unikey_json_data["data"][0]["current"]["likedata"][
                "cnt"]
        except Exception as e:
            print(e)
            logging.exception("unikey: %s" % unikey)
            logging.exception(unikey_json_data)
            logging.exception(e)
            like_count = 0

        json_data[unikey] = {}
        json_data[unikey][QzoneExporter.LIKE_COUNT_KEY] = unikey_json_data

        if like_count <= 0:
            print(unikey, "has no like data")
            json_data[unikey][
                QzoneExporter.LIKE_DETAILED_KEY] = "no like detailed data"
        else:
            # 获取点赞详细信息
            like_data_url = "https://user.qzone.qq.com/proxy/domain/users.qzone.qq.com/cgi-bin/likes/get_like_list_app"
            begin_uin = "0"
            query_count = 60
            if_first_page = 1
            current_get_num = 0
            total_num = 0
            payload = {
                "uin": self._account_info.self_uin,
                "unikey": unikey,
                "begin_uin": begin_uin,
                "query_count": "%d" % query_count,
                "if_first_page": "%d" % if_first_page,
                "g_tk": self._account_info.g_tk
            }

            json_data[unikey][QzoneExporter.LIKE_DETAILED_KEY] = []
            while True:
                payload["begin_uin"] = begin_uin
                payload["query_count"] = "%d" % query_count
                payload["if_first_page"] = "%d" % if_first_page
                r = self._account_info.get_url(like_data_url, params=payload)
                temp = r.text
                temp = temp[temp.find("{"):temp.rfind("}") + 1]
                # 中文乱码
                # 需要先用iso8859编码,再解码
                temp_bytes = temp.encode("iso8859")
                try:
                    try:
                        temp = temp_bytes.decode("utf-8")
                    except UnicodeError:
                        temp = temp_bytes.decode("gb2312")
                except Exception as e:
                    logging.exception(e)
                    logging.exception("=====\nerror: %s\n=====" % temp)
                    print("decode error, break")
                    break

                like_json_data = json.loads(temp)
                json_data[unikey][QzoneExporter.LIKE_DETAILED_KEY].append(
                    like_json_data)

                if "data" not in like_json_data:
                    break

                like_uin_info = like_json_data["data"]["like_uin_info"]
                if len(like_uin_info) == 0:
                    break
                begin_uin = like_uin_info[-1]["fuin"]
                if_first_page = 0

                current_get_num = like_json_data["data"]["total_number"]
                total_num += current_get_num
                if current_get_num <= 0 or total_num >= like_count:
                    break

                random_sleep(1, 2)

        with open(data_count_file, "w", encoding="utf-8") as f:
            json.dump(json_data, f, ensure_ascii=False, indent=4)
        return unikey_json_data
コード例 #8
0
    def _get_album_photo_data(self, album_info, need_get_comment=False):
        '''获取相册中照片数据
        '''

        if not self.can_access:
            return

        print("process album, name: %s\tid: %s" %
              (album_info.name, album_info.id))

        list_photo_url = "https://h5.qzone.qq.com/proxy/domain/photo.qzone.qq.com/fcgi-bin/cgi_list_photo"
        floatview_photo_list = "https://h5.qzone.qq.com/proxy/domain/photo.qzone.qq.com/fcgi-bin/cgi_floatview_photo_list_v2"
        start = 0
        num = 500
        current_num = num

        list_photo_payload = {
            "inCharset": "utf-8",
            "outCharset": "utf-8",
            "g_tk": self._account_info.g_tk,
            "hostUin": self._account_info.target_uin,
            "uin": self._account_info.self_uin,
            "topicId": album_info.id,
            "pageStart": "%d" % start,
            "pageNum": "%d" % current_num,
        }
        floatview_photo_payload = {
            "g_tk": self._account_info.g_tk,
            "topicId": album_info.id,
            "hostUin": self._account_info.target_uin,
            "uin": self._account_info.self_uin,
            "fupdate": "1",
            "plat": "qzone",
            "source": "qzone",
            "cmtNum": "99",  # 必选
            "sortOrder": "1",
            "need_private_comment": "1",
            "inCharset": "utf-8",
            "outCharset": "utf-8",
            "appid": "4",
            "isFirst": "1",
            "picKey": "unknown",
            "postNum": "0"  # 获取后续照片数量
        }

        ttt = '''{"data": {"comments":[]}}'''
        single_comment_data = json.loads(ttt)
        comment_exported_num = 0
        total_comment_num = 0

        loop_num = math.ceil(album_info.photo_num / num)
        for i in range(loop_num):
            start = i * num
            current_num = num if i < loop_num - 1 else album_info.photo_num - i * num
            list_photo_payload["pageStart"] = "%d" % start
            list_photo_payload["pageNum"] = "%d" % current_num
            r = self._account_info.get_url(list_photo_url,
                                           params=list_photo_payload)
            json_data = get_json_data_from_response(r.text)
            photo_parser = PhotoParser(json_data, start, start + current_num,
                                       self._directory, album_info.directory)
            photo_parser.export()

            # 获取原图及视频url
            if "photoList" in json_data["data"] and json_data["data"]["photoList"] \
                    and len(json_data["data"]["photoList"]) > 0:
                floatview_photo_payload["picKey"] = json_data["data"][
                    "photoList"][0]["lloc"]
                floatview_photo_payload["postNum"] = "%d" % (current_num - 1)
                r = self._account_info.get_url(floatview_photo_list,
                                               params=floatview_photo_payload)
                floatview_json_data = get_json_data_from_response(r.text)
                photo_parser = PhotoParser(floatview_json_data, start,
                                           start + current_num,
                                           self._directory,
                                           album_info.directory, True)
                photo_parser.export()

                # 获取评论数据
                if need_get_comment:
                    for photo in json_data["data"]["photoList"]:
                        pic_comment_num = photo["forum"] or 0
                        if pic_comment_num == 0:
                            continue
                        print("find %d comment(s) in %s" %
                              (pic_comment_num, photo["lloc"]))
                        # 评论数可能显示错误
                        floatview_photo_payload["cmtNum"] = "%d" % (
                            pic_comment_num if pic_comment_num > 99 else 99)
                        floatview_photo_payload["picKey"] = photo["lloc"]
                        floatview_photo_payload["postNum"] = "0"

                        r = self._account_info.get_url(
                            floatview_photo_list,
                            params=floatview_photo_payload)
                        floatview_json_data = get_json_data_from_response(
                            r.text)
                        if not ("single" in floatview_json_data["data"]
                                and floatview_json_data["data"]["single"]):
                            continue
                        comment_data = floatview_json_data["data"]["single"][
                            "comments"]
                        single_comment_data["data"]["comments"] += comment_data
                        pic_comment_num = len(comment_data)
                        total_comment_num += pic_comment_num
                        if total_comment_num > 100 + comment_exported_num:
                            photo_comment = PhotoComment(
                                single_comment_data, comment_exported_num,
                                total_comment_num, self._directory,
                                album_info.directory, self._account_info)
                            photo_comment.export()
                            single_comment_data["data"]["comments"] = []
                            comment_exported_num = total_comment_num
                        random_sleep(0, 1)

            random_sleep(1, 2)

        # 导出剩余评论数据
        if need_get_comment:
            if comment_exported_num < total_comment_num:
                photo_comment = PhotoComment(single_comment_data,
                                             comment_exported_num,
                                             total_comment_num,
                                             self._directory,
                                             album_info.directory,
                                             self._account_info)
                photo_comment.export()
            print("get %d comment(s) in %s" %
                  (total_comment_num, album_info.name))

        print(str(album_info), "photo data done")
コード例 #9
0
    def _get_list_album_data(self, get_like_data=False, *args, **kwargs):
        '''获取相册数据
        '''

        if not self.can_access:
            return

        album_list_url = "https://h5.qzone.qq.com/proxy/domain/photo.qzone.qq.com/fcgi-bin/fcg_list_album_v3"
        pos = 0
        num = 100
        current_num = num

        payload = {
            "g_tk": self._account_info.g_tk,
            "uin": self._account_info.self_uin,
            "hostUin": self._account_info.target_uin,
            "inCharset": "utf-8",
            "outCharset": "utf-8",
            "source": "qzone",
            "plat": "qzone",
            "pageStart": "%d" % pos,
            "pageNum": "%d" % current_num
        }

        for i in range(RETRY_TIMES):
            r = self._account_info.get_url(album_list_url, params=payload)
            json_data = get_json_data_from_response(r.text)
            result_code = json_data["code"]
            if result_code == 0:
                break
            random_sleep(1, 2)

        if result_code != 0:
            return

        album_list_mode_key = QzoneKey.ALBUM_LIST_MODE_SORT_KEY
        if album_list_mode_key not in json_data["data"]:
            album_list_mode_key = QzoneKey.ALBUM_LIST_MODE_CLASS_KEY
            if album_list_mode_key not in json_data["data"]:
                logging.warning("album list data not found in %s" %
                                json_data["data"])
                return
        if not json_data["data"][album_list_mode_key]:
            logging.warning("%s\nalbum list data is null" % json_data["data"])

        self._album_num = albums_num = json_data["data"]["albumsInUser"]
        print("total album num", self._album_num)

        current_num = QzoneExporter._get_album_list_data_len(json_data["data"])
        total_num = current_num
        loop_num = math.ceil(albums_num / num)
        for i in range(1, loop_num):
            pos = i * num
            current_num = num if i < loop_num - 1 else albums_num - (i * num)
            payload["pageStart"] = "%d" % pos
            payload["pageNum"] = "%d" % current_num
            for i in range(RETRY_TIMES):
                r = self._account_info.get_url(album_list_url, params=payload)
                temp_json_data = get_json_data_from_response(r.text)
                result_code = temp_json_data["code"]
                if result_code == 0:
                    break
                random_sleep(1, 2)
            if result_code != 0:
                continue

            if album_list_mode_key in temp_json_data["data"]:
                if not temp_json_data["data"][album_list_mode_key]:
                    print("album is null, break")
                    break
                total_num += QzoneExporter._get_album_list_data_len(
                    temp_json_data["data"])

            json_data["data"][album_list_mode_key] += temp_json_data["data"][
                album_list_mode_key]

            print("current get num", total_num)
            random_sleep(0, 1)

        album_list_info = AlbumListInfo(json_data, self._directory)
        album_list_info.export()

        if not album_list_info.json_data["data"][album_list_mode_key]:
            return

        album_list_data = get_album_list_data(
            album_list_info.json_data["data"])
        for album_data in album_list_data:
            album_info = AlbumInfo(album_data)
            print(str(album_info))

            album_comment_num = self._get_album_comment_data(album_info)
            self._get_album_photo_data(album_info, album_comment_num == 0)

            if get_like_data:
                unikey = "http://user.qzone.qq.com/%s/photo/%s" % (
                    self._account_info.target_uin, album_info.id)
                self._get_like_data(unikey)

            random_sleep(1, 2)