Python json_decodeの例、common.tool.json_decode Pythonの例

コード例 #1

0

ファイルを表示

def load_review_list():
    review_data = {
        "can_review_lists": [],
        "dlc_in_game": {},
        "review_list": [],
    }
    if not os.path.exists(REVIEW_DATA_PATH):
        return review_data
    review_data = tool.json_decode(file.read_file(REVIEW_DATA_PATH), review_data)
    return review_data

コード例 #2

0

ファイルを表示

def load_discount_list():
    discount_game_list = []
    if not os.path.exists(DISCOUNT_DATA_PATH):
        return discount_game_list
    week_day = int(time.strftime("%w"))
    # 已超过本周API更新时间
    if (week_day > API_UPDATE_TIME_WEEKDAY) or (week_day == API_UPDATE_TIME_WEEKDAY and int(time.strftime("%H")) >= API_UPDATE_TIME_WEEKDAY):
        last_api_update_day = (datetime.datetime.today() + datetime.timedelta(days=API_UPDATE_TIME_WEEKDAY - week_day)).timetuple()
    #  获取上周API更新时间
    else:
        last_api_update_day = (datetime.datetime.today() + datetime.timedelta(days=API_UPDATE_TIME_WEEKDAY - week_day - 7)).timetuple()
    last_api_update_day = time.strptime(time.strftime("%Y-%m-%d " + "%02d" % API_UPDATE_TIME_HOUR + ":00:00", last_api_update_day), "%Y-%m-%d %H:%M:%S")
    last_api_update_time = time.mktime(last_api_update_day)
    if os.path.getmtime(DISCOUNT_DATA_PATH) < last_api_update_time < time.time():
        output.print_msg("discount game list expired")
        return discount_game_list
    discount_game_list = tool.json_decode(tool.read_file(DISCOUNT_DATA_PATH), discount_game_list)
    return discount_game_list

コード例 #3

0

ファイルを表示

    def rename_file_extension(self, response):
        """
        检测文件后缀名是否正确
        """
        if self.recheck_file_extension:
            # response中的Content-Type作为文件后缀名
            content_type = response.getheader("Content-Type")
            if content_type is not None:
                # 重置状态，避免反复修改
                self.recheck_file_extension = False

                if content_type != "octet-stream":
                    global MIME_DICTIONARY
                    if MIME_DICTIONARY is None:
                        MIME_DICTIONARY = tool.json_decode(
                            file.read_file(
                                os.path.join(os.path.dirname(__file__),
                                             "mime.json")), {})
                    if content_type in MIME_DICTIONARY:
                        new_file_extension = MIME_DICTIONARY[content_type]
                    else:
                        new_file_extension = content_type.split("/")[-1]
                    self.file_path = os.path.splitext(
                        self.file_path)[0] + "." + new_file_extension

コード例 #4

0

ファイルを表示

ファイル: net.py プロジェクト: yxw19870806/Py3CrawlerLib

def save_net_file(file_url, file_path, need_content_type=False, head_check=False, **kwargs):
    """Visit web and save to local

    :param file_url:
        the remote resource URL which you want to save

    :param file_path:
        the local file path which you want to save remote resource

    :param need_content_type:
        is auto rename file according to "Content-Type" in response headers

    :param head_check:
        "HEAD" method request to check response status and file size before download file

    :return:
        status      0 download failure, 1 download successful
        code        failure reason
        file_path   finally local file path(when need_content_type is True, will rename it)
    """
    # 判断保存目录是否存在
    if not path.create_dir(os.path.dirname(file_path)):
        return False
    is_create_file = False
    is_multi_thread = False
    return_code = {"status": 0, "code": -3}
    for retry_count in range(0, NET_CONFIG["DOWNLOAD_RETRY_COUNT"]):
        if head_check and retry_count == 0:
            request_method = "HEAD"
        else:
            request_method = "GET"
        # 获取头信息
        response = http_request(file_url, request_method, connection_timeout=NET_CONFIG["HTTP_CONNECTION_TIMEOUT"], read_timeout=NET_CONFIG["HTTP_READ_TIMEOUT"], **kwargs)
        # 其他返回状态，退出
        if response.status != HTTP_RETURN_CODE_SUCCEED:
            # URL格式不正确
            if response.status == HTTP_RETURN_CODE_URL_INVALID:
                return_code = {"status": 0, "code": -1}
            # 超过重试次数
            elif response.status == HTTP_RETURN_CODE_RETRY:
                return_code = {"status": 0, "code": -2}
            # 其他http code
            else:
                return_code = {"status": 0, "code": response.status}
            break

        # 判断文件是不是过大
        content_length = response.getheader("Content-Length")
        if content_length is not None:
            content_length = int(content_length)
            # 超过限制
            if content_length > NET_CONFIG["DOWNLOAD_LIMIT_SIZE"]:
                return {"status": 0, "code": -4}
            # 文件比较大，使用多线程下载（必须是head_check=True的情况下，否则整个文件内容都已经返回了）
            elif head_check and content_length > NET_CONFIG["DOWNLOAD_MULTI_THREAD_MIN_SIZE"]:
                is_multi_thread = True

        # response中的Content-Type作为文件后缀名
        if need_content_type:
            content_type = response.getheader("Content-Type")
            if content_type is not None and content_type != "octet-stream":
                global MIME_DICTIONARY
                if MIME_DICTIONARY is None:
                    MIME_DICTIONARY = tool.json_decode(file.read_file(os.path.join(os.path.dirname(__file__), "mime.json")), {})
                if content_type in MIME_DICTIONARY:
                    new_file_type = MIME_DICTIONARY[content_type]
                else:
                    new_file_type = content_type.split("/")[-1]
                file_path = os.path.splitext(file_path)[0] + "." + new_file_type

        if not is_multi_thread:  # 单线程下载
            # 如果是先调用HEAD方法的，需要重新获取完整数据
            if head_check:
                response = http_request(file_url, method="GET", connection_timeout=NET_CONFIG["DOWNLOAD_CONNECTION_TIMEOUT"], read_timeout=NET_CONFIG["DOWNLOAD_READ_TIMEOUT"], **kwargs)
                if response.status != HTTP_RETURN_CODE_SUCCEED:
                    continue
            # 下载
            with open(file_path, "wb") as file_handle:
                is_create_file = True
                try:
                    file_handle.write(response.data)
                except OSError as ose:
                    if str(ose).find("No space left on device"):
                        global EXIT_FLAG
                        EXIT_FLAG = True
                    raise
        else:  # 多线程下载
            # 单线程下载文件大小（100MB）
            multi_thread_block_size = int(math.ceil(content_length / 10 / SIZE_MB)) * SIZE_MB
            multi_thread_block_size = min(NET_CONFIG["DOWNLOAD_MULTI_THREAD_MIN_BLOCK_SIZE"], max(NET_CONFIG["DOWNLOAD_MULTI_THREAD_MAX_BLOCK_SIZE"], multi_thread_block_size))
            # 创建文件
            with open(file_path, "w"):
                is_create_file = True
            thread_list = []
            error_flag = []
            with open(file_path, "rb+") as file_handle:
                file_no = file_handle.fileno()
                end_pos = -1
                while end_pos < content_length - 1:
                    start_pos = end_pos + 1
                    end_pos = min(content_length - 1, start_pos + multi_thread_block_size - 1)
                    # 创建一个副本
                    fd_handle = os.fdopen(os.dup(file_no), "rb+", -1)
                    thread = MultiThreadDownload(file_url, start_pos, end_pos, fd_handle, error_flag)
                    thread.start()
                    thread_list.append(thread)
            # 等待所有线程下载完毕
            for thread in thread_list:
                thread.join()
            # 有任意一个线程下载失败了，或者文件存在连续1K以上的空字节
            if len(error_flag) > 0:
                continue
            if not _check_multi_thread_download_file(file_path):
                output.print_msg("网络文件%s多线程下载后发现无效字节" % file_url)
                continue
        if content_length is None:
            return {"status": 1, "code": 0, "file_path": file_path}
        # 判断文件下载后的大小和response中的Content-Length是否一致
        file_size = os.path.getsize(file_path)
        if content_length == file_size:
            return {"status": 1, "code": 0, "file_path": file_path}
        else:
            output.print_msg("本地文件%s：%s和网络文件%s：%s不一致" % (file_path, content_length, file_url, file_size))
            time.sleep(10)
    if is_create_file:
        path.delete_dir_or_file(file_path)
    return return_code

コード例 #5

0

ファイルを表示

ファイル: net.py プロジェクト: yxw19870806/Py3CrawlerLib

SIZE_GB = 2 ** 30  # 1GB = 多少字节

# 读取网络相关配置
DEFAULT_NET_CONFIG = {
    "HTTP_CONNECTION_TIMEOUT": 10,  # 网络访问连接超时的秒数
    "HTTP_READ_TIMEOUT": 30,  # 网络访问读取超时的秒数
    "HTTP_REQUEST_RETRY_COUNT": 10,  # 网络访问自动重试次数
    "DOWNLOAD_CONNECTION_TIMEOUT": 10,  # 下载文件连接超时的秒数
    "DOWNLOAD_READ_TIMEOUT": 60,  # 下载文件读取超时的秒数
    "DOWNLOAD_RETRY_COUNT": 10,  # 下载文件自动重试次数
    "DOWNLOAD_LIMIT_SIZE": 1.5 * SIZE_GB,  # 下载文件超过多少字节跳过不下载
    "DOWNLOAD_MULTI_THREAD_MIN_SIZE": 50 * SIZE_MB,  # 下载文件超过多少字节后开始使用多线程下载
    "DOWNLOAD_MULTI_THREAD_MIN_BLOCK_SIZE": 10 * SIZE_MB,  # 多线程下载中单个线程下载的字节数下限（线程总数下限=文件大小/单个线程下载的字节数下限）
    "DOWNLOAD_MULTI_THREAD_MAX_BLOCK_SIZE": 100 * SIZE_MB,  # 多线程下载中单个线程下载的字节数上限（线程总数上限=文件大小/单个线程下载的字节数上限）
}
NET_CONFIG = tool.json_decode(file.read_file(os.path.join(os.path.dirname(__file__), "net_config.json")), DEFAULT_NET_CONFIG)

# 连接池
HTTP_CONNECTION_POOL = None
PROXY_HTTP_CONNECTION_POOL = None
# 网络访问相关阻塞/继续事件
thread_event = threading.Event()
thread_event.set()
# 退出标志
EXIT_FLAG = False
# response header中Content-Type对应的Mime字典
MIME_DICTIONARY = None
# 网络访问返回值
HTTP_RETURN_CODE_RETRY = 0
HTTP_RETURN_CODE_URL_INVALID = -1  # 地址不符合规范（非http:// 或者 https:// 开头）
HTTP_RETURN_CODE_JSON_DECODE_ERROR = -2  # 返回数据不是JSON格式，但返回状态是200

コード例 #6

0

ファイルを表示

ファイル: checkDiscount.py プロジェクト: yxw19870806/Py3Crawler

def main():
    # 获取登录状态
    steam_class = steam.Steam(need_login=True)

    # 所有打折游戏
    discount_game_file_path = os.path.abspath(os.path.join(steam_class.cache_data_path, "discount.txt"))
    discount_game_list = tool.json_decode(file.read_file(discount_game_file_path), [])
    game_id_list = []
    for game_info in discount_game_list:
        if game_info["type"] == "game":
            game_id_list.append(game_info["app_id"])
        else:
            game_id_list += game_info["app_id"]
    # 已检测过的游戏列表
    checked_apps_file_path = os.path.join(steam_class.cache_data_path, "discount_checked.txt")
    checked_apps_string = file.read_file(checked_apps_file_path)
    if checked_apps_string:
        checked_apps_list = checked_apps_string.split(",")
    else:
        checked_apps_list = []
    # 已删除的游戏
    deleted_app_list = steam_class.load_deleted_app_list()
    # 已资料受限制的游戏
    restricted_app_list = steam_class.load_restricted_app_list()
    # 游戏的DLC列表
    game_dlc_list = steam_class.load_game_dlc_list()

    while len(game_id_list) > 0:
        game_id = game_id_list.pop()
        game_id = str(game_id)
        if game_id[-1:] != "0":
            continue
        if game_id in deleted_app_list or game_id in restricted_app_list:
            continue
        if game_id in checked_apps_list:
            continue

        output.print_msg(f"游戏：{game_id}，剩余数量：{len(game_id_list)}")

        # 获取游戏信息
        try:
            game_data = steam.get_game_store_index(game_id)
        except crawler.CrawlerException as e:
            output.print_msg(e.http_error(f"游戏{game_id}"))
            continue

        if len(game_data["dlc_list"]) > 0:
            output.print_msg(f"游戏：{game_id}全部DLC：{game_data['dlc_list']}")
            is_change = False
            for dlc_id in game_data["dlc_list"]:
                if dlc_id not in game_dlc_list:
                    is_change = True
                    game_dlc_list[dlc_id] = game_id
            # 保存数据
            if is_change:
                steam_class.save_game_dlc_list(game_dlc_list)

        # 已资料受限制
        if game_data["restricted"]:
            output.print_msg(f"游戏：{game_id}已资料受限制")
            restricted_app_list.append(game_id)
            # 保存数据
            steam_class.save_restricted_app_list(restricted_app_list)

        # 增加检测标记
        checked_apps_list.append(game_id)
        file.write_file(",".join(checked_apps_list), checked_apps_file_path, file.WRITE_FILE_TYPE_REPLACE)

コード例 #7

0

ファイルを表示

ファイル: extraTool.py プロジェクト: yxw19870806/Py3Crawler

def download(file_url,
             file_path,
             recheck_file_extension=False,
             head_check=False,
             replace_if_exist: Optional[bool] = None,
             **kwargs):
    """
    现在远程文件到本地

    :Args:
    - file_url - the remote resource URL which you want to save
    - file_path - the local file path which you want to save remote resource
    - recheck_file_extension - is auto rename file according to "Content-Type" in response headers
    - head_check -"HEAD" method request to check response status and file size before download file

    :Returns:
        - status - 0 download failure, 1 download successful
        - code - failure reason
        - file_path - finally local file path(when recheck_file_extension is True, will rename it)
    """
    if not isinstance(replace_if_exist, bool):
        replace_if_exist = net.DOWNLOAD_REPLACE_IF_EXIST
    if not replace_if_exist and os.path.exists(
            file_path) and os.path.getsize(file_path) > 0:
        output.print_msg(f"文件{file_path}（{file_url}）已存在，跳过")
        return {"status": 1, "code": 0, "file_path": file_path}
    # 判断保存目录是否存在
    if not path.create_dir(os.path.dirname(file_path)):
        return {"status": 1, "code": -11, "file_path": file_path}
    is_create_file = False
    is_multi_thread = False
    return_code = {"status": 0, "code": -3}
    for retry_count in range(0, net.NET_CONFIG["DOWNLOAD_RETRY_COUNT"]):
        if head_check and retry_count == 0:
            request_method = "HEAD"
        else:
            request_method = "GET"
        # 获取头信息
        response = net.request(
            file_url,
            request_method,
            is_check_qps=False,
            connection_timeout=net.NET_CONFIG["HTTP_CONNECTION_TIMEOUT"],
            read_timeout=net.NET_CONFIG["HTTP_READ_TIMEOUT"],
            **kwargs)
        # 其他返回状态，退出
        if response.status != net.HTTP_RETURN_CODE_SUCCEED:
            # URL格式不正确
            if response.status == net.HTTP_RETURN_CODE_URL_INVALID:
                return_code = {"status": 0, "code": -1}
            # 超过重试次数
            elif response.status == net.HTTP_RETURN_CODE_RETRY:
                return_code = {"status": 0, "code": -2}
            # 其他http code
            else:
                return_code = {"status": 0, "code": response.status}
            break

        # 判断文件是不是过大
        content_length = response.getheader("Content-Length")
        if content_length is not None:
            content_length = int(content_length)
            # 超过限制
            if content_length > NET_CONFIG["DOWNLOAD_LIMIT_SIZE"]:
                return {"status": 0, "code": -4}
            # 文件比较大，使用多线程下载（必须是head_check=True的情况下，否则整个文件内容都已经返回了）
            elif head_check and content_length > NET_CONFIG[
                    "DOWNLOAD_MULTI_THREAD_MIN_SIZE"]:
                is_multi_thread = True

        # response中的Content-Type作为文件后缀名
        if recheck_file_extension:
            content_type = response.getheader("Content-Type")
            if content_type is not None and content_type != "octet-stream":
                if net.MIME_DICTIONARY is None:
                    net.MIME_DICTIONARY = tool.json_decode(
                        file.read_file(
                            os.path.join(os.path.dirname(__file__),
                                         "mime.json")), {})
                if content_type in net.MIME_DICTIONARY:
                    new_file_extension = net.MIME_DICTIONARY[content_type]
                else:
                    new_file_extension = content_type.split("/")[-1]
                file_path = os.path.splitext(
                    file_path)[0] + "." + new_file_extension

        if not is_multi_thread:  # 单线程下载
            # 如果是先调用HEAD方法的，需要重新获取完整数据
            if head_check:
                response = net.request(
                    file_url,
                    method="GET",
                    connection_timeout=net.
                    NET_CONFIG["DOWNLOAD_CONNECTION_TIMEOUT"],
                    read_timeout=net.NET_CONFIG["DOWNLOAD_READ_TIMEOUT"],
                    **kwargs)
                if response.status != net.HTTP_RETURN_CODE_SUCCEED:
                    continue
            # 下载
            with open(file_path, "wb") as file_handle:
                is_create_file = True
                try:
                    file_handle.write(response.data)
                except OSError as ose:
                    if str(ose).find("No space left on device") != -1:
                        net.EXIT_FLAG = True
                    raise
        else:  # 多线程下载
            # 创建文件
            with open(file_path, "w"):
                is_create_file = True
            thread_list = []
            error_flag = []
            with open(file_path, "rb+") as file_handle:
                file_no = file_handle.fileno()
                end_pos = -1
                while end_pos < content_length - 1:
                    start_pos = end_pos + 1
                    end_pos = min(
                        content_length - 1, start_pos +
                        NET_CONFIG["DOWNLOAD_MULTI_THREAD_BLOCK_SIZE"] - 1)
                    # 创建一个副本
                    fd_handle = os.fdopen(os.dup(file_no), "rb+", -1)
                    thread = MultiThreadDownload(file_url, start_pos, end_pos,
                                                 fd_handle, error_flag)
                    thread.start()
                    thread_list.append(thread)
            # 等待所有线程下载完毕
            for thread in thread_list:
                thread.join()
            # 有任意一个线程下载失败了，或者文件存在连续1K以上的空字节
            if len(error_flag) > 0:
                continue
            if not _check_multi_thread_download_file(file_path):
                output.print_msg(f"网络文件{file_url}多线程下载后发现无效字节")
                continue
        if content_length is None:
            return {"status": 1, "code": 0, "file_path": file_path}
        # 判断文件下载后的大小和response中的Content-Length是否一致
        file_size = os.path.getsize(file_path)
        if content_length == file_size:
            return {"status": 1, "code": 0, "file_path": file_path}
        else:
            output.print_msg(
                f"本地文件{file_path}：{content_length}和网络文件{file_url}：{file_size}不一致"
            )
            time.sleep(net.NET_CONFIG["HTTP_REQUEST_RETRY_WAIT_TIME"])
    if is_create_file:
        path.delete_dir_or_file(file_path)
    return return_code