def load_review_list(): review_data = { "can_review_lists": [], "dlc_in_game": {}, "review_list": [], } if not os.path.exists(REVIEW_DATA_PATH): return review_data review_data = tool.json_decode(file.read_file(REVIEW_DATA_PATH), review_data) return review_data
def load_discount_list(): discount_game_list = [] if not os.path.exists(DISCOUNT_DATA_PATH): return discount_game_list week_day = int(time.strftime("%w")) # 已超过本周API更新时间 if (week_day > API_UPDATE_TIME_WEEKDAY) or (week_day == API_UPDATE_TIME_WEEKDAY and int(time.strftime("%H")) >= API_UPDATE_TIME_WEEKDAY): last_api_update_day = (datetime.datetime.today() + datetime.timedelta(days=API_UPDATE_TIME_WEEKDAY - week_day)).timetuple() # 获取上周API更新时间 else: last_api_update_day = (datetime.datetime.today() + datetime.timedelta(days=API_UPDATE_TIME_WEEKDAY - week_day - 7)).timetuple() last_api_update_day = time.strptime(time.strftime("%Y-%m-%d " + "%02d" % API_UPDATE_TIME_HOUR + ":00:00", last_api_update_day), "%Y-%m-%d %H:%M:%S") last_api_update_time = time.mktime(last_api_update_day) if os.path.getmtime(DISCOUNT_DATA_PATH) < last_api_update_time < time.time(): output.print_msg("discount game list expired") return discount_game_list discount_game_list = tool.json_decode(tool.read_file(DISCOUNT_DATA_PATH), discount_game_list) return discount_game_list
def rename_file_extension(self, response): """ 检测文件后缀名是否正确 """ if self.recheck_file_extension: # response中的Content-Type作为文件后缀名 content_type = response.getheader("Content-Type") if content_type is not None: # 重置状态,避免反复修改 self.recheck_file_extension = False if content_type != "octet-stream": global MIME_DICTIONARY if MIME_DICTIONARY is None: MIME_DICTIONARY = tool.json_decode( file.read_file( os.path.join(os.path.dirname(__file__), "mime.json")), {}) if content_type in MIME_DICTIONARY: new_file_extension = MIME_DICTIONARY[content_type] else: new_file_extension = content_type.split("/")[-1] self.file_path = os.path.splitext( self.file_path)[0] + "." + new_file_extension
def save_net_file(file_url, file_path, need_content_type=False, head_check=False, **kwargs): """Visit web and save to local :param file_url: the remote resource URL which you want to save :param file_path: the local file path which you want to save remote resource :param need_content_type: is auto rename file according to "Content-Type" in response headers :param head_check: "HEAD" method request to check response status and file size before download file :return: status 0 download failure, 1 download successful code failure reason file_path finally local file path(when need_content_type is True, will rename it) """ # 判断保存目录是否存在 if not path.create_dir(os.path.dirname(file_path)): return False is_create_file = False is_multi_thread = False return_code = {"status": 0, "code": -3} for retry_count in range(0, NET_CONFIG["DOWNLOAD_RETRY_COUNT"]): if head_check and retry_count == 0: request_method = "HEAD" else: request_method = "GET" # 获取头信息 response = http_request(file_url, request_method, connection_timeout=NET_CONFIG["HTTP_CONNECTION_TIMEOUT"], read_timeout=NET_CONFIG["HTTP_READ_TIMEOUT"], **kwargs) # 其他返回状态,退出 if response.status != HTTP_RETURN_CODE_SUCCEED: # URL格式不正确 if response.status == HTTP_RETURN_CODE_URL_INVALID: return_code = {"status": 0, "code": -1} # 超过重试次数 elif response.status == HTTP_RETURN_CODE_RETRY: return_code = {"status": 0, "code": -2} # 其他http code else: return_code = {"status": 0, "code": response.status} break # 判断文件是不是过大 content_length = response.getheader("Content-Length") if content_length is not None: content_length = int(content_length) # 超过限制 if content_length > NET_CONFIG["DOWNLOAD_LIMIT_SIZE"]: return {"status": 0, "code": -4} # 文件比较大,使用多线程下载(必须是head_check=True的情况下,否则整个文件内容都已经返回了) elif head_check and content_length > NET_CONFIG["DOWNLOAD_MULTI_THREAD_MIN_SIZE"]: is_multi_thread = True # response中的Content-Type作为文件后缀名 if need_content_type: content_type = response.getheader("Content-Type") if content_type is not None and content_type != "octet-stream": global MIME_DICTIONARY if MIME_DICTIONARY is None: MIME_DICTIONARY = tool.json_decode(file.read_file(os.path.join(os.path.dirname(__file__), "mime.json")), {}) if content_type in MIME_DICTIONARY: new_file_type = MIME_DICTIONARY[content_type] else: new_file_type = content_type.split("/")[-1] file_path = os.path.splitext(file_path)[0] + "." + new_file_type if not is_multi_thread: # 单线程下载 # 如果是先调用HEAD方法的,需要重新获取完整数据 if head_check: response = http_request(file_url, method="GET", connection_timeout=NET_CONFIG["DOWNLOAD_CONNECTION_TIMEOUT"], read_timeout=NET_CONFIG["DOWNLOAD_READ_TIMEOUT"], **kwargs) if response.status != HTTP_RETURN_CODE_SUCCEED: continue # 下载 with open(file_path, "wb") as file_handle: is_create_file = True try: file_handle.write(response.data) except OSError as ose: if str(ose).find("No space left on device"): global EXIT_FLAG EXIT_FLAG = True raise else: # 多线程下载 # 单线程下载文件大小(100MB) multi_thread_block_size = int(math.ceil(content_length / 10 / SIZE_MB)) * SIZE_MB multi_thread_block_size = min(NET_CONFIG["DOWNLOAD_MULTI_THREAD_MIN_BLOCK_SIZE"], max(NET_CONFIG["DOWNLOAD_MULTI_THREAD_MAX_BLOCK_SIZE"], multi_thread_block_size)) # 创建文件 with open(file_path, "w"): is_create_file = True thread_list = [] error_flag = [] with open(file_path, "rb+") as file_handle: file_no = file_handle.fileno() end_pos = -1 while end_pos < content_length - 1: start_pos = end_pos + 1 end_pos = min(content_length - 1, start_pos + multi_thread_block_size - 1) # 创建一个副本 fd_handle = os.fdopen(os.dup(file_no), "rb+", -1) thread = MultiThreadDownload(file_url, start_pos, end_pos, fd_handle, error_flag) thread.start() thread_list.append(thread) # 等待所有线程下载完毕 for thread in thread_list: thread.join() # 有任意一个线程下载失败了,或者文件存在连续1K以上的空字节 if len(error_flag) > 0: continue if not _check_multi_thread_download_file(file_path): output.print_msg("网络文件%s多线程下载后发现无效字节" % file_url) continue if content_length is None: return {"status": 1, "code": 0, "file_path": file_path} # 判断文件下载后的大小和response中的Content-Length是否一致 file_size = os.path.getsize(file_path) if content_length == file_size: return {"status": 1, "code": 0, "file_path": file_path} else: output.print_msg("本地文件%s:%s和网络文件%s:%s不一致" % (file_path, content_length, file_url, file_size)) time.sleep(10) if is_create_file: path.delete_dir_or_file(file_path) return return_code
SIZE_GB = 2 ** 30 # 1GB = 多少字节 # 读取网络相关配置 DEFAULT_NET_CONFIG = { "HTTP_CONNECTION_TIMEOUT": 10, # 网络访问连接超时的秒数 "HTTP_READ_TIMEOUT": 30, # 网络访问读取超时的秒数 "HTTP_REQUEST_RETRY_COUNT": 10, # 网络访问自动重试次数 "DOWNLOAD_CONNECTION_TIMEOUT": 10, # 下载文件连接超时的秒数 "DOWNLOAD_READ_TIMEOUT": 60, # 下载文件读取超时的秒数 "DOWNLOAD_RETRY_COUNT": 10, # 下载文件自动重试次数 "DOWNLOAD_LIMIT_SIZE": 1.5 * SIZE_GB, # 下载文件超过多少字节跳过不下载 "DOWNLOAD_MULTI_THREAD_MIN_SIZE": 50 * SIZE_MB, # 下载文件超过多少字节后开始使用多线程下载 "DOWNLOAD_MULTI_THREAD_MIN_BLOCK_SIZE": 10 * SIZE_MB, # 多线程下载中单个线程下载的字节数下限(线程总数下限=文件大小/单个线程下载的字节数下限) "DOWNLOAD_MULTI_THREAD_MAX_BLOCK_SIZE": 100 * SIZE_MB, # 多线程下载中单个线程下载的字节数上限(线程总数上限=文件大小/单个线程下载的字节数上限) } NET_CONFIG = tool.json_decode(file.read_file(os.path.join(os.path.dirname(__file__), "net_config.json")), DEFAULT_NET_CONFIG) # 连接池 HTTP_CONNECTION_POOL = None PROXY_HTTP_CONNECTION_POOL = None # 网络访问相关阻塞/继续事件 thread_event = threading.Event() thread_event.set() # 退出标志 EXIT_FLAG = False # response header中Content-Type对应的Mime字典 MIME_DICTIONARY = None # 网络访问返回值 HTTP_RETURN_CODE_RETRY = 0 HTTP_RETURN_CODE_URL_INVALID = -1 # 地址不符合规范(非http:// 或者 https:// 开头) HTTP_RETURN_CODE_JSON_DECODE_ERROR = -2 # 返回数据不是JSON格式,但返回状态是200
def main(): # 获取登录状态 steam_class = steam.Steam(need_login=True) # 所有打折游戏 discount_game_file_path = os.path.abspath(os.path.join(steam_class.cache_data_path, "discount.txt")) discount_game_list = tool.json_decode(file.read_file(discount_game_file_path), []) game_id_list = [] for game_info in discount_game_list: if game_info["type"] == "game": game_id_list.append(game_info["app_id"]) else: game_id_list += game_info["app_id"] # 已检测过的游戏列表 checked_apps_file_path = os.path.join(steam_class.cache_data_path, "discount_checked.txt") checked_apps_string = file.read_file(checked_apps_file_path) if checked_apps_string: checked_apps_list = checked_apps_string.split(",") else: checked_apps_list = [] # 已删除的游戏 deleted_app_list = steam_class.load_deleted_app_list() # 已资料受限制的游戏 restricted_app_list = steam_class.load_restricted_app_list() # 游戏的DLC列表 game_dlc_list = steam_class.load_game_dlc_list() while len(game_id_list) > 0: game_id = game_id_list.pop() game_id = str(game_id) if game_id[-1:] != "0": continue if game_id in deleted_app_list or game_id in restricted_app_list: continue if game_id in checked_apps_list: continue output.print_msg(f"游戏:{game_id},剩余数量:{len(game_id_list)}") # 获取游戏信息 try: game_data = steam.get_game_store_index(game_id) except crawler.CrawlerException as e: output.print_msg(e.http_error(f"游戏{game_id}")) continue if len(game_data["dlc_list"]) > 0: output.print_msg(f"游戏:{game_id}全部DLC:{game_data['dlc_list']}") is_change = False for dlc_id in game_data["dlc_list"]: if dlc_id not in game_dlc_list: is_change = True game_dlc_list[dlc_id] = game_id # 保存数据 if is_change: steam_class.save_game_dlc_list(game_dlc_list) # 已资料受限制 if game_data["restricted"]: output.print_msg(f"游戏:{game_id}已资料受限制") restricted_app_list.append(game_id) # 保存数据 steam_class.save_restricted_app_list(restricted_app_list) # 增加检测标记 checked_apps_list.append(game_id) file.write_file(",".join(checked_apps_list), checked_apps_file_path, file.WRITE_FILE_TYPE_REPLACE)
def download(file_url, file_path, recheck_file_extension=False, head_check=False, replace_if_exist: Optional[bool] = None, **kwargs): """ 现在远程文件到本地 :Args: - file_url - the remote resource URL which you want to save - file_path - the local file path which you want to save remote resource - recheck_file_extension - is auto rename file according to "Content-Type" in response headers - head_check -"HEAD" method request to check response status and file size before download file :Returns: - status - 0 download failure, 1 download successful - code - failure reason - file_path - finally local file path(when recheck_file_extension is True, will rename it) """ if not isinstance(replace_if_exist, bool): replace_if_exist = net.DOWNLOAD_REPLACE_IF_EXIST if not replace_if_exist and os.path.exists( file_path) and os.path.getsize(file_path) > 0: output.print_msg(f"文件{file_path}({file_url})已存在,跳过") return {"status": 1, "code": 0, "file_path": file_path} # 判断保存目录是否存在 if not path.create_dir(os.path.dirname(file_path)): return {"status": 1, "code": -11, "file_path": file_path} is_create_file = False is_multi_thread = False return_code = {"status": 0, "code": -3} for retry_count in range(0, net.NET_CONFIG["DOWNLOAD_RETRY_COUNT"]): if head_check and retry_count == 0: request_method = "HEAD" else: request_method = "GET" # 获取头信息 response = net.request( file_url, request_method, is_check_qps=False, connection_timeout=net.NET_CONFIG["HTTP_CONNECTION_TIMEOUT"], read_timeout=net.NET_CONFIG["HTTP_READ_TIMEOUT"], **kwargs) # 其他返回状态,退出 if response.status != net.HTTP_RETURN_CODE_SUCCEED: # URL格式不正确 if response.status == net.HTTP_RETURN_CODE_URL_INVALID: return_code = {"status": 0, "code": -1} # 超过重试次数 elif response.status == net.HTTP_RETURN_CODE_RETRY: return_code = {"status": 0, "code": -2} # 其他http code else: return_code = {"status": 0, "code": response.status} break # 判断文件是不是过大 content_length = response.getheader("Content-Length") if content_length is not None: content_length = int(content_length) # 超过限制 if content_length > NET_CONFIG["DOWNLOAD_LIMIT_SIZE"]: return {"status": 0, "code": -4} # 文件比较大,使用多线程下载(必须是head_check=True的情况下,否则整个文件内容都已经返回了) elif head_check and content_length > NET_CONFIG[ "DOWNLOAD_MULTI_THREAD_MIN_SIZE"]: is_multi_thread = True # response中的Content-Type作为文件后缀名 if recheck_file_extension: content_type = response.getheader("Content-Type") if content_type is not None and content_type != "octet-stream": if net.MIME_DICTIONARY is None: net.MIME_DICTIONARY = tool.json_decode( file.read_file( os.path.join(os.path.dirname(__file__), "mime.json")), {}) if content_type in net.MIME_DICTIONARY: new_file_extension = net.MIME_DICTIONARY[content_type] else: new_file_extension = content_type.split("/")[-1] file_path = os.path.splitext( file_path)[0] + "." + new_file_extension if not is_multi_thread: # 单线程下载 # 如果是先调用HEAD方法的,需要重新获取完整数据 if head_check: response = net.request( file_url, method="GET", connection_timeout=net. NET_CONFIG["DOWNLOAD_CONNECTION_TIMEOUT"], read_timeout=net.NET_CONFIG["DOWNLOAD_READ_TIMEOUT"], **kwargs) if response.status != net.HTTP_RETURN_CODE_SUCCEED: continue # 下载 with open(file_path, "wb") as file_handle: is_create_file = True try: file_handle.write(response.data) except OSError as ose: if str(ose).find("No space left on device") != -1: net.EXIT_FLAG = True raise else: # 多线程下载 # 创建文件 with open(file_path, "w"): is_create_file = True thread_list = [] error_flag = [] with open(file_path, "rb+") as file_handle: file_no = file_handle.fileno() end_pos = -1 while end_pos < content_length - 1: start_pos = end_pos + 1 end_pos = min( content_length - 1, start_pos + NET_CONFIG["DOWNLOAD_MULTI_THREAD_BLOCK_SIZE"] - 1) # 创建一个副本 fd_handle = os.fdopen(os.dup(file_no), "rb+", -1) thread = MultiThreadDownload(file_url, start_pos, end_pos, fd_handle, error_flag) thread.start() thread_list.append(thread) # 等待所有线程下载完毕 for thread in thread_list: thread.join() # 有任意一个线程下载失败了,或者文件存在连续1K以上的空字节 if len(error_flag) > 0: continue if not _check_multi_thread_download_file(file_path): output.print_msg(f"网络文件{file_url}多线程下载后发现无效字节") continue if content_length is None: return {"status": 1, "code": 0, "file_path": file_path} # 判断文件下载后的大小和response中的Content-Length是否一致 file_size = os.path.getsize(file_path) if content_length == file_size: return {"status": 1, "code": 0, "file_path": file_path} else: output.print_msg( f"本地文件{file_path}:{content_length}和网络文件{file_url}:{file_size}不一致" ) time.sleep(net.NET_CONFIG["HTTP_REQUEST_RETRY_WAIT_TIME"]) if is_create_file: path.delete_dir_or_file(file_path) return return_code