def read_file(file_path, read_type=READ_FILE_TYPE_FULL): """Read local file :param file_path: the path of file :param read_type: READ_FILE_TYPE_FULL read full file READ_FILE_TYPE_LINE read each line of file :return: READ_FILE_TYPE_FULL type of string READ_FILE_TYPE_LINE type of list """ file_path = path.change_path_encoding(file_path) if not os.path.exists(file_path): if read_type == 1: return "" else: return [] with open(file_path, "r") as file_handle: if read_type == 1: result = file_handle.read() if result[-1] == "\n": result = result[:-1] else: result = [] for line in file_handle.readlines(): if line[-1] == "\n": line = line[:-1] result.append(line) return result
def read_save_data(save_data_path, key_index, default_value_list): result_list = {} if not os.path.exists(path.change_path_encoding(save_data_path)): return result_list for single_save_data in tool.read_file(save_data_path, tool.READ_FILE_TYPE_LINE): single_save_data = single_save_data.replace("\xef\xbb\xbf", "").replace("\n", "").replace("\r", "") if len(single_save_data) == 0: continue single_save_list = single_save_data.split("\t") if single_save_list[key_index] in result_list: output.print_msg("存档中存在重复行 %s" % single_save_list[key_index]) tool.process_exit() # 去除前后空格 single_save_list = map(lambda value: value.strip(), single_save_list) # 根据default_value_list给没给字段默认值 index = 0 for default_value in default_value_list: # _开头表示和该数组下标的值一直,如["", "_0"] 表示第1位为空时数值和第0位一致 if default_value != "" and default_value[0] == "_": default_value = single_save_list[int(default_value.replace("_", ""))] if len(single_save_list) <= index: single_save_list.append(default_value) if single_save_list[index] == "": single_save_list[index] = default_value index += 1 result_list[single_save_list[key_index]] = single_save_list return result_list
def read_config(config_path): """Read config file""" config = {} with codecs.open(path.change_path_encoding(config_path), encoding="UTF-8-SIG") as file_handle: config_file = ConfigParser.SafeConfigParser() config_file.readfp(file_handle) for key, value in config_file.items("setting"): config[key.encode("UTF-8")] = value.encode("UTF-8") return config
def get_file_md5(file_path): file_path = path.change_path_encoding(file_path) if not os.path.exists(file_path): return None md5_obj = hashlib.md5() with open(file_path, "rb") as file_handle: buffer_size = 2**20 # 1M while True: file_buffer = file_handle.read(buffer_size) if not file_buffer: break md5_obj.update(file_buffer) return md5_obj.hexdigest()
def write_file(msg, file_path, append_type=WRITE_FILE_TYPE_APPEND): file_path = path.change_path_encoding(file_path) if not path.create_dir(os.path.dirname(file_path)): return False if append_type == WRITE_FILE_TYPE_APPEND: open_type = "a" elif append_type == WRITE_FILE_TYPE_REPLACE: open_type = "w" else: return False with open(file_path, open_type) as file_handle: if isinstance(msg, unicode): msg = msg.encode("UTF-8") file_handle.write(msg + "\n")
def save_net_file_list(file_url_list, file_path, header_list=None, cookies_list=None): """Visit web and save to local(multiple remote resource, single local file) :param file_url_list: the list of remote resource URL which you want to save :param file_path: the local file path which you want to save remote resource :param header_list: customize header dictionary :param cookies_list: customize cookies dictionary, will replaced header_list["Cookie"] :return: status 0 download failure, 1 download successful code failure reason """ file_path = path.change_path_encoding(file_path) # 判断保存目录是否存在 if not path.create_dir(os.path.dirname(file_path)): return False for retry_count in range(0, HTTP_DOWNLOAD_RETRY_COUNT): # 下载 with open(file_path, "wb") as file_handle: for file_url in file_url_list: response = http_request( file_url, header_list=header_list, cookies_list=cookies_list, connection_timeout=HTTP_DOWNLOAD_CONNECTION_TIMEOUT, read_timeout=HTTP_DOWNLOAD_READ_TIMEOUT) if response.status == HTTP_RETURN_CODE_SUCCEED: file_handle.write(response.data) # 超过重试次数,直接退出 elif response.status == HTTP_RETURN_CODE_RETRY: path.delete_dir_or_file(file_path) return {"status": 0, "code": -1} # 其他http code,退出 else: path.delete_dir_or_file(file_path) return {"status": 0, "code": response.status} return {"status": 1, "code": 0} # path.delete_dir_or_file(file_path) return {"status": 0, "code": -2}
"</div>").strip() item_introduction = item_introduction.replace("'", "’") output.print_msg("%s %s %s %s" % (item_position, item_name, special_attribute, item_introduction)) item_attribute_list[item_path].append( [item_name, special_attribute, item_introduction]) else: output.print_msg("error get" + item_url) else: output.print_msg("error get" + item_index_url) pagination = tool.find_sub_string(item_index_response.data, '<ul class="ui-pagination">', "</ul>") if pagination: pagination = re.findall('<a href="#page=([\d]*)">', pagination) max_page = 1 for page in pagination: max_page = max(max_page, int(page)) if page_count < max_page: page_count += 1 continue break path.create_dir("data") for item_path in item_attribute_list: with open(path.change_path_encoding("data\%s.txt" % item_list[item_path]), "w") as file_handle: for item in item_attribute_list[item_path]: file_handle.write("\t".join(item) + "\n")
def save_net_file(file_url, file_path, need_content_type=False, header_list=None, cookies_list=None, head_check=True): """Visit web and save to local :param file_url: the remote resource URL which you want to save :param file_path: the local file path which you want to save remote resource :param need_content_type: is auto rename file according to "Content-Type" in response headers :param header_list: customize header dictionary :param cookies_list: customize cookies dictionary, will replaced header_list["Cookie"] :param head_check: "HEAD" method request to check response status and file size before download file :return: status 0 download failure, 1 download successful code failure reason file_path finally local file path(when need_content_type is True, will rename it) """ file_path = path.change_path_encoding(file_path) # 判断保存目录是否存在 if not path.create_dir(os.path.dirname(file_path)): return False create_file = False for retry_count in range(0, HTTP_DOWNLOAD_RETRY_COUNT): if head_check: request_method = "HEAD" else: request_method = "GET" # 获取头信息 response = http_request(file_url, request_method, header_list=header_list, cookies_list=cookies_list, connection_timeout=HTTP_CONNECTION_TIMEOUT, read_timeout=HTTP_READ_TIMEOUT) if response.status == HTTP_RETURN_CODE_SUCCEED: # todo 分段下载 # 判断文件是不是过大 content_length = response.getheader("Content-Length") if content_length is not None and int(content_length) > HTTP_DOWNLOAD_MAX_SIZE: return {"status": 0, "code": -4} # response中的Content-Type作为文件后缀名 if need_content_type: content_type = response.getheader("Content-Type") if content_type is not None and content_type != "octet-stream": if content_type == "video/quicktime": new_file_type = "mov" else: new_file_type = content_type.split("/")[-1] file_path = os.path.splitext(file_path)[0] + "." + new_file_type # 如果是先调用HEAD方法的,需要重新获取完整数据 if head_check: response = http_request(file_url, method="GET", header_list=header_list, cookies_list=cookies_list, connection_timeout=HTTP_DOWNLOAD_CONNECTION_TIMEOUT, read_timeout=HTTP_DOWNLOAD_READ_TIMEOUT) if response.status != HTTP_RETURN_CODE_SUCCEED: continue # 下载 with open(file_path, "wb") as file_handle: file_handle.write(response.data) create_file = True # 判断文件下载后的大小和response中的Content-Length是否一致 if content_length is None: return {"status": 1, "code": 0, "file_path": file_path} file_size = os.path.getsize(file_path) if int(content_length) == file_size: return {"status": 1, "code": 0, "file_path": file_path} else: output.print_msg("本地文件%s:%s和网络文件%s:%s不一致" % (file_path.encode("UTF-8"), content_length, str(file_url), file_size)) elif response.status == HTTP_RETURN_CODE_URL_INVALID: if create_file: path.delete_dir_or_file(file_path) return {"status": 0, "code": -1} # 超过重试次数,直接退出 elif response.status == HTTP_RETURN_CODE_RETRY: if create_file: path.delete_dir_or_file(file_path) return {"status": 0, "code": -2} # 其他http code,退出 else: if create_file: path.delete_dir_or_file(file_path) return {"status": 0, "code": response.status} if create_file: path.delete_dir_or_file(file_path) return {"status": 0, "code": -3}
def read_config(config_path): """Read config file""" config = ConfigParser.SafeConfigParser() with codecs.open(path.change_path_encoding(config_path), encoding="UTF-8-SIG") as file_handle: config.readfp(file_handle) return config