def collect_sub_files(source_root_directory, move_target_directory): """ 遍历所有子文件,然后移动到另一个地方,避免有些下载的文件嵌套太深 可以批量把某个文件夹下的所有文件移动到指定的目录下 :param source_root_directory: 检查的路径 :param move_target_directory: 统一移动到的路径 :return: """ if not os.path.isdir(move_target_directory): # 文件不存在则创建 os.makedirs(move_target_directory) sub_file_paths = u_file.get_all_sub_files(source_root_directory) for sub_file_path in sub_file_paths: if os.path.isdir(sub_file_path): log.info('The file is directory: {}'.format(sub_file_path)) continue sub_file_name = os.path.split(sub_file_path)[1] sub_file_name_suffix = os.path.splitext(sub_file_name)[1] if sub_file_name_suffix != '.jpg' and sub_file_name_suffix != '.hdr': log.info('The file is not hdr file: {}'.format(sub_file_name)) continue move_target_file_path = os.path.join(move_target_directory, sub_file_name) if os.path.isfile(move_target_file_path): log.warn('The move target file is exist: {}'.format( move_target_file_path)) continue log.info('move file: {} --> file: {}'.format(sub_file_path, move_target_file_path)) os.replace(sub_file_path, move_target_file_path)
def get_all_image_paths(image_directory: str, use_cache: bool = True, contain_dir=False) -> list: """ 递归获取某个文件夹下的所有图片和文件夹 :param image_directory: 图片路径 :param use_cache: 是否使用缓存 :param contain_dir: 返回值是否包含目录 :return: 图片绝对路径列表 """ log.info('begin get all image files from path: {}'.format(image_directory)) if not os.path.isdir(image_directory): log.error('The image directory is not exist: {}'.format(image_directory)) return [] # 构建cache文件夹并检查是否存在cache cache_file_path = get_cache_path(image_directory, 'image_paths', 'txt') cache_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), cache_file_path) if use_cache and os.path.isfile(cache_file_path): # 存在缓存文件直接使用缓存 log.info('read all image file from cache: {}'.format(cache_file_path)) return u_file.read_file_as_list(cache_file_path) # 如果cache目录不存在,则创建 if not os.path.isdir(os.path.split(cache_file_path)[0]): log.info('create the cache directory: {}'.format(cache_file_path)) os.makedirs(os.path.split(cache_file_path)[0]) all_files = u_file.get_all_sub_files(image_directory, contain_dir=contain_dir) # 将结果存入cache cache_file_path_handler = open(cache_file_path, 'w+', encoding='utf-8') for file in all_files: cache_file_path_handler.writelines(file + '\n') cache_file_path_handler.close() log.info('get_all_image_files finish. file size: {}'.format(len(all_files))) return all_files
def delete_file(): delete_picture_paths = u_file.get_all_sub_files(r'result-delete') for delete_picture_path in delete_picture_paths: base_filename = os.path.split(delete_picture_path)[1] for index in range(30): source_filename = base_filename.replace('-1', '-' + str(index)) source_path = os.path.join(r'result', source_filename) if not os.path.isfile(source_path): break log.info('move file: {}'.format(source_path))
def arrange(): image_paths = os.path.abspath(r'./result') sub_files = u_file.get_all_sub_files(image_paths) log.info('The sub files size is :{}'.format(len(sub_files))) for sub_file in sub_files: # 按照名称和映射关系分组 for (path_key, path_value) in PATH_MAP.items(): if sub_file.find(path_key) >= 0: move_target_file = sub_file.replace('result', path_value) log.info('move the file from: {} -> {}'.format( sub_file, move_target_file)) if not os.path.isdir(os.path.dirname(move_target_file)): os.makedirs(os.path.dirname(move_target_file)) os.replace(sub_file, move_target_file) break
def copy_first_picture(): picture_paths = u_file.get_all_sub_files(r'result') first_picture_paths = [] for picture_path in picture_paths: if picture_path.find('-1.jpg') >= 0: first_picture_paths.append(picture_path) log.info('first picture size: {}'.format(len(first_picture_paths))) for first_picture_path in first_picture_paths: copy_file_path = os.path.join(r'result-copy', os.path.split(first_picture_path)[1]) log.info('copy file: {}'.format(copy_file_path)) if os.path.isfile(copy_file_path): log.info('The file is exist: {}'.format(copy_file_path)) continue shutil.copy(first_picture_path, copy_file_path)
def replace_file_name(source_root_directory, replace_ad_str): """ 一般用来去掉下载文件中的广告 :param replace_ad_str: 需要替换掉的广告文字 :param source_root_directory: 处理的文件夹 :return: """ sub_file_paths = u_file.get_all_sub_files(source_root_directory) for sub_file_path in sub_file_paths: move_target_file_path = sub_file_path.replace(replace_ad_str, '') if os.path.isfile(move_target_file_path): log.warn( 'The target file is exist: {}'.format(move_target_file_path)) continue log.info('rename file: {} --> file: {}'.format(sub_file_path, move_target_file_path)) os.replace(sub_file_path, move_target_file_path)
def generate_gitbook_summary(source_dir): """ 生成gitbook的summary目录文件,通过遍历文件目录树实现 :param source_dir: source_dir :return: """ sub_file_paths = u_file.get_all_sub_files(source_dir, contain_dir=True) summary_content = '' sub_file_paths.sort() exclude_paths = [ '.git', '.idea', 'assets', 'temp', 'node_modules', '_book' ] for sub_file_path in sub_file_paths: relative_path = sub_file_path.replace(source_dir + '\\', '') # 过滤掉非markdown文件 ignore = False for exclude_path in exclude_paths: if sub_file_path.find(exclude_path) >= 0: ignore = True if ignore: continue path_depth = relative_path.count('\\') menu = ' ' * path_depth if os.path.isfile(sub_file_path): if sub_file_path.find('.md') >= 0: menu += '- [{}]({})'.format( os.path.split(relative_path)[1].replace('.md', ''), relative_path) else: continue else: menu += '- {}'.format(relative_path.split('\\')[-1]) summary_content += menu + "\n" u_file.write_content(r'cache\result.md', summary_content) print(summary_content)
def modify_picture_suffix(source_directory): """ 批量修改图片后缀名,垃圾微信报错GIF图片有问题 :param source_directory: 需要处理的文件夹路径 :return: """ sub_file_paths = u_file.get_all_sub_files(source_directory) min_gif_picture_size = 500 * 1024 for sub_file_path in sub_file_paths: if os.path.isdir(sub_file_path): log.info('The file is directory: {}'.format(sub_file_path)) continue if os.path.getsize(sub_file_path) < min_gif_picture_size: log.info('The file size is small. file: {}, size: {}'.format( sub_file_path, os.path.getsize(sub_file_path))) continue sub_file_name = os.path.split(sub_file_path)[1] sub_file_name_suffix = os.path.splitext(sub_file_name)[1] move_target_file_path = sub_file_path.replace(sub_file_name_suffix, ".gif") log.info('move file: {} --> file: {}'.format(sub_file_path, move_target_file_path)) os.replace(sub_file_path, move_target_file_path)
def test_get_all_sub_files(): root_path = r'D:\BaiduNetdiskDownload\最新免费国外hdr环境高清贴图批量下载_By佐邦视觉' file_paths = u_file.get_all_sub_files(root_path) u_log.info('file size: {}'.format(len(file_paths)))