def down_pic_include_child(down_path): file_name_list = FileTool.get_file_list('txt') # down_path = down_param['down_file_path'] for index, file_name in enumerate(file_name_list, 1): # print('down the %i file : %s' % (index, file_name)) print('读取第 %i 个文件 : %s' % (index, file_name)) # 打开文件 with open(file_name) as file_obj: for num, value in enumerate(file_obj, 1): line = value.strip('\n') if line == '': print('当前行为空:%i line' % num) continue # print('the %i line: -%s- ' % (num, line), end=';') print('第 %i 行: -%s- ' % (num, line), end=';') # 获取子页面连接 # child_img_url = get_img_child_url(line, pre_url) url_list = get_img_url_list(line) img_urls = url_list[0] # img_urls.extend(child_img_url) total = len(img_urls) # print('duplicate removal image num: %i ' % total) print('去重后图片数量: %i ' % total) new_title = url_list[1] # 保存所有的下载记录 os.chdir(cur_dir) write_to_done_log(line, new_title) if len(img_urls) < 2: os.chdir(cur_dir) save_not_down_url(line, new_title, num) else: path = down_path + cur_month + str(new_title.strip()) + '/' common.create_file(path) os.chdir(path) for i in range(0, len(img_urls)): file_url = img_urls[i].get('file') # print(file_url) # fileUrl = file_url.replace('http://pic.w26.rocks/', pre_url) if not file_url.startswith('http'): # if not ('http://' in file_url or 'https://' in file_url): print('in:' + file_url) file_url = pre_url + file_url image_name = file_url.split("/")[-1] # print(file_url) if not os.path.exists(image_name): # print('the %i line:the %i / %i ge: %s' % (num, i + 1, total, file_url), end=';') print('第 %i 行:第 %i / %i 个: %s' % (num, i + 1, total, file_url), end=';') common.down_img(file_url) # else: # print('the %i line:the %i / %i is exist: %s' % (num, i + 1, total, file_url)) # print('第 %i 行:第 %i / % i个已存在: %s' % (num, i + 1, total, file_url)) # print("-----down over----------------") print('第 %i 行: %s 下载完毕,开始下载下一行文件 ' % (num, line)) print('第 %i 个文件: %s 下载完毕,开始删除...' % (index, file_name)) os.remove(file_name) print('第 %i 个文件: %s 删除成功,开始读取下一个文件' % (index, file_name)) print("----------------所有文件下载完毕-------------------")
def down_all_pic(down_param): path_ = down_param + cur_month if not (os.path.exists(path_)): os.makedirs(path_) file_name_list = FileTool.get_file_list('txt') for index, file_name in enumerate(file_name_list, 1): # print('down the %i file: %s' % (index, file_name)) print('读取第 %i 个文件: %s' % (index, file_name)) # 打开文件 with open(file_name) as file_obj: for num, value in enumerate(file_obj, 1): line = value.strip('\n') if line == '': print('当前行为空:%i line' % num) continue # print('the %i line: -%s- ' % (num, line), end=' ;') print('第 %i 行: -%s- ' % (num, line), end=' ;') # 获取所有图片连接 url_list = get_img_url_list(line) img_urls = url_list[0] # print(' image num: %i ' % l) print(' 图片数量: %i ' % len(img_urls)) new_title = url_list[1] if len(img_urls) < 2: os.chdir(cur_dir) save_not_down_url(line, new_title, num) else: path = path_ + str(new_title.strip()) + os.sep common.create_file(path) os.chdir(path) for i in range(0, len(img_urls)): file_url = img_urls[i].get('file') # if not ('http://' in file_url or 'https://' in file_url): if not file_url.startswith('http'): print('in:' + file_url) file_url = pre_url + file_url # fileUrl = file_url.replace('http://pic.w26.rocks/', pre_url) image_name = file_url.split("/")[-1] if not os.path.exists(image_name): # print('the %i line:the %i / %i ge : %s' % (num, i + 1, l, file_url), end=' ;') print('第 %i 行:第 %i / %i 个 : %s' % (num, i + 1, len(img_urls), file_url), end=' ;') common.down_img(file_url) # print("-----down over----------------") print('第 %i 行: %s 下载完毕 ' % (num, line)) # 保存所有的下载链接 os.chdir(cur_dir) write_to_done_log(line, new_title) print('第 %i 个文件: %s 下载完毕,开始删除...' % (index, file_name)) os.remove(file_name) print('第 %i 个文件: %s 删除成功,开始读取下一个文件' % (index, file_name), end=";") # print("down all over----------------start delete old undown text-------------------") print("---------------- 所有文件下载完毕 -------------------")
def get_down_file(self, arg='filter'): file_dir = self.get_file_dir(arg) return FileTool.get_file_list('text', file_dir)