Exemple #1
0
def down_pic_include_child(down_path):
    file_name_list = FileTool.get_file_list('txt')
    # down_path = down_param['down_file_path']
    for index, file_name in enumerate(file_name_list, 1):
        # print('down the %i file : %s' % (index, file_name))
        print('读取第 %i 个文件 : %s' % (index, file_name))
        # 打开文件
        with open(file_name) as file_obj:
            for num, value in enumerate(file_obj, 1):
                line = value.strip('\n')
                if line == '':
                    print('当前行为空:%i line' % num)
                    continue
                # print('the %i line: -%s-  ' % (num, line), end=';')
                print('第 %i 行: -%s-  ' % (num, line), end=';')
                # 获取子页面连接
                # child_img_url = get_img_child_url(line, pre_url)
                url_list = get_img_url_list(line)
                img_urls = url_list[0]
                # img_urls.extend(child_img_url)
                total = len(img_urls)
                # print('duplicate removal image num: %i ' % total)
                print('去重后图片数量: %i ' % total)
                new_title = url_list[1]
                # 保存所有的下载记录
                os.chdir(cur_dir)
                write_to_done_log(line, new_title)
                if len(img_urls) < 2:
                    os.chdir(cur_dir)
                    save_not_down_url(line, new_title, num)
                else:
                    path = down_path + cur_month + str(new_title.strip()) + '/'
                    common.create_file(path)
                    os.chdir(path)
                    for i in range(0, len(img_urls)):
                        file_url = img_urls[i].get('file')
                        # print(file_url)
                        # fileUrl = file_url.replace('http://pic.w26.rocks/', pre_url)
                        if not file_url.startswith('http'):
                            # if not ('http://' in file_url or 'https://' in file_url):
                            print('in:' + file_url)
                            file_url = pre_url + file_url
                        image_name = file_url.split("/")[-1]
                        # print(file_url)
                        if not os.path.exists(image_name):
                            # print('the %i line:the %i  / %i ge: %s' % (num, i + 1, total, file_url), end=';')
                            print('第 %i 行:第 %i  / %i 个: %s' %
                                  (num, i + 1, total, file_url),
                                  end=';')
                            common.down_img(file_url)
                        # else:
                        # print('the %i line:the %i  / %i is exist: %s' % (num, i + 1, total, file_url))
                        # print('第 %i 行:第 %i  / % i个已存在: %s' % (num, i + 1, total, file_url))
                # print("-----down over----------------")
                print('第 %i 行: %s 下载完毕,开始下载下一行文件  ' % (num, line))
        print('第 %i 个文件: %s 下载完毕,开始删除...' % (index, file_name))
        os.remove(file_name)
        print('第 %i 个文件: %s 删除成功,开始读取下一个文件' % (index, file_name))
    print("----------------所有文件下载完毕-------------------")
Exemple #2
0
def down_all_pic(down_param):
    path_ = down_param + cur_month
    if not (os.path.exists(path_)):
        os.makedirs(path_)
    file_name_list = FileTool.get_file_list('txt')
    for index, file_name in enumerate(file_name_list, 1):
        # print('down the %i file: %s' % (index, file_name))
        print('读取第 %i 个文件: %s' % (index, file_name))
        # 打开文件
        with open(file_name) as file_obj:
            for num, value in enumerate(file_obj, 1):
                line = value.strip('\n')
                if line == '':
                    print('当前行为空:%i line' % num)
                    continue
                # print('the %i line: -%s- ' % (num, line), end=' ;')
                print('第 %i 行: -%s- ' % (num, line), end=' ;')
                # 获取所有图片连接
                url_list = get_img_url_list(line)
                img_urls = url_list[0]
                # print(' image num: %i ' % l)
                print(' 图片数量: %i ' % len(img_urls))
                new_title = url_list[1]

                if len(img_urls) < 2:
                    os.chdir(cur_dir)
                    save_not_down_url(line, new_title, num)
                else:
                    path = path_ + str(new_title.strip()) + os.sep
                    common.create_file(path)
                    os.chdir(path)
                    for i in range(0, len(img_urls)):
                        file_url = img_urls[i].get('file')
                        # if not ('http://' in file_url or 'https://' in file_url):
                        if not file_url.startswith('http'):
                            print('in:' + file_url)
                            file_url = pre_url + file_url
                        # fileUrl = file_url.replace('http://pic.w26.rocks/', pre_url)
                        image_name = file_url.split("/")[-1]
                        if not os.path.exists(image_name):
                            # print('the %i line:the %i  / %i ge : %s' % (num, i + 1, l, file_url), end=' ;')
                            print('第 %i 行:第 %i / %i 个 : %s' %
                                  (num, i + 1, len(img_urls), file_url),
                                  end=' ;')
                            common.down_img(file_url)
                # print("-----down over----------------")
                print('第 %i 行: %s 下载完毕 ' % (num, line))
                # 保存所有的下载链接
                os.chdir(cur_dir)
                write_to_done_log(line, new_title)
        print('第 %i 个文件: %s 下载完毕,开始删除...' % (index, file_name))
        os.remove(file_name)
        print('第 %i 个文件: %s 删除成功,开始读取下一个文件' % (index, file_name), end=";")
    # print("down all over----------------start delete old undown text-------------------")
    print("---------------- 所有文件下载完毕 -------------------")
Exemple #3
0
 def get_down_file(self, arg='filter'):
     file_dir = self.get_file_dir(arg)
     return FileTool.get_file_list('text', file_dir)