Python step Exemples, common.log.step Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : crawler.py Projet : yxw19870806/Py3Crawler

    def main(self):
        try:
            self.init()

            self._main()
        except (KeyboardInterrupt, SystemExit) as e:
            if self.download_thread and issubclass(self.download_thread,
                                                   DownloadThread):
                self.stop_process()
            else:
                if isinstance(
                        e,
                        SystemExit) and e.code == tool.PROCESS_EXIT_CODE_ERROR:
                    log.step("异常退出")
                else:
                    log.step("提前退出")
        except Exception as e:
            log.error("未知异常")
            log.error(str(e) + "\n" + traceback.format_exc())

        # 未完成的数据保存
        self.write_remaining_save_data()

        # 重新排序保存存档文件
        self.rewrite_save_file()

        # 其他结束操作
        self.done()

        # 结束日志
        self.end_message()

Exemple #2

0

Afficher le fichier

Fichier : crawler.py Projet : yxw19870806/Py3Crawler

 def step(self, message, include_display_name=True):
     """
     step log
     """
     if include_display_name and self.display_name is not None:
         message = self.display_name + " " + message
     log.step(message)

Exemple #3

0

Afficher le fichier

Fichier : checkInvalid.py Projet : yxw19870806/yxw19870806

def check_invalid():
    # 获取存放路径
    config_path = os.path.join(os.getcwd(), "..\\common\\config.ini")
    config = robot.read_config(config_path)
    save_data_path = robot.get_config(config, "SAVE_DATA_PATH", "info/save.data", 3)
    save_data_dir = os.path.dirname(save_data_path)
    fee_save_data_path = os.path.join(save_data_dir, "fee.data")
    # 读取村存档中的收费相册列表
    if not os.path.exists(fee_save_data_path):
        log.step("收费相册存档不存在")
        return
    fee_save_data_file = open(fee_save_data_path, "r")
    fee_save_data = fee_save_data_file.read()
    fee_save_data_file.close()
    fee_album_id_list = fee_save_data.strip().split(" ")
    new_fee_album_id_list = []
    # 循环访问，判断相册是否已经被删除
    for fee_album_id in fee_album_id_list:
        album_url = "http://meituzz.com/album/browse?albumID=%s" % fee_album_id
        album_page_return_code, album_page = tool.http_request(album_url)[:2]
        if album_page_return_code == 1:
            if album_page.find("<title>相册已被删除</title>") == -1:
                new_fee_album_id_list.append(fee_album_id)
            else:
                log.step("第%s页相册已被删除" % fee_album_id)
    # 重新保存
    fee_save_data_file = open(fee_save_data_path, "w")
    fee_save_data_file.write(" ".join(new_fee_album_id_list) + " ")
    fee_save_data_file.close()

Exemple #4

0

Afficher le fichier

Fichier : Abase.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT

        # 如果文件已经存在，则使用临时文件名保存
        if os.path.exists(self.file_path):
            is_exist = True
            file_path = self.file_temp_path
        else:
            is_exist = False
            file_path = self.file_path

        if tool.save_net_file(self.file_url, file_path):
            if check_invalid_image(file_path):
                os.remove(file_path)
                log.step("%s的封面图片无效，自动删除" % self.title)
            else:
                log.step("%s的封面图片下载成功" % self.title)
                if is_exist:
                    # 如果新下载图片比原来大，则替换原本的；否则删除新下载的图片
                    if os.path.getsize(self.file_temp_path) > os.path.getsize(self.file_path):
                        os.remove(self.file_path)
                        os.rename(self.file_temp_path, self.file_path)
                    else:
                        os.remove(self.file_temp_path)

                self.thread_lock.acquire()
                TOTAL_IMAGE_COUNT += 1
                self.thread_lock.release()
        else:
            log.error("%s的封面图片 %s 下载失败" % (self.title, self.file_url))

Exemple #5

0

Afficher le fichier

Fichier : crawler.py Projet : yxw19870806/Py3Crawler

 def end_message(self):
     message = f"全部下载完毕，耗时{self.get_run_time()}秒"
     download_result = []
     if self.is_download_photo:
         download_result.append(f"图片{self.total_photo_count}张")
     if self.is_download_video:
         download_result.append(f"视频{self.total_video_count}个")
     if self.is_download_audio:
         download_result.append(f"音频{self.total_audio_count}个")
     if download_result:
         message += "，共计下载" + "，".join(download_result)
     log.step(message)

Exemple #6

0

Afficher le fichier

Fichier : weibo.py Projet : yxw19870806/yxw19870806

    def main(self):
        global ACCOUNTS

        # 解析存档文件
        # account_id  image_count  last_image_time  video_count  last_video_url  (account_name)
        account_list = robot.read_save_data(self.save_data_path, 0, ["", "0", "0", "0", ""])
        ACCOUNTS = account_list.keys()

        # 先访问下页面，产生cookies
        auto_redirect_visit("http://www.weibo.com/")
        time.sleep(2)

        # 循环下载每个id
        main_thread_count = threading.activeCount()
        for account_id in sorted(account_list.keys()):
            # 检查正在运行的线程数
            while threading.activeCount() >= self.thread_count + main_thread_count:
                if robot.is_process_end() == 0:
                    time.sleep(10)
                else:
                    break

            # 提前结束
            if robot.is_process_end() > 0:
                break

            # 开始下载
            thread = Download(account_list[account_id], self.thread_lock)
            thread.start()

            time.sleep(1)

        # 检查除主线程外的其他所有线程是不是全部结束了
        while threading.activeCount() > main_thread_count:
            time.sleep(10)

        # 未完成的数据保存
        if len(ACCOUNTS) > 0:
            new_save_data_file = open(NEW_SAVE_DATA_PATH, "a")
            for account_id in ACCOUNTS:
                new_save_data_file.write("\t".join(account_list[account_id]) + "\n")
            new_save_data_file.close()

        # 删除临时文件夹
        self.finish_task()

        # 重新排序保存存档文件
        robot.rewrite_save_file(NEW_SAVE_DATA_PATH, self.save_data_path)

        log.step("全部下载完毕，耗时%s秒，共计图片%s张，视频%s个" % (self.get_run_time(), TOTAL_IMAGE_COUNT, TOTAL_VIDEO_COUNT))

Exemple #7

0

Afficher le fichier

Fichier : instagram.py Projet : yxw19870806/yxw19870806

    def main(self):
        global ACCOUNTS

        # 解析存档文件
        # account_name  image_count  video_count  last_created_time
        account_list = robot.read_save_data(self.save_data_path, 0, ["", "0", "0", "0"])
        ACCOUNTS = account_list.keys()

        if not set_csrf_token():
            log.error("token和session获取查找失败")
            tool.process_exit()

        # 循环下载每个id
        main_thread_count = threading.activeCount()
        for account_name in sorted(account_list.keys()):
            # 检查正在运行的线程数
            while threading.activeCount() >= self.thread_count + main_thread_count:
                if robot.is_process_end() == 0:
                    time.sleep(10)
                else:
                    break

            # 提前结束
            if robot.is_process_end() > 0:
                break

            # 开始下载
            thread = Download(account_list[account_name], self.thread_lock)
            thread.start()

            time.sleep(1)

        # 检查除主线程外的其他所有线程是不是全部结束了
        while threading.activeCount() > main_thread_count:
            time.sleep(10)

        # 未完成的数据保存
        if len(ACCOUNTS) > 0:
            new_save_data_file = open(NEW_SAVE_DATA_PATH, "a")
            for account_name in ACCOUNTS:
                new_save_data_file.write("\t".join(account_list[account_name]) + "\n")
            new_save_data_file.close()

        # 删除临时文件夹
        self.finish_task()

        # 重新排序保存存档文件
        robot.rewrite_save_file(NEW_SAVE_DATA_PATH, self.save_data_path)

        log.step("全部下载完毕，耗时%s秒，共计图片%s张，视频%s个" % (self.get_run_time(), TOTAL_IMAGE_COUNT, TOTAL_VIDEO_COUNT))

Exemple #8

0

Afficher le fichier

Fichier : template.py Projet : yxw19870806/yxw19870806

    def main(self):
        global ACCOUNTS

        # todo 存档文件格式
        # 解析存档文件
        # account_id
        account_list = robot.read_save_data(self.save_data_path, 0, ["", ])
        ACCOUNTS = account_list.keys()

        # 循环下载每个id
        main_thread_count = threading.activeCount()
        for account_id in sorted(account_list.keys()):
            # 检查正在运行的线程数
            while threading.activeCount() >= self.thread_count + main_thread_count:
                if robot.is_process_end() == 0:
                    time.sleep(10)
                else:
                    break

            # 提前结束
            if robot.is_process_end() > 0:
                break

            # 开始下载
            thread = Download(account_list[account_id], self.thread_lock)
            thread.start()

            time.sleep(1)

        # 检查除主线程外的其他所有线程是不是全部结束了
        while threading.activeCount() > main_thread_count:
            time.sleep(10)

        # 未完成的数据保存
        if len(ACCOUNTS) > 0:
            new_save_data_file = open(NEW_SAVE_DATA_PATH, "a")
            for account_id in ACCOUNTS:
                new_save_data_file.write("\t".join(account_list[account_id]) + "\n")
            new_save_data_file.close()

        # todo 是否需要下载图片或视频
        # 删除临时文件夹
        tool.remove_dir(IMAGE_TEMP_PATH)
        tool.remove_dir(VIDEO_TEMP_PATH)

        # 重新排序保存存档文件
        robot.rewrite_save_file(NEW_SAVE_DATA_PATH, self.save_data_path)

        # todo 是否需要下载图片或视频
        log.step("全部下载完毕，耗时%s秒，共计图片%s张，视频%s个" % (self.get_run_time(), TOTAL_IMAGE_COUNT, TOTAL_VIDEO_COUNT))

Exemple #9

0

Afficher le fichier

Fichier : bcy.py Projet : yxw19870806/yxw19870806

    def main(self):
        global ACCOUNTS

        # 检测登录状态
        # 未登录时提示可能无法获取粉丝指定的作品
        check_login()

        # 解析存档文件
        # account_id  last_rp_id
        account_list = robot.read_save_data(self.save_data_path, 0, ["", "0"])
        ACCOUNTS = account_list.keys()

        # 循环下载每个id
        main_thread_count = threading.activeCount()
        for account_id in sorted(account_list.keys()):
            # 检查正在运行的线程数
            while threading.activeCount() >= self.thread_count + main_thread_count:
                if robot.is_process_end() == 0:
                    time.sleep(10)
                else:
                    break

            # 提前结束
            if robot.is_process_end() > 0:
                break

            # 开始下载
            thread = Download(account_list[account_id], self.thread_lock)
            thread.start()

            time.sleep(1)

        # 检查除主线程外的其他所有线程是不是全部结束了
        while threading.activeCount() > main_thread_count:
            time.sleep(10)

        # 未完成的数据保存
        if len(ACCOUNTS) > 0:
            new_save_data_file = open(NEW_SAVE_DATA_PATH, "a")
            for account_id in ACCOUNTS:
                new_save_data_file.write("\t".join(account_list[account_id]) + "\n")
            new_save_data_file.close()

        # 重新排序保存存档文件
        robot.rewrite_save_file(NEW_SAVE_DATA_PATH, self.save_data_path)

        log.step("全部下载完毕，耗时%s秒，共计图片%s张" % (self.get_run_time(), TOTAL_IMAGE_COUNT))

Exemple #10

0

Afficher le fichier

Fichier : Abase.py Projet : yxw19870806/yxw19870806

    def main(self):
        page_count = 1
        image_count = 0
        main_thread_count = threading.activeCount()
        # 多线程下载类型
        # 1 同时开始N个下载线程
        # 2 对一页中的所有图片开启多线程下载，下完一页中的所有图片后开始下一页
        thread_type = 2
        while True:
            # 获取一页页面
            page_data = get_one_page_data(page_count)
            if page_data is None:
                log.error("第%s页获取失败" % page_count)
                break

            # 获取页面中的所有图片信息列表
            image_info_list = re.findall('<img src="" data-original="([^"]*)" class="lazy img" title="([^"]*)">', page_data)
            # 获取页面中的影片数量
            page_data_count = page_data.count('<div class="item pull-left">')

            # 已经下载完毕了
            if page_data_count == 0:
                break
            log.step("第%s页，影片数量%s，获取到的封面图片数量%s" % (page_count, len(image_info_list), page_data_count))

            for small_image_url, title in image_info_list:
                # 达到线程上限，等待
                while thread_type == 1 and threading.activeCount() >= self.thread_count + main_thread_count:
                    time.sleep(5)

                title = robot.filter_text(str(title)).upper()
                image_url = get_large_image_url(small_image_url)
                if image_url is None:
                    log.trace("%s的封面图片大图地址获取失败" % title)
                    continue

                log.step("开始下载%s的封面图片 %s" % (title, image_url))

                file_type = image_url.split(".")[-1]
                file_path = os.path.join(self.image_download_path, "%s.%s" % (title, file_type))
                file_temp_path = os.path.join(self.image_download_path, "%s_temp.%s" % (title, file_type))

                # 开始下载
                thread = Download(self.thread_lock, title, file_path, file_temp_path, image_url)
                thread.start()
                time.sleep(0.1)

            # 还有未完成线程
            while thread_type == 2 and threading.activeCount() > main_thread_count:
                time.sleep(5)

            page_count += 1

        log.step("全部下载完毕，耗时%s秒，共计图片%s张" % (self.get_run_time(), TOTAL_IMAGE_COUNT))

Exemple #11

0

Afficher le fichier

Fichier : lofter.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT

        account_id = self.account_info[0]

        try:
            log.step(account_id + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_id)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_id)

            # 图片下载
            page_count = 1
            image_count = 1
            first_post_id = ""
            unique_list = []
            is_over = False
            need_make_download_dir = True
            while not is_over:
                post_url_list = get_one_page_post_url_list(account_id, page_count)
                # 无法获取信息首页
                if post_url_list is None:
                    log.error(account_id + " 无法访问第%s页相册页" % page_count)
                    tool.process_exit()

                if len(post_url_list) == 0:
                    # 下载完毕了
                    break

                # 去重排序
                log.trace(account_id + " 相册第%s页获取的所有信息页：%s" % (page_count, post_url_list))
                post_url_list = sorted(list(set(post_url_list)), reverse=True)
                log.trace(account_id + " 相册第%s页去重排序后的信息页：%s" % (page_count, post_url_list))
                for post_url in post_url_list:
                    post_id = post_url.split("/")[-1].split("_")[-1]

                    # 检查是否已下载到前一次的图片
                    if post_id <= self.account_info[2]:
                        is_over = True
                        break

                    # 新增信息页导致的重复判断
                    if post_id in unique_list:
                        continue
                    else:
                        unique_list.append(post_id)
                    # 将第一个信息页的id做为新的存档记录
                    if first_post_id == "":
                        first_post_id = post_id

                    post_page_return_code, post_page = tool.http_request(post_url)[:2]
                    if post_page_return_code != 1:
                        log.error(account_id + " 第%s张图片，无法获取信息页 %s" % (image_count, post_url))
                        continue

                    image_url_list = get_image_url_list(post_page)
                    log.trace(account_id + " 信息页 %s 获取的所有图片：%s" % (post_url, image_url_list))
                    if len(image_url_list) == 0:
                        log.error(account_id + " 第%s张图片，信息页 %s 中没有找到图片" % (image_count, post_url))
                        continue
                    for image_url in image_url_list:
                        if image_url.rfind("?") > image_url.rfind("."):
                            image_url = image_url.split("?")[0]
                        log.step(account_id + " 开始下载第%s张图片 %s" % (image_count, image_url))

                        # 第一张图片，创建目录
                        if need_make_download_dir:
                            if not tool.make_dir(image_path, 0):
                                log.error(account_id + " 创建图片下载目录 %s 失败" % image_path)
                                tool.process_exit()
                            need_make_download_dir = False

                        file_type = image_url.split(".")[-1]
                        file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                        if tool.save_net_file(image_url, file_path):
                            log.step(account_id + " 第%s张图片下载成功" % image_count)
                            image_count += 1
                        else:
                            log.error(account_id + " 第%s张图片 %s 下载失败" % (image_count, image_url))

                        # 达到配置文件中的下载数量，结束
                        if 0 < GET_IMAGE_COUNT < image_count:
                            is_over = True
                            break

                    if is_over:
                        break

                if not is_over:
                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_PAGE_COUNT <= page_count:
                        is_over = True
                    else:
                        page_count += 1

            log.step(account_id + " 下载完毕，总共获得%s张图片" % (image_count - 1))

            # 排序
            if IS_SORT and image_count > 1:
                destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_id)
                if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                    log.step(account_id + " 图片从下载目录移动到保存目录成功")
                else:
                    log.error(account_id + " 创建图片子目录 %s 失败" % destination_path)
                    tool.process_exit()

            # 新的存档记录
            if first_post_id != "":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = first_post_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_id + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_id + " 提前退出")
            else:
                log.error(account_id + " 异常退出")

Exemple #12

0

Afficher le fichier

Fichier : bcy.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT

        coser_id = self.account_info[0]
        if len(self.account_info) >= 3:
            cn = self.account_info[2]
        else:
            cn = self.account_info[0]

        try:
            log.step(cn + " 开始")

            image_path = os.path.join(IMAGE_DOWNLOAD_PATH, cn)

            # 图片下载
            this_cn_total_image_count = 0
            page_count = 1
            total_rp_count = 1
            first_rp_id = ""
            unique_list = []
            is_over = False
            need_make_download_dir = True  # 是否需要创建cn目录
            while not is_over:
                # 获取一页的作品信息
                post_page = get_one_page_post(coser_id, page_count)
                if post_page is None:
                    log.error(cn + " 无法访问第%s页作品" % page_count)
                    tool.process_exit()

                # 解析作品信息，获取所有的正片信息
                cp_id, rp_list = get_rp_list(post_page)
                if cp_id is None:
                    log.error(cn + " 第%s页作品解析异常" % page_count)
                    tool.process_exit()

                for rp_id, title in rp_list.iteritems():
                    # 检查是否已下载到前一次的图片
                    if int(rp_id) <= int(self.account_info[1]):
                        is_over = True
                        break

                    # 新增正片导致的重复判断
                    if rp_id in unique_list:
                        continue
                    else:
                        unique_list.append(rp_id)
                    # 将第一个作品的id做为新的存档记录
                    if first_rp_id == "":
                        first_rp_id = rp_id

                    log.trace("rp: " + rp_id)

                    if need_make_download_dir:
                        if not tool.make_dir(image_path, 0):
                            log.error(cn + " 创建CN目录 %s 失败" % image_path)
                            tool.process_exit()
                        need_make_download_dir = False

                    # 过滤标题中不支持的字符
                    title = robot.filter_text(title)
                    if title:
                        rp_path = os.path.join(image_path, "%s %s" % (rp_id, title))
                    else:
                        rp_path = os.path.join(image_path, rp_id)
                    if not tool.make_dir(rp_path, 0):
                        # 目录出错，把title去掉后再试一次，如果还不行退出
                        log.error(cn + " 创建作品目录 %s 失败，尝试不使用title" % rp_path)
                        rp_path = os.path.join(image_path, rp_id)
                        if not tool.make_dir(rp_path, 0):
                            log.error(cn + " 创建作品目录 %s 失败" % rp_path)
                            tool.process_exit()

                    # 获取正片页面内的所有图片地址列表
                    image_url_list = get_image_url_list(cp_id, rp_id)
                    if image_url_list is None:
                        log.error(cn + " 无法访问正片：%s，cp_id：%s" % (rp_id, cp_id))
                        continue

                    if len(image_url_list) == 0 and IS_AUTO_FOLLOW:
                        log.step(cn + " 检测到可能有私密作品且账号不是ta的粉丝，自动关注")
                        if follow(coser_id):
                            # 重新获取下正片页面内的所有图片地址列表
                            image_url_list = get_image_url_list(cp_id, rp_id)

                    if len(image_url_list) == 0:
                        log.error(cn + " 正片：%s没有任何图片，可能是你使用的账号没有关注ta，所以无法访问只对粉丝开放的私密作品，cp_id：%s" % (rp_id, cp_id))
                        continue

                    image_count = 1
                    for image_url in list(image_url_list):
                        # 禁用指定分辨率
                        image_url = "/".join(image_url.split("/")[0:-1])
                        log.step(cn + " %s 开始下载第%s张图片 %s" % (rp_id, image_count, image_url))

                        if image_url.rfind("/") < image_url.rfind("."):
                            file_type = image_url.split(".")[-1]
                        else:
                            file_type = "jpg"
                        file_path = os.path.join(rp_path, "%03d.%s" % (image_count, file_type))
                        if tool.save_net_file(image_url, file_path):
                            image_count += 1
                            log.step(cn + " %s 第%s张图片下载成功" % (rp_id, image_count))
                        else:
                            log.error(cn + " %s 第%s张图片 %s 下载失败" % (rp_id, image_count, image_url))

                    this_cn_total_image_count += image_count - 1

                    if 0 < GET_PAGE_COUNT < total_rp_count:
                        is_over = True
                        break
                    else:
                        total_rp_count += 1

                if not is_over:
                    if page_count >= get_max_page_count(coser_id, post_page):
                        is_over = True
                    else:
                        page_count += 1

            log.step(cn + " 下载完毕，总共获得%s张图片" % this_cn_total_image_count)

            # 新的存档记录
            if first_rp_id != "":
                self.account_info[1] = first_rp_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += this_cn_total_image_count
            ACCOUNTS.remove(coser_id)
            self.thread_lock.release()

            log.step(cn + " 完成")
        except SystemExit:
            log.error(cn + " 异常退出")
        except Exception, e:
            log.error(cn + " 未知异常")
            log.error(str(e) + "\n" + str(traceback.format_exc()))

Exemple #13

0

Afficher le fichier

Fichier : instagram.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT
        global TOTAL_VIDEO_COUNT

        account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            account_id = get_account_id(account_name)
            if account_id is None:
                log.error(account_name + " account id 查找失败")
                tool.process_exit()

            image_count = 1
            video_count = 1
            cursor = INIT_CURSOR
            first_created_time = "0"
            is_over = False
            need_make_image_dir = True
            need_make_video_dir = True
            while not is_over:
                # 获取指定时间后的一页媒体信息
                media_data = get_one_page_media_data(account_id, cursor)
                if media_data is None:
                    log.error(account_name + " 媒体列表解析异常")
                    tool.process_exit()

                nodes_data = media_data["nodes"]
                for photo_info in nodes_data:
                    if not robot.check_sub_key(("is_video", "display_src", "date"), photo_info):
                        log.error(account_name + " 媒体信息解析异常")
                        break
                    if photo_info["is_video"] and not robot.check_sub_key(("code",), photo_info):
                        log.error(account_name + " 视频code解析异常")
                        break

                    # 检查是否已下载到前一次的图片
                    if int(photo_info["date"]) <= int(self.account_info[3]):
                        is_over = True
                        break

                    # 将第一张图片的上传时间做为新的存档记录
                    if first_created_time == "0":
                        first_created_time = str(int(photo_info["date"]))

                    # 图片
                    if IS_DOWNLOAD_IMAGE:
                        image_url = str(photo_info["display_src"].split("?")[0])
                        log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                        # 第一张图片，创建目录
                        if need_make_image_dir:
                            if not tool.make_dir(image_path, 0):
                                log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                                tool.process_exit()
                            need_make_image_dir = False

                        file_type = image_url.split(".")[-1]
                        image_file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                        if tool.save_net_file(image_url, image_file_path):
                            log.step(account_name + " 第%s张图片下载成功" % image_count)
                            image_count += 1
                        else:
                            log.error(account_name + " 第%s张图片 %s 下载失败" % (image_count, image_url))

                    # 视频
                    if IS_DOWNLOAD_VIDEO and photo_info["is_video"]:
                        # 根据日志ID获取视频下载地址
                        video_url = get_video_url(photo_info["code"])
                        if video_url is None:
                            log.error(account_name + " 第%s个视频code：%s 无法访问" % (video_count, photo_info["code"]))
                            continue
                        if not video_url:
                            log.error(account_name + " 第%s个视频code：%s 没有获取到下载地址" % (video_count, photo_info["code"]))
                            continue

                        log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_url))

                        # 第一个视频，创建目录
                        if need_make_video_dir:
                            if not tool.make_dir(video_path, 0):
                                log.error(account_name + " 创建视频下载目录 %s 失败" % video_path)
                                tool.process_exit()
                            need_make_video_dir = False

                        file_type = video_url.split(".")[-1]
                        video_file_path = os.path.join(video_path, "%04d.%s" % (video_count, file_type))
                        if tool.save_net_file(video_url, video_file_path):
                            log.step(account_name + " 第%s个视频下载成功" % video_count)
                            video_count += 1
                        else:
                            log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url))

                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_IMAGE_COUNT < image_count:
                        is_over = True
                        break

                if not is_over:
                    if media_data["page_info"]["has_next_page"]:
                        cursor = str(media_data["page_info"]["end_cursor"])
                    else:
                        is_over = True

            log.step(account_name + " 下载完毕，总共获得%s张图片和%s个视频" % (image_count - 1, video_count - 1))

            # 排序
            if IS_SORT:
                if image_count > 1:
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_name + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建图片保存目录 %s 失败" % destination_path)
                        tool.process_exit()
                if video_count > 1:
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[2]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_created_time != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = str(int(self.account_info[2]) + video_count - 1)
                self.account_info[3] = first_created_time

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_name)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #14

0

Afficher le fichier

Fichier : changba.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 3 and self.account_info[2]:
            account_name = self.account_info[2]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            user_id = get_user_id(account_id)
            if user_id is None:
                log.error(account_name + " userid获取失败")
                tool.process_exit()

            page_count = 0
            video_count = 1
            first_audio_id = "0"
            unique_list = []
            is_over = False
            need_make_download_dir = True
            while not is_over:
                # 获取指定一页的歌曲信息
                audio_list = get_one_page_audio_list(user_id, page_count)

                if audio_list is None:
                    log.step(account_name + " 第%s页歌曲列表获取失败" % page_count)
                    first_audio_id = "0"
                    break  # 存档恢复

                # 如果为空，表示已经取完了
                if len(audio_list) == 0:
                    break

                for audio_info in list(audio_list):
                    audio_id = audio_info[0]

                    # 检查是否歌曲id小于上次的记录
                    if int(audio_id) <= int(self.account_info[1]):
                        is_over = True
                        break

                    # 新增歌曲导致的重复判断
                    if audio_id in unique_list:
                        continue
                    else:
                        unique_list.append(audio_id)
                    # 将第一首歌曲id做为新的存档记录
                    if first_audio_id == "0":
                        first_audio_id = str(audio_id)

                    # 获取歌曲的下载地址
                    audio_url = get_audio_url(audio_info[2])
                    log.step(account_name + " 开始下载第%s首歌曲 %s" % (video_count, audio_url))

                    # 第一首歌曲，创建目录
                    if need_make_download_dir:
                        if not tool.make_dir(video_path, 0):
                            log.error(account_name + " 创建歌曲下载目录 %s 失败" % video_path)
                            tool.process_exit()
                        need_make_download_dir = False

                    file_path = os.path.join(video_path, "%s - %s.mp3" % (audio_id, audio_info[1]))
                    if tool.save_net_file(audio_url, file_path):
                        log.step(account_name + " 第%s首歌曲下载成功" % video_count)
                        video_count += 1
                    else:
                        log.error(account_name + " 第%s首歌曲 %s 下载失败" % (video_count, audio_url))

                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_VIDEO_COUNT < video_count:
                        is_over = True
                        break

                if not is_over:
                    # 获取的歌曲数量少于1页的上限，表示已经到结束了
                    # 如果歌曲数量正好是页数上限的倍数，则由下一页获取是否为空判断
                    if len(audio_list) < 20:
                        is_over = True
                    else:
                        page_count += 1

            log.step(account_name + " 下载完毕，总共获得%s首歌曲" % (video_count - 1))

            # 新的存档记录
            if first_audio_id != "0":
                self.account_info[1] = first_audio_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #15

0

Afficher le fichier

Fichier : blog.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 4 and self.account_info[3]:
            account_name = self.account_info[3]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)

            # 图片
            image_count = 1
            page_count = 1
            first_blog_id = "0"
            need_make_image_dir = True
            is_over = False
            is_big_image_over = False
            while not is_over:
                # 获取一页日志信息
                blog_page = get_one_page_blog(account_id, page_count)
                if blog_page is None:
                    log.error(account_name + " 第%s页日志获取失败" % page_count)
                    tool.process_exit()
                if not blog_page:
                    log.error(account_name + " 第%s页日志解析失败" % page_count)
                    tool.process_exit()

                blog_data_list = get_blog_data_list(blog_page)
                if len(blog_data_list) == 0:
                    log.error(account_name + " 第%s页日志分组失败" % page_count)
                    tool.process_exit()

                for blog_data in blog_data_list:
                    # 获取日志id
                    blog_id = get_blog_id(account_id, blog_data)
                    if blog_id is None:
                        log.error(account_name + " 日志解析日志id失败，日志内容：%s" % blog_data)
                        tool.process_exit()

                    # 检查是否已下载到前一次的日志
                    if blog_id <= int(self.account_info[2]):
                        is_over = True
                        break

                    # 将第一个日志的ID做为新的存档记录
                    if first_blog_id == "0":
                        first_blog_id = str(blog_id)

                    # 获取该页日志的全部图片地址列表
                    image_url_list = get_image_url_list(blog_data)
                    if len(image_url_list) == 0:
                        continue

                    # 获取日志页面中存在的所有大图显示地址，以及对应的小图地址
                    big_2_small_list = get_big_image_url_list(blog_data)

                    # 下载图片
                    for image_url in image_url_list:
                        # 检查是否存在大图可以下载
                        if not is_big_image_over:
                            image_url, is_big_image_over = check_big_image(image_url, big_2_small_list)
                        log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                        # 第一张图片，创建目录
                        if need_make_image_dir:
                            if not tool.make_dir(image_path, 0):
                                log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                                tool.process_exit()
                            need_make_image_dir = False

                        file_type = image_url.split(".")[-1]
                        if file_type.find("?") != -1:
                            file_type = "jpeg"
                        file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                        if tool.save_net_file(image_url, file_path):
                            log.step(account_name + " 第%s张图片下载成功" % image_count)
                            image_count += 1
                        else:
                            log.error(account_name + " 第%s张图片 %s 下载失败" % (image_count, image_url))

                        # 达到配置文件中的下载数量，结束
                        if 0 < GET_IMAGE_COUNT < image_count:
                            is_over = True
                            break

                if not is_over:
                    # 达到配置文件中的下载页数，结束
                    if 0 < GET_PAGE_COUNT <= page_count:
                        is_over = True
                    # 判断当前页数是否大等于总页数
                    elif page_count >= get_max_page_count(blog_page):
                        is_over = True
                    else:
                        page_count += 1

            log.step(account_name + " 下载完毕，总共获得%s张图片" % (image_count - 1))

            # 排序
            if IS_SORT:
                if first_blog_id != "0":
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_name + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建图片保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_blog_id != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = first_blog_id

            # 保存最后的信息
            self.thread_lock.acquire()
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            TOTAL_IMAGE_COUNT += image_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #16

0

Afficher le fichier

Fichier : flickr.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT

        account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)

            # 获取user id
            api_info = get_api_info(account_name)
            if api_info is None:
                log.error(account_name + " API信息查找失败")
                tool.process_exit()
            if not api_info["user_id"]:
                log.error(account_name + " user_id解析失败")
                tool.process_exit()
            if not api_info["site_key"]:
                log.error(account_name + " site_key解析失败")
                tool.process_exit()
            # 生成一个随机的request id用作访问（使用原理暂时不明，只管模拟页面传入）
            request_id = tool.generate_random_string(8)

            # 图片
            image_count = 1
            page_count = 1
            first_image_time = "0"
            is_over = False
            need_make_image_dir = True
            while not is_over:
                # 获取一页图片信息
                page_data = get_one_page_image_data(api_info["user_id"], page_count, api_info["site_key"], request_id)
                if page_data is None:
                    log.error(account_name + " 第%s页图片信息获取失败" % page_count)
                    tool.process_exit()

                for photo_info in page_data["photos"]["photo"]:
                    if "dateupload" not in photo_info:
                        log.error(account_name + " 第%s张图片上传时间获取失败，图片信息：%s" % (image_count, photo_info))
                        continue

                    # 检查是否是上一次的最后视频
                    if int(self.account_info[2]) >= int(photo_info["dateupload"]):
                        is_over = True
                        break

                    # 将第一张图片的上传时间做为新的存档记录
                    if first_image_time == "0":
                        first_image_time = str(photo_info["dateupload"])

                    if "url_o_cdn" in photo_info:
                        image_url = str(photo_info["url_o_cdn"])
                    elif "url_o" in photo_info:
                        image_url = str(photo_info["url_o"])
                    else:
                        log.error(account_name + " 第%s张图片下载地址获取失败，图片信息：%s" % (image_count, photo_info))
                        continue
                    log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                    if need_make_image_dir:
                        if not tool.make_dir(image_path, 0):
                            log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                            tool.process_exit()
                        need_make_image_dir = False

                    file_type = image_url.split(".")[-1]
                    file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                    if tool.save_net_file(image_url, file_path):
                        log.step(account_name + " 第%s张图片下载成功" % image_count)
                        image_count += 1
                    else:
                        log.error(account_name + " 第%s张图片 %s 下载失败" % (image_count, image_url))

                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_IMAGE_COUNT < image_count:
                        is_over = True
                        break

                if not is_over:
                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_PAGE_COUNT <= page_count:
                        is_over = True
                    elif page_count >= int(page_data["photos"]["pages"]):
                        is_over = True
                    else:
                        page_count += 1

            log.step(account_name + " 下载完毕，总共获得%s张图片" % (image_count - 1))

            # 排序
            if IS_SORT:
                if first_image_time != "0":
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_name + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建图片保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_image_time != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = first_image_time

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            ACCOUNTS.remove(account_name)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #17

0

Afficher le fichier

Fichier : article.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 3 and self.account_info[2]:
            account_name = self.account_info[2]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 获取账号对应的page_id
            account_page_id = get_account_page_id(account_id)
            if account_page_id is None:
                log.error(account_name + " 微博主页没有获取到page_id")
                tool.process_exit()

            page_count = 1
            this_account_total_image_count = 0
            first_article_time = "0"
            is_over = False
            image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
            while not is_over:
                # 获取一页文章预览页面
                preview_article_page = get_one_page_preview_article_data(account_page_id, page_count)

                if preview_article_page is None:
                    log.error(account_name + " 第%s页文章获取失败" % page_count)
                    tool.process_exit()

                # 将文章预览页面内容分组
                preview_article_data_list = get_preview_article_data_list(preview_article_page)
                if len(preview_article_data_list) == 0:
                    log.error(account_name + " 第%s页文章解析失败，页面：%s" % (page_count, preview_article_page))
                    tool.process_exit()

                for preview_article_data in preview_article_data_list:
                    # 获取文章的发布时间
                    article_time = get_article_time(preview_article_data)
                    if article_time is None:
                        log.error(account_name + " 预览 %s 中的文章发布时间解析失败" % preview_article_data)
                        continue

                    # 检查是否是上一次的最后视频
                    if article_time <= int(self.account_info[1]):
                        is_over = True
                        break

                    # 将第一个视频的地址做为新的存档记录
                    if first_article_time == "0":
                        first_article_time = str(article_time)

                    # 获取文章地址
                    article_url = get_article_url(preview_article_data)
                    if article_url is None:
                        log.error(account_name + " 预览 %s 中的文章地址解析失败" % preview_article_data)
                        continue

                    # 获取文章id
                    article_id = get_article_id(article_url)
                    if article_id is None:
                        log.error(account_name + " 文章地址 %s 解析文章id失败" % article_url)
                        continue

                    # 获取文章页面内容
                    article_page = auto_redirect_visit(article_url)
                    if not article_page:
                        log.error(account_name + " 文章 %s 获取失败" % article_url)
                        continue

                    # 文章标题
                    title = get_article_title(article_page, article_id[0])
                    # 过滤标题中不支持的字符
                    title = robot.filter_text(title)
                    if title:
                        article_path = os.path.join(image_path, "%s %s" % (article_id, title))
                    else:
                        article_path = os.path.join(image_path, article_id)
                    if not tool.make_dir(article_path, 0):
                        # 目录出错，把title去掉后再试一次，如果还不行退出
                        log.error(account_name + " 创建文章目录 %s 失败，尝试不使用title" % article_path)
                        article_path = os.path.join(image_path, article_id)
                        if not tool.make_dir(article_path, 0):
                            log.error(account_name + " 创建文章目录 %s 失败" % article_path)
                            tool.process_exit()

                    # 文章顶部图片
                    top_picture_url = get_article_top_picture_url(article_page)
                    if top_picture_url:
                        log.step(account_name + " %s 开始下载顶部图片 %s" % (title, top_picture_url))

                        file_type = top_picture_url.split(".")[-1]
                        file_path = os.path.join(article_path, "0000.%s" % file_type)
                        if tool.save_net_file(top_picture_url, file_path):
                            log.step(account_name + " %s 顶部图片下载成功" % title)
                            this_account_total_image_count += 1
                        else:
                            log.error(account_name + " %s 顶部图片 %s 下载失败" % (title, top_picture_url))

                    # 获取文章正文的图片地址列表
                    image_url_list = get_article_image_url_list(article_page, article_id[0])
                    if image_url_list is None:
                        log.error(account_name + " 文章 %s 正文解析失败" % article_url)
                        continue

                    image_count = 1
                    for image_url in list(image_url_list):
                        if image_url.find("/p/e_weibo_com") >= 0 or image_url.find("e.weibo.com") >= 0:
                            continue
                        log.step(account_name + " %s 开始下载第%s张图片 %s" % (title, image_count, image_url))

                        file_type = image_url.split(".")[-1]
                        file_path = os.path.join(article_path, "%s.%s" % (image_count, file_type))
                        if tool.save_net_file(image_url, file_path):
                            log.step(account_name + " %s 第%s张图片下载成功" % (title, image_count))
                            image_count += 1
                        else:
                            log.error(account_name + " %s 第%s张图片 %s 下载失败" % (title, image_count, image_url))

                    if image_count > 1:
                        this_account_total_image_count += image_count - 1

                if not is_over:
                    # 获取文章总页数
                    if page_count >= get_max_page_count(preview_article_page):
                        is_over = True
                    else:
                        page_count += 1

            log.step(account_name + " 下载完毕，总共获得%s张图片" % this_account_total_image_count)

            # 新的存档记录
            if first_article_time != "0":
                self.account_info[1] = first_article_time

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += this_account_total_image_count
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #18

0

Afficher le fichier

Fichier : shinoda.py Projet : yxw19870806/yxw19870806

    def main(self):
        # 解析存档文件
        last_blog_id = ""
        image_start_index = 0
        if os.path.exists(self.save_data_path):
            save_file = open(self.save_data_path, "r")
            save_info = save_file.read()
            save_file.close()
            save_info = save_info.split("\t")
            if len(save_info) >= 2:
                image_start_index = int(save_info[0])
                last_blog_id = save_info[1]

        # 下载
        page_index = 1
        image_count = 1
        is_over = False
        new_last_blog_id = ""
        if self.is_sort:
            image_path = self.image_temp_path
        else:
            image_path = self.image_download_path
        while not is_over:
            index_url = "http://blog.mariko-shinoda.net/page%s.html" % (page_index - 1)
            index_page_return_code, index_page = tool.http_request(index_url)[:2]

            if index_page_return_code == 1:
                image_name_list = re.findall('data-original="./([^"]*)"', index_page)
                for image_name in image_name_list:
                    blog_id = image_name.split("-")[0]

                    # 检查是否已下载到前一次的图片
                    if blog_id == last_blog_id:
                        is_over = True
                        break

                    # 将第一个博客的id做为新的存档记录
                    if new_last_blog_id == "":
                        new_last_blog_id = blog_id

                    image_url = "http://blog.mariko-shinoda.net/%s" % image_name
                    # 文件类型
                    file_type = image_url.split(".")[-1].split(":")[0]
                    file_path = os.path.join(image_path, "%05d.%s" % (image_count, file_type))
                    log.step("开始下载第%s张图片 %s" % (image_count, image_url))
                    if tool.save_net_file(image_url, file_path):
                        log.step("第%s张图片下载成功" % image_count)
                        image_count += 1
                    else:
                        log.step("第%s张图片 %s 下载失败" % (image_count, image_url))
                page_index += 1
            else:
                log.error("无法访问博客页面 %s" % index_url)
                is_over = True

        log.step("下载完毕")

        # 排序复制到保存目录
        if self.is_sort:
            if robot.sort_file(self.image_temp_path, self.image_download_path, image_start_index, 5):
                log.step(" 图片从下载目录移动到保存目录成功")
            else:
                log.error(" 创建图片保存目录 %s 失败" % self.image_download_path)
                tool.process_exit()

        # 保存新的存档文件
        new_save_file_path = robot.get_new_save_file_path(self.save_data_path)
        log.step("保存新存档文件 %s" % new_save_file_path)
        new_save_file = open(new_save_file_path, "w")
        new_save_file.write(str(image_start_index) + "\t" + new_last_blog_id)
        new_save_file.close()

        log.step("全部下载完毕，耗时%s秒，共计图片%s张" % (self.get_run_time(), image_count - 1))

Exemple #19

0

Afficher le fichier

Fichier : meipai.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 4 and self.account_info[3]:
            account_name = self.account_info[3]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            page_count = 1
            video_count = 1
            first_video_id = "0"
            unique_list = []
            is_over = False
            need_make_download_dir = True
            while not is_over:
                # 获取指定一页的视频信息
                medias_data = get_one_page_video_data(account_id, page_count)
                if medias_data is None:
                    log.error(account_name + " 视频列表获取失败")
                    tool.process_exit()

                for media in medias_data:
                    if not robot.check_sub_key(("video", "id"), media):
                        log.error(account_name + " 第%s个视频信：%s解析失败" % (video_count, media))
                        continue

                    video_id = str(media["id"])

                    # 检查是否图片时间小于上次的记录
                    if int(video_id) <= int(self.account_info[2]):
                        is_over = True
                        break

                    # 新增视频导致的重复判断
                    if video_id in unique_list:
                        continue
                    else:
                        unique_list.append(video_id)
                    # 将第一张图片的上传时间做为新的存档记录
                    if first_video_id == "0":
                        first_video_id = video_id

                    video_url = str(media["video"])
                    log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_url))

                    # 第一个视频，创建目录
                    if need_make_download_dir:
                        if not tool.make_dir(video_path, 0):
                            log.error(account_name + " 创建视频下载目录 %s 失败" % video_path)
                            tool.process_exit()
                        need_make_download_dir = False
                        
                    file_path = os.path.join(video_path, "%04d.mp4" % video_count)
                    if tool.save_net_file(video_url, file_path):
                        log.step(account_name + " 第%s个视频下载成功" % video_count)
                        video_count += 1
                    else:
                        log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url))

                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_VIDEO_COUNT < video_count:
                        is_over = True
                        break

                if not is_over:
                    if len(medias_data) >= VIDEO_COUNT_PER_PAGE:
                        page_count += 1
                    else:
                        # 获取的数量小于请求的数量，已经没有剩余视频了
                        is_over = True

            log.step(account_name + " 下载完毕，总共获得%s个视频" % (video_count - 1))

            # 排序
            if IS_SORT and video_count > 1:
                destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                if robot.sort_file(video_path, destination_path, int(self.account_info[1]), 4):
                    log.step(account_name + " 视频从下载目录移动到保存目录成功")
                else:
                    log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                    tool.process_exit()

            # 新的存档记录
            if first_video_id != "":
                self.account_info[1] = str(int(self.account_info[1]) + video_count - 1)
                self.account_info[2] = first_video_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #20

0

Afficher le fichier

Fichier : googlePlus.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 4 and self.account_info[3]:
            account_name = self.account_info[3]
        else:
            account_name = self.account_info[0]
        if len(self.account_info) >= 5 and self.account_info[4]:
            account_file_path = self.account_info[4]
        else:
            account_file_path = ""

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_file_path, account_name)

            # 图片下载
            image_count = 1
            key = ""
            first_album_id = "0"
            unique_list = []
            is_over = False
            need_make_download_dir = True
            while not is_over:
                # 获取一页相册
                album_page = get_one_page_album(account_id, key)
                if album_page is None:
                    log.error(account_name + " 无法访问相册页，token：%s" % key)
                    tool.process_exit()

                # 获取相册页中的所有picasweb地址列表
                picasaweb_url_list = get_picasaweb_url_list(album_page)

                log.trace(account_name + " 相册获取的所有picasaweb页：%s" % picasaweb_url_list)
                for picasaweb_url in picasaweb_url_list:
                    # 有可能拿到带authkey的，需要去掉
                    # https://picasaweb.google.com/116300481938868290370/2015092603?authkey\u003dGv1sRgCOGLq-jctf-7Ww#6198800191175756402
                    picasaweb_url = picasaweb_url.replace("\u003d", "=")

                    # 获取picasaweb页的album id
                    album_id = get_picasaweb_page_album_id(account_id, picasaweb_url)
                    if album_id is None:
                        log.error(account_name + " 第%s张图片，无法访问picasaweb页 %s" % (image_count, picasaweb_url))
                        continue
                    if not album_id:
                        log.error(account_name + " 第%s张图片，picasaweb页 %s 获取album id失败" % (image_count, picasaweb_url))
                        continue
                    log.trace(account_name + " picasaweb页 %s 的album id：%s" % (picasaweb_url, album_id))

                    # 检查是否已下载到前一次的图片
                    if int(album_id) <= int(self.account_info[2]):
                        is_over = True
                        break

                    # # 相同的album_id判断
                    if album_id in unique_list:
                        continue
                    else:
                        unique_list.append(album_id)
                    # 将第一个album_id做为新的存档记录
                    if first_album_id == "0":
                        first_album_id = album_id

                    # 获取album id对应相册存档页的全部图片地址列表
                    image_url_list = get_image_url_list(account_id, album_id)
                    if image_url_list is None:
                        log.error(account_name + " 第%s张图片，无法访问album id：%s 的相册存档页" % (image_count, album_id))
                        continue
                    if len(image_url_list) == 0:
                        log.error(account_name + " 第%s张图片，album id：%s 的相册存档页没有解析到图片" % (image_count, album_id))
                        continue

                    log.trace(account_name + " album id：%s 的相册存档页获取的所有图片：%s" % (album_id, image_url_list))
                    for image_url in list(image_url_list):
                        image_url = generate_max_resolution_image_url(image_url)

                        # 下载
                        log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                        # 第一张图片，创建目录
                        if need_make_download_dir:
                            if not tool.make_dir(image_path, 0):
                                log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                                tool.process_exit()
                            need_make_download_dir = False

                        if image_url.rfind("/") < image_url.rfind("."):
                            file_type = image_url.split(".")[-1]
                        else:
                            file_type = "jpg"
                        file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                        if tool.save_net_file(image_url, file_path):
                            log.step(account_name + " 第%s张图片下载成功" % image_count)
                            image_count += 1
                        else:
                            log.error(account_name + " 第%s张图片 %s 下载失败" % (image_count, image_url))

                        # 达到配置文件中的下载数量，结束
                        if 0 < GET_IMAGE_COUNT < image_count:
                            is_over = True
                            break
                    if is_over:
                        break

                if not is_over:
                    # 查找下一页的token key
                    key_find = re.findall('"([.]?[a-zA-Z0-9-_]*)"', album_page)
                    if len(key_find) > 0 and len(key_find[0]) > 80:
                        key = key_find[0]
                    else:
                        # 不是第一次下载
                        if self.account_info[2] != "0":
                            log.error(account_name + " 没有找到下一页的token，将该页保存：")
                            log.error(album_page)
                        is_over = True

            log.step(account_name + " 下载完毕，总共获得%s张图片" % (image_count - 1))

            # 排序
            if IS_SORT and image_count > 1:
                destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_file_path, account_name)
                if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                    log.step(account_name + " 图片从下载目录移动到保存目录成功")
                else:
                    log.error(account_name + " 创建图片子目录 %s 失败" % destination_path)
                    tool.process_exit()

            # 新的存档记录
            if first_album_id != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = first_album_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #21

0

Afficher le fichier

Fichier : weibo.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 6 and self.account_info[5]:
            account_name = self.account_info[5]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            # 视频
            video_count = 1
            account_page_id = None
            first_video_url = ""
            is_over = False
            need_make_video_dir = True
            since_id = INIT_SINCE_ID
            while IS_DOWNLOAD_VIDEO and (not is_over):
                # 获取page_id
                if account_page_id is None:
                    account_page_id = get_account_page_id(account_id)
                    if account_page_id is None:
                        log.error(account_name + " 微博主页没有获取到page_id")
                        break

                # 获取指定时间点后的一页视频信息
                video_page_data = get_one_page_video_data(account_page_id, since_id)
                if video_page_data is None:
                    log.error(account_name + " 视频列表解析异常")
                    first_video_url = ""  # 存档恢复
                    break

                # 匹配获取全部的视频页面
                video_play_url_list = get_video_play_url_list(video_page_data)
                log.trace(account_name + "since_id：%s中的全部视频：%s" % (since_id, video_play_url_list))
                for video_play_url in video_play_url_list:
                    # 检查是否是上一次的最后视频
                    if self.account_info[4] == video_play_url:
                        is_over = True
                        break

                    # 将第一个视频的地址做为新的存档记录
                    if first_video_url == "":
                        first_video_url = video_play_url

                    # 获取这个视频的下载地址
                    return_code, video_url_list = get_video_url(video_play_url)
                    if return_code != 1:
                        if return_code == -1:
                            log.error(account_name + " 第%s个视频 %s 没有获取到源地址" % (video_count, video_play_url))
                        elif return_code == -2:
                            log.error(account_name + " 第%s个视频 %s 无法访问" % (video_count, video_play_url))
                        elif return_code == -3:
                            log.error(account_name + " 第%s个视频 %s 暂不支持的视频源" % (video_count, video_play_url))
                        continue
                    log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_play_url))

                    # 第一个视频，创建目录
                    if need_make_video_dir:
                        if not tool.make_dir(video_path, 0):
                            log.error(account_name + " 创建图片下载目录 %s 失败" % video_path)
                            tool.process_exit()
                        need_make_video_dir = False

                    video_file_path = os.path.join(video_path, "%04d.mp4" % video_count)
                    for video_url in video_url_list:
                        if tool.save_net_file(video_url, video_file_path):
                            log.step(account_name + " 第%s个视频下载成功" % video_count)
                            video_count += 1
                        else:
                            log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url))

                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_VIDEO_COUNT < video_count:
                        is_over = True
                        break

                if not is_over:
                    # 获取下一页的since_id
                    since_id = tool.find_sub_string(video_page_data, "type=video&owner_uid=&since_id=", '">')
                    if not since_id:
                        break

            # 有历史记录，并且此次没有获得正常结束的标记，说明历史最后的视频已经被删除了
            if self.account_info[4] != "" and video_count > 1 and not is_over:
                log.error(account_name + " 没有找到上次下载的最后一个视频地址")

            # 图片
            image_count = 1
            page_count = 1
            first_image_time = "0"
            unique_list = []
            is_over = False
            need_make_image_dir = True
            while IS_DOWNLOAD_IMAGE and (not is_over):
                # 获取指定一页图片的信息
                photo_page_data = get_one_page_photo_data(account_id, page_count)
                if photo_page_data is None:
                    log.error(account_name + " 图片列表获取失败")
                    first_image_time = "0"  # 存档恢复
                    break

                log.trace(account_name + "第%s页的全部图片信息：%s" % (page_count, photo_page_data))
                for image_info in photo_page_data["photo_list"]:
                    if not robot.check_sub_key(("pic_host", "pic_name", "timestamp"), image_info):
                        log.error(account_name + " 第%s张图片信息解析失败 %s" % (image_count, image_info))
                        continue

                    # 检查是否图片时间小于上次的记录
                    if int(image_info["timestamp"]) <= int(self.account_info[2]):
                        is_over = True
                        break

                    # 新增图片导致的重复判断
                    if image_info["pic_name"] in unique_list:
                        continue
                    else:
                        unique_list.append(image_info["pic_name"])
                    # 将第一张图片的上传时间做为新的存档记录
                    if first_image_time == "0":
                        first_image_time = str(image_info["timestamp"])

                    image_url = str(image_info["pic_host"]) + "/large/" + str(image_info["pic_name"])
                    log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                    # 获取图片的二进制数据，并且判断这个图片是否是可用的
                    image_status, image_byte = get_image_byte(image_url)
                    if image_status != 1:
                        if image_status == -1:
                            log.error(account_name + " 第%s张图片 %s 下载失败" % (image_count, image_url))
                        elif image_status == -2:
                            log.error(account_name + " 第%s张图片 %s 资源已被删除，跳过" % (image_count, image_url))
                        continue

                    # 第一张图片，创建目录
                    if need_make_image_dir:
                        if not tool.make_dir(image_path, 0):
                            log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                            tool.process_exit()
                        need_make_image_dir = False

                    file_type = image_url.split(".")[-1]
                    if file_type.find("/") != -1:
                        file_type = "jpg"
                    image_file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                    save_image(image_byte, image_file_path)
                    log.step(account_name + " 第%s张图片下载成功" % image_count)
                    image_count += 1

                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_IMAGE_COUNT < image_count:
                        is_over = True
                        break

                if not is_over:
                    # 根据总的图片数量和每页显示的图片数量，计算是否还有下一页
                    if (photo_page_data["total"] / IMAGE_COUNT_PER_PAGE) > (page_count - 1):
                        page_count += 1
                    else:
                        # 全部图片下载完毕
                        is_over = True

            log.step(account_name + " 下载完毕，总共获得%s张图片和%s个视频" % (image_count - 1, video_count - 1))

            # 排序
            if IS_SORT:
                if first_image_time != "0":
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_name + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建图片保存目录 %s 失败" % destination_path)
                        tool.process_exit()
                if first_video_url != "":
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[3]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_image_time != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = first_image_time
            if first_video_url != "":
                self.account_info[3] = str(int(self.account_info[3]) + video_count - 1)
                self.account_info[4] = first_video_url

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #22

0

Afficher le fichier

Fichier : ameblo.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT

        account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)

            image_count = 1
            page_count = 1
            first_blog_time = "0"
            is_over = False
            need_make_image_dir = True
            while not is_over:
                # 获取一页日志
                blog_data = get_blog_page_data(account_name, page_count)
                if blog_data is None:
                    log.error(account_name + " 第%s页日志无法获取" % page_count)
                    tool.process_exit()

                # 解析日志发布时间
                blog_time = get_blog_time(blog_data)
                if blog_time is None:
                    log.error(account_name + " 第%s页解析日志时间失败" % page_count)
                    tool.process_exit()

                # 检查是否是上一次的最后blog
                if blog_time <= int(self.account_info[2]):
                    break

                # 将第一个日志的时间做为新的存档记录
                if first_blog_time == "0":
                    first_blog_time = str(blog_time)

                # 从日志列表中获取全部的图片
                image_url_list = get_image_url_list(blog_data)
                for image_url in image_url_list:
                    # 使用默认图片的分辨率
                    image_url = image_url.split("?")[0]
                    # 过滤表情
                    if image_url.find("http://emoji.ameba.jp") >= 0:
                        continue
                    log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                    # 第一张图片，创建目录
                    if need_make_image_dir:
                        if not tool.make_dir(image_path, 0):
                            log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                            tool.process_exit()
                        need_make_image_dir = False
                        
                    file_type = image_url.split(".")[-1]
                    file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                    if tool.save_net_file(image_url, file_path):
                        log.step(account_name + " 第%s张图片下载成功" % image_count)
                        image_count += 1
                    else:
                        log.error(account_name + " 第%s张图片 %s 获取失败" % (image_count, image_url))

                # 达到配置文件中的下载数量，结束
                if 0 < GET_IMAGE_COUNT < image_count:
                    is_over = True

                if not is_over:
                    if 0 < GET_PAGE_COUNT < page_count:
                        is_over = True
                    else:
                        page_count += 1

            log.step(account_name + " 下载完毕，总共获得%s张图片" % (image_count - 1))

            # 排序
            if IS_SORT and image_count > 1:
                destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                    log.step(account_name + " 图片从下载目录移动到保存目录成功")
                else:
                    log.error(account_name + " 创建图片子目录 %s 失败" % destination_path)
                    tool.process_exit()

            # 新的存档记录
            if first_blog_time != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = first_blog_time

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            ACCOUNTS.remove(account_name)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #23

0

Afficher le fichier

Fichier : twitter.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT
        global TOTAL_VIDEO_COUNT

        account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            image_count = 1
            video_count = 1
            data_tweet_id = INIT_MAX_ID
            first_tweet_id = "0"
            is_over = False
            is_download_image = IS_DOWNLOAD_IMAGE
            is_download_video = IS_DOWNLOAD_VIDEO
            need_make_image_dir = True
            need_make_video_dir = True
            while not is_over:
                # 获取指定时间点后的一页图片信息
                media_page = get_media_page_data(account_name, data_tweet_id)
                if media_page is None:
                    log.error(account_name + " 媒体列表解析异常")
                    tool.process_exit()

                # 上一页正好获取了全部的媒体信息，所以这一页没有任何内容，完成了，直接退出
                if media_page["new_latent_count"] == 0 and not media_page["has_more_items"]:
                    break

                tweet_list = get_tweet_list(media_page["items_html"])
                if len(tweet_list) == 0:
                    log.error(account_name + " 媒体列表拆分异常，items_html：%s" % media_page["items_html"])
                    tool.process_exit()

                if media_page["new_latent_count"] != len(tweet_list):
                    log.error(account_name + " 解析的媒体数量不等于new_latent_count的数值")
                    # tool.process_exit()

                for tweet_data in tweet_list:
                    tweet_id = tool.find_sub_string(tweet_data, 'data-tweet-id="', '"')
                    if not tweet_id:
                        log.error(account_name + " tweet id解析异常，tweet数据：%s" % tweet_data)
                        continue

                    # 检查是否tweet的id小于上次的记录
                    if int(tweet_id) <= int(self.account_info[3]):
                        is_over = True
                        break

                    # 将第一个tweet的id做为新的存档记录
                    if first_tweet_id == "0":
                        first_tweet_id = tweet_id

                    # 视频
                    if is_download_image:
                        # 这个tweet是否包含视频
                        if check_has_video(tweet_data):
                            video_file_type, video_url_list = get_video_url_list(tweet_id)
                            if len(video_url_list) > 0:
                                log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_url_list))

                                # 第一个视频，创建目录
                                if need_make_video_dir:
                                    if not tool.make_dir(video_path, 0):
                                        log.error(account_name + " 创建图片下载目录 %s 失败" % video_path)
                                        tool.process_exit()
                                    need_make_video_dir = False

                                video_file_path = os.path.join(video_path, "%04d.%s" % (video_count, video_file_type))
                                if save_video(video_url_list, video_file_path):
                                    log.step(account_name + " 第%s个视频下载成功" % video_count)
                                    video_count += 1
                                else:
                                    log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url_list))
                            else:
                                log.error(account_name + " 第%s个视频 没有获取到源地址，tweet id：%s" % (video_count, tweet_id))

                        # 达到配置文件中的下载数量，结束图片下载
                        if 0 < GET_IMAGE_COUNT < image_count:
                            is_download_image = False

                    # 图片
                    if is_download_video:
                        # 匹配获取全部的图片地址
                        image_url_list = get_image_url_list(tweet_data)
                        for image_url in image_url_list:
                            image_url = str(image_url)
                            log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                            image_return_code, image_byte = tool.http_request(image_url)[:2]
                            # 404，不算做错误，图片已经被删掉了
                            if image_return_code == -404:
                                log.error(account_name + " 第%s张图片 %s 已被删除，跳过" % (image_count, image_url))
                            elif image_return_code == 1:
                                # 第一张图片，创建目录
                                if need_make_image_dir:
                                    if not tool.make_dir(image_path, 0):
                                        log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                                        tool.process_exit()
                                    need_make_image_dir = False

                                file_type = image_url.split(".")[-1].split(":")[0]
                                image_file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                                save_image(image_byte, image_file_path)
                                log.step(account_name + " 第%s张图片下载成功" % image_count)
                                image_count += 1
                            else:
                                log.error(account_name + " 第%s张图片 %s 获取失败" % (image_count, image_url))

                        # 达到配置文件中的下载数量，结束视频下载
                        if 0 < GET_VIDEO_COUNT < video_count:
                            is_download_video = False

                    # 全部达到配置文件中的下载数量，结束
                    if not is_download_image and not is_download_video:
                        is_over = True
                        break

                if not is_over:
                    # 查找下一页的data_tweet_id
                    if media_page["has_more_items"]:
                        data_tweet_id = str(media_page["min_position"])
                    else:
                        is_over = True

            log.step(account_name + " 下载完毕，总共获得%s张图片和%s个视频" % (image_count - 1, video_count - 1))

            # 排序
            if IS_SORT:
                if image_count > 1:
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_name + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建图片子目录 %s 失败" % destination_path)
                        tool.process_exit()
                if video_count > 1:
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[2]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_tweet_id != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = str(int(self.account_info[2]) + video_count - 1)
                self.account_info[3] = first_tweet_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_name)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #24

0

Afficher le fichier

Fichier : tuchong.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT

        account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            if account_name.isdigit():
                site_id = account_name
            else:
                site_id = get_site_id(account_name)
            if site_id is None:
                log.error(account_name + " 主页无法访问")
                tool.process_exit()

            if not site_id:
                log.error(account_name + " site id解析失败")
                tool.process_exit()

            image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)

            this_account_total_image_count = 0
            post_count = 0
            first_post_id = "0"
            post_time = "2016-11-16 14:12:00"
            is_over = False
            while not is_over:
                # 获取一页的相册信息列表
                post_info_list = get_one_page_post_info_list(site_id, post_time)
                if post_info_list is None:
                    log.error(account_name + " 相册信息列表无法访问")
                    tool.process_exit()

                # 如果为空，表示已经取完了
                if len(post_info_list) == 0:
                    break

                for post_info in post_info_list:
                    if not robot.check_sub_key(("title", "post_id", "published_at", "images"), post_info):
                        log.error(account_name + " 相册信息解析失败：%s" % post_info)
                        continue

                    post_id = str(post_info["post_id"])

                    # 检查信息页id是否小于上次的记录
                    if int(post_id) <= int(self.account_info[1]):
                        is_over = True
                        break

                    # 将第一个信息页的id做为新的存档记录
                    if first_post_id == "0":
                        first_post_id = post_id

                    # 过滤标题中不支持的字符
                    title = robot.filter_text(post_info["title"])
                    if title:
                        post_path = os.path.join(image_path, "%s %s" % (post_id, title))
                    else:
                        post_path = os.path.join(image_path, post_id)
                    if not tool.make_dir(post_path, 0):
                        # 目录出错，把title去掉后再试一次，如果还不行退出
                        log.error(account_name + " 创建相册目录 %s 失败，尝试不使用title" % post_path)
                        post_path = os.path.join(image_path, post_id)
                        if not tool.make_dir(post_path, 0):
                            log.error(account_name + " 创建相册目录 %s 失败" % post_path)
                            tool.process_exit()

                    image_count = 0
                    for image_info in post_info["images"]:
                        image_count += 1
                        if not robot.check_sub_key(("img_id",), image_info):
                            log.error(account_name + " 相册%s 第%s张图片解析失败" % (post_id, image_count))
                            continue
                        image_url = generate_large_image_url(site_id, image_info["img_id"])
                        log.step(account_name + " 相册%s 开始下载第%s张图片 %s" % (post_id, image_count, image_url))

                        file_path = os.path.join(post_path, "%s.jpg" % image_count)
                        if tool.save_net_file(image_url, file_path):
                            log.step(account_name + " 相册%s 第%s张图片下载成功" % (post_id, image_count))
                        else:
                            log.error(account_name + " 相册%s 第%s张图片 %s 下载失败" % (post_info["post_id"], image_count, image_url))
                    this_account_total_image_count += image_count

                    if not is_over:
                        # 达到配置文件中的下载页数，结束
                        if 0 < GET_PAGE_COUNT < post_count:
                            is_over = True
                        else:
                            # 相册发布时间
                            post_time = post_info["published_at"]
                            post_count += 1

            log.step(account_name + " 下载完毕，总共获得%s张图片" % this_account_total_image_count)

            # 新的存档记录
            if first_post_id != "0":
                self.account_info[1] = first_post_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += this_account_total_image_count
            ACCOUNTS.remove(account_name)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #25

0

Afficher le fichier

Fichier : nicoNico.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 3 and self.account_info[2]:
            account_name = self.account_info[2]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            # 获取视频信息列表
            video_info_list = get_video_info_list(account_id)
            if video_info_list is None:
                log.error(account_name + " 视频列表获取失败")
                tool.process_exit()

            video_count = 1
            first_video_id = "0"
            need_make_video_dir = True
            for video_info in video_info_list:
                if not robot.check_sub_key(("item_data",), video_info) or \
                        not robot.check_sub_key(("watch_id", "title"), video_info["item_data"]):
                    log.error(account_name + " 视频信息%s解析失败" % video_info)
                    tool.process_exit()

                # sm30043563
                video_id = str(video_info["item_data"]["watch_id"])

                # 过滤标题中不支持的字符
                video_title = robot.filter_text(video_info["item_data"]["title"])

                # 第一个视频，创建目录
                if need_make_video_dir:
                    if not tool.make_dir(video_path, 0):
                        log.error(account_name + " 创建图片下载目录 %s 失败" % video_path)
                        tool.process_exit()
                    need_make_video_dir = False

                # 获取视频下载地址
                video_url = get_video_url(video_id)
                log.step(account_name + " 开始下载第%s个视频 %s %s" % (video_count, video_id, video_url))
                print video_title
                print "%s %s" % (video_id, video_title)
                file_path = os.path.join(video_path, "%s %s.mp4" % (video_id, video_title))
                if tool.save_net_file(video_url, file_path):
                    log.step(account_name + " 第%s个视频下载成功" % video_count)
                    video_count += 1
                else:
                    log.error(account_name + " 第%s个视频 %s %s 下载失败" % (video_count, video_id, video_url))

            log.step(account_name + " 下载完毕，总共获得%s个视频" % (video_count - 1))

            # 排序
            if IS_SORT:
                if first_video_id != "0":
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[3]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_video_id != "0":
                self.account_info[1] = first_video_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #26

0

Afficher le fichier

Fichier : meituzz.py Projet : yxw19870806/yxw19870806

    def main(self):
        # 解析存档文件，获取上一次的album id
        album_id = 1
        if os.path.exists(self.save_data_path):
            save_file = open(self.save_data_path, "r")
            save_info = save_file.read()
            save_file.close()
            album_id = int(save_info.strip())

        total_image_count = 0
        total_video_count = 0
        error_count = 0
        is_over = False
        while not is_over:
            album_url = "http://meituzz.com/album/browse?albumID=%s" % album_id
            try:
                album_page_return_code, album_page = tool.http_request(album_url)[:2]
            except SystemExit:
                log.step("提前退出")
                break

            if album_page_return_code == -500:
                log.error("第%s页相册内部错误" % album_id)
                album_id += 1
                continue
            elif album_page_return_code != 1:
                log.error("第%s页图片获取失败" % album_id)
                break

            if album_page.find("<title>相册已被删除</title>") >= 0:
                error_count += 1
                if error_count >= ERROR_PAGE_COUNT_CHECK:
                    log.error("连续%s页相册没有图片，退出程序" % ERROR_PAGE_COUNT_CHECK)
                    album_id -= error_count - 1
                    break
                else:
                    log.error("第%s页相册已被删除" % album_id)
                    album_id += 1
                    continue
            # 错误数量重置
            error_count = 0

            # 图片下载
            if self.is_download_image and album_page.find('<input type="hidden" id="imageList"') >= 0:
                total_photo_count = tool.find_sub_string(album_page, '<input type="hidden" id="totalPageNum" value=', ' />')
                if not total_photo_count:
                    log.error("第%s页图片数量解析失败" % album_id)
                    break
                total_photo_count = int(total_photo_count)

                # 获取页面全部图片地址列表
                image_url_list = get_image_url_list(album_page)
                if image_url_list is None:
                    log.error("第%s页图片地址列表解析失败" % album_id)
                    break

                if len(image_url_list) == 0:
                    log.error("第%s页没有获取到图片" % album_id)
                    break

                is_fee = False
                if len(image_url_list) != total_photo_count:
                    album_reward_find = re.findall('<input type="hidden" id="rewardAmount" value="(\d*)">', album_page)
                    if len(album_reward_find) == 1:
                        album_reward = int(album_reward_find[0])
                        if album_reward > 0 and total_photo_count - len(image_url_list) <= 1:
                            is_fee = True
                    if not is_fee:
                        log.error("第%s页解析获取的图片数量不符" % album_id)
                        # break

                image_path = os.path.join(self.image_download_path, "%04d" % album_id)
                if not tool.make_dir(image_path, 0):
                    log.error("创建图片下载目录 %s 失败" % image_path)
                    break

                image_count = 1
                for image_url in image_url_list:
                    # 去除模糊效果
                    image_url = str(image_url).split("@")[0]
                    log.step("开始下载第%s页第%s张图片 %s" % (album_id, image_count, image_url))

                    image_file_path = os.path.join(image_path, "%04d.jpg" % image_count)
                    try:
                        if tool.save_net_file(image_url, image_file_path, True):
                            log.step("第%s页第%s张图片下载成功" % (album_id, image_count))
                            image_count += 1
                        else:
                            log.error("第%s页第%s张图片 %s 下载失败" % (album_id, image_count, image_url))
                    except SystemExit:
                        log.step("提前退出")
                        tool.remove_dir(image_path)
                        is_over = True
                        break

                total_image_count += image_count - 1

            # 视频下载
            if self.is_download_image and album_page.find('<input type="hidden" id="VideoUrl"') >= 0:
                # 获取视频下载地址
                video_url = get_video_url(album_page)
                log.step("开始下载第%s页视频 %s" % (album_id, video_url))

                video_title = robot.filter_text(tool.find_sub_string(album_page, "<title>", "</title>"))
                file_type = video_url.split(".")[-1]
                video_file_path = os.path.join(self.video_download_path, "%s %s.%s" % (album_id, video_title, file_type))
                try:
                    if tool.save_net_file(video_url, video_file_path, True):
                        log.step("第%s页视频下载成功" % album_id)
                        total_video_count += 1
                    else:
                        log.error("第%s页视频 %s 下载失败" % (album_id, video_url))
                except SystemExit:
                    log.step("提前退出")
                    is_over = True

            if not is_over:
                album_id += 1

        # 重新保存存档文件
        save_data_dir = os.path.dirname(self.save_data_path)
        if not os.path.exists(save_data_dir):
            tool.make_dir(save_data_dir, 0)
        save_file = open(self.save_data_path, "w")
        save_file.write(str(album_id))
        save_file.close()

        log.step("全部下载完毕，耗时%s秒，共计图片%s张，视频%s个" % (self.get_run_time(), total_image_count, total_video_count))

Exemple #27

0

Afficher le fichier

Fichier : fiveSing.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_VIDEO_COUNT
        global GET_PAGE_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 4 and self.account_info[3]:
            account_name = self.account_info[3]
        else:
            account_name = self.account_info[0]

        # 原创、翻唱
        audio_type_to_index = {"yc": 1, "fc": 2}
        try:
            log.step(account_name + " 开始")

            video_count = 1
            for audio_type in audio_type_to_index.keys():
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name, audio_type)

                page_count = 1
                first_audio_id = "0"
                unique_list = []
                is_over = False
                need_make_download_dir = True
                while not is_over:
                    # 获取指定一页的歌曲信息列表
                    audio_list = get_one_page_audio_list(account_id, audio_type, page_count)

                    if audio_list is None:
                        log.step(account_name + " 第%s页%s歌曲页面获取失败" % (page_count, audio_type))
                        first_audio_id = "0"
                        break  # 存档恢复

                    # 如果为空，表示已经取完了
                    if len(audio_list) == 0:
                        break

                    for audio_info in list(audio_list):
                        audio_id = audio_info[0]
                        # 过滤标题中不支持的字符
                        audio_title = robot.filter_text(audio_info[1])

                        # 检查是否歌曲id小于上次的记录
                        if int(audio_id) <= int(self.account_info[audio_type_to_index[audio_type]]):
                            is_over = True
                            break

                        # 新增歌曲导致的重复判断
                        if audio_id in unique_list:
                            continue
                        else:
                            unique_list.append(audio_id)
                        # 将第一首歌曲id做为新的存档记录
                        if first_audio_id == "0":
                            first_audio_id = str(audio_id)

                        # 获取歌曲的下载地址
                        audio_url = get_audio_url(audio_id, audio_type_to_index[audio_type])
                        if audio_url is None:
                            log.step(account_name + " %s歌曲ID %s，下载地址获取失败" % (audio_type, audio_id))
                            continue
                        if not audio_url:
                            log.step(account_name + " %s歌曲ID %s，暂不提供下载地址" % (audio_type, audio_id))
                            continue

                        log.step(account_name + " 开始下载第%s首歌曲 %s" % (video_count, audio_url))

                        # 第一首歌曲，创建目录
                        if need_make_download_dir:
                            if not tool.make_dir(video_path, 0):
                                log.error(account_name + " 创建歌曲下载目录 %s 失败" % video_path)
                                tool.process_exit()
                            need_make_download_dir = False

                        file_path = os.path.join(video_path, "%s - %s.mp3" % (audio_id, audio_title))
                        if tool.save_net_file(audio_url, file_path):
                            log.step(account_name + " 第%s首歌曲下载成功" % video_count)
                            video_count += 1
                        else:
                            log.error(account_name + " 第%s首歌曲 %s 下载失败" % (video_count, audio_url))

                        # 达到配置文件中的下载数量，结束
                        if 0 < GET_VIDEO_COUNT < video_count:
                            is_over = True
                            break

                    if not is_over:
                        # 达到配置文件中的下载页数，结束
                        if 0 < GET_PAGE_COUNT <= page_count:
                            is_over = True
                        # 获取的歌曲数量少于1页的上限，表示已经到结束了
                        # 如果歌曲数量正好是页数上限的倍数，则由下一页获取是否为空判断
                        elif len(audio_list) < 20:
                            is_over = True
                        else:
                            page_count += 1

                # 新的存档记录
                if first_audio_id != "0":
                    self.account_info[audio_type_to_index[audio_type]] = first_audio_id

            log.step(account_name + " 下载完毕，总共获得%s首歌曲" % (video_count - 1))

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #28

0

Afficher le fichier

Fichier : template.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        # todo 是否有需要显示不同名字
        account_name = account_id

        try:
            log.step(account_name + " 开始")

            # todo 是否需要下载图片或视频
            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            # todo 图片下载逻辑
            # 图片
            image_count = 1
            first_image_time = "0"
            need_make_image_dir = True
            if IS_DOWNLOAD_IMAGE:
                pass

            # todo 视频下载逻辑
            # 视频
            video_count = 1
            first_video_time = "0"
            need_make_video_dir = True
            if IS_DOWNLOAD_VIDEO:
                pass

            log.step(account_name + " 下载完毕，总共获得%s张图片和%s个视频" % (image_count - 1, video_count - 1))

            # 排序
            if IS_SORT:
                # todo 是否需要下载图片
                if first_image_time != "0":
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_name + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建图片保存目录 %s 失败" % destination_path)
                        tool.process_exit()
                # todo 是否需要下载视频
                if first_video_time != "0":
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[3]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # todo 是否需要下载图片或视频
            # 新的存档记录
            if first_image_time != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = first_image_time
            if first_video_time != "0":
                self.account_info[3] = str(int(self.account_info[3]) + video_count - 1)
                self.account_info[4] = first_video_time

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            # todo 是否需要下载图片或视频
            TOTAL_IMAGE_COUNT += image_count - 1
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #29

0

Afficher le fichier

Fichier : tumblr.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]

        try:
            log.step(account_id + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_id)
                video_path = os.path.join(VIDEO_TEMP_PATH, account_id)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_id)
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_id)

            page_count = 1
            image_count = 1
            video_count = 1
            first_post_id = ""
            unique_list = []
            is_over = False
            need_make_image_dir = True
            need_make_video_dir = True
            while not is_over:
                post_url_list = get_one_page_post_url_list(account_id, page_count)
                if post_url_list is None:
                    log.error(account_id + " 无法访问第%s页相册页" % page_count)
                    tool.process_exit()

                if len(post_url_list) == 0:
                    # 下载完毕了
                    break

                log.trace(account_id + " 相册第%s页获取的所有信息页：%s" % (page_count, post_url_list))
                post_url_list_group_by_post_id = filter_post_url(post_url_list)
                log.trace(account_id + " 相册第%s页去重排序后的信息页：%s" % (page_count, post_url_list_group_by_post_id))
                log.step(account_id + " 相册第%s页获取到%s页信息页" % (page_count, len(post_url_list_group_by_post_id)))
                for post_id in sorted(post_url_list_group_by_post_id.keys(), reverse=True):
                    # 检查信息页id是否小于上次的记录
                    if post_id <= self.account_info[3]:
                        is_over = True
                        break

                    # 将第一个信息页的id做为新的存档记录
                    if first_post_id == "":
                        first_post_id = post_id

                    # 获取信息页并截取head标签内的内容
                    post_url = "http://%s.tumblr.com/post/%s" % (account_id, post_id)
                    post_page_head = get_post_page_head(post_url, post_url_list_group_by_post_id[post_id])
                    if post_page_head is None:
                        log.error(account_id + " 无法访问信息页 %s" % post_url)
                        continue
                    if not post_page_head:
                        log.error(account_id + " 信息页 %s 截取head标签异常" % post_url)
                        continue

                    # 获取og_type（页面类型的是视频还是图片或其他）
                    og_type = tool.find_sub_string(post_page_head, '<meta property="og:type" content="', '" />')
                    if not og_type:
                        log.error(account_id + " 信息页 %s，'og:type'获取异常" % post_url)
                        continue

                    # 空、音频、引用，跳过
                    if og_type in ["tumblr-feed:entry", "tumblr-feed:audio", "tumblr-feed:quote", "tumblr-feed:link"]:
                        continue

                    # 新增信息页导致的重复判断
                    if post_id in unique_list:
                        continue
                    else:
                        unique_list.append(post_id)

                    # 视频下载
                    if IS_DOWNLOAD_VIDEO and og_type == "tumblr-feed:video":
                        video_list = get_video_list(account_id, post_id)
                        if video_list is None:
                            log.error(account_id + " 第%s个视频 日志id：%s无法访问播放页" % (video_count, post_id))
                        else:
                            if len(video_list) > 0:
                                for video_url, video_type in list(video_list):
                                    log.step(account_id + " 开始下载第%s个视频 %s" % (video_count, video_url))

                                    # 第一个视频，创建目录
                                    if need_make_video_dir:
                                        if not tool.make_dir(video_path, 0):
                                            log.error(account_id + " 创建视频下载目录 %s 失败" % video_path)
                                            tool.process_exit()
                                        need_make_video_dir = False

                                    file_type = video_type.split("/")[-1]
                                    video_file_path = os.path.join(video_path, "%04d.%s" % (video_count, file_type))
                                    if tool.save_net_file(video_url, video_file_path):
                                        log.step(account_id + " 第%s个视频下载成功" % video_count)
                                        video_count += 1
                                    else:
                                        log.error(account_id + " 第%s个视频 %s 下载失败" % (video_count, video_url))
                            else:
                                log.error(account_id + " 第%s个视频 日志id：%s 中没有找到视频" % (video_count, post_id))

                    # 图片下载
                    if IS_DOWNLOAD_IMAGE:
                        if og_type == "tumblr-feed:video":
                            page_image_url_list = []
                            video_image_url = tool.find_sub_string(post_page_head, '<meta property="og:image" content="', '" />')
                            if video_image_url:
                                page_image_url_list.append(video_image_url)
                        else:
                            page_image_url_list = re.findall('"(http[s]?://\w*[.]?media.tumblr.com/[^"]*)"', post_page_head)
                            log.trace(account_id + " 信息页 %s 过滤前的所有图片：%s" % (post_url, page_image_url_list))
                            # 过滤头像以及页面上找到不同分辨率的同一张图
                            page_image_url_list = filter_different_resolution_images(page_image_url_list)
                        log.trace(account_id + " 信息页 %s 获取的的所有图片：%s" % (post_url, page_image_url_list))
                        if len(page_image_url_list) > 0:
                            for image_url in page_image_url_list:
                                log.step(account_id + " 开始下载第%s张图片 %s" % (image_count, image_url))

                                # 第一张图片，创建目录
                                if need_make_image_dir:
                                    if not tool.make_dir(image_path, 0):
                                        log.error(account_id + " 创建图片下载目录 %s 失败" % image_path)
                                        tool.process_exit()
                                    need_make_image_dir = False

                                file_type = image_url.split(".")[-1]
                                image_file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                                if tool.save_net_file(image_url, image_file_path):
                                    log.step(account_id + " 第%s张图片下载成功" % image_count)
                                    image_count += 1
                                else:
                                    log.error(account_id + " 第%s张图片 %s 下载失败" % (image_count, image_url))
                        else:
                            log.error(account_id + " 第%s张图片 信息页 %s 中没有找到图片" % (image_count, post_url))

                if not is_over:
                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_PAGE_COUNT <= page_count:
                        is_over = True
                    else:
                        page_count += 1

            log.step(account_id + " 下载完毕，总共获得%s张图片和%s个视频" % (image_count - 1, video_count - 1))

            # 排序
            if IS_SORT:
                if image_count > 1:
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_id)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_id + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_id + " 创建图片保存目录 %s 失败" % destination_path)
                        tool.process_exit()
                if video_count > 1:
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_id)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[2]), 4):
                        log.step(account_id + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_id + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_post_id != "":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = str(int(self.account_info[2]) + video_count - 1)
                self.account_info[3] = first_post_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_id + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_id + " 提前退出")
            else:
                log.error(account_id + " 异常退出")

Exemple #30

0

Afficher le fichier

Fichier : nanaGoGo.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_IMAGE_COUNT
        global TOTAL_VIDEO_COUNT

        account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            image_count = 1
            video_count = 1
            target_id = INIT_TARGET_ID
            first_post_id = "0"
            is_over = False
            need_make_image_dir = True
            need_make_video_dir = True

            while not is_over:
                # 获取一页日志信息
                message_page_data = get_message_page_data(account_name, target_id)
                if message_page_data is None:
                    log.error(account_name + " 媒体列表解析异常")
                    tool.process_exit()
                # 没有了
                if len(message_page_data) == 0:
                    break

                for message_info in message_page_data:
                    if not robot.check_sub_key(("post",), message_info):
                        log.error(account_name + " 媒体信息解析异常 %s" % message_info)
                        continue
                    if not robot.check_sub_key(("body", "postId"), message_info["post"]):
                        log.error(account_name + " 媒体信息解析异常 %s" % message_info)
                        continue

                    target_id = message_info["post"]["postId"]
                    # 检查是否已下载到前一次的记录
                    if int(target_id) <= int(self.account_info[3]):
                        is_over = True
                        break

                    # 将第一个媒体的postId做为新的存档记录
                    if first_post_id == "0":
                        first_post_id = str(target_id)

                    for media_info in message_info["post"]["body"]:
                        if not robot.check_sub_key(("bodyType",), media_info):
                            log.error(account_name + " 媒体列表bodyType解析异常")
                            continue

                        # bodyType = 1: text, bodyType = 3: image, bodyType = 8: video
                        body_type = int(media_info["bodyType"])
                        if body_type == 1:  # 文本
                            pass
                        elif body_type == 2:  # 表情
                            pass
                        elif body_type == 3:  # 图片
                            if IS_DOWNLOAD_IMAGE:
                                if not robot.check_sub_key(("image",), media_info):
                                    log.error(account_name + " 第%s张图片解析异常%s" % (image_count, media_info))
                                    continue

                                image_url = str(media_info["image"])
                                log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                                # 第一张图片，创建目录
                                if need_make_image_dir:
                                    if not tool.make_dir(image_path, 0):
                                        log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                                        tool.process_exit()
                                    need_make_image_dir = False

                                file_type = image_url.split(".")[-1]
                                image_file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                                if tool.save_net_file(image_url, image_file_path):
                                    log.step(account_name + " 第%s张图片下载成功" % image_count)
                                    image_count += 1
                                else:
                                    log.error(account_name + " 第%s张图片 %s 下载失败" % (image_count, image_url))
                        elif body_type == 8:  # video
                            if IS_DOWNLOAD_VIDEO:
                                if not robot.check_sub_key(("movieUrlHq",), media_info):
                                    log.error(account_name + " 第%s个视频解析异常%s" % (video_count, media_info))
                                    continue

                                video_url = str(media_info["movieUrlHq"])
                                log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_url))

                                # 第一个视频，创建目录
                                if need_make_video_dir:
                                    if not tool.make_dir(video_path, 0):
                                        log.error(account_name + " 创建视频下载目录 %s 失败" % video_path)
                                        tool.process_exit()
                                    need_make_video_dir = False

                                file_type = video_url.split(".")[-1]
                                video_file_path = os.path.join(video_path, "%04d.%s" % (video_count, file_type))
                                if tool.save_net_file(video_url, video_file_path):
                                    log.step(account_name + " 第%s个视频下载成功" % video_count)
                                    video_count += 1
                                else:
                                    log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url))
                        elif body_type == 7:  # 转发
                            pass
                        else:
                            log.error(account_name + " 第%s张图片、第%s个视频，未知bodytype %s, %s" % (image_count, video_count, body_type, media_info))

            # 排序
            if IS_SORT:
                if image_count > 1:
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_name + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建图片保存目录 %s 失败" % destination_path)
                        tool.process_exit()
                if video_count > 1:
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[2]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_post_id != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = str(int(self.account_info[2]) + video_count - 1)
                self.account_info[3] = first_post_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_name)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #31

0

Afficher le fichier

Fichier : weishi.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 4 and self.account_info[3]:
            account_name = self.account_info[3]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            video_count = 1
            page_time = 0
            first_video_time = "0"
            need_make_video_dir = True
            is_over = False
            while not is_over:
                # 获取一页视频信息
                video_data = get_one_page_video_data(account_id, page_time)
                if video_data is None:
                    log.error(account_name + " 视频列表获取失败")
                    tool.process_exit()

                for video_info in video_data["info"]:
                    if not robot.check_sub_key(("newvideos", "id", "timestamp"), video_info):
                        log.error(account_name + " 视频信息 %s 解析失败" % video_info)
                        continue

                    page_time = int(video_info["timestamp"])
                    # 检查是否已下载到前一次的视频
                    if page_time <= int(self.account_info[2]):
                        is_over = True
                        break

                    # 将第一个视频的上传时间做为新的存档记录
                    if first_video_time == "0":
                        first_video_time = str(page_time)

                    # todo 处理如果有多个视频
                    if len(video_info["newvideos"]) != 1:
                        log.error(account_name + " 视频信息 %s 发现多个视频下载信息" % video_info)
                        continue
                    if not robot.check_sub_key(("vid",), video_info["newvideos"][0]):
                        log.error(account_name + " 视频信息 %s 解析vid失败" % video_info)
                        continue

                    # 获取视频下载地址
                    video_url = get_video_url(video_info["newvideos"][0]["vid"], video_info["id"])
                    log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_url))

                    # 第一个视频，创建目录
                    if need_make_video_dir:
                        if not tool.make_dir(video_path, 0):
                            log.error(account_name + " 创建图片下载目录 %s 失败" % video_path)
                            tool.process_exit()
                        need_make_video_dir = False

                    file_type = video_url.split(".")[-1].split("?")[0]
                    file_path = os.path.join(video_path, "%04d.%s" % (video_count, file_type))
                    if tool.save_net_file(video_url, file_path):
                        log.step(account_name + " 第%s个视频下载成功" % video_count)
                        video_count += 1
                    else:
                        log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url))

                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_VIDEO_COUNT < video_data:
                        is_over = True
                        break

                if not is_over:
                    if not video_data["hasNext"]:
                        is_over = True

            log.step(account_name + " 下载完毕，总共获得%s个视频" % (video_count - 1))

            # 排序
            if IS_SORT:
                if first_video_time != "0":
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[3]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_video_time != "0":
                self.account_info[3] = str(int(self.account_info[3]) + video_count - 1)
                self.account_info[4] = first_video_time

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #32

0

Afficher le fichier

Fichier : miaopai.py Projet : yxw19870806/yxw19870806

    def run(self):
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 4 and self.account_info[3]:
            account_name = self.account_info[3]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹，否则直接下载到目标目录
            if IS_SORT:
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            suid = get_suid(account_id)
            if suid is None:
                log.error(account_name + " suid获取失败")

            page_count = 1
            video_count = 1
            first_video_scid = ""
            unique_list = []
            is_over = False
            need_make_download_dir = True
            while suid != "" and (not is_over):
                # 获取指定一页的视频信息
                media_page = get_one_page_video_data(suid, page_count)
                if media_page is None:
                    log.error(account_name + " 视频列表获取失败")
                    tool.process_exit()

                # 获取视频scid列表
                scid_list = get_scid_list(media_page["msg"])
                if len(scid_list) == 0:
                    log.error(account_name + " 在视频列表：%s 中没有找到视频scid" % str(media_page["msg"]))
                    tool.process_exit()

                for scid in scid_list:
                    scid = str(scid)

                    # 检查是否已下载到前一次的图片
                    if first_video_scid == self.account_info[2]:
                        is_over = True
                        break

                    # 新增视频导致的重复判断
                    if scid in unique_list:
                        continue
                    else:
                        unique_list.append(scid)
                    # 将第一个视频的id做为新的存档记录
                    if first_video_scid == "":
                        first_video_scid = scid

                    # 获取视频下载地址
                    video_url = get_video_url_by_video_id(scid)
                    if video_url is None:
                        log.error(account_name + " 第%s个视频 %s 获取下载地址失败" % (video_count, scid))
                        continue

                    log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_url))

                    # 第一个视频，创建目录
                    if need_make_download_dir:
                        if not tool.make_dir(video_path, 0):
                            log.error(account_name + " 创建视频下载目录 %s 失败" % video_path)
                            tool.process_exit()
                        need_make_download_dir = False

                    file_path = os.path.join(video_path, "%04d.mp4" % video_count)
                    if tool.save_net_file(video_url, file_path):
                        log.step(account_name + " 第%s个视频下载成功" % video_count)
                        video_count += 1
                    else:
                        log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url))

                    # 达到配置文件中的下载数量，结束
                    if 0 < GET_VIDEO_COUNT < video_count:
                        is_over = True
                        break

                if not is_over:
                    if media_page["isall"]:
                        is_over = True
                    else:
                        page_count += 1

            log.step(account_name + " 下载完毕，总共获得%s个视频" % (video_count - 1))

            # 排序
            if IS_SORT and video_count > 1:
                destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                if robot.sort_file(video_path, destination_path, int(self.account_info[1]), 4):
                    log.step(account_name + " 视频从下载目录移动到保存目录成功")
                else:
                    log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                    tool.process_exit()

            # 新的存档记录
            if first_video_scid != "":
                self.account_info[1] = str(int(self.account_info[1]) + video_count - 1)
                self.account_info[2] = first_video_scid

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")

Exemple #33

0

Afficher le fichier

Fichier : fkoji.py Projet : yxw19870806/yxw19870806

    def main(self):
        # 解析存档文件
        # 寻找fkoji.save
        account_list = robot.read_save_data(self.save_data_path, 0, ["", "", ""])

        # 这个key的内容为总数据
        if ALL_SIGN in account_list:
            image_start_index = int(account_list[ALL_SIGN][1])
            save_data_image_time = int(account_list[ALL_SIGN][2])
            account_list.pop(ALL_SIGN)
        else:
            image_start_index = 0
            save_data_image_time = 0

        if self.is_sort:
            image_path = self.image_temp_path
        else:
            image_path = self.image_download_path

        if not tool.make_dir(image_path, 0):
            # 图片保存目录创建失败
            self.print_msg("图片下载目录%s创建失败！" % self.image_download_path)
            tool.process_exit()

        # 下载
        page_index = 1
        image_count = 1
        first_image_time = 0
        unique_list = []
        is_over = False

        while not is_over:
            index_url = "http://jigadori.fkoji.com/?p=%s" % page_index
            index_page_return_code, index_page_response = tool.http_request(index_url)[:2]
            if index_page_return_code != 1:
                log.error("无法访问首页地址 %s" % index_url)
                tool.process_exit()

            index_page = BeautifulSoup.BeautifulSoup(index_page_response)
            photo_list = index_page.body.findAll("div", "photo")
            # 已经下载到最后一页
            if not photo_list:
                break
            for photo_info in photo_list:
                if isinstance(photo_info, BeautifulSoup.NavigableString):
                    continue

                # 从图片页面中解析获取推特发布时间的时间戳
                tweet_created_time = get_tweet_created_time(photo_info)
                if tweet_created_time is None:
                    log.error("第%s张图片，解析tweet-created-at失败" % image_count)
                    continue

                # 下载完毕
                if tweet_created_time <= save_data_image_time:
                    is_over = True
                    break

                # 将第一张图片的上传时间做为新的存档记录
                if first_image_time == 0:
                    first_image_time = tweet_created_time

                # 从图片页面中解析获取推特发布账号
                account_id = get_tweet_account_id(photo_info)
                if account_id is None:
                    log.error("第%s张图片，解析tweet账号失败" % image_count)
                    continue

                # 找图片
                img_tags = photo_info.findAll("img")
                for tag in img_tags:
                    tag_attr = dict(tag.attrs)
                    if robot.check_sub_key(("src", "alt"), tag_attr):
                        image_url = str(tag_attr["src"]).replace(" ", "")
                        # 新增图片导致的重复判断
                        if image_url in unique_list:
                            continue
                        else:
                            unique_list.append(image_url)

                        log.step("开始下载第%s张图片 %s" % (image_count, image_url))

                        file_type = image_url.split(".")[-1]
                        if file_type.find("/") != -1:
                            file_type = "jpg"
                        file_path = os.path.join(image_path, "%05d_%s.%s" % (image_count, account_id, file_type))
                        if tool.save_net_file(image_url, file_path):
                            log.step("第%s张图片下载成功" % image_count)
                            image_count += 1
                        else:
                            log.error("第%s张图片 %s，account_id：%s，下载失败" % (image_count, image_url, account_id))
                if is_over:
                    break

            if not is_over:
                page_index += 1

        log.step("下载完毕")

        # 排序复制到保存目录
        if self.is_sort:
            is_check_ok = False
            while not is_check_ok:
                # 等待手动检测所有图片结束
                input_str = raw_input(tool.get_time() + " 已经下载完毕，是否下一步操作？ (Y)es or (N)o: ")
                input_str = input_str.lower()
                if input_str in ["y", "yes"]:
                    is_check_ok = True
                elif input_str in ["n", "no"]:
                    tool.process_exit()

            all_path = os.path.join(self.image_download_path, "all")
            if not tool.make_dir(all_path, 0):
                log.error("创建目录 %s 失败" % all_path)
                tool.process_exit()

            file_list = tool.get_dir_files_name(self.image_temp_path, "desc")
            for file_name in file_list:
                image_path = os.path.join(self.image_temp_path, file_name)
                file_name_list = file_name.split(".")
                file_type = file_name_list[-1]
                account_id = "_".join(".".join(file_name_list[:-1]).split("_")[1:])

                # 所有
                image_start_index += 1
                destination_file_name = "%05d_%s.%s" % (image_start_index, account_id, file_type)
                destination_path = os.path.join(all_path, destination_file_name)
                tool.copy_files(image_path, destination_path)

                # 单个
                each_account_path = os.path.join(self.image_download_path, "single", account_id)
                if not os.path.exists(each_account_path):
                    if not tool.make_dir(each_account_path, 0):
                        log.error("创建目录 %s 失败" % each_account_path)
                        tool.process_exit()
                if account_id in account_list:
                    account_list[account_id][1] = int(account_list[account_id][1]) + 1
                else:
                    account_list[account_id] = [account_id, 1]
                destination_file_name = "%05d.%s" % (account_list[account_id][1], file_type)
                destination_path = os.path.join(each_account_path, destination_file_name)
                tool.copy_files(image_path, destination_path)

            log.step("图片从下载目录移动到保存目录成功")

            # 删除临时文件夹
            tool.remove_dir(self.image_temp_path)

        # 保存新的存档文件
        temp_list = [account_list[key] for key in sorted(account_list.keys())]
        # 把总数据插入列表头
        temp_list.insert(0, [ALL_SIGN, str(image_start_index), str(first_image_time)])
        tool.write_file(tool.list_to_string(temp_list), self.save_data_path, 2)

        log.step("全部下载完毕，耗时%s秒，共计图片%s张" % (self.get_run_time(), image_count - 1))