Ejemplo n.º 1
0
    def main(self):
        global ACCOUNTS

        # todo 存档文件格式
        # 解析存档文件
        # account_id
        account_list = robot.read_save_data(self.save_data_path, 0, ["", ])
        ACCOUNTS = account_list.keys()

        # 循环下载每个id
        main_thread_count = threading.activeCount()
        for account_id in sorted(account_list.keys()):
            # 检查正在运行的线程数
            while threading.activeCount() >= self.thread_count + main_thread_count:
                if robot.is_process_end() == 0:
                    time.sleep(10)
                else:
                    break

            # 提前结束
            if robot.is_process_end() > 0:
                break

            # 开始下载
            thread = Download(account_list[account_id], self.thread_lock)
            thread.start()

            time.sleep(1)

        # 检查除主线程外的其他所有线程是不是全部结束了
        while threading.activeCount() > main_thread_count:
            time.sleep(10)

        # 未完成的数据保存
        if len(ACCOUNTS) > 0:
            new_save_data_file = open(NEW_SAVE_DATA_PATH, "a")
            for account_id in ACCOUNTS:
                new_save_data_file.write("\t".join(account_list[account_id]) + "\n")
            new_save_data_file.close()

        # todo 是否需要下载图片或视频
        # 删除临时文件夹
        tool.remove_dir(IMAGE_TEMP_PATH)
        tool.remove_dir(VIDEO_TEMP_PATH)

        # 重新排序保存存档文件
        robot.rewrite_save_file(NEW_SAVE_DATA_PATH, self.save_data_path)

        # todo 是否需要下载图片或视频
        log.step("全部下载完毕,耗时%s秒,共计图片%s张,视频%s个" % (self.get_run_time(), TOTAL_IMAGE_COUNT, TOTAL_VIDEO_COUNT))
Ejemplo n.º 2
0
def sort_file(source_path, destination_path, start_count, file_name_length):
    file_list = tool.get_dir_files_name(source_path, "desc")
    # 判断排序目标文件夹是否存在
    if len(file_list) >= 1:
        if not tool.make_dir(destination_path, 0):
            return False
        # 倒叙排列
        for file_name in file_list:
            start_count += 1
            file_type = os.path.splitext(file_name)[1]  # 包括 .扩展名
            new_file_name = str(("%0" + str(file_name_length) + "d") % start_count) + file_type
            tool.copy_files(os.path.join(source_path, file_name), os.path.join(destination_path, new_file_name))
        # 删除临时文件夹
        tool.remove_dir(source_path)
    return True
Ejemplo n.º 3
0
def create_exe(py_file_path, need_zip=False):
    build_path = os.path.realpath(".\\build")
    build_dist_path = os.path.realpath(".\\dist")
    py_file_name = ".".join(os.path.basename(py_file_path).split(".")[:-1])

    # 旧目录删除
    tool.remove_dir(build_path)
    tool.remove_dir(build_dist_path)

    # 打包
    setup(console=[py_file_path])

    # 删除临时文件 + 复制其他必要文件
    tool.make_dir(os.path.join(build_dist_path, "data\\"), 0)
    tool.copy_files(os.path.realpath("..\\common\\config_exe.ini"), os.path.join(build_dist_path, "data\\config.ini"))
    if need_zip:
        tool.zip_dir(build_dist_path, os.path.realpath("%s.zip" % py_file_name))
        tool.remove_dir(build_dist_path)
    else:
        shutil.move(build_dist_path, os.path.realpath(".\\%s" % py_file_name))
    tool.remove_dir(build_path)
Ejemplo n.º 4
0
    def main(self):
        # 解析存档文件,获取上一次的album id
        album_id = 1
        if os.path.exists(self.save_data_path):
            save_file = open(self.save_data_path, "r")
            save_info = save_file.read()
            save_file.close()
            album_id = int(save_info.strip())

        total_image_count = 0
        total_video_count = 0
        error_count = 0
        is_over = False
        while not is_over:
            album_url = "http://meituzz.com/album/browse?albumID=%s" % album_id
            try:
                album_page_return_code, album_page = tool.http_request(album_url)[:2]
            except SystemExit:
                log.step("提前退出")
                break

            if album_page_return_code == -500:
                log.error("第%s页相册内部错误" % album_id)
                album_id += 1
                continue
            elif album_page_return_code != 1:
                log.error("第%s页图片获取失败" % album_id)
                break

            if album_page.find("<title>相册已被删除</title>") >= 0:
                error_count += 1
                if error_count >= ERROR_PAGE_COUNT_CHECK:
                    log.error("连续%s页相册没有图片,退出程序" % ERROR_PAGE_COUNT_CHECK)
                    album_id -= error_count - 1
                    break
                else:
                    log.error("第%s页相册已被删除" % album_id)
                    album_id += 1
                    continue
            # 错误数量重置
            error_count = 0

            # 图片下载
            if self.is_download_image and album_page.find('<input type="hidden" id="imageList"') >= 0:
                total_photo_count = tool.find_sub_string(album_page, '<input type="hidden" id="totalPageNum" value=', ' />')
                if not total_photo_count:
                    log.error("第%s页图片数量解析失败" % album_id)
                    break
                total_photo_count = int(total_photo_count)

                # 获取页面全部图片地址列表
                image_url_list = get_image_url_list(album_page)
                if image_url_list is None:
                    log.error("第%s页图片地址列表解析失败" % album_id)
                    break

                if len(image_url_list) == 0:
                    log.error("第%s页没有获取到图片" % album_id)
                    break

                is_fee = False
                if len(image_url_list) != total_photo_count:
                    album_reward_find = re.findall('<input type="hidden" id="rewardAmount" value="(\d*)">', album_page)
                    if len(album_reward_find) == 1:
                        album_reward = int(album_reward_find[0])
                        if album_reward > 0 and total_photo_count - len(image_url_list) <= 1:
                            is_fee = True
                    if not is_fee:
                        log.error("第%s页解析获取的图片数量不符" % album_id)
                        # break

                image_path = os.path.join(self.image_download_path, "%04d" % album_id)
                if not tool.make_dir(image_path, 0):
                    log.error("创建图片下载目录 %s 失败" % image_path)
                    break

                image_count = 1
                for image_url in image_url_list:
                    # 去除模糊效果
                    image_url = str(image_url).split("@")[0]
                    log.step("开始下载第%s页第%s张图片 %s" % (album_id, image_count, image_url))

                    image_file_path = os.path.join(image_path, "%04d.jpg" % image_count)
                    try:
                        if tool.save_net_file(image_url, image_file_path, True):
                            log.step("第%s页第%s张图片下载成功" % (album_id, image_count))
                            image_count += 1
                        else:
                            log.error("第%s页第%s张图片 %s 下载失败" % (album_id, image_count, image_url))
                    except SystemExit:
                        log.step("提前退出")
                        tool.remove_dir(image_path)
                        is_over = True
                        break

                total_image_count += image_count - 1

            # 视频下载
            if self.is_download_image and album_page.find('<input type="hidden" id="VideoUrl"') >= 0:
                # 获取视频下载地址
                video_url = get_video_url(album_page)
                log.step("开始下载第%s页视频 %s" % (album_id, video_url))

                video_title = robot.filter_text(tool.find_sub_string(album_page, "<title>", "</title>"))
                file_type = video_url.split(".")[-1]
                video_file_path = os.path.join(self.video_download_path, "%s %s.%s" % (album_id, video_title, file_type))
                try:
                    if tool.save_net_file(video_url, video_file_path, True):
                        log.step("第%s页视频下载成功" % album_id)
                        total_video_count += 1
                    else:
                        log.error("第%s页视频 %s 下载失败" % (album_id, video_url))
                except SystemExit:
                    log.step("提前退出")
                    is_over = True

            if not is_over:
                album_id += 1

        # 重新保存存档文件
        save_data_dir = os.path.dirname(self.save_data_path)
        if not os.path.exists(save_data_dir):
            tool.make_dir(save_data_dir, 0)
        save_file = open(self.save_data_path, "w")
        save_file.write(str(album_id))
        save_file.close()

        log.step("全部下载完毕,耗时%s秒,共计图片%s张,视频%s个" % (self.get_run_time(), total_image_count, total_video_count))
Ejemplo n.º 5
0
    def main(self):
        # 解析存档文件
        # 寻找fkoji.save
        account_list = robot.read_save_data(self.save_data_path, 0, ["", "", ""])

        # 这个key的内容为总数据
        if ALL_SIGN in account_list:
            image_start_index = int(account_list[ALL_SIGN][1])
            save_data_image_time = int(account_list[ALL_SIGN][2])
            account_list.pop(ALL_SIGN)
        else:
            image_start_index = 0
            save_data_image_time = 0

        if self.is_sort:
            image_path = self.image_temp_path
        else:
            image_path = self.image_download_path

        if not tool.make_dir(image_path, 0):
            # 图片保存目录创建失败
            self.print_msg("图片下载目录%s创建失败!" % self.image_download_path)
            tool.process_exit()

        # 下载
        page_index = 1
        image_count = 1
        first_image_time = 0
        unique_list = []
        is_over = False

        while not is_over:
            index_url = "http://jigadori.fkoji.com/?p=%s" % page_index
            index_page_return_code, index_page_response = tool.http_request(index_url)[:2]
            if index_page_return_code != 1:
                log.error("无法访问首页地址 %s" % index_url)
                tool.process_exit()

            index_page = BeautifulSoup.BeautifulSoup(index_page_response)
            photo_list = index_page.body.findAll("div", "photo")
            # 已经下载到最后一页
            if not photo_list:
                break
            for photo_info in photo_list:
                if isinstance(photo_info, BeautifulSoup.NavigableString):
                    continue

                # 从图片页面中解析获取推特发布时间的时间戳
                tweet_created_time = get_tweet_created_time(photo_info)
                if tweet_created_time is None:
                    log.error("第%s张图片,解析tweet-created-at失败" % image_count)
                    continue

                # 下载完毕
                if tweet_created_time <= save_data_image_time:
                    is_over = True
                    break

                # 将第一张图片的上传时间做为新的存档记录
                if first_image_time == 0:
                    first_image_time = tweet_created_time

                # 从图片页面中解析获取推特发布账号
                account_id = get_tweet_account_id(photo_info)
                if account_id is None:
                    log.error("第%s张图片,解析tweet账号失败" % image_count)
                    continue

                # 找图片
                img_tags = photo_info.findAll("img")
                for tag in img_tags:
                    tag_attr = dict(tag.attrs)
                    if robot.check_sub_key(("src", "alt"), tag_attr):
                        image_url = str(tag_attr["src"]).replace(" ", "")
                        # 新增图片导致的重复判断
                        if image_url in unique_list:
                            continue
                        else:
                            unique_list.append(image_url)

                        log.step("开始下载第%s张图片 %s" % (image_count, image_url))

                        file_type = image_url.split(".")[-1]
                        if file_type.find("/") != -1:
                            file_type = "jpg"
                        file_path = os.path.join(image_path, "%05d_%s.%s" % (image_count, account_id, file_type))
                        if tool.save_net_file(image_url, file_path):
                            log.step("第%s张图片下载成功" % image_count)
                            image_count += 1
                        else:
                            log.error("第%s张图片 %s,account_id:%s,下载失败" % (image_count, image_url, account_id))
                if is_over:
                    break

            if not is_over:
                page_index += 1

        log.step("下载完毕")

        # 排序复制到保存目录
        if self.is_sort:
            is_check_ok = False
            while not is_check_ok:
                # 等待手动检测所有图片结束
                input_str = raw_input(tool.get_time() + " 已经下载完毕,是否下一步操作? (Y)es or (N)o: ")
                input_str = input_str.lower()
                if input_str in ["y", "yes"]:
                    is_check_ok = True
                elif input_str in ["n", "no"]:
                    tool.process_exit()

            all_path = os.path.join(self.image_download_path, "all")
            if not tool.make_dir(all_path, 0):
                log.error("创建目录 %s 失败" % all_path)
                tool.process_exit()

            file_list = tool.get_dir_files_name(self.image_temp_path, "desc")
            for file_name in file_list:
                image_path = os.path.join(self.image_temp_path, file_name)
                file_name_list = file_name.split(".")
                file_type = file_name_list[-1]
                account_id = "_".join(".".join(file_name_list[:-1]).split("_")[1:])

                # 所有
                image_start_index += 1
                destination_file_name = "%05d_%s.%s" % (image_start_index, account_id, file_type)
                destination_path = os.path.join(all_path, destination_file_name)
                tool.copy_files(image_path, destination_path)

                # 单个
                each_account_path = os.path.join(self.image_download_path, "single", account_id)
                if not os.path.exists(each_account_path):
                    if not tool.make_dir(each_account_path, 0):
                        log.error("创建目录 %s 失败" % each_account_path)
                        tool.process_exit()
                if account_id in account_list:
                    account_list[account_id][1] = int(account_list[account_id][1]) + 1
                else:
                    account_list[account_id] = [account_id, 1]
                destination_file_name = "%05d.%s" % (account_list[account_id][1], file_type)
                destination_path = os.path.join(each_account_path, destination_file_name)
                tool.copy_files(image_path, destination_path)

            log.step("图片从下载目录移动到保存目录成功")

            # 删除临时文件夹
            tool.remove_dir(self.image_temp_path)

        # 保存新的存档文件
        temp_list = [account_list[key] for key in sorted(account_list.keys())]
        # 把总数据插入列表头
        temp_list.insert(0, [ALL_SIGN, str(image_start_index), str(first_image_time)])
        tool.write_file(tool.list_to_string(temp_list), self.save_data_path, 2)

        log.step("全部下载完毕,耗时%s秒,共计图片%s张" % (self.get_run_time(), image_count - 1))
Ejemplo n.º 6
0
    def main(self):
        # 解析存档文件,获取上一次的album id
        page_count = 1
        if os.path.exists(self.save_data_path):
            save_file = open(self.save_data_path, "r")
            save_info = save_file.read()
            save_file.close()
            page_count = int(save_info.strip())

        total_image_count = 0
        error_count = 0
        is_over = False
        while not is_over:
            album_status, album_data = get_one_page_album_data(page_count)

            if album_status == -1:
                log.error("第%s页相册获取失败" % page_count)
                break
            elif album_status == -2:
                log.error("第%s页相册解析失败" % page_count)
                break
            elif album_status == 2:
                error_count += 1
                if error_count >= ERROR_PAGE_COUNT_CHECK:
                    log.error("连续%s页相册没有图片,退出程序" % ERROR_PAGE_COUNT_CHECK)
                    page_count -= error_count - 1
                    break
                else:
                    log.error("第%s页相册已被删除" % page_count)
                    page_count += 1
                    continue
            elif album_status == 3:
                log.error("第%s页歌曲相册" % page_count)
                page_count += 1
                continue
            elif album_status == 4:
                log.error("第%s页相册未知相册类型%s" % (page_count, album_data))
                break
            # 错误数量重置
            error_count = 0

            # 下载目录标题
            title = ""
            if album_data["title"]:
                # 过滤标题中不支持的字符
                title = robot.filter_text(str(album_data["title"].encode("utf-8")))
            if title:
                image_path = os.path.join(self.image_download_path, "%04d %s" % (page_count, title))
            else:
                image_path = os.path.join(self.image_download_path, "%04d" % page_count)
            if not tool.make_dir(image_path, 0):
                # 目录出错,把title去掉后再试一次,如果还不行退出
                log.error("第%s页创建相册目录 %s 失败,尝试不使用title" % (page_count, image_path))
                post_path = os.path.join(image_path, page_count)
                if not tool.make_dir(post_path, 0):
                    log.error("第%s页创建相册目录 %s 失败" % (page_count, image_path))
                    tool.process_exit()

            image_count = 1
            for image_data in album_data["attr"]["img"]:
                image_url = "http://www.zunguang.com/%s" % str(image_data["url"])
                log.step("开始下载第%s页第%s张图片 %s" % (page_count, image_count, image_url))

                file_type = image_url.split(".")[-1]
                file_path = os.path.join(image_path, "%03d.%s" % (image_count, file_type))
                try:
                    if tool.save_net_file(image_url, file_path, True):
                        log.step("第%s页第%s张图片下载成功" % (page_count, image_count))
                        image_count += 1
                    else:
                        log.error("第%s页第%s张图片 %s 下载失败" % (page_count, image_count, image_url))
                except SystemExit:
                    log.step("提前退出")
                    tool.remove_dir(image_path)
                    is_over = True
                    break

            if not is_over:
                total_image_count += image_count - 1
                page_count += 1

        # 重新保存存档文件
        save_data_dir = os.path.dirname(self.save_data_path)
        if not os.path.exists(save_data_dir):
            tool.make_dir(save_data_dir, 0)
        save_file = open(self.save_data_path, "w")
        save_file.write(str(page_count))
        save_file.close()

        log.step("全部下载完毕,耗时%s秒,共计图片%s张" % (self.get_run_time(), total_image_count))