def main(self): global ACCOUNTS # todo 存档文件格式 # 解析存档文件 # account_id account_list = robot.read_save_data(self.save_data_path, 0, ["", ]) ACCOUNTS = account_list.keys() # 循环下载每个id main_thread_count = threading.activeCount() for account_id in sorted(account_list.keys()): # 检查正在运行的线程数 while threading.activeCount() >= self.thread_count + main_thread_count: if robot.is_process_end() == 0: time.sleep(10) else: break # 提前结束 if robot.is_process_end() > 0: break # 开始下载 thread = Download(account_list[account_id], self.thread_lock) thread.start() time.sleep(1) # 检查除主线程外的其他所有线程是不是全部结束了 while threading.activeCount() > main_thread_count: time.sleep(10) # 未完成的数据保存 if len(ACCOUNTS) > 0: new_save_data_file = open(NEW_SAVE_DATA_PATH, "a") for account_id in ACCOUNTS: new_save_data_file.write("\t".join(account_list[account_id]) + "\n") new_save_data_file.close() # todo 是否需要下载图片或视频 # 删除临时文件夹 tool.remove_dir(IMAGE_TEMP_PATH) tool.remove_dir(VIDEO_TEMP_PATH) # 重新排序保存存档文件 robot.rewrite_save_file(NEW_SAVE_DATA_PATH, self.save_data_path) # todo 是否需要下载图片或视频 log.step("全部下载完毕,耗时%s秒,共计图片%s张,视频%s个" % (self.get_run_time(), TOTAL_IMAGE_COUNT, TOTAL_VIDEO_COUNT))
def sort_file(source_path, destination_path, start_count, file_name_length): file_list = tool.get_dir_files_name(source_path, "desc") # 判断排序目标文件夹是否存在 if len(file_list) >= 1: if not tool.make_dir(destination_path, 0): return False # 倒叙排列 for file_name in file_list: start_count += 1 file_type = os.path.splitext(file_name)[1] # 包括 .扩展名 new_file_name = str(("%0" + str(file_name_length) + "d") % start_count) + file_type tool.copy_files(os.path.join(source_path, file_name), os.path.join(destination_path, new_file_name)) # 删除临时文件夹 tool.remove_dir(source_path) return True
def create_exe(py_file_path, need_zip=False): build_path = os.path.realpath(".\\build") build_dist_path = os.path.realpath(".\\dist") py_file_name = ".".join(os.path.basename(py_file_path).split(".")[:-1]) # 旧目录删除 tool.remove_dir(build_path) tool.remove_dir(build_dist_path) # 打包 setup(console=[py_file_path]) # 删除临时文件 + 复制其他必要文件 tool.make_dir(os.path.join(build_dist_path, "data\\"), 0) tool.copy_files(os.path.realpath("..\\common\\config_exe.ini"), os.path.join(build_dist_path, "data\\config.ini")) if need_zip: tool.zip_dir(build_dist_path, os.path.realpath("%s.zip" % py_file_name)) tool.remove_dir(build_dist_path) else: shutil.move(build_dist_path, os.path.realpath(".\\%s" % py_file_name)) tool.remove_dir(build_path)
def main(self): # 解析存档文件,获取上一次的album id album_id = 1 if os.path.exists(self.save_data_path): save_file = open(self.save_data_path, "r") save_info = save_file.read() save_file.close() album_id = int(save_info.strip()) total_image_count = 0 total_video_count = 0 error_count = 0 is_over = False while not is_over: album_url = "http://meituzz.com/album/browse?albumID=%s" % album_id try: album_page_return_code, album_page = tool.http_request(album_url)[:2] except SystemExit: log.step("提前退出") break if album_page_return_code == -500: log.error("第%s页相册内部错误" % album_id) album_id += 1 continue elif album_page_return_code != 1: log.error("第%s页图片获取失败" % album_id) break if album_page.find("<title>相册已被删除</title>") >= 0: error_count += 1 if error_count >= ERROR_PAGE_COUNT_CHECK: log.error("连续%s页相册没有图片,退出程序" % ERROR_PAGE_COUNT_CHECK) album_id -= error_count - 1 break else: log.error("第%s页相册已被删除" % album_id) album_id += 1 continue # 错误数量重置 error_count = 0 # 图片下载 if self.is_download_image and album_page.find('<input type="hidden" id="imageList"') >= 0: total_photo_count = tool.find_sub_string(album_page, '<input type="hidden" id="totalPageNum" value=', ' />') if not total_photo_count: log.error("第%s页图片数量解析失败" % album_id) break total_photo_count = int(total_photo_count) # 获取页面全部图片地址列表 image_url_list = get_image_url_list(album_page) if image_url_list is None: log.error("第%s页图片地址列表解析失败" % album_id) break if len(image_url_list) == 0: log.error("第%s页没有获取到图片" % album_id) break is_fee = False if len(image_url_list) != total_photo_count: album_reward_find = re.findall('<input type="hidden" id="rewardAmount" value="(\d*)">', album_page) if len(album_reward_find) == 1: album_reward = int(album_reward_find[0]) if album_reward > 0 and total_photo_count - len(image_url_list) <= 1: is_fee = True if not is_fee: log.error("第%s页解析获取的图片数量不符" % album_id) # break image_path = os.path.join(self.image_download_path, "%04d" % album_id) if not tool.make_dir(image_path, 0): log.error("创建图片下载目录 %s 失败" % image_path) break image_count = 1 for image_url in image_url_list: # 去除模糊效果 image_url = str(image_url).split("@")[0] log.step("开始下载第%s页第%s张图片 %s" % (album_id, image_count, image_url)) image_file_path = os.path.join(image_path, "%04d.jpg" % image_count) try: if tool.save_net_file(image_url, image_file_path, True): log.step("第%s页第%s张图片下载成功" % (album_id, image_count)) image_count += 1 else: log.error("第%s页第%s张图片 %s 下载失败" % (album_id, image_count, image_url)) except SystemExit: log.step("提前退出") tool.remove_dir(image_path) is_over = True break total_image_count += image_count - 1 # 视频下载 if self.is_download_image and album_page.find('<input type="hidden" id="VideoUrl"') >= 0: # 获取视频下载地址 video_url = get_video_url(album_page) log.step("开始下载第%s页视频 %s" % (album_id, video_url)) video_title = robot.filter_text(tool.find_sub_string(album_page, "<title>", "</title>")) file_type = video_url.split(".")[-1] video_file_path = os.path.join(self.video_download_path, "%s %s.%s" % (album_id, video_title, file_type)) try: if tool.save_net_file(video_url, video_file_path, True): log.step("第%s页视频下载成功" % album_id) total_video_count += 1 else: log.error("第%s页视频 %s 下载失败" % (album_id, video_url)) except SystemExit: log.step("提前退出") is_over = True if not is_over: album_id += 1 # 重新保存存档文件 save_data_dir = os.path.dirname(self.save_data_path) if not os.path.exists(save_data_dir): tool.make_dir(save_data_dir, 0) save_file = open(self.save_data_path, "w") save_file.write(str(album_id)) save_file.close() log.step("全部下载完毕,耗时%s秒,共计图片%s张,视频%s个" % (self.get_run_time(), total_image_count, total_video_count))
def main(self): # 解析存档文件 # 寻找fkoji.save account_list = robot.read_save_data(self.save_data_path, 0, ["", "", ""]) # 这个key的内容为总数据 if ALL_SIGN in account_list: image_start_index = int(account_list[ALL_SIGN][1]) save_data_image_time = int(account_list[ALL_SIGN][2]) account_list.pop(ALL_SIGN) else: image_start_index = 0 save_data_image_time = 0 if self.is_sort: image_path = self.image_temp_path else: image_path = self.image_download_path if not tool.make_dir(image_path, 0): # 图片保存目录创建失败 self.print_msg("图片下载目录%s创建失败!" % self.image_download_path) tool.process_exit() # 下载 page_index = 1 image_count = 1 first_image_time = 0 unique_list = [] is_over = False while not is_over: index_url = "http://jigadori.fkoji.com/?p=%s" % page_index index_page_return_code, index_page_response = tool.http_request(index_url)[:2] if index_page_return_code != 1: log.error("无法访问首页地址 %s" % index_url) tool.process_exit() index_page = BeautifulSoup.BeautifulSoup(index_page_response) photo_list = index_page.body.findAll("div", "photo") # 已经下载到最后一页 if not photo_list: break for photo_info in photo_list: if isinstance(photo_info, BeautifulSoup.NavigableString): continue # 从图片页面中解析获取推特发布时间的时间戳 tweet_created_time = get_tweet_created_time(photo_info) if tweet_created_time is None: log.error("第%s张图片,解析tweet-created-at失败" % image_count) continue # 下载完毕 if tweet_created_time <= save_data_image_time: is_over = True break # 将第一张图片的上传时间做为新的存档记录 if first_image_time == 0: first_image_time = tweet_created_time # 从图片页面中解析获取推特发布账号 account_id = get_tweet_account_id(photo_info) if account_id is None: log.error("第%s张图片,解析tweet账号失败" % image_count) continue # 找图片 img_tags = photo_info.findAll("img") for tag in img_tags: tag_attr = dict(tag.attrs) if robot.check_sub_key(("src", "alt"), tag_attr): image_url = str(tag_attr["src"]).replace(" ", "") # 新增图片导致的重复判断 if image_url in unique_list: continue else: unique_list.append(image_url) log.step("开始下载第%s张图片 %s" % (image_count, image_url)) file_type = image_url.split(".")[-1] if file_type.find("/") != -1: file_type = "jpg" file_path = os.path.join(image_path, "%05d_%s.%s" % (image_count, account_id, file_type)) if tool.save_net_file(image_url, file_path): log.step("第%s张图片下载成功" % image_count) image_count += 1 else: log.error("第%s张图片 %s,account_id:%s,下载失败" % (image_count, image_url, account_id)) if is_over: break if not is_over: page_index += 1 log.step("下载完毕") # 排序复制到保存目录 if self.is_sort: is_check_ok = False while not is_check_ok: # 等待手动检测所有图片结束 input_str = raw_input(tool.get_time() + " 已经下载完毕,是否下一步操作? (Y)es or (N)o: ") input_str = input_str.lower() if input_str in ["y", "yes"]: is_check_ok = True elif input_str in ["n", "no"]: tool.process_exit() all_path = os.path.join(self.image_download_path, "all") if not tool.make_dir(all_path, 0): log.error("创建目录 %s 失败" % all_path) tool.process_exit() file_list = tool.get_dir_files_name(self.image_temp_path, "desc") for file_name in file_list: image_path = os.path.join(self.image_temp_path, file_name) file_name_list = file_name.split(".") file_type = file_name_list[-1] account_id = "_".join(".".join(file_name_list[:-1]).split("_")[1:]) # 所有 image_start_index += 1 destination_file_name = "%05d_%s.%s" % (image_start_index, account_id, file_type) destination_path = os.path.join(all_path, destination_file_name) tool.copy_files(image_path, destination_path) # 单个 each_account_path = os.path.join(self.image_download_path, "single", account_id) if not os.path.exists(each_account_path): if not tool.make_dir(each_account_path, 0): log.error("创建目录 %s 失败" % each_account_path) tool.process_exit() if account_id in account_list: account_list[account_id][1] = int(account_list[account_id][1]) + 1 else: account_list[account_id] = [account_id, 1] destination_file_name = "%05d.%s" % (account_list[account_id][1], file_type) destination_path = os.path.join(each_account_path, destination_file_name) tool.copy_files(image_path, destination_path) log.step("图片从下载目录移动到保存目录成功") # 删除临时文件夹 tool.remove_dir(self.image_temp_path) # 保存新的存档文件 temp_list = [account_list[key] for key in sorted(account_list.keys())] # 把总数据插入列表头 temp_list.insert(0, [ALL_SIGN, str(image_start_index), str(first_image_time)]) tool.write_file(tool.list_to_string(temp_list), self.save_data_path, 2) log.step("全部下载完毕,耗时%s秒,共计图片%s张" % (self.get_run_time(), image_count - 1))
def main(self): # 解析存档文件,获取上一次的album id page_count = 1 if os.path.exists(self.save_data_path): save_file = open(self.save_data_path, "r") save_info = save_file.read() save_file.close() page_count = int(save_info.strip()) total_image_count = 0 error_count = 0 is_over = False while not is_over: album_status, album_data = get_one_page_album_data(page_count) if album_status == -1: log.error("第%s页相册获取失败" % page_count) break elif album_status == -2: log.error("第%s页相册解析失败" % page_count) break elif album_status == 2: error_count += 1 if error_count >= ERROR_PAGE_COUNT_CHECK: log.error("连续%s页相册没有图片,退出程序" % ERROR_PAGE_COUNT_CHECK) page_count -= error_count - 1 break else: log.error("第%s页相册已被删除" % page_count) page_count += 1 continue elif album_status == 3: log.error("第%s页歌曲相册" % page_count) page_count += 1 continue elif album_status == 4: log.error("第%s页相册未知相册类型%s" % (page_count, album_data)) break # 错误数量重置 error_count = 0 # 下载目录标题 title = "" if album_data["title"]: # 过滤标题中不支持的字符 title = robot.filter_text(str(album_data["title"].encode("utf-8"))) if title: image_path = os.path.join(self.image_download_path, "%04d %s" % (page_count, title)) else: image_path = os.path.join(self.image_download_path, "%04d" % page_count) if not tool.make_dir(image_path, 0): # 目录出错,把title去掉后再试一次,如果还不行退出 log.error("第%s页创建相册目录 %s 失败,尝试不使用title" % (page_count, image_path)) post_path = os.path.join(image_path, page_count) if not tool.make_dir(post_path, 0): log.error("第%s页创建相册目录 %s 失败" % (page_count, image_path)) tool.process_exit() image_count = 1 for image_data in album_data["attr"]["img"]: image_url = "http://www.zunguang.com/%s" % str(image_data["url"]) log.step("开始下载第%s页第%s张图片 %s" % (page_count, image_count, image_url)) file_type = image_url.split(".")[-1] file_path = os.path.join(image_path, "%03d.%s" % (image_count, file_type)) try: if tool.save_net_file(image_url, file_path, True): log.step("第%s页第%s张图片下载成功" % (page_count, image_count)) image_count += 1 else: log.error("第%s页第%s张图片 %s 下载失败" % (page_count, image_count, image_url)) except SystemExit: log.step("提前退出") tool.remove_dir(image_path) is_over = True break if not is_over: total_image_count += image_count - 1 page_count += 1 # 重新保存存档文件 save_data_dir = os.path.dirname(self.save_data_path) if not os.path.exists(save_data_dir): tool.make_dir(save_data_dir, 0) save_file = open(self.save_data_path, "w") save_file.write(str(page_count)) save_file.close() log.step("全部下载完毕,耗时%s秒,共计图片%s张" % (self.get_run_time(), total_image_count))