def __init__(self): global GET_IMAGE_COUNT global GET_VIDEO_COUNT global IMAGE_TEMP_PATH global IMAGE_DOWNLOAD_PATH global VIDEO_TEMP_PATH global VIDEO_DOWNLOAD_PATH global NEW_SAVE_DATA_PATH global IS_SORT global IS_DOWNLOAD_IMAGE global IS_DOWNLOAD_VIDEO # todo 配置 sys_config = { robot.SYS_DOWNLOAD_IMAGE: True, robot.SYS_DOWNLOAD_VIDEO: True, robot.SYS_SET_PROXY: True, robot.SYS_NOT_CHECK_SAVE_DATA: True, robot.SYS_SET_COOKIE: ('example.com',) } robot.Robot.__init__(self, sys_config) # 设置全局变量,供子线程调用 # todo 是否需要下载图片或视频 GET_IMAGE_COUNT = self.get_image_count GET_VIDEO_COUNT = self.get_video_count IMAGE_TEMP_PATH = self.image_temp_path IMAGE_DOWNLOAD_PATH = self.image_download_path VIDEO_TEMP_PATH = self.video_temp_path VIDEO_DOWNLOAD_PATH = self.video_download_path IS_SORT = self.is_sort IS_DOWNLOAD_IMAGE = self.is_download_image IS_DOWNLOAD_VIDEO = self.is_download_video NEW_SAVE_DATA_PATH = robot.get_new_save_file_path(self.save_data_path)
def __init__(self): global IMAGE_TEMP_PATH global IMAGE_DOWNLOAD_PATH global VIDEO_TEMP_PATH global VIDEO_DOWNLOAD_PATH global NEW_SAVE_DATA_PATH global IS_SORT global IS_DOWNLOAD_IMAGE global IS_DOWNLOAD_VIDEO sys_config = { robot.SYS_DOWNLOAD_IMAGE: True, robot.SYS_DOWNLOAD_VIDEO: True, } robot.Robot.__init__(self, sys_config) # 设置全局变量,供子线程调用 IMAGE_TEMP_PATH = self.image_temp_path IMAGE_DOWNLOAD_PATH = self.image_download_path VIDEO_TEMP_PATH = self.video_temp_path VIDEO_DOWNLOAD_PATH = self.video_download_path IS_SORT = self.is_sort IS_DOWNLOAD_IMAGE = self.is_download_image IS_DOWNLOAD_VIDEO = self.is_download_video NEW_SAVE_DATA_PATH = robot.get_new_save_file_path(self.save_data_path)
def __init__(self, extra_config=None): global GET_IMAGE_COUNT global GET_VIDEO_COUNT global IMAGE_TEMP_PATH global IMAGE_DOWNLOAD_PATH global VIDEO_TEMP_PATH global VIDEO_DOWNLOAD_PATH global NEW_SAVE_DATA_PATH global IS_SORT global IS_DOWNLOAD_IMAGE global IS_DOWNLOAD_VIDEO sys_config = { robot.SYS_DOWNLOAD_IMAGE: True, robot.SYS_DOWNLOAD_VIDEO: True, robot.SYS_SET_COOKIE: ("weibo.com", ".sina.com.cn"), } robot.Robot.__init__(self, sys_config, extra_config) # 设置全局变量,供子线程调用 GET_IMAGE_COUNT = self.get_image_count GET_VIDEO_COUNT = self.get_video_count IMAGE_TEMP_PATH = self.image_temp_path IMAGE_DOWNLOAD_PATH = self.image_download_path VIDEO_TEMP_PATH = self.video_temp_path VIDEO_DOWNLOAD_PATH = self.video_download_path IS_SORT = self.is_sort IS_DOWNLOAD_IMAGE = self.is_download_image IS_DOWNLOAD_VIDEO = self.is_download_video NEW_SAVE_DATA_PATH = robot.get_new_save_file_path(self.save_data_path)
def __init__(self): global GET_VIDEO_COUNT global VIDEO_DOWNLOAD_PATH global NEW_SAVE_DATA_PATH sys_config = { robot.SYS_DOWNLOAD_VIDEO: True, } robot.Robot.__init__(self, sys_config) # 设置全局变量,供子线程调用 GET_VIDEO_COUNT = self.get_video_count VIDEO_DOWNLOAD_PATH = self.video_download_path NEW_SAVE_DATA_PATH = robot.get_new_save_file_path(self.save_data_path)
def __init__(self): global GET_PAGE_COUNT global IMAGE_DOWNLOAD_PATH global NEW_SAVE_DATA_PATH sys_config = { robot.SYS_DOWNLOAD_IMAGE: True, robot.SYS_SET_COOKIE: ("bcy.net",), } robot.Robot.__init__(self, sys_config) # 设置全局变量,供子线程调用 GET_PAGE_COUNT = self.get_page_count IMAGE_DOWNLOAD_PATH = self.image_download_path NEW_SAVE_DATA_PATH = robot.get_new_save_file_path(self.save_data_path)
def __init__(self): global GET_IMAGE_COUNT global IMAGE_TEMP_PATH global IMAGE_DOWNLOAD_PATH global NEW_SAVE_DATA_PATH global IS_SORT sys_config = { robot.SYS_DOWNLOAD_IMAGE: True, robot.SYS_SET_PROXY: True, } robot.Robot.__init__(self, sys_config) # 设置全局变量,供子线程调用 GET_IMAGE_COUNT = self.get_image_count IMAGE_TEMP_PATH = self.image_temp_path IMAGE_DOWNLOAD_PATH = self.image_download_path IS_SORT = self.is_sort NEW_SAVE_DATA_PATH = robot.get_new_save_file_path(self.save_data_path)
def __init__(self): global GET_VIDEO_COUNT global VIDEO_TEMP_PATH global VIDEO_DOWNLOAD_PATH global NEW_SAVE_DATA_PATH global IS_SORT sys_config = { robot.SYS_DOWNLOAD_VIDEO: True, robot.SYS_SET_PROXY: True, } robot.Robot.__init__(self, sys_config) # 设置全局变量,供子线程调用 # todo 是否需要下载图片或视频 GET_VIDEO_COUNT = self.get_video_count VIDEO_TEMP_PATH = self.video_temp_path VIDEO_DOWNLOAD_PATH = self.video_download_path IS_SORT = self.is_sort NEW_SAVE_DATA_PATH = robot.get_new_save_file_path(self.save_data_path)
def __init__(self): global GET_IMAGE_COUNT global IMAGE_TEMP_PATH global IMAGE_DOWNLOAD_PATH global NEW_SAVE_DATA_PATH global IS_SORT sys_config = { robot.SYS_DOWNLOAD_IMAGE: True, robot.SYS_SET_COOKIE: ("weibo.com", ".sina.com.cn"), } extra_config = { "save_data_path": os.path.join(os.path.abspath(""), "info\\article.data"), "image_download_path": os.path.join(os.path.abspath(""), "photo\\article"), "image_temp_path": os.path.join(os.path.abspath(""), "photo\\article\\tempImage"), } robot.Robot.__init__(self, sys_config, extra_config) # 设置全局变量,供子线程调用 GET_IMAGE_COUNT = self.get_image_count IMAGE_TEMP_PATH = self.image_temp_path IMAGE_DOWNLOAD_PATH = self.image_download_path IS_SORT = self.is_sort NEW_SAVE_DATA_PATH = robot.get_new_save_file_path(self.save_data_path)
def main(self): # 解析存档文件 last_blog_id = "" image_start_index = 0 if os.path.exists(self.save_data_path): save_file = open(self.save_data_path, "r") save_info = save_file.read() save_file.close() save_info = save_info.split("\t") if len(save_info) >= 2: image_start_index = int(save_info[0]) last_blog_id = save_info[1] # 下载 page_index = 1 image_count = 1 is_over = False new_last_blog_id = "" if self.is_sort: image_path = self.image_temp_path else: image_path = self.image_download_path while not is_over: index_url = "http://blog.mariko-shinoda.net/page%s.html" % (page_index - 1) index_page_return_code, index_page = tool.http_request(index_url)[:2] if index_page_return_code == 1: image_name_list = re.findall('data-original="./([^"]*)"', index_page) for image_name in image_name_list: blog_id = image_name.split("-")[0] # 检查是否已下载到前一次的图片 if blog_id == last_blog_id: is_over = True break # 将第一个博客的id做为新的存档记录 if new_last_blog_id == "": new_last_blog_id = blog_id image_url = "http://blog.mariko-shinoda.net/%s" % image_name # 文件类型 file_type = image_url.split(".")[-1].split(":")[0] file_path = os.path.join(image_path, "%05d.%s" % (image_count, file_type)) log.step("开始下载第%s张图片 %s" % (image_count, image_url)) if tool.save_net_file(image_url, file_path): log.step("第%s张图片下载成功" % image_count) image_count += 1 else: log.step("第%s张图片 %s 下载失败" % (image_count, image_url)) page_index += 1 else: log.error("无法访问博客页面 %s" % index_url) is_over = True log.step("下载完毕") # 排序复制到保存目录 if self.is_sort: if robot.sort_file(self.image_temp_path, self.image_download_path, image_start_index, 5): log.step(" 图片从下载目录移动到保存目录成功") else: log.error(" 创建图片保存目录 %s 失败" % self.image_download_path) tool.process_exit() # 保存新的存档文件 new_save_file_path = robot.get_new_save_file_path(self.save_data_path) log.step("保存新存档文件 %s" % new_save_file_path) new_save_file = open(new_save_file_path, "w") new_save_file.write(str(image_start_index) + "\t" + new_last_blog_id) new_save_file.close() log.step("全部下载完毕,耗时%s秒,共计图片%s张" % (self.get_run_time(), image_count - 1))