def get_account_info_from_console(): while True: email = raw_input(tool.get_time() + " 请输入邮箱: ") password = raw_input(tool.get_time() + " 请输入密码: ") while True: input_str = raw_input(tool.get_time() + " 是否使用这些信息(Y)或重新输入(N): ") input_str = input_str.lower() if input_str in ["y", "yes"]: return email, password elif input_str in ["n", "no"]: break else: pass
def trace(msg): msg = tool.get_time() + " " + str(msg) if IS_SHOW_TRACE: tool.print_msg(msg, False) if TRACE_LOG_PATH != "": thread_lock.acquire() try: tool.write_file(msg, TRACE_LOG_PATH) except: raise finally: thread_lock.release()
def step(msg): msg = tool.get_time() + " " + str(msg) if IS_SHOW_STEP: tool.print_msg(msg, False) if STEP_LOG_PATH != "": thread_lock.acquire() try: tool.write_file(msg, STEP_LOG_PATH) except: raise finally: thread_lock.release()
def error(msg): msg = tool.get_time() + " [Error] " + str(msg) if IS_SHOW_ERROR: tool.print_msg(msg, False) if ERROR_LOG_PATH != "": thread_lock.acquire() try: tool.write_file(msg, ERROR_LOG_PATH) except: raise finally: thread_lock.release()
def check_login(): home_page_url = "http://bcy.net/home/user/index" home_page_return = tool.http_request(home_page_url) if home_page_return[0] == 1: real_url = home_page_return[2].geturl() if (home_page_url != real_url) or ("http://bcy.net/start" == real_url): is_check_ok = False while not is_check_ok: input_str = raw_input(tool.get_time() + " 没有检测到您的账号信息,可能无法获取那些只对粉丝开放的隐藏作品,是否继续下一步操作? (Y)es or (N)o: ") input_str = input_str.lower() if input_str in ["y", "yes"]: is_check_ok = True elif input_str in ["n", "no"]: tool.process_exit()
def __init__(self, sys_config, **kwargs): """ :Args: - sys_config - download_photo - 程序是否支持下载图片功能,默认值:False - download_video - 程序是否支持下载视频功能,默认值:False - download_audio - 程序是否支持下载音频功能,默认值:False - download_content - 程序是否支持下载文本内容功能,默认值:False - set_proxy - 程序是否默认需要设置代理,默认值:False - no_save_data - 程序是否支持不需要存档文件就可以开始运行,默认值:False - no_download - 程序没有任何下载行为,默认值:False - get_cookie - 程序是否需要从浏览器存储的cookie中获取指定cookie的值,默认值:False - app_config - 程序额外应用配置,存在相同配置参数时将会将其他值覆盖 - app_config_path - 程序默认的app配置文件路径,赋值后将不会读取原本的app.ini文件 - kwargs - extra_sys_config - 通过类实例化时传入的程序配置 - extra_app_config - 通过类实例化时传入的应用配置 """ self.start_time = time.time() # 程序启动配置 if not isinstance(sys_config, dict): output.print_msg("程序启动配置不存在,请检查代码!") tool.process_exit() return # 额外初始化配置(直接通过实例化中传入,可覆盖子类__init__方法传递的sys_config参数) if "extra_sys_config" in kwargs and isinstance( kwargs["extra_sys_config"], dict): sys_config.update(kwargs["extra_sys_config"]) sys_download_photo = SYS_DOWNLOAD_PHOTO in sys_config and sys_config[ SYS_DOWNLOAD_PHOTO] sys_download_video = SYS_DOWNLOAD_VIDEO in sys_config and sys_config[ SYS_DOWNLOAD_VIDEO] sys_download_audio = SYS_DOWNLOAD_AUDIO in sys_config and sys_config[ SYS_DOWNLOAD_AUDIO] sys_download_content = SYS_DOWNLOAD_CONTENT in sys_config and sys_config[ SYS_DOWNLOAD_CONTENT] sys_set_proxy = SYS_SET_PROXY in sys_config and sys_config[ SYS_SET_PROXY] sys_get_cookie = SYS_GET_COOKIE in sys_config and sys_config[ SYS_GET_COOKIE] sys_not_check_save_data = SYS_NOT_CHECK_SAVE_DATA in sys_config and sys_config[ SYS_NOT_CHECK_SAVE_DATA] sys_not_download = SYS_NOT_DOWNLOAD in sys_config and sys_config[ SYS_NOT_DOWNLOAD] # exe程序 if tool.IS_EXECUTABLE: application_path = os.path.dirname(sys.executable) os.chdir(application_path) config_path = os.path.join(os.getcwd(), "data/config.ini") else: config_path = PROJECT_CONFIG_PATH # 程序配置 config = read_config(config_path) # 应用配置 if SYS_APP_CONFIG_PATH in sys_config: app_config_path = sys_config[SYS_APP_CONFIG_PATH] else: app_config_path = os.path.abspath( os.path.join(PROJECT_APP_PATH, "app.ini")) if os.path.exists(app_config_path): config.update(read_config(app_config_path)) # 额外应用配置(直接通过实例化中传入,可覆盖配置文件中参数) if "extra_app_config" in kwargs and isinstance( kwargs["extra_app_config"], dict): config.update(kwargs["extra_app_config"]) # 应用配置 self.app_config = {} if SYS_APP_CONFIG in sys_config and len( sys_config[SYS_APP_CONFIG]) > 0: for app_config_template in sys_config[SYS_APP_CONFIG]: if len(app_config_template) == 3: self.app_config[app_config_template[0]] = analysis_config( config, app_config_template[0], app_config_template[1], app_config_template[2]) # 是否下载 self.is_download_photo = analysis_config( config, "IS_DOWNLOAD_PHOTO", True, CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_photo self.is_download_video = analysis_config( config, "IS_DOWNLOAD_VIDEO", True, CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_video self.is_download_audio = analysis_config( config, "IS_DOWNLOAD_AUDIO", True, CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_audio self.is_download_content = analysis_config( config, "IS_DOWNLOAD_CONTENT", True, CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_content if not sys_not_download and not self.is_download_photo and not self.is_download_video and not self.is_download_audio and not self.is_download_content: if sys_download_photo or sys_download_video or sys_download_audio or sys_download_content: output.print_msg("所有支持的下载都没有开启,请检查配置!") tool.process_exit() return # 下载文件时是否覆盖已存在的同名文件 net.DOWNLOAD_REPLACE_IF_EXIST = analysis_config( config, "IS_DOWNLOAD_REPLACE_IF_EXIST", False, CONFIG_ANALYSIS_MODE_BOOLEAN) # 存档 self.save_data_path = analysis_config(config, "SAVE_DATA_PATH", "\\\\info/save.data", CONFIG_ANALYSIS_MODE_PATH) self.temp_save_data_path = "" if not sys_not_check_save_data: if not os.path.exists(self.save_data_path): # 存档文件不存在 output.print_msg(f"存档文件{self.save_data_path}不存在!") tool.process_exit() return temp_file_name = tool.get_time("%m-%d_%H_%M_") + os.path.basename( self.save_data_path) self.temp_save_data_path = os.path.join( os.path.dirname(self.save_data_path), temp_file_name) if os.path.exists(self.temp_save_data_path): # 临时文件已存在 output.print_msg(f"存档临时文件{self.temp_save_data_path}已存在!") tool.process_exit() return # cache self.cache_data_path = analysis_config(config, "CACHE_DATA_PATH", "\\\\cache", CONFIG_ANALYSIS_MODE_PATH) # session self.session_data_path = analysis_config(config, "SESSION_DATA_PATH", "\\\\info/session.data", CONFIG_ANALYSIS_MODE_PATH) # 是否需要下载图片 if self.is_download_photo: # 图片保存目录 self.photo_download_path = analysis_config( config, "PHOTO_DOWNLOAD_PATH", "\\\\photo", CONFIG_ANALYSIS_MODE_PATH) else: self.photo_download_path = "" # 是否需要下载视频 if self.is_download_video: # 视频保存目录 self.video_download_path = analysis_config( config, "VIDEO_DOWNLOAD_PATH", "\\\\video", CONFIG_ANALYSIS_MODE_PATH) else: self.video_download_path = "" # 是否需要下载音频 if self.is_download_audio: # 音频保存目录 self.audio_download_path = analysis_config( config, "AUDIO_DOWNLOAD_PATH", "\\\\audio", CONFIG_ANALYSIS_MODE_PATH) else: self.audio_download_path = "" # 是否需要下载文本内容 if self.is_download_content: # 音频保存目录 self.content_download_path = analysis_config( config, "CONTENT_DOWNLOAD_PATH", "\\\\content", CONFIG_ANALYSIS_MODE_PATH) else: self.content_download_path = "" # 是否在下载失败后退出线程的运行 self.is_thread_exit_after_download_failure = analysis_config( config, "IS_THREAD_EXIT_AFTER_DOWNLOAD_FAILURE", "\\\\content", CONFIG_ANALYSIS_MODE_BOOLEAN) # 代理 is_proxy = analysis_config(config, "IS_PROXY", 2, CONFIG_ANALYSIS_MODE_INTEGER) if is_proxy == 1 or (is_proxy == 2 and sys_set_proxy): proxy_ip = analysis_config(config, "PROXY_IP", "127.0.0.1") proxy_port = analysis_config(config, "PROXY_PORT", "8087") # 使用代理的线程池 net.set_proxy(proxy_ip, proxy_port) else: # 初始化urllib3的线程池 net.init_http_connection_pool() # cookies self.cookie_value = {} if sys_get_cookie: # 操作系统&浏览器 browser_type = analysis_config(config, "BROWSER_TYPE", 2, CONFIG_ANALYSIS_MODE_INTEGER) # cookie cookie_path = analysis_config(config, "COOKIE_PATH", "", CONFIG_ANALYSIS_MODE_RAW) if cookie_path: cookie_path = analysis_config(config, "COOKIE_PATH", "", CONFIG_ANALYSIS_MODE_PATH) else: cookie_path = browser.get_default_browser_cookie_path( browser_type) all_cookie_from_browser = browser.get_all_cookie_from_browser( browser_type, cookie_path) if browser_type == browser.BROWSER_TYPE_TEXT: if "DEFAULT" in all_cookie_from_browser: self.cookie_value.update( all_cookie_from_browser["DEFAULT"]) else: for cookie_domain in sys_config[SYS_GET_COOKIE]: check_domain_list = [cookie_domain] if cookie_domain[0] != ".": check_domain_list.append("." + cookie_domain) elif cookie_domain[0] == ".": check_domain_list.append(cookie_domain[1:]) for check_domain in check_domain_list: if check_domain in all_cookie_from_browser: self.cookie_value.update( all_cookie_from_browser[check_domain]) # 线程数 self.thread_count = analysis_config(config, "THREAD_COUNT", 10, CONFIG_ANALYSIS_MODE_INTEGER) self.thread_lock = threading.Lock() # 线程锁,避免操作一些全局参数 self.thread_semaphore = threading.Semaphore( self.thread_count) # 线程总数信号量 # 启用线程监控是否需要暂停其他下载线程 if analysis_config(config, "IS_PORT_LISTENER_EVENT", False, CONFIG_ANALYSIS_MODE_BOOLEAN): listener_port = analysis_config(config, "LISTENER_PORT", 12345, CONFIG_ANALYSIS_MODE_INTEGER) listener_event_bind = { str(portListenerEvent.PROCESS_STATUS_PAUSE): net.pause_request, # 暂停进程 str(portListenerEvent.PROCESS_STATUS_RUN): net.resume_request, # 继续进程 str(portListenerEvent.PROCESS_STATUS_STOP): self.stop_process # 结束进程(取消当前的线程,完成任务) } process_control_thread = portListenerEvent.PortListenerEvent( port=listener_port, event_list=listener_event_bind) process_control_thread.setDaemon(True) process_control_thread.start() # 键盘监控线程(仅支持windows) if platform.system() == "Windows" and analysis_config( config, "IS_KEYBOARD_EVENT", False, CONFIG_ANALYSIS_MODE_BOOLEAN): keyboard_event_bind = {} pause_process_key = analysis_config(config, "PAUSE_PROCESS_KEYBOARD_KEY", "F9") # 暂停进程 if pause_process_key: keyboard_event_bind[pause_process_key] = self.pause_process # 继续进程 continue_process_key = analysis_config( config, "CONTINUE_PROCESS_KEYBOARD_KEY", "F10") if continue_process_key: keyboard_event_bind[continue_process_key] = self.resume_process # 结束进程(取消当前的线程,完成任务) stop_process_key = analysis_config(config, "STOP_PROCESS_KEYBOARD_KEY", "CTRL + F12") if stop_process_key: keyboard_event_bind[stop_process_key] = self.stop_process if keyboard_event_bind: keyboard_control_thread = keyboardEvent.KeyboardEvent( keyboard_event_bind) keyboard_control_thread.setDaemon(True) keyboard_control_thread.start() self.save_data = {} self.total_photo_count = 0 self.total_video_count = 0 self.total_audio_count = 0 output.print_msg("初始化完成")
def main(self): # 解析存档文件 # 寻找fkoji.save account_list = robot.read_save_data(self.save_data_path, 0, ["", "", ""]) # 这个key的内容为总数据 if ALL_SIGN in account_list: image_start_index = int(account_list[ALL_SIGN][1]) save_data_image_time = int(account_list[ALL_SIGN][2]) account_list.pop(ALL_SIGN) else: image_start_index = 0 save_data_image_time = 0 if self.is_sort: image_path = self.image_temp_path else: image_path = self.image_download_path if not tool.make_dir(image_path, 0): # 图片保存目录创建失败 self.print_msg("图片下载目录%s创建失败!" % self.image_download_path) tool.process_exit() # 下载 page_index = 1 image_count = 1 first_image_time = 0 unique_list = [] is_over = False while not is_over: index_url = "http://jigadori.fkoji.com/?p=%s" % page_index index_page_return_code, index_page_response = tool.http_request(index_url)[:2] if index_page_return_code != 1: log.error("无法访问首页地址 %s" % index_url) tool.process_exit() index_page = BeautifulSoup.BeautifulSoup(index_page_response) photo_list = index_page.body.findAll("div", "photo") # 已经下载到最后一页 if not photo_list: break for photo_info in photo_list: if isinstance(photo_info, BeautifulSoup.NavigableString): continue # 从图片页面中解析获取推特发布时间的时间戳 tweet_created_time = get_tweet_created_time(photo_info) if tweet_created_time is None: log.error("第%s张图片,解析tweet-created-at失败" % image_count) continue # 下载完毕 if tweet_created_time <= save_data_image_time: is_over = True break # 将第一张图片的上传时间做为新的存档记录 if first_image_time == 0: first_image_time = tweet_created_time # 从图片页面中解析获取推特发布账号 account_id = get_tweet_account_id(photo_info) if account_id is None: log.error("第%s张图片,解析tweet账号失败" % image_count) continue # 找图片 img_tags = photo_info.findAll("img") for tag in img_tags: tag_attr = dict(tag.attrs) if robot.check_sub_key(("src", "alt"), tag_attr): image_url = str(tag_attr["src"]).replace(" ", "") # 新增图片导致的重复判断 if image_url in unique_list: continue else: unique_list.append(image_url) log.step("开始下载第%s张图片 %s" % (image_count, image_url)) file_type = image_url.split(".")[-1] if file_type.find("/") != -1: file_type = "jpg" file_path = os.path.join(image_path, "%05d_%s.%s" % (image_count, account_id, file_type)) if tool.save_net_file(image_url, file_path): log.step("第%s张图片下载成功" % image_count) image_count += 1 else: log.error("第%s张图片 %s,account_id:%s,下载失败" % (image_count, image_url, account_id)) if is_over: break if not is_over: page_index += 1 log.step("下载完毕") # 排序复制到保存目录 if self.is_sort: is_check_ok = False while not is_check_ok: # 等待手动检测所有图片结束 input_str = raw_input(tool.get_time() + " 已经下载完毕,是否下一步操作? (Y)es or (N)o: ") input_str = input_str.lower() if input_str in ["y", "yes"]: is_check_ok = True elif input_str in ["n", "no"]: tool.process_exit() all_path = os.path.join(self.image_download_path, "all") if not tool.make_dir(all_path, 0): log.error("创建目录 %s 失败" % all_path) tool.process_exit() file_list = tool.get_dir_files_name(self.image_temp_path, "desc") for file_name in file_list: image_path = os.path.join(self.image_temp_path, file_name) file_name_list = file_name.split(".") file_type = file_name_list[-1] account_id = "_".join(".".join(file_name_list[:-1]).split("_")[1:]) # 所有 image_start_index += 1 destination_file_name = "%05d_%s.%s" % (image_start_index, account_id, file_type) destination_path = os.path.join(all_path, destination_file_name) tool.copy_files(image_path, destination_path) # 单个 each_account_path = os.path.join(self.image_download_path, "single", account_id) if not os.path.exists(each_account_path): if not tool.make_dir(each_account_path, 0): log.error("创建目录 %s 失败" % each_account_path) tool.process_exit() if account_id in account_list: account_list[account_id][1] = int(account_list[account_id][1]) + 1 else: account_list[account_id] = [account_id, 1] destination_file_name = "%05d.%s" % (account_list[account_id][1], file_type) destination_path = os.path.join(each_account_path, destination_file_name) tool.copy_files(image_path, destination_path) log.step("图片从下载目录移动到保存目录成功") # 删除临时文件夹 tool.remove_dir(self.image_temp_path) # 保存新的存档文件 temp_list = [account_list[key] for key in sorted(account_list.keys())] # 把总数据插入列表头 temp_list.insert(0, [ALL_SIGN, str(image_start_index), str(first_image_time)]) tool.write_file(tool.list_to_string(temp_list), self.save_data_path, 2) log.step("全部下载完毕,耗时%s秒,共计图片%s张" % (self.get_run_time(), image_count - 1))