Exemple #1
0
def quickly_set_proxy(config=None, is_auto=True):
    if not isinstance(config, ConfigParser.SafeConfigParser):
        config = read_config(tool.PROJECT_CONFIG_PATH)
    # 设置代理
    if is_auto:
        is_proxy = analysis_config(config, "IS_PROXY", 2, CONFIG_ANALYSIS_MODE_INTEGER)
        if is_proxy == 0:
            return
    proxy_ip = analysis_config(config, "PROXY_IP", "127.0.0.1")
    proxy_port = analysis_config(config, "PROXY_PORT", "8087")
    # 使用代理的线程池
    net.set_proxy(proxy_ip, proxy_port)
Exemple #2
0
def quickly_set_proxy(config: Optional[dict] = None, is_auto: bool = True):
    """
    读取配置文件,快速设置代理

    :Args:
    - is_auto
        False   始终使用代理
        True    配置文件未禁止时使用代理(IS_PROXY = 1 or 2)
    """
    if not isinstance(config, configparser.SafeConfigParser):
        config = _get_config()
    # 设置代理
    if is_auto:
        is_proxy = crawler.analysis_config(
            config, "IS_PROXY", 2, crawler.CONFIG_ANALYSIS_MODE_INTEGER)
        if is_proxy == 0:
            return
    proxy_ip = crawler.analysis_config(config, "PROXY_IP", "127.0.0.1")
    proxy_port = crawler.analysis_config(config, "PROXY_PORT", "8087")
    # 使用代理的线程池
    net.set_proxy(proxy_ip, proxy_port)
Exemple #3
0
    def __init__(self, sys_config, **kwargs):
        """
        :Args:
        - sys_config
            - download_photo - 程序是否支持下载图片功能,默认值:False
            - download_video - 程序是否支持下载视频功能,默认值:False
            - download_audio - 程序是否支持下载音频功能,默认值:False
            - download_content - 程序是否支持下载文本内容功能,默认值:False
            - set_proxy - 程序是否默认需要设置代理,默认值:False
            - no_save_data - 程序是否支持不需要存档文件就可以开始运行,默认值:False
            - no_download - 程序没有任何下载行为,默认值:False
            - get_cookie - 程序是否需要从浏览器存储的cookie中获取指定cookie的值,默认值:False
            - app_config - 程序额外应用配置,存在相同配置参数时将会将其他值覆盖
            - app_config_path - 程序默认的app配置文件路径,赋值后将不会读取原本的app.ini文件
        - kwargs
            - extra_sys_config - 通过类实例化时传入的程序配置
            - extra_app_config - 通过类实例化时传入的应用配置
        """
        self.start_time = time.time()

        # 程序启动配置
        if not isinstance(sys_config, dict):
            output.print_msg("程序启动配置不存在,请检查代码!")
            tool.process_exit()
            return
        # 额外初始化配置(直接通过实例化中传入,可覆盖子类__init__方法传递的sys_config参数)
        if "extra_sys_config" in kwargs and isinstance(
                kwargs["extra_sys_config"], dict):
            sys_config.update(kwargs["extra_sys_config"])
        sys_download_photo = SYS_DOWNLOAD_PHOTO in sys_config and sys_config[
            SYS_DOWNLOAD_PHOTO]
        sys_download_video = SYS_DOWNLOAD_VIDEO in sys_config and sys_config[
            SYS_DOWNLOAD_VIDEO]
        sys_download_audio = SYS_DOWNLOAD_AUDIO in sys_config and sys_config[
            SYS_DOWNLOAD_AUDIO]
        sys_download_content = SYS_DOWNLOAD_CONTENT in sys_config and sys_config[
            SYS_DOWNLOAD_CONTENT]
        sys_set_proxy = SYS_SET_PROXY in sys_config and sys_config[
            SYS_SET_PROXY]
        sys_get_cookie = SYS_GET_COOKIE in sys_config and sys_config[
            SYS_GET_COOKIE]
        sys_not_check_save_data = SYS_NOT_CHECK_SAVE_DATA in sys_config and sys_config[
            SYS_NOT_CHECK_SAVE_DATA]
        sys_not_download = SYS_NOT_DOWNLOAD in sys_config and sys_config[
            SYS_NOT_DOWNLOAD]

        # exe程序
        if tool.IS_EXECUTABLE:
            application_path = os.path.dirname(sys.executable)
            os.chdir(application_path)
            config_path = os.path.join(os.getcwd(), "data/config.ini")
        else:
            config_path = PROJECT_CONFIG_PATH

        # 程序配置
        config = read_config(config_path)
        # 应用配置
        if SYS_APP_CONFIG_PATH in sys_config:
            app_config_path = sys_config[SYS_APP_CONFIG_PATH]
        else:
            app_config_path = os.path.abspath(
                os.path.join(PROJECT_APP_PATH, "app.ini"))
        if os.path.exists(app_config_path):
            config.update(read_config(app_config_path))
        # 额外应用配置(直接通过实例化中传入,可覆盖配置文件中参数)
        if "extra_app_config" in kwargs and isinstance(
                kwargs["extra_app_config"], dict):
            config.update(kwargs["extra_app_config"])

        # 应用配置
        self.app_config = {}
        if SYS_APP_CONFIG in sys_config and len(
                sys_config[SYS_APP_CONFIG]) > 0:
            for app_config_template in sys_config[SYS_APP_CONFIG]:
                if len(app_config_template) == 3:
                    self.app_config[app_config_template[0]] = analysis_config(
                        config, app_config_template[0], app_config_template[1],
                        app_config_template[2])

        # 是否下载
        self.is_download_photo = analysis_config(
            config, "IS_DOWNLOAD_PHOTO", True,
            CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_photo
        self.is_download_video = analysis_config(
            config, "IS_DOWNLOAD_VIDEO", True,
            CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_video
        self.is_download_audio = analysis_config(
            config, "IS_DOWNLOAD_AUDIO", True,
            CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_audio
        self.is_download_content = analysis_config(
            config, "IS_DOWNLOAD_CONTENT", True,
            CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_content

        if not sys_not_download and not self.is_download_photo and not self.is_download_video and not self.is_download_audio and not self.is_download_content:
            if sys_download_photo or sys_download_video or sys_download_audio or sys_download_content:
                output.print_msg("所有支持的下载都没有开启,请检查配置!")
                tool.process_exit()
                return

        # 下载文件时是否覆盖已存在的同名文件
        net.DOWNLOAD_REPLACE_IF_EXIST = analysis_config(
            config, "IS_DOWNLOAD_REPLACE_IF_EXIST", False,
            CONFIG_ANALYSIS_MODE_BOOLEAN)

        # 存档
        self.save_data_path = analysis_config(config, "SAVE_DATA_PATH",
                                              "\\\\info/save.data",
                                              CONFIG_ANALYSIS_MODE_PATH)
        self.temp_save_data_path = ""
        if not sys_not_check_save_data:
            if not os.path.exists(self.save_data_path):
                # 存档文件不存在
                output.print_msg(f"存档文件{self.save_data_path}不存在!")
                tool.process_exit()
                return
            temp_file_name = tool.get_time("%m-%d_%H_%M_") + os.path.basename(
                self.save_data_path)
            self.temp_save_data_path = os.path.join(
                os.path.dirname(self.save_data_path), temp_file_name)
            if os.path.exists(self.temp_save_data_path):
                # 临时文件已存在
                output.print_msg(f"存档临时文件{self.temp_save_data_path}已存在!")
                tool.process_exit()
                return

        # cache
        self.cache_data_path = analysis_config(config, "CACHE_DATA_PATH",
                                               "\\\\cache",
                                               CONFIG_ANALYSIS_MODE_PATH)

        # session
        self.session_data_path = analysis_config(config, "SESSION_DATA_PATH",
                                                 "\\\\info/session.data",
                                                 CONFIG_ANALYSIS_MODE_PATH)

        # 是否需要下载图片
        if self.is_download_photo:
            # 图片保存目录
            self.photo_download_path = analysis_config(
                config, "PHOTO_DOWNLOAD_PATH", "\\\\photo",
                CONFIG_ANALYSIS_MODE_PATH)
        else:
            self.photo_download_path = ""
        # 是否需要下载视频
        if self.is_download_video:
            # 视频保存目录
            self.video_download_path = analysis_config(
                config, "VIDEO_DOWNLOAD_PATH", "\\\\video",
                CONFIG_ANALYSIS_MODE_PATH)
        else:
            self.video_download_path = ""
        # 是否需要下载音频
        if self.is_download_audio:
            # 音频保存目录
            self.audio_download_path = analysis_config(
                config, "AUDIO_DOWNLOAD_PATH", "\\\\audio",
                CONFIG_ANALYSIS_MODE_PATH)
        else:
            self.audio_download_path = ""
        # 是否需要下载文本内容
        if self.is_download_content:
            # 音频保存目录
            self.content_download_path = analysis_config(
                config, "CONTENT_DOWNLOAD_PATH", "\\\\content",
                CONFIG_ANALYSIS_MODE_PATH)
        else:
            self.content_download_path = ""

        # 是否在下载失败后退出线程的运行
        self.is_thread_exit_after_download_failure = analysis_config(
            config, "IS_THREAD_EXIT_AFTER_DOWNLOAD_FAILURE", "\\\\content",
            CONFIG_ANALYSIS_MODE_BOOLEAN)

        # 代理
        is_proxy = analysis_config(config, "IS_PROXY", 2,
                                   CONFIG_ANALYSIS_MODE_INTEGER)
        if is_proxy == 1 or (is_proxy == 2 and sys_set_proxy):
            proxy_ip = analysis_config(config, "PROXY_IP", "127.0.0.1")
            proxy_port = analysis_config(config, "PROXY_PORT", "8087")
            # 使用代理的线程池
            net.set_proxy(proxy_ip, proxy_port)
        else:
            # 初始化urllib3的线程池
            net.init_http_connection_pool()

        # cookies
        self.cookie_value = {}
        if sys_get_cookie:
            # 操作系统&浏览器
            browser_type = analysis_config(config, "BROWSER_TYPE", 2,
                                           CONFIG_ANALYSIS_MODE_INTEGER)
            # cookie
            cookie_path = analysis_config(config, "COOKIE_PATH", "",
                                          CONFIG_ANALYSIS_MODE_RAW)
            if cookie_path:
                cookie_path = analysis_config(config, "COOKIE_PATH", "",
                                              CONFIG_ANALYSIS_MODE_PATH)
            else:
                cookie_path = browser.get_default_browser_cookie_path(
                    browser_type)
            all_cookie_from_browser = browser.get_all_cookie_from_browser(
                browser_type, cookie_path)
            if browser_type == browser.BROWSER_TYPE_TEXT:
                if "DEFAULT" in all_cookie_from_browser:
                    self.cookie_value.update(
                        all_cookie_from_browser["DEFAULT"])
            else:
                for cookie_domain in sys_config[SYS_GET_COOKIE]:
                    check_domain_list = [cookie_domain]
                    if cookie_domain[0] != ".":
                        check_domain_list.append("." + cookie_domain)
                    elif cookie_domain[0] == ".":
                        check_domain_list.append(cookie_domain[1:])
                    for check_domain in check_domain_list:
                        if check_domain in all_cookie_from_browser:
                            self.cookie_value.update(
                                all_cookie_from_browser[check_domain])

        # 线程数
        self.thread_count = analysis_config(config, "THREAD_COUNT", 10,
                                            CONFIG_ANALYSIS_MODE_INTEGER)
        self.thread_lock = threading.Lock()  # 线程锁,避免操作一些全局参数
        self.thread_semaphore = threading.Semaphore(
            self.thread_count)  # 线程总数信号量

        # 启用线程监控是否需要暂停其他下载线程
        if analysis_config(config, "IS_PORT_LISTENER_EVENT", False,
                           CONFIG_ANALYSIS_MODE_BOOLEAN):
            listener_port = analysis_config(config, "LISTENER_PORT", 12345,
                                            CONFIG_ANALYSIS_MODE_INTEGER)
            listener_event_bind = {
                str(portListenerEvent.PROCESS_STATUS_PAUSE):
                net.pause_request,  # 暂停进程
                str(portListenerEvent.PROCESS_STATUS_RUN):
                net.resume_request,  # 继续进程
                str(portListenerEvent.PROCESS_STATUS_STOP):
                self.stop_process  # 结束进程(取消当前的线程,完成任务)
            }
            process_control_thread = portListenerEvent.PortListenerEvent(
                port=listener_port, event_list=listener_event_bind)
            process_control_thread.setDaemon(True)
            process_control_thread.start()

        # 键盘监控线程(仅支持windows)
        if platform.system() == "Windows" and analysis_config(
                config, "IS_KEYBOARD_EVENT", False,
                CONFIG_ANALYSIS_MODE_BOOLEAN):
            keyboard_event_bind = {}
            pause_process_key = analysis_config(config,
                                                "PAUSE_PROCESS_KEYBOARD_KEY",
                                                "F9")
            # 暂停进程
            if pause_process_key:
                keyboard_event_bind[pause_process_key] = self.pause_process
            # 继续进程
            continue_process_key = analysis_config(
                config, "CONTINUE_PROCESS_KEYBOARD_KEY", "F10")
            if continue_process_key:
                keyboard_event_bind[continue_process_key] = self.resume_process
            # 结束进程(取消当前的线程,完成任务)
            stop_process_key = analysis_config(config,
                                               "STOP_PROCESS_KEYBOARD_KEY",
                                               "CTRL + F12")
            if stop_process_key:
                keyboard_event_bind[stop_process_key] = self.stop_process

            if keyboard_event_bind:
                keyboard_control_thread = keyboardEvent.KeyboardEvent(
                    keyboard_event_bind)
                keyboard_control_thread.setDaemon(True)
                keyboard_control_thread.start()

        self.save_data = {}
        self.total_photo_count = 0
        self.total_video_count = 0
        self.total_audio_count = 0

        output.print_msg("初始化完成")
Exemple #4
0
    def __init__(self, sys_config, extra_config=None):
        self.start_time = time.time()

        # 程序启动配置
        if not isinstance(sys_config, dict):
            self.print_msg("程序启动配置不存在,请检查代码!")
            tool.process_exit()
            return
        sys_download_image = SYS_DOWNLOAD_IMAGE in sys_config
        sys_download_video = SYS_DOWNLOAD_VIDEO in sys_config
        sys_set_proxy = SYS_SET_PROXY in sys_config
        sys_get_cookie = SYS_GET_COOKIE in sys_config
        sys_not_check_save_data = SYS_NOT_CHECK_SAVE_DATA in sys_config
        sys_not_download = SYS_NOT_DOWNLOAD in sys_config

        # exe程序
        if tool.IS_EXECUTABLE:
            application_path = os.path.dirname(sys.executable)
            os.chdir(application_path)
            config_path = os.path.join(os.getcwd(), "data/config.ini")
        else:
            config_path = tool.PROJECT_CONFIG_PATH

        # 程序配置
        config = read_config(config_path)
        # 应用配置
        app_config_path = os.path.abspath(
            os.path.join(tool.PROJECT_APP_PATH, "app.ini"))
        if os.path.exists(app_config_path):
            config.update(read_config(app_config_path))
        # 额外配置
        if isinstance(extra_config, dict):
            config.update(extra_config)

        # 应用配置
        self.app_config = {}
        if SYS_APP_CONFIG in sys_config and len(
                sys_config[SYS_APP_CONFIG]) > 0:
            for app_config_template in sys_config[SYS_APP_CONFIG]:
                if len(app_config_template) == 3:
                    self.app_config[app_config_template[0]] = analysis_config(
                        config, app_config_template[0], app_config_template[1],
                        app_config_template[2])

        # 日志
        log.IS_SHOW_ERROR = self.is_show_error = analysis_config(
            config, "IS_SHOW_ERROR", True, CONFIG_ANALYSIS_MODE_BOOLEAN)
        log.IS_SHOW_STEP = self.is_show_step = analysis_config(
            config, "IS_SHOW_STEP", True, CONFIG_ANALYSIS_MODE_BOOLEAN)
        log.IS_SHOW_TRACE = self.is_show_trace = analysis_config(
            config, "IS_SHOW_TRACE", False, CONFIG_ANALYSIS_MODE_BOOLEAN)
        error_log_path = analysis_config(config, "ERROR_LOG_PATH",
                                         "\\log/errorLog.txt",
                                         CONFIG_ANALYSIS_MODE_PATH)
        log.ERROR_LOG_PATH = self.error_log_path = replace_path(error_log_path)
        error_log_dir = os.path.dirname(self.error_log_path)
        if not path.create_dir(error_log_dir):
            self.print_msg("创建错误日志目录 %s 失败" % error_log_dir)
            tool.process_exit()
            return
        is_log_step = analysis_config(config, "IS_LOG_STEP", True,
                                      CONFIG_ANALYSIS_MODE_BOOLEAN)
        if not is_log_step:
            log.STEP_LOG_PATH = self.step_log_path = ""
        else:
            step_log_path = analysis_config(config, "STEP_LOG_PATH",
                                            "\\log/stepLog.txt",
                                            CONFIG_ANALYSIS_MODE_PATH)
            log.STEP_LOG_PATH = self.step_log_path = replace_path(
                step_log_path)
            # 日志文件保存目录
            step_log_dir = os.path.dirname(self.step_log_path)
            if not path.create_dir(step_log_dir):
                self.print_msg("创建步骤日志目录 %s 失败" % step_log_dir)
                tool.process_exit()
                return
        is_log_trace = analysis_config(config, "IS_LOG_TRACE", True,
                                       CONFIG_ANALYSIS_MODE_BOOLEAN)
        if not is_log_trace:
            log.TRACE_LOG_PATH = self.trace_log_path = ""
        else:
            trace_log_path = analysis_config(config, "TRACE_LOG_PATH",
                                             "\\log/traceLog.txt",
                                             CONFIG_ANALYSIS_MODE_PATH)
            log.TRACE_LOG_PATH = self.trace_log_path = replace_path(
                trace_log_path)
            # 日志文件保存目录
            trace_log_dir = os.path.dirname(self.trace_log_path)
            if not path.create_dir(trace_log_dir):
                self.print_msg("创建调试日志目录 %s 失败" % trace_log_dir)
                tool.process_exit()
                return

        # 是否下载
        self.is_download_image = analysis_config(
            config, "IS_DOWNLOAD_IMAGE", True,
            CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_image
        self.is_download_video = analysis_config(
            config, "IS_DOWNLOAD_VIDEO", True,
            CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_video

        if not sys_not_download and not self.is_download_image and not self.is_download_video:
            if sys_download_image or sys_download_video:
                self.print_msg("所有支持的下载都没有开启,请检查配置!")
                tool.process_exit()
                return

        # 存档
        self.save_data_path = analysis_config(config, "SAVE_DATA_PATH",
                                              "\\\\info/save.data",
                                              CONFIG_ANALYSIS_MODE_PATH)
        if not sys_not_check_save_data and not os.path.exists(
                self.save_data_path):
            # 存档文件不存在
            self.print_msg("存档文件%s不存在!" % self.save_data_path)
            tool.process_exit()
            return
        temp_file_name = time.strftime(
            "%m-%d_%H_%M_", time.localtime(time.time())) + os.path.basename(
                self.save_data_path)
        self.temp_save_data_path = os.path.join(
            os.path.dirname(self.save_data_path), temp_file_name)
        if os.path.exists(self.temp_save_data_path):
            # 临时文件已存在
            self.print_msg("存档临时文件%s已存在!" % self.temp_save_data_path)
            tool.process_exit()
            return

        # session
        self.session_data_path = analysis_config(config, "SESSION_DATA_PATH",
                                                 "\\\\info/session.data",
                                                 CONFIG_ANALYSIS_MODE_PATH)

        # 是否需要下载图片
        if self.is_download_image:
            # 图片保存目录
            self.image_download_path = analysis_config(
                config, "IMAGE_DOWNLOAD_PATH", "\\\\photo",
                CONFIG_ANALYSIS_MODE_PATH)
            if not path.create_dir(self.image_download_path):
                # 图片保存目录创建失败
                self.print_msg("图片保存目录%s创建失败!" % self.image_download_path)
                tool.process_exit()
                return
        else:
            self.image_download_path = ""
        # 是否需要下载视频
        if self.is_download_video:
            # 视频保存目录
            self.video_download_path = analysis_config(
                config, "VIDEO_DOWNLOAD_PATH", "\\\\video",
                CONFIG_ANALYSIS_MODE_PATH)
            if not path.create_dir(self.video_download_path):
                # 视频保存目录创建失败
                self.print_msg("视频保存目录%s创建失败!" % self.video_download_path)
                tool.process_exit()
                return
        else:
            self.video_download_path = ""

        # 代理
        is_proxy = analysis_config(config, "IS_PROXY", 2,
                                   CONFIG_ANALYSIS_MODE_INTEGER)
        if is_proxy == 1 or (is_proxy == 2 and sys_set_proxy):
            proxy_ip = analysis_config(config, "PROXY_IP", "127.0.0.1")
            proxy_port = analysis_config(config, "PROXY_PORT", "8087")
            # 使用代理的线程池
            net.set_proxy(proxy_ip, proxy_port)
        else:
            # 初始化urllib3的线程池
            net.init_http_connection_pool()

        # cookies
        self.cookie_value = {}
        if sys_get_cookie:
            # 操作系统&浏览器
            browser_type = analysis_config(config, "BROWSER_TYPE", 2,
                                           CONFIG_ANALYSIS_MODE_INTEGER)
            # cookie
            is_auto_get_cookie = analysis_config(config, "IS_AUTO_GET_COOKIE",
                                                 True,
                                                 CONFIG_ANALYSIS_MODE_BOOLEAN)
            if is_auto_get_cookie:
                cookie_path = browser.get_default_browser_cookie_path(
                    browser_type)
            else:
                cookie_path = analysis_config(config, "COOKIE_PATH", "")
            all_cookie_from_browser = browser.get_all_cookie_from_browser(
                browser_type, cookie_path)
            for cookie_domain in sys_config[SYS_GET_COOKIE]:
                # 如果指定了cookie key
                if sys_config[SYS_GET_COOKIE][cookie_domain]:
                    for cookie_key in sys_config[SYS_GET_COOKIE][
                            cookie_domain]:
                        self.cookie_value[cookie_key] = ""
                    if cookie_domain in all_cookie_from_browser:
                        for cookie_name in self.cookie_value:
                            if cookie_name in all_cookie_from_browser[
                                    cookie_domain]:
                                self.cookie_value[
                                    cookie_name] = all_cookie_from_browser[
                                        cookie_domain][cookie_name]
                # 没有指定cookie key那么就是取全部
                else:
                    if cookie_domain in all_cookie_from_browser:
                        for cookie_name in all_cookie_from_browser[
                                cookie_domain]:
                            self.cookie_value[
                                cookie_name] = all_cookie_from_browser[
                                    cookie_domain][cookie_name]

        # Http Setting
        net.HTTP_CONNECTION_TIMEOUT = analysis_config(
            config, "HTTP_CONNECTION_TIMEOUT", 10,
            CONFIG_ANALYSIS_MODE_INTEGER)
        net.HTTP_READ_TIMEOUT = analysis_config(config, "HTTP_READ_TIMEOUT",
                                                10,
                                                CONFIG_ANALYSIS_MODE_INTEGER)
        net.HTTP_DOWNLOAD_CONNECTION_TIMEOUT = analysis_config(
            config, "HTTP_DOWLOAD_CONNECTION_TIMEOUT", 10,
            CONFIG_ANALYSIS_MODE_INTEGER)
        net.HTTP_DOWNLOAD_READ_TIMEOUT = analysis_config(
            config, "HTTP_DOWLOAD_READ_TIMEOUT", 60,
            CONFIG_ANALYSIS_MODE_INTEGER)
        net.HTTP_REQUEST_RETRY_COUNT = analysis_config(
            config, "HTTP_REQUEST_RETRY_COUNT", 10,
            CONFIG_ANALYSIS_MODE_INTEGER)

        # 线程数
        self.thread_count = analysis_config(config, "THREAD_COUNT", 10,
                                            CONFIG_ANALYSIS_MODE_INTEGER)
        self.thread_lock = threading.Lock()  # 线程锁,避免操作一些全局参数
        self.thread_condition = threading.Condition(
        )  # 线程数达到上限时等待wait(),直到任意线程唤醒notify()

        # 启用线程监控是否需要暂停其他下载线程
        if analysis_config(config, "IS_PORT_LISTENER_ENVET", True,
                           CONFIG_ANALYSIS_MODE_BOOLEAN):
            listener_event_bind = {}
            # 暂停进程
            listener_event_bind[str(
                portListenerEvent.PROCESS_STATUS_PAUSE)] = net.pause_request
            # 继续进程
            listener_event_bind[str(
                portListenerEvent.PROCESS_STATUS_RUN)] = net.resume_request
            # 结束进程(取消当前的线程,完成任务)
            listener_event_bind[str(
                portListenerEvent.PROCESS_STATUS_STOP)] = self.stop_process

            listener_port = analysis_config(config, "LISTENER_PORT", 12345,
                                            CONFIG_ANALYSIS_MODE_INTEGER)
            process_control_thread = portListenerEvent.PortListenerEvent(
                port=listener_port, event_list=listener_event_bind)
            process_control_thread.setDaemon(True)
            process_control_thread.start()

        # 键盘监控线程(仅支持windows)
        if platform.system() == "Windows" and analysis_config(
                config, "IS_KEYBOARD_EVENT", True,
                CONFIG_ANALYSIS_MODE_BOOLEAN):
            keyboard_event_bind = {}
            pause_process_key = analysis_config(config,
                                                "PAUSE_PROCESS_KEYBOARD_KEY",
                                                "F9")
            # 暂停进程
            if pause_process_key:
                keyboard_event_bind[pause_process_key] = net.pause_request
            # 继续进程
            continue_process_key = analysis_config(
                config, "CONTINUE_PROCESS_KEYBOARD_KEY", "F10")
            if continue_process_key:
                keyboard_event_bind[continue_process_key] = net.resume_request
            # 结束进程(取消当前的线程,完成任务)
            stop_process_key = analysis_config(config,
                                               "STOP_PROCESS_KEYBOARD_KEY",
                                               "CTRL + F12")
            if stop_process_key:
                keyboard_event_bind[stop_process_key] = self.stop_process

            if keyboard_event_bind:
                keyboard_control_thread = keyboardEvent.KeyboardEvent(
                    keyboard_event_bind)
                keyboard_control_thread.setDaemon(True)
                keyboard_control_thread.start()

        self.total_image_count = 0
        self.total_video_count = 0

        self.print_msg("初始化完成")
Exemple #5
0
    def __init__(self, sys_config, **kwargs):
        self.start_time = time.time()

        # 程序启动配置
        if not isinstance(sys_config, dict):
            output.print_msg("程序启动配置不存在,请检查代码!")
            tool.process_exit()
            return
        # 额外初始化配置(直接通过实例化中传入,可覆盖子类__init__方法传递的sys_config参数)
        if "extra_sys_config" in kwargs and isinstance(
                kwargs["extra_sys_config"], dict):
            sys_config.update(kwargs["extra_sys_config"])
        sys_download_photo = SYS_DOWNLOAD_PHOTO in sys_config
        sys_download_video = SYS_DOWNLOAD_VIDEO in sys_config
        sys_download_audio = SYS_DOWNLOAD_AUDIO in sys_config
        sys_set_proxy = SYS_SET_PROXY in sys_config
        sys_get_cookie = SYS_GET_COOKIE in sys_config
        sys_not_check_save_data = SYS_NOT_CHECK_SAVE_DATA in sys_config
        sys_not_download = SYS_NOT_DOWNLOAD in sys_config

        # exe程序
        if tool.IS_EXECUTABLE:
            application_path = os.path.dirname(sys.executable)
            os.chdir(application_path)
            config_path = os.path.join(os.getcwd(), "data/config.ini")
        else:
            config_path = PROJECT_CONFIG_PATH

        # 程序配置
        config = read_config(config_path)
        # 应用配置
        app_config_path = os.path.abspath(
            os.path.join(PROJECT_APP_PATH, "app.ini"))
        if os.path.exists(app_config_path):
            config.update(read_config(app_config_path))
        # 额外应用配置(直接通过实例化中传入,可覆盖配置文件中参数)
        if "extra_app_config" in kwargs and isinstance(
                kwargs["extra_app_config"], dict):
            config.update(kwargs["extra_app_config"])

        # 应用配置
        self.app_config = {}
        if SYS_APP_CONFIG in sys_config and len(
                sys_config[SYS_APP_CONFIG]) > 0:
            for app_config_template in sys_config[SYS_APP_CONFIG]:
                if len(app_config_template) == 3:
                    self.app_config[app_config_template[0]] = analysis_config(
                        config, app_config_template[0], app_config_template[1],
                        app_config_template[2])

        # 是否下载
        self.is_download_photo = analysis_config(
            config, "IS_DOWNLOAD_PHOTO", True,
            CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_photo
        self.is_download_video = analysis_config(
            config, "IS_DOWNLOAD_VIDEO", True,
            CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_video
        self.is_download_audio = analysis_config(
            config, "IS_DOWNLOAD_AUDIO", True,
            CONFIG_ANALYSIS_MODE_BOOLEAN) and sys_download_audio

        if not sys_not_download and not self.is_download_photo and not self.is_download_video and not self.is_download_audio:
            if sys_download_photo or sys_download_video or sys_download_audio:
                output.print_msg("所有支持的下载都没有开启,请检查配置!")
                tool.process_exit()
                return

        # 存档
        self.save_data_path = analysis_config(config, "SAVE_DATA_PATH",
                                              "\\\\info/save.data",
                                              CONFIG_ANALYSIS_MODE_PATH)
        if not sys_not_check_save_data and not os.path.exists(
                self.save_data_path):
            # 存档文件不存在
            output.print_msg("存档文件%s不存在!" % self.save_data_path)
            tool.process_exit()
            return
        temp_file_name = time.strftime(
            "%m-%d_%H_%M_", time.localtime(time.time())) + os.path.basename(
                self.save_data_path)
        self.temp_save_data_path = os.path.join(
            os.path.dirname(self.save_data_path), temp_file_name)
        if os.path.exists(self.temp_save_data_path):
            # 临时文件已存在
            output.print_msg("存档临时文件%s已存在!" % self.temp_save_data_path)
            tool.process_exit()
            return

        # cache
        self.cache_data_path = analysis_config(config, "CACHE_DATA_PATH",
                                               "\\\\cache",
                                               CONFIG_ANALYSIS_MODE_PATH)

        # session
        self.session_data_path = analysis_config(config, "SESSION_DATA_PATH",
                                                 "\\\\info/session.data",
                                                 CONFIG_ANALYSIS_MODE_PATH)

        # 是否需要下载图片
        if self.is_download_photo:
            # 图片保存目录
            self.photo_download_path = analysis_config(
                config, "PHOTO_DOWNLOAD_PATH", "\\\\photo",
                CONFIG_ANALYSIS_MODE_PATH)
        else:
            self.photo_download_path = ""
        # 是否需要下载视频
        if self.is_download_video:
            # 视频保存目录
            self.video_download_path = analysis_config(
                config, "VIDEO_DOWNLOAD_PATH", "\\\\video",
                CONFIG_ANALYSIS_MODE_PATH)
        else:
            self.video_download_path = ""
        # 是否需要下载音频
        if self.is_download_audio:
            # 音频保存目录
            self.audio_download_path = analysis_config(
                config, "AUDIO_DOWNLOAD_PATH", "\\\\audio",
                CONFIG_ANALYSIS_MODE_PATH)
        else:
            self.audio_download_path = ""

        # 代理
        is_proxy = analysis_config(config, "IS_PROXY", 2,
                                   CONFIG_ANALYSIS_MODE_INTEGER)
        if is_proxy == 1 or (is_proxy == 2 and sys_set_proxy):
            proxy_ip = analysis_config(config, "PROXY_IP", "127.0.0.1")
            proxy_port = analysis_config(config, "PROXY_PORT", "8087")
            # 使用代理的线程池
            net.set_proxy(proxy_ip, proxy_port)
        else:
            # 初始化urllib3的线程池
            net.init_http_connection_pool()

        # cookies
        self.cookie_value = {}
        if sys_get_cookie:
            # 操作系统&浏览器
            browser_type = analysis_config(config, "BROWSER_TYPE", 2,
                                           CONFIG_ANALYSIS_MODE_INTEGER)
            # cookie
            is_auto_get_cookie = analysis_config(config, "IS_AUTO_GET_COOKIE",
                                                 True,
                                                 CONFIG_ANALYSIS_MODE_BOOLEAN)
            if is_auto_get_cookie:
                cookie_path = browser.get_default_browser_cookie_path(
                    browser_type)
            else:
                cookie_path = analysis_config(config, "COOKIE_PATH", "")
            all_cookie_from_browser = browser.get_all_cookie_from_browser(
                browser_type, cookie_path)
            for cookie_domain in sys_config[SYS_GET_COOKIE]:
                check_domain_list = [cookie_domain]
                if cookie_domain[0] != ".":
                    check_domain_list.append("." + cookie_domain)
                elif cookie_domain[0] == ".":
                    check_domain_list.append(cookie_domain[1:])
                for check_domain in check_domain_list:
                    if check_domain in all_cookie_from_browser:
                        self.cookie_value.update(
                            all_cookie_from_browser[check_domain])

        # 线程数
        self.thread_count = analysis_config(config, "THREAD_COUNT", 10,
                                            CONFIG_ANALYSIS_MODE_INTEGER)
        self.thread_lock = threading.Lock()  # 线程锁,避免操作一些全局参数
        self.thread_semaphore = threading.Semaphore(
            self.thread_count)  # 线程总数信号量

        # 启用线程监控是否需要暂停其他下载线程
        if analysis_config(config, "IS_PORT_LISTENER_ENVET", True,
                           CONFIG_ANALYSIS_MODE_BOOLEAN):
            listener_event_bind = {}
            # 暂停进程
            listener_event_bind[str(
                portListenerEvent.PROCESS_STATUS_PAUSE)] = net.pause_request
            # 继续进程
            listener_event_bind[str(
                portListenerEvent.PROCESS_STATUS_RUN)] = net.resume_request
            # 结束进程(取消当前的线程,完成任务)
            listener_event_bind[str(
                portListenerEvent.PROCESS_STATUS_STOP)] = self.stop_process

            listener_port = analysis_config(config, "LISTENER_PORT", 12345,
                                            CONFIG_ANALYSIS_MODE_INTEGER)
            process_control_thread = portListenerEvent.PortListenerEvent(
                port=listener_port, event_list=listener_event_bind)
            process_control_thread.setDaemon(True)
            process_control_thread.start()

        # 键盘监控线程(仅支持windows)
        if platform.system() == "Windows" and analysis_config(
                config, "IS_KEYBOARD_EVENT", True,
                CONFIG_ANALYSIS_MODE_BOOLEAN):
            keyboard_event_bind = {}
            pause_process_key = analysis_config(config,
                                                "PAUSE_PROCESS_KEYBOARD_KEY",
                                                "F9")
            # 暂停进程
            if pause_process_key:
                keyboard_event_bind[pause_process_key] = self.pause_process
            # 继续进程
            continue_process_key = analysis_config(
                config, "CONTINUE_PROCESS_KEYBOARD_KEY", "F10")
            if continue_process_key:
                keyboard_event_bind[continue_process_key] = self.resume_process
            # 结束进程(取消当前的线程,完成任务)
            stop_process_key = analysis_config(config,
                                               "STOP_PROCESS_KEYBOARD_KEY",
                                               "CTRL + F12")
            if stop_process_key:
                keyboard_event_bind[stop_process_key] = self.stop_process

            if keyboard_event_bind:
                keyboard_control_thread = keyboardEvent.KeyboardEvent(
                    keyboard_event_bind)
                keyboard_control_thread.setDaemon(True)
                keyboard_control_thread.start()

        self.total_photo_count = 0
        self.total_video_count = 0
        self.total_audio_count = 0

        output.print_msg("初始化完成")
Exemple #6
0
    def __init__(self, sys_config, extra_config=None):
        global IS_INIT
        self.start_time = time.time()

        # 程序启动配置
        if not isinstance(sys_config, dict):
            self.print_msg("程序启动配置不存在,请检查代码!")
            tool.process_exit()
            return
        sys_download_image = SYS_DOWNLOAD_IMAGE in sys_config
        sys_download_video = SYS_DOWNLOAD_VIDEO in sys_config
        sys_set_proxy = SYS_SET_PROXY in sys_config
        sys_get_cookie = SYS_GET_COOKIE in sys_config
        sys_not_check_save_data = SYS_NOT_CHECK_SAVE_DATA in sys_config

        # exe程序
        if tool.IS_EXECUTABLE:
            application_path = os.path.dirname(sys.executable)
            os.chdir(application_path)
            config_path = os.path.join(os.getcwd(), "data/config.ini")
        else:
            config_path = tool.PROJECT_CONFIG_PATH

        # 程序配置
        config = read_config(config_path)
        if not isinstance(extra_config, dict):
            extra_config = {}

        # 应用配置
        self.app_config = {}
        if SYS_APP_CONFIG in sys_config and len(sys_config[SYS_APP_CONFIG]) >= 2:
            app_config = read_config(sys_config[SYS_APP_CONFIG][0])
            for app_config_template in  sys_config[SYS_APP_CONFIG][1:]:
                if len(app_config_template) == 3:
                    self.app_config[app_config_template[0]] = get_config(app_config, app_config_template[0], app_config_template[1], app_config_template[2])

        # 日志
        self.is_show_error = get_config(config, "IS_SHOW_ERROR", True, 2)
        self.is_show_step = get_config(config, "IS_SHOW_STEP", True, 2)
        self.is_show_trace = get_config(config, "IS_SHOW_TRACE", False, 2)
        error_log_path = get_config(config, "ERROR_LOG_PATH", "\\log/errorLog.txt", 3)
        self.error_log_path = replace_path(error_log_path)
        error_log_dir = os.path.dirname(self.error_log_path)

        if not tool.make_dir(error_log_dir, 0):
            self.print_msg("创建错误日志目录 %s 失败" % error_log_dir)
            tool.process_exit()
            return
        is_log_step = get_config(config, "IS_LOG_STEP", True, 2)
        if not is_log_step:
            self.step_log_path = ""
        else:
            step_log_path = get_config(config, "STEP_LOG_PATH", "\\log/stepLog.txt", 3)
            self.step_log_path = replace_path(step_log_path)
            # 日志文件保存目录
            step_log_dir = os.path.dirname(self.step_log_path)
            if not tool.make_dir(step_log_dir, 0):
                self.print_msg("创建步骤日志目录 %s 失败" % step_log_dir)
                tool.process_exit()
                return
        is_log_trace = get_config(config, "IS_LOG_TRACE", True, 2)
        if not is_log_trace:
            self.trace_log_path = ""
        else:
            trace_log_path = get_config(config, "TRACE_LOG_PATH", "\\log/traceLog.txt", 3)
            self.trace_log_path = replace_path(trace_log_path)
            # 日志文件保存目录
            trace_log_dir = os.path.dirname(self.trace_log_path)
            if not tool.make_dir(trace_log_dir, 0):
                self.print_msg("创建调试日志目录 %s 失败" % trace_log_dir)
                tool.process_exit()
                return

        if not IS_INIT:
            log.IS_SHOW_ERROR = self.is_show_error
            log.IS_SHOW_STEP = self.is_show_step
            log.IS_SHOW_TRACE = self.is_show_trace
            log.ERROR_LOG_PATH = self.error_log_path
            log.STEP_LOG_PATH = self.step_log_path
            log.TRACE_LOG_PATH = self.trace_log_path
            IS_INIT = True

        # 是否下载
        self.is_download_image = get_config(config, "IS_DOWNLOAD_IMAGE", True, 2) and sys_download_image
        self.is_download_video = get_config(config, "IS_DOWNLOAD_VIDEO", True, 2) and sys_download_video

        if not self.is_download_image and not self.is_download_video:
            if sys_download_image or sys_download_video:
                self.print_msg("所有支持的下载都没有开启,请检查配置!")
                tool.process_exit()
                return

        # 存档
        if "save_data_path" in extra_config:
            self.save_data_path = os.path.realpath(extra_config["save_data_path"])
        else:
            self.save_data_path = get_config(config, "SAVE_DATA_PATH", "\\\\info/save.data", 3)
        if not sys_not_check_save_data and not os.path.exists(self.save_data_path):
            # 存档文件不存在
            self.print_msg("存档文件%s不存在!" % self.save_data_path)
            tool.process_exit()
            return

        # 是否需要下载图片
        if self.is_download_image:
            # 图片保存目录
            if "image_download_path" in extra_config:
                self.image_download_path = os.path.realpath(extra_config["image_download_path"])
            else:
                self.image_download_path = get_config(config, "IMAGE_DOWNLOAD_PATH", "\\\\photo", 3)
            if not tool.make_dir(self.image_download_path, 0):
                # 图片保存目录创建失败
                self.print_msg("图片保存目录%s创建失败!" % self.image_download_path)
                tool.process_exit()
                return
        else:
            self.image_download_path = ""
        # 是否需要下载视频
        if self.is_download_video:
            # 视频保存目录
            if "video_download_path" in extra_config:
                self.video_download_path = os.path.realpath(extra_config["video_download_path"])
            else:
                self.video_download_path = get_config(config, "VIDEO_DOWNLOAD_PATH", "\\\\video", 3)
            if not tool.make_dir(self.video_download_path, 0):
                # 视频保存目录创建失败
                self.print_msg("视频保存目录%s创建失败!" % self.video_download_path)
                tool.process_exit()
                return
        else:
            self.video_download_path = ""

        # 代理
        is_proxy = get_config(config, "IS_PROXY", 2, 1)
        if is_proxy == 1 or (is_proxy == 2 and sys_set_proxy):
            proxy_ip = get_config(config, "PROXY_IP", "127.0.0.1", 0)
            proxy_port = get_config(config, "PROXY_PORT", "8087", 0)
            # 使用代理的线程池
            net.set_proxy(proxy_ip, proxy_port)
        else:
            # 初始化urllib3的线程池
            net.init_http_connection_pool()

        # cookies
        self.cookie_value = {}
        if sys_get_cookie:
            # 操作系统&浏览器
            browser_type = get_config(config, "BROWSER_TYPE", 2, 1)
            # cookie
            is_auto_get_cookie = get_config(config, "IS_AUTO_GET_COOKIE", True, 2)
            if is_auto_get_cookie:
                cookie_path = tool.get_default_browser_cookie_path(browser_type)
            else:
                cookie_path = get_config(config, "COOKIE_PATH", "", 0)
            all_cookie_from_browser = tool.get_all_cookie_from_browser(browser_type, cookie_path)
            for cookie_domain in sys_config[SYS_GET_COOKIE]:
                # 如果指定了cookie key
                if sys_config[SYS_GET_COOKIE][cookie_domain]:
                    for cookie_key in sys_config[SYS_GET_COOKIE][cookie_domain]:
                        self.cookie_value[cookie_key] = ""
                    if cookie_domain in all_cookie_from_browser:
                        for cookie_name in self.cookie_value:
                            if cookie_name in all_cookie_from_browser[cookie_domain]:
                                self.cookie_value[cookie_name] = all_cookie_from_browser[cookie_domain][cookie_name]
                # 没有指定cookie key那么就是取全部
                else:
                    if cookie_domain in all_cookie_from_browser:
                        for cookie_name in all_cookie_from_browser[cookie_domain]:
                            self.cookie_value[cookie_name] = all_cookie_from_browser[cookie_domain][cookie_name]

        # Http Setting
        net.HTTP_CONNECTION_TIMEOUT = get_config(config, "HTTP_CONNECTION_TIMEOUT", 10, 1)
        net.HTTP_REQUEST_RETRY_COUNT = get_config(config, "HTTP_REQUEST_RETRY_COUNT", 10, 1)

        # 线程数
        self.thread_count = get_config(config, "THREAD_COUNT", 10, 1)
        self.thread_lock = threading.Lock()

        # 启用线程监控是否需要暂停其他下载线程
        process_control_thread = process.ProcessControl()
        process_control_thread.setDaemon(True)
        process_control_thread.start()

        # 键盘监控线程
        if get_config(config, "IS_KEYBOARD_EVENT", True, 2):
            keyboard_event_bind = {}
            pause_process_key = get_config(config, "PAUSE_PROCESS_KEYBOARD_KEY", "F9", 0)
            # 暂停进程
            if pause_process_key:
                keyboard_event_bind[pause_process_key] = process.pause_process
            # 继续进程
            continue_process_key = get_config(config, "CONTINUE_PROCESS_KEYBOARD_KEY", "F10", 0)
            if continue_process_key:
                keyboard_event_bind[continue_process_key] = process.continue_process
            # 结束进程(取消当前的线程,完成任务)
            stop_process_key = get_config(config, "STOP_PROCESS_KEYBOARD_KEY", "CTRL + F12", 0)
            if stop_process_key:
                keyboard_event_bind[stop_process_key] = process.stop_process

            if keyboard_event_bind:
                keyboard_control_thread = keyboardEvent.KeyboardEvent(keyboard_event_bind)
                keyboard_control_thread.setDaemon(True)
                keyboard_control_thread.start()

        self.print_msg("初始化完成")