Example #1
0
def get_project_settings():
    if ENVVAR not in os.environ:  # ENVVAR = 'SCRAPY_SETTINGS_MODULE'
        project = os.environ.get('SCRAPY_PROJECT', 'default')  # 似乎都是default
        init_env(
            project
        )  # 将scrapy.cfg模块加载到os.environ环境里面,名字叫SCRAPY_SETTINGS_MODULE  -- # SCRAPY_SETTINGS_MODULE -> os.environ  project -> sys.path

    settings = Settings()  # 实例一个Settings管理对象
    settings_module_path = os.environ.get(ENVVAR)  # 获取上一步获取的os.environ
    if settings_module_path:
        settings.setmodule(settings_module_path, priority='project')  #

    # XXX: remove this hack
    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    if pickled_settings:
        settings.setdict(pickle.loads(pickled_settings), priority='project')

    # XXX: deprecate and remove this functionality
    env_overrides = {
        k[7:]: v
        for k, v in os.environ.items() if k.startswith('SCRAPY_')
    }
    if env_overrides:
        settings.setdict(env_overrides, priority='project')

    return settings
Example #2
0
def get_project_settings():
    if ENVVAR not in os.environ:
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        init_env(project)

    settings = Settings()
    settings_module_path = os.environ.get(ENVVAR)
    if settings_module_path:
        settings.setmodule(settings_module_path, priority='project')

    # XXX: remove this hack
    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    if pickled_settings:
        warnings.warn(
            "Use of environment variable "
            "'SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE' "
            "is deprecated.", ScrapyDeprecationWarning)
        settings.setdict(pickle.loads(pickled_settings), priority='project')

    # XXX: deprecate and remove this functionality
    env_overrides = {
        k[7:]: v
        for k, v in os.environ.items() if k.startswith('SCRAPY_')
    }
    if env_overrides:
        settings.setdict(env_overrides, priority='project')

    return settings
Example #3
0
def get_project_settings():
    # 在环境中检查是否有SCRAPY_SETTINGS_MODULE 这个变量,没有则载入默认
    if ENVVAR not in os.environ:
        project = os.environ.get(
            'SCRAPY_PROJECT',
            'default')  #os.environ.get类似于dict里取某个键的值,不过这个是从操作系统变量上搞
        init_env(project)  #修改os.path让其添加scrapy.cfg对应的路径

    settings = Settings()
    settings_module_path = os.environ.get(ENVVAR)  # 如果环境变量里有scrapy.cfg的路径
    if settings_module_path:
        settings.setmodule(settings_module_path,
                           priority='project')  #用setting对象载入相关module

    scrapy_envvars = {
        k[7:]: v
        for k, v in os.environ.items() if k.startswith('SCRAPY_')
    }
    valid_envvars = {
        'CHECK',
        'PROJECT',
        'PYTHON_SHELL',
        'SETTINGS_MODULE',
    }
    setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars}
    if setting_envvars:  # 如果环境变量里 有SCRAPY_开头的其他变量,将其弄到setting里
        setting_envvar_list = ', '.join(sorted(setting_envvars))
        warnings.warn(
            'Use of environment variables prefixed with SCRAPY_ to override '
            'settings is deprecated. The following environment variables are '
            f'currently defined: {setting_envvar_list}',
            ScrapyDeprecationWarning)
    settings.setdict(scrapy_envvars, priority='project')

    return settings
Example #4
0
def get_project_settings():
    if ENVVAR not in os.environ:
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        init_env(project)

    settings = Settings()
    settings_module_path = os.environ.get(ENVVAR)
    if settings_module_path:
        settings.setmodule(settings_module_path, priority='project')

    scrapy_envvars = {k[7:]: v for k, v in os.environ.items() if
                      k.startswith('SCRAPY_')}
    valid_envvars = {
        'CHECK',
        'PROJECT',
        'PYTHON_SHELL',
        'SETTINGS_MODULE',
    }
    setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars}
    if setting_envvars:
        setting_envvar_list = ', '.join(sorted(setting_envvars))
        warnings.warn(
            'Use of environment variables prefixed with SCRAPY_ to override '
            'settings is deprecated. The following environment variables are '
            f'currently defined: {setting_envvar_list}',
            ScrapyDeprecationWarning
        )
    settings.setdict(scrapy_envvars, priority='project')

    return settings
Example #5
0
def get_project_dirs():
    outer_dir = inner_dir = ""
    closest_cfg = closest_scrapy_cfg()
    if closest_cfg:
        outer_dir = os.path.dirname(closest_cfg)
    if os.environ.get('SCRAPY_PROJECT'):
        inner_dir = os.environ.get('SCRAPY_PROJECT')
    if outer_dir and inner_dir:
        return (outer_dir, inner_dir)

    init_env()
    scrapy_module = os.environ.get('SCRAPY_SETTINGS_MODULE')
    if scrapy_module is None and not outer_dir:
        raise Exception("Project configuration awry")
    if not inner_dir:
        inner_dir = scrapy_module.split('.')[0]
    if outer_dir and inner_dir:
        return (outer_dir, inner_dir)

    try:
        module = import_module(scrapy_module)
        outer_dir = os.path.dirname(os.path.dirname(module.__file__))
        return (outer_dir, inner_dir)
    except ImportError:
        raise Exception("Project configuration awry")
Example #6
0
def get_project_settings():
    if ENVVAR not in os.environ:
        # 获取环境配置项
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        # 初始化项目配置,实际就是将项目根目录下的scrapy.cfg配置中的[settings]的default属性值配置到系统变量SCRAPY_SETTINGS_MODULE
        init_env(project)
    # 读取setting配置文件所有配置项到内存
    settings = Settings()
    settings_module_path = os.environ.get(ENVVAR)  #cfg文件中配置的settings路径
    if settings_module_path:  #项目的settings配置覆盖default.settings配置
        settings.setmodule(settings_module_path, priority='project')

    # XXX: remove this hack
    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    if pickled_settings:
        settings.setdict(pickle.loads(pickled_settings), priority='project')

    # XXX: deprecate and remove this functionality
    env_overrides = {
        k[7:]: v
        for k, v in os.environ.items() if k.startswith('SCRAPY_')
    }
    if env_overrides:  #覆盖settings配置
        settings.setdict(env_overrides, priority='project')

    return settings
Example #7
0
def get_project_settings():

#     指定设定(Designating the settings)
# ENVVAR = 'SCRAPY_SETTINGS_MODULE'
# 当您使用Scrapy时,您需要声明您所使用的设定。这可以通过使用环境变量: SCRAPY_SETTINGS_MODULE 来完成。
#
# SCRAPY_SETTINGS_MODULE 必须以Python路径语法编写, 如 myproject.settings 。 注意,设定模块应该在 Python import search path 中。
    if ENVVAR not in os.environ:
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        init_env(project)
    settings_module_path = os.environ.get(ENVVAR)
    if settings_module_path:
        settings_module = import_module(settings_module_path)
    else:
        settings_module = None
    settings = CrawlerSettings(settings_module)

    # XXX: remove this hack
    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    settings.overrides = pickle.loads(pickled_settings) if pickled_settings else {}

    # XXX: deprecate and remove this functionality
    for k, v in os.environ.items():
        if k.startswith('SCRAPY_'):
            settings.overrides[k[7:]] = v

    return settings
Example #8
0
def spider_project(path_or_spider):
    cur_dir = os.path.abspath(os.curdir)
    mod_path = lambda cls: os.path.dirname(sys.modules[cls.__class__.__module__].__file__)
    path = mod_path(path_or_spider) if isinstance(path_or_spider, ScrapyBaseSpider) else path_or_spider

    os.chdir(path)
    init_env()
    try:
        yield get_project_settings()
    finally:
        os.chdir(cur_dir)
Example #9
0
def spider_project(path_or_spider):
    cur_dir = os.path.abspath(os.curdir)
    mod_path = lambda cls: os.path.dirname(sys.modules[cls.__class__.__module__
                                                       ].__file__)
    path = mod_path(path_or_spider) if isinstance(
        path_or_spider, ScrapyBaseSpider) else path_or_spider

    os.chdir(path)
    init_env()
    try:
        yield get_project_settings()
    finally:
        os.chdir(cur_dir)
Example #10
0
def get_project_settings():
    if ENVVAR not in os.environ:
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        init_env(project)

    settings = Settings()
    settings_module_path = os.environ.get(ENVVAR)
    if settings_module_path:
        _update_settings(settings, default_settings)
        settings.setmodule(settings_module_path, priority='project')
        if os.environ.get('ENV_FLAG_DDIY') == 'online':
            _update_settings(settings, online_settings)

    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    if pickled_settings:
        warnings.warn(
            "Use of environment variable "
            "'SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE' "
            "is deprecated.", ScrapyDeprecationWarning)
        settings.setdict(pickle.loads(pickled_settings), priority='project')

    scrapy_envvars = {
        k[7:]: v
        for k, v in os.environ.items() if k.startswith('SCRAPY_')
    }
    valid_envvars = {
        'CHECK',
        'PICKLED_SETTINGS_TO_OVERRIDE',
        'PROJECT',
        'PYTHON_SHELL',
        'SETTINGS_MODULE',
    }
    setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars}
    if setting_envvars:
        setting_envvar_list = ', '.join(sorted(setting_envvars))
        warnings.warn(
            'Use of environment variables prefixed with SCRAPY_ to override '
            'settings is deprecated. The following environment variables are '
            'currently defined: {}'.format(setting_envvar_list),
            ScrapyDeprecationWarning)
    settings.setdict(scrapy_envvars, priority='project')

    project_path = os.path.normpath(
        os.path.join(os.path.dirname(os.path.abspath(__file__)), '../..'))
    current_path = os.getcwd()
    if not current_path.startswith(project_path):
        raise EnvironmentError(
            f'The program runs in a non-project path (current_path:{current_path} => project_path:{project_path})'
        )
    return settings
Example #11
0
def get_project_settings():
    import os
    import pickle
    import warnings

    from scrapy.utils.conf import init_env
    from aioscrapy.settings import AioSettings
    from scrapy.exceptions import ScrapyDeprecationWarning

    ENVVAR = 'SCRAPY_SETTINGS_MODULE'

    if ENVVAR not in os.environ:
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        init_env(project)
    settings = AioSettings()
    settings_module_path = os.environ.get(ENVVAR)
    if settings_module_path:
        settings.setmodule(settings_module_path, priority='project')

    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    if pickled_settings:
        warnings.warn(
            "Use of environment variable "
            "'SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE' "
            "is deprecated.", ScrapyDeprecationWarning)
        settings.setdict(pickle.loads(pickled_settings), priority='project')

    scrapy_envvars = {
        k[7:]: v
        for k, v in os.environ.items() if k.startswith('SCRAPY_')
    }
    valid_envvars = {
        'CHECK',
        'PICKLED_SETTINGS_TO_OVERRIDE',
        'PROJECT',
        'PYTHON_SHELL',
        'SETTINGS_MODULE',
    }
    setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars}
    if setting_envvars:
        setting_envvar_list = ', '.join(sorted(setting_envvars))
        warnings.warn(
            'Use of environment variables prefixed with SCRAPY_ to override '
            'settings is deprecated. The following environment variables are '
            f'currently defined: {setting_envvar_list}',
            ScrapyDeprecationWarning)
    settings.setdict(scrapy_envvars, priority='project')

    return settings
Example #12
0
def get_project_dir():
    closest_cfg = closest_scrapy_cfg()
    if closest_cfg:
        return Path(closest_cfg).parent

    init_env()
    scrapy_module = os.environ.get('SCRAPY_SETTINGS_MODULE')
    if scrapy_module is None:
        return None

    try:
        module = import_module(scrapy_module)
        return Path(module.__file__).parent.parent
    except ImportError:
        return None
Example #13
0
def get_project_settings():
    if ENVVAR not in os.environ:
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        init_env(project)
    settings_module_path = os.environ.get(ENVVAR)
    if settings_module_path:
        settings_module = import_module(settings_module_path)
    else:
        settings_module = None
    settings = CrawlerSettings(settings_module)

    # XXX: remove this hack
    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    settings.overrides = pickle.loads(pickled_settings) if pickled_settings else {}

    # XXX: deprecate and remove this functionality
    for k, v in os.environ.items():
        if k.startswith('SCRAPY_'):
            settings.overrides[k[7:]] = v

    return settings
Example #14
0
def get_project_settings():
    if ENVVAR not in os.environ:
        project = os.environ.get("SCRAPY_PROJECT", "default")
        init_env(project)
    settings_module_path = os.environ.get(ENVVAR, "scrapy_settings")
    try:
        settings_module = __import__(settings_module_path, {}, {}, [""])
    except ImportError:
        settings_module = None
    settings = CrawlerSettings(settings_module)

    # XXX: remove this hack
    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    settings.overrides = pickle.loads(pickled_settings) if pickled_settings else {}

    # XXX: deprecate and remove this functionality
    for k, v in os.environ.items():
        if k.startswith("SCRAPY_"):
            settings.overrides[k[7:]] = v

    return settings
Example #15
0
def get_project_settings():
    if ENVVAR not in os.environ:
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        init_env(project)

    settings = Settings()
    settings_module_path = os.environ.get(ENVVAR)
    if settings_module_path:
        settings.setmodule(settings_module_path, priority='project')

    # XXX: remove this hack
    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    if pickled_settings:
        settings.setdict(pickle.loads(pickled_settings), priority='project')

    # XXX: deprecate and remove this functionality
    env_overrides = {k[7:]: v for k, v in os.environ.items() if
                     k.startswith('SCRAPY_')}
    if env_overrides:
        settings.setdict(env_overrides, priority='project')

    return settings
Example #16
0
def get_project_settings():
    if ENVVAR not in os.environ:
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        init_env(project)
    settings_module_path = os.environ.get(ENVVAR, 'scrapy_settings')
    try:
        settings_module = __import__(settings_module_path, {}, {}, [''])
    except ImportError:
        settings_module = None
    settings = CrawlerSettings(settings_module)

    # XXX: remove this hack
    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    settings.overrides = pickle.loads(
        pickled_settings) if pickled_settings else {}

    # XXX: deprecate and remove this functionality
    for k, v in os.environ.items():
        if k.startswith('SCRAPY_'):
            settings.overrides[k[7:]] = v

    return settings
Example #17
0
def get_project_settings():
    ## 环境变量中是否有 SCRAPY_SETTINGS_MODULE 配置
    if ENVVAR not in os.environ:
        ## 从环境变量中获取 SCRAPY_PROJECT,若无则默认返回 'default'
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        ## 初始化项目环境:
        ## 在项目目录内,通过命令行工具,基于配置文件 scrapy.cfg 初始化项目环境
        ## 找到用户配置模块(settings),设置到环境变量 SCRAPY_SETTINGS_MODULE 中
        ## 将项目基路径加入到 Python 模块的解析路径集中
        init_env(project)

    ## 加载默认配置文件 default_settings.py,生成 settings 实例
    ## 用于存储 scrapy 内置组件的配置(默认配置),是可定制的
    ## 这里得到的是默认配置,默认配置的优先级为 default
    settings = Settings()
    ## 获取用户配置文件(settings.py)的路径
    settings_module_path = os.environ.get(ENVVAR)
    if settings_module_path:
        ## 基于 settings.py 文件的路径加载用户配置
        ## 更新配置:用用户配置更新默认配置
        ## 用户配置的优先级为 project
        settings.setmodule(settings_module_path, priority='project')

    # XXX: remove this hack
    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    if pickled_settings:
        settings.setdict(pickle.loads(pickled_settings), priority='project')

    # XXX: deprecate and remove this functionality
    env_overrides = {k[7:]: v for k, v in os.environ.items() if
                     k.startswith('SCRAPY_')}
    if env_overrides:
        settings.setdict(env_overrides, priority='project')

    ## 返回配置对象
    return settings
Example #18
0
    @staticmethod
    def _save_redis_rule(rule_dict):
        key = 'Rule:' + rule_dict['name']
        conn.hmset(key, rule_dict)
        conn.sadd('Rules', rule_dict['name'])

    @staticmethod
    def _save_csv_rule(rule_dict):
        raise NotImplementedError

    def save(self, dst='redis'):
        """
        Save rule to destination.

        Author: David
        """
        self.__check_vals()
        save_method = getattr(self, '_save_{}_rule'.format(dst))
        save_method(self.rule_dict)


if __name__ == '__main__':
    # rule = Rule.load('xici')
    init_env('default')
    rules = Rule.loads('csv')
    for r in rules:
        r.save()
    print(rules[0])
    # rule.save_rule()