Exemple #1
0
def load_custom_settings(config):
    path = find_scrapydweb_settings_py(SCRAPYDWEB_SETTINGS_PY, os.getcwd())

    if path:
        config['SCRAPYDWEB_SETTINGS_PY_PATH'] = path
        print(STAR)
        printf("Overriding custom settings from %s" % path, warn=True)
        print(STAR)
        config.from_pyfile(path)
    else:
        try:
            os.remove(LAST_CHECK_UPDATE_PATH)
        except:
            pass

        try:
            copyfile(config['DEFAULT_SETTINGS_PY_PATH'],
                     config['SCRAPYDWEB_SETTINGS_PY_PATH'])
        except:
            sys.exit(
                "\n{alert}\nPlease copy the 'default_settings.py' file from the path above "
                "to current working directory,\nand rename it to '{file}'.\n"
                "Then add your SCRAPYD_SERVERS in the config file and restart scrapydweb.\n{alert}"
                .format(alert=ALERT, file=SCRAPYDWEB_SETTINGS_PY))
        else:
            sys.exit(
                "\n{alert}\nThe config file '{file}' has been copied to current working directory.\n"
                "Please add your SCRAPYD_SERVERS in the config file and restart scrapydweb.\n{alert}"
                .format(alert=ALERT, file=SCRAPYDWEB_SETTINGS_PY))
Exemple #2
0
def load_custom_config(config):
    global scrapydweb_settings_py_path

    path = find_scrapydweb_settings_py(SCRAPYDWEB_SETTINGS_PY, os.getcwd())

    print('*' * 100)
    if path:
        scrapydweb_settings_py_path = path
        printf("Overriding custom settings from %s" %
               scrapydweb_settings_py_path,
               warn=True)
        config.from_pyfile(scrapydweb_settings_py_path)
    else:
        try:
            os.remove(LAST_CHECK_UPDATE)
        except:
            pass

        try:
            copyfile(os.path.join(CWD, 'default_settings.py'),
                     scrapydweb_settings_py_path)
        except:
            sys.exit(
                "Please copy the 'default_settings.py' file from above path to current working directory,\n"
                "and rename it to '%s'.\n"
                "Then add your SCRAPYD_SERVERS in the file and restart scrapydweb."
                % SCRAPYDWEB_SETTINGS_PY)
        else:
            sys.exit(
                "The config file '%s' has been copied to current working directory.\n"
                "Please add your SCRAPYD_SERVERS in the file and restart scrapydweb."
                % SCRAPYDWEB_SETTINGS_PY)
Exemple #3
0
def update_app_config(config, args):
    printf("Reading settings from command line: %s" % args)

    config.update(dict(
        SCRAPYDWEB_BIND=args.bind,
        SCRAPYDWEB_PORT=args.port,
    ))

    # scrapyd_server would be None if -ss not passed in
    SCRAPYD_SERVERS = args.scrapyd_server or config.get(
        'SCRAPYD_SERVERS', []) or ['127.0.0.1:6800']
    servers = []
    for idx, server in enumerate(SCRAPYD_SERVERS):
        if isinstance(server, tuple):
            assert len(server) == 5, (
                "Scrapyd server should be a tuple with 5 elements, "
                "current value: %s" % str(server))
            usr, psw, ip, port, group = server
        else:
            usr, psw, ip, port, group = pattern_scrapyd_server.search(
                server.strip()).groups()
        ip = ip.strip() if ip and ip.strip() else '127.0.0.1'
        port = port.strip() if port and port.strip() else '6800'
        group = group.strip() if group and group.strip() else ''
        auth = (usr, psw) if usr and psw else None
        servers.append((group, ip, port, auth))

    def key(arg):
        group, ip, port, auth = arg
        parts = ip.split('.')
        parts = [('0' * (3 - len(part)) + part) for part in parts]
        return [group, '.'.join(parts), int(port)]

    servers = sorted(set(servers), key=key)
    check_scrapyd_connectivity(servers)

    config['SCRAPYD_SERVERS'] = [
        '%s:%s' % (ip, port) for group, ip, port, auth in servers
    ]
    config['SCRAPYD_SERVERS_GROUPS'] = [
        group for group, ip, port, auth in servers
    ]
    config['SCRAPYD_SERVERS_AUTHS'] = [
        auth for group, ip, port, auth in servers
    ]

    # action='store_true': default False
    if args.disable_auth:
        config['ENABLE_AUTH'] = False
    if args.disable_cache:
        config['ENABLE_CACHE'] = False
    if args.delete_cache:
        config['DELETE_CACHE'] = True
    if args.disable_email:
        config['ENABLE_EMAIL'] = False
    if args.debug:
        config['DEBUG'] = True
    if args.verbose:
        config['VERBOSE'] = True
Exemple #4
0
def load_custom_config(config):
    global scrapydweb_settings_py_path

    path = find_scrapydweb_settings_py(SCRAPYDWEB_SETTINGS_PY, os.getcwd())

    print('')
    if path:
        scrapydweb_settings_py_path = path
        printf("Overriding custom settings from %s" % scrapydweb_settings_py_path, warn=True)
        config.from_pyfile(scrapydweb_settings_py_path)
    else:
        try:
            copyfile(os.path.join(CWD, 'default_settings.py'), scrapydweb_settings_py_path)
            printf("The config file '%s' is copied to current working directory, "
                   "and you can custom settings in it" % SCRAPYDWEB_SETTINGS_PY, warn=True)
        except:
            sys.exit("!!! Please copy the file 'default_settings.py' from above path to current working directory, "
                     "and rename it to '%s' to custom settings" % SCRAPYDWEB_SETTINGS_PY)
    print('')
Exemple #5
0
def check_scrapyd_connectivity(servers):
    printf("Checking connectivity of SCRAPYD_SERVERS")

    def check_connectivity(server):
        (group, ip, port, auth) = server
        try:
            r = requests.get('http://%s:%s' % (ip, port), auth=auth, timeout=3)
            assert r.status_code == 200
        except:
            return False
        else:
            return True

    # with ThreadPool(min(len(servers), 10)) as pool:  # Works in python 3.3 and up
    # results = pool.map(check_connectivity, servers)
    pool = ThreadPool(min(len(servers), 10))
    results = pool.map(check_connectivity, servers)
    pool.close()
    pool.join()

    print("Index {group:<20} {server:<21} Connectivity Auth".format(
        group='Group', server='Scrapyd IP:Port'))
    print('#' * 100)
    for idx, ((group, ip, port, auth),
              result) in enumerate(zip(servers, results), 1):
        print(
            "{idx:_<5} {group:_<20} {server:_<22} {result:_<11} {auth}".format(
                idx=idx,
                group=group or 'None',
                server='%s:%s' % (ip, port),
                auth=auth,
                result=str(result)))
    print('#' * 100)

    if not any(results):
        sys.exit("\n!!! None of your SCRAPYD_SERVERS could be connected.\n"
                 "Check and update the SCRAPYD_SERVERS item in: %s" %
                 scrapydweb_settings_py_path)
Exemple #6
0
def update_app_config(config, args):
    printf("Reading settings from command line: %s" % args)

    config.update(dict(
        SCRAPYDWEB_BIND=args.bind,
        SCRAPYDWEB_PORT=args.port,
    ))

    # scrapyd_server would be None if the -ss argument is not passed in
    if args.scrapyd_server:
        config['SCRAPYD_SERVERS'] = args.scrapyd_server

    # action='store_true': default False
    if args.disable_auth:
        config['ENABLE_AUTH'] = False
    if args.disable_logparser:
        config['ENABLE_LOGPARSER'] = False
    if args.disable_email:
        config['ENABLE_EMAIL'] = False
    if args.debug:
        config['DEBUG'] = True
    if args.verbose:
        config['VERBOSE'] = True
Exemple #7
0
def main():
    main_pid = os.getpid()
    printf("Main pid: %s" % main_pid)
    printf("ScrapydWeb version: %s" % __version__)
    printf("Use the 'scrapydweb -h' command to get help")
    printf("Loading default settings from %s" %
           os.path.join(CWD, 'default_settings.py'))

    app = create_app()
    load_custom_config(app.config)

    args = parse_args(app.config)
    # "scrapydweb -h" ends up here
    update_app_config(app.config, args)
    # from pprint import pprint
    # pprint(app.config)
    try:
        check_app_config(app.config)
    except AssertionError as err:
        sys.exit("\n!!! %s\nCheck and update your settings in: %s" %
                 (err, scrapydweb_settings_py_path))

    if app.config.get('ENABLE_CACHE', True):
        caching_pid = init_caching(app.config, main_pid)
    else:
        caching_pid = None

    # https://stackoverflow.com/questions/34164464/flask-decorate-every-route-at-once
    @app.before_request
    def require_login():
        if app.config.get('ENABLE_AUTH', False):
            auth = request.authorization
            USERNAME = str(app.config.get('USERNAME',
                                          ''))  # May be 0 from config file
            PASSWORD = str(app.config.get('PASSWORD', ''))
            if not auth or not (auth.username == USERNAME
                                and auth.password == PASSWORD):
                return authenticate()

    # Should be commented out for released version
    # https://stackoverflow.com/questions/34066804/disabling-caching-in-flask
    # @app.after_request
    # def add_header(r):
    # r.headers['Pragma'] = 'no-cache'
    # r.headers['Expires'] = '0'
    # r.headers['Cache-Control'] = 'public, max-age=0'
    # return r

    @app.context_processor
    def inject_variable():
        return dict(
            main_pid=main_pid,
            caching_pid=caching_pid,
            CHECK_LATEST_VERSION_FREQ=100,
            scrapydweb_settings_py_path=scrapydweb_settings_py_path,
        )

    printf(
        "Visit ScrapydWeb at http://127.0.0.1:{port} or http://{bind}:{port}".
        format(bind='IP-OF-CURRENT-HOST', port=app.config['SCRAPYDWEB_PORT']))

    # site-packages/flask/app.py
    # def run(self, host=None, port=None, debug=None, load_dotenv=True, **options):
    # Threaded mode is enabled by default.
    app.run(host=app.config['SCRAPYDWEB_BIND'],
            port=app.config['SCRAPYDWEB_PORT'])  # , debug=True)
Exemple #8
0
def main():
    main_pid = os.getpid()
    printf("ScrapydWeb version: %s" % __version__)
    printf("Use 'scrapydweb -h' to get help")
    printf("Main pid: %s" % main_pid)
    printf("Loading default settings from %s" % DEFAULT_SETTINGS_PY_PATH)
    app = create_app()
    app.config['MAIN_PID'] = main_pid
    app.config['DEFAULT_SETTINGS_PY_PATH'] = DEFAULT_SETTINGS_PY_PATH
    app.config['SCRAPYDWEB_SETTINGS_PY_PATH'] = os.path.join(
        os.getcwd(), SCRAPYDWEB_SETTINGS_PY)
    load_custom_settings(app.config)

    args = parse_args(app.config)
    # "scrapydweb -h" ends up here
    update_app_config(app.config, args)
    try:
        check_app_config(app.config)
    except AssertionError as err:
        sys.exit(
            "\n{alert}\n{err}\nCheck and update your settings in {path}\n{alert}"
            .format(alert=ALERT,
                    err=err,
                    path=app.config['SCRAPYDWEB_SETTINGS_PY_PATH']))

    # https://stackoverflow.com/questions/34164464/flask-decorate-every-route-at-once
    @app.before_request
    def require_login():
        if app.config.get('ENABLE_AUTH', False):
            auth = request.authorization
            USERNAME = str(app.config.get('USERNAME',
                                          ''))  # May be 0 from config file
            PASSWORD = str(app.config.get('PASSWORD', ''))
            if not auth or not (auth.username == USERNAME
                                and auth.password == PASSWORD):
                return authenticate()

    # MUST be commented out for released version
    # https://stackoverflow.com/questions/34066804/disabling-caching-in-flask
    # @app.after_request
    # def add_header(r):
    # r.headers['Pragma'] = 'no-cache'
    # r.headers['Expires'] = '0'
    # r.headers['Cache-Control'] = 'public, max-age=0'
    # return r

    @app.context_processor
    def inject_variable():
        return dict(
            SCRAPYD_SERVERS=app.config.get('SCRAPYD_SERVERS', [])
            or ['127.0.0.1:6800'],
            SCRAPYD_SERVERS_AMOUNT=len(
                app.config.get('SCRAPYD_SERVERS', []) or ['127.0.0.1:6800']),
            SCRAPYD_SERVERS_GROUPS=app.config.get('SCRAPYD_SERVERS_GROUPS', [])
            or [''],
            SCRAPYD_SERVERS_AUTHS=app.config.get('SCRAPYD_SERVERS_AUTHS', [])
            or [None],
            DAEMONSTATUS_REFRESH_INTERVAL=app.config.get(
                'DAEMONSTATUS_REFRESH_INTERVAL', 10),
            ENABLE_AUTH=app.config.get('ENABLE_AUTH', False),
            SHOW_SCRAPYD_ITEMS=app.config.get('SHOW_SCRAPYD_ITEMS', True),
        )

    # To solve https://github.com/my8100/scrapydweb/issues/17
    # http://flask.pocoo.org/docs/1.0/cli/?highlight=flask_debug#environments
    # flask/helpers.py: get_env() The default is 'production'
    # On Windows, get/set/delete: set FLASK_ENV, set FLASK_ENV=production, set set FLASK_ENV=
    # if not os.environ.get('FLASK_ENV'):
    # os.environ['FLASK_ENV'] = 'development'
    # printf("The environment variable 'FLASK_ENV' has been set to 'development'", warn=True)
    # printf("WARNING: Do not use the development server in a production. "
    # "Check out http://flask.pocoo.org/docs/1.0/deploying/", warn=True)

    # http://flask.pocoo.org/docs/1.0/config/?highlight=flask_debug#environment-and-debug-features
    if app.config.get('DEBUG', False):
        os.environ['FLASK_DEBUG'] = '1'
        printf(
            "It's not recommended to run ScrapydWeb in debug mode, set 'DEBUG = False' instead.",
            warn=True)
    else:
        os.environ['FLASK_DEBUG'] = '0'

    # site-packages/flask/app.py
    # Threaded mode is enabled by default.
    # https://stackoverflow.com/a/28590266/10517783 to run in HTTP or HTTPS mode
    # site-packages/werkzeug/serving.py
    if app.config.get('ENABLE_HTTPS', False):
        protocol = 'https'
        context = (app.config['CERTIFICATE_FILEPATH'],
                   app.config['PRIVATEKEY_FILEPATH'])
    else:
        protocol = 'http'
        context = None
    print(STAR)
    printf(
        "Visit ScrapydWeb at {protocol}://127.0.0.1:{port} or {protocol}://IP-OF-THE-CURRENT-HOST:{port}"
        .format(protocol=protocol, port=app.config['SCRAPYDWEB_PORT']))
    printf(
        "For running Flask in production, check out http://flask.pocoo.org/docs/1.0/deploying/",
        warn=True)
    print(STAR)
    app.run(host=app.config['SCRAPYDWEB_BIND'],
            port=app.config['SCRAPYDWEB_PORT'],
            ssl_context=context)
Exemple #9
0
def main():
    main_pid = os.getpid()
    printf("Main pid: %s" % main_pid)
    printf("scrapydweb version: %s" % __version__)
    printf("Run 'scrapydweb -h' to get help")
    printf("Loading default settings from %s" % os.path.join(CWD, 'default_settings.py'))

    app = create_app()
    load_custom_config(app.config)

    args = parse_args(app.config)
    # "scrapydweb -h" would end up here
    update_app_config(app.config, args)
    # from pprint import pprint
    # pprint(app.config)
    try:
        check_app_config(app.config)
    except AssertionError as err:
        sys.exit("\n!!! %s\nCheck out your settings in: %s" % (err, scrapydweb_settings_py_path))

    if not app.config.get('DISABLE_CACHE', False):
        caching_pid = init_caching(app.config, main_pid)
    else:
        caching_pid = None

    REQUIRE_LOGIN = False if app.config.get('DISABLE_AUTH', True) else True
    USERNAME = str(app.config.get('USERNAME', ''))  # May be 0 from config file
    PASSWORD = str(app.config.get('PASSWORD', ''))

    # https://stackoverflow.com/questions/34164464/flask-decorate-every-route-at-once
    @app.before_request
    def require_login():
        if REQUIRE_LOGIN:
            auth = request.authorization
            if not auth or not (auth.username == USERNAME and auth.password == PASSWORD):
                return authenticate()

    @app.context_processor
    def inject_variable():
        return dict(
            SCRAPYD_SERVERS=app.config['SCRAPYD_SERVERS'],
            SCRAPYD_SERVERS_AMOUNT=len(app.config['SCRAPYD_SERVERS']),
            SCRAPYD_SERVERS_GROUPS=app.config['SCRAPYD_SERVERS_GROUPS'],
            SCRAPYD_SERVERS_AUTHS=app.config['SCRAPYD_SERVERS_AUTHS'],
            PYTHON_VERSION='.'.join([str(n) for n in sys.version_info[:3]]),
            SCRAPYDWEB_VERSION=__version__,
            CHECK_LATEST_VERSION_FREQ=30,
            DEFAULT_LATEST_VERSION=DEFAULT_LATEST_VERSION,
            GITHUB_URL=__url__,
            SHOW_SCRAPYD_ITEMS=app.config.get('SHOW_SCRAPYD_ITEMS', True),
            DAEMONSTATUS_REFRESH_INTERVAL=int(app.config.get('DAEMONSTATUS_REFRESH_INTERVAL', 10)),
            REQUIRE_LOGIN=REQUIRE_LOGIN,
            scrapydweb_settings_py_path=scrapydweb_settings_py_path,
            main_pid=main_pid,
            caching_pid=caching_pid,
        )

    printf("Visit ScrapydWeb at http://{bind}:{port} or http://127.0.0.1:{port}".format(
        bind='IP-OF-CURRENT-HOST', port=app.config['SCRAPYDWEB_PORT']))

    # /site-packages/flask/app.py
    # def run(self, host=None, port=None, debug=None, load_dotenv=True, **options):
    # Threaded mode is enabled by default.
    app.run(host=app.config['SCRAPYDWEB_BIND'], port=app.config['SCRAPYDWEB_PORT'])  # , debug=True)