Ejemplo n.º 1
0
def load_custom_settings(config):
    path = find_scrapydweb_settings_py(SCRAPYDWEB_SETTINGS_PY, os.getcwd())

    if path:
        config['SCRAPYDWEB_SETTINGS_PY_PATH'] = path
        print(u"{star}Overriding custom settings from {path}{star}".format(
            star=STAR, path=handle_slash(path)))
        config.from_pyfile(path)
    else:
        logger.error("%s not found: ", SCRAPYDWEB_SETTINGS_PY)
        try:
            copyfile(config['DEFAULT_SETTINGS_PY_PATH'],
                     config['SCRAPYDWEB_SETTINGS_PY_PATH'])
        except:
            sys.exit(
                "\nPlease copy the 'default_settings.py' file from the path above to current working directory,\n"
                "and rename it to '{file}'.\n"
                "Then add your SCRAPYD_SERVERS in the config file and restart scrapydweb.\n"
                .format(file=SCRAPYDWEB_SETTINGS_PY))
        else:
            sys.exit(
                "\nATTENTION:\nYou may encounter ERROR if there are any timer tasks added in v1.2.0,\n"
                "and you have to restart scrapydweb and manually restart the stopped tasks.\n"
                "\nThe config file '{file}' has been copied to current working directory.\n"
                "Please add your SCRAPYD_SERVERS in the config file and restart scrapydweb.\n"
                .format(file=SCRAPYDWEB_SETTINGS_PY))
Ejemplo n.º 2
0
def main():
    apscheduler_logger.setLevel(logging.ERROR)  # To hide warning logging in scheduler.py until app.run()
    main_pid = os.getpid()
    logger.info("ScrapydWeb version: %s", __version__)
    logger.info("Use 'scrapydweb -h' to get help")
    logger.info("Main pid: %s", main_pid)
    logger.debug("Loading default settings from %s", handle_slash(DEFAULT_SETTINGS_PY_PATH))
    app = create_app()
    handle_metadata('main_pid', main_pid)  # In handle_metadata(): with db.app.app_context():
    app.config['MAIN_PID'] = main_pid
    app.config['DEFAULT_SETTINGS_PY_PATH'] = DEFAULT_SETTINGS_PY_PATH
    app.config['SCRAPYDWEB_SETTINGS_PY_PATH'] = os.path.join(os.getcwd(), SCRAPYDWEB_SETTINGS_PY)
    load_custom_settings(app.config)

    args = parse_args(app.config)
    # "scrapydweb -h" ends up here
    update_app_config(app.config, args)
    try:
        check_app_config(app.config)
    except AssertionError as err:
        logger.error("Check app config fail: ")
        sys.exit(u"\n{err}\n\nCheck and update your settings in {path}\n".format(
                 err=err, path=handle_slash(app.config['SCRAPYDWEB_SETTINGS_PY_PATH'])))

    # https://stackoverflow.com/questions/34164464/flask-decorate-every-route-at-once
    @app.before_request
    def require_login():
        if app.config.get('ENABLE_AUTH', False):
            auth = request.authorization
            USERNAME = str(app.config.get('USERNAME', ''))  # May be 0 from config file
            PASSWORD = str(app.config.get('PASSWORD', ''))
            if not auth or not (auth.username == USERNAME and auth.password == PASSWORD):
                return authenticate()

    # MUST be commented out for released version
    # https://stackoverflow.com/questions/34066804/disabling-caching-in-flask
    # @app.after_request
    # def add_header(r):
        # r.headers['Pragma'] = 'no-cache'
        # r.headers['Expires'] = '0'
        # r.headers['Cache-Control'] = 'public, max-age=0'
        # return r

    @app.context_processor
    def inject_variable():
        SCRAPYD_SERVERS = app.config.get('SCRAPYD_SERVERS', []) or ['127.0.0.1:6800']
        SCRAPYD_SERVERS_PUBLIC_URLS = app.config.get('SCRAPYD_SERVERS_PUBLIC_URLS', None)
        return dict(
            SCRAPYD_SERVERS=SCRAPYD_SERVERS,
            SCRAPYD_SERVERS_AMOUNT=len(SCRAPYD_SERVERS),
            SCRAPYD_SERVERS_GROUPS=app.config.get('SCRAPYD_SERVERS_GROUPS', []) or [''],
            SCRAPYD_SERVERS_AUTHS=app.config.get('SCRAPYD_SERVERS_AUTHS', []) or [None],
            SCRAPYD_SERVERS_PUBLIC_URLS=SCRAPYD_SERVERS_PUBLIC_URLS or [None] * len(SCRAPYD_SERVERS),

            DAEMONSTATUS_REFRESH_INTERVAL=app.config.get('DAEMONSTATUS_REFRESH_INTERVAL', 10),
            ENABLE_AUTH=app.config.get('ENABLE_AUTH', False),
            SHOW_SCRAPYD_ITEMS=app.config.get('SHOW_SCRAPYD_ITEMS', True),
        )

    # To solve https://github.com/my8100/scrapydweb/issues/17
    # http://flask.pocoo.org/docs/1.0/cli/?highlight=flask_debug#environments
    # flask/helpers.py: get_env() The default is 'production'
    # On Windows, get/set/delete: set FLASK_ENV, set FLASK_ENV=production, set set FLASK_ENV=
    # if not os.environ.get('FLASK_ENV'):
        # os.environ['FLASK_ENV'] = 'development'
        # print("The environment variable 'FLASK_ENV' has been set to 'development'")
        # print("WARNING: Do not use the development server in a production. "
               # "Check out http://flask.pocoo.org/docs/1.0/deploying/")

    # http://flask.pocoo.org/docs/1.0/config/?highlight=flask_debug#environment-and-debug-features
    if app.config.get('DEBUG', False):
        os.environ['FLASK_DEBUG'] = '1'
        logger.info("Note that use_reloader is set to False in run.py")
    else:
        os.environ['FLASK_DEBUG'] = '0'

    # site-packages/flask/app.py
    # Threaded mode is enabled by default.
    # https://stackoverflow.com/a/28590266/10517783 to run in HTTP or HTTPS mode
    # site-packages/werkzeug/serving.py
    # https://stackoverflow.com/questions/13895176/sqlalchemy-and-sqlite-database-is-locked
    if app.config.get('ENABLE_HTTPS', False):
        protocol = 'https'
        context = (app.config['CERTIFICATE_FILEPATH'], app.config['PRIVATEKEY_FILEPATH'])
    else:
        protocol = 'http'
        context = None

    print("{star}Visit ScrapydWeb at {protocol}://127.0.0.1:{port} "
          "or {protocol}://IP-OF-THE-CURRENT-HOST:{port}{star}\n".format(
           star=STAR, protocol=protocol, port=app.config['SCRAPYDWEB_PORT']))
    logger.info("For running Flask in production, check out http://flask.pocoo.org/docs/1.0/deploying/")
    apscheduler_logger.setLevel(logging.DEBUG)
    app.run(host=app.config['SCRAPYDWEB_BIND'], port=app.config['SCRAPYDWEB_PORT'],
            ssl_context=context, use_reloader=False)