Example #1
0
def update_app_config(config, args):
    logger.debug("Reading settings from command line: %s", args)

    config.update(dict(
        SCRAPYDWEB_BIND=args.bind,
        SCRAPYDWEB_PORT=args.port,
    ))

    # scrapyd_server would be None if the -ss argument is not passed in
    if args.scrapyd_server:
        config['SCRAPYD_SERVERS'] = args.scrapyd_server

    # action='store_true': default False
    if args.disable_auth:
        config['ENABLE_AUTH'] = False
    if args.disable_logparser:
        config['ENABLE_LOGPARSER'] = False
    if args.switch_scheduler_state:
        if handle_metadata().get('scheduler_state', STATE_RUNNING) == STATE_RUNNING:
            handle_metadata('scheduler_state', STATE_PAUSED)
        else:
            handle_metadata('scheduler_state', STATE_RUNNING)
    if args.disable_monitor:
        config['ENABLE_MONITOR'] = False
    if args.debug:
        config['DEBUG'] = True
    if args.verbose:
        config['VERBOSE'] = True
Example #2
0
def main():
    apscheduler_logger.setLevel(logging.ERROR)  # To hide warning logging in scheduler.py until app.run()
    main_pid = os.getpid()
    logger.info("ScrapydWeb version: %s", __version__)
    logger.info("Use 'scrapydweb -h' to get help")
    logger.info("Main pid: %s", main_pid)
    logger.debug("Loading default settings from %s", handle_slash(DEFAULT_SETTINGS_PY_PATH))
    app = create_app()
    handle_metadata('main_pid', main_pid)  # In handle_metadata(): with db.app.app_context():
    app.config['MAIN_PID'] = main_pid
    app.config['DEFAULT_SETTINGS_PY_PATH'] = DEFAULT_SETTINGS_PY_PATH
    app.config['SCRAPYDWEB_SETTINGS_PY_PATH'] = os.path.join(os.getcwd(), SCRAPYDWEB_SETTINGS_PY)
    load_custom_settings(app.config)

    args = parse_args(app.config)
    # "scrapydweb -h" ends up here
    update_app_config(app.config, args)
    try:
        check_app_config(app.config)
    except AssertionError as err:
        logger.error("Check app config fail: ")
        sys.exit(u"\n{err}\n\nCheck and update your settings in {path}\n".format(
                 err=err, path=handle_slash(app.config['SCRAPYDWEB_SETTINGS_PY_PATH'])))

    # https://stackoverflow.com/questions/34164464/flask-decorate-every-route-at-once
    @app.before_request
    def require_login():
        if app.config.get('ENABLE_AUTH', False):
            auth = request.authorization
            USERNAME = str(app.config.get('USERNAME', ''))  # May be 0 from config file
            PASSWORD = str(app.config.get('PASSWORD', ''))
            if not auth or not (auth.username == USERNAME and auth.password == PASSWORD):
                return authenticate()

    # MUST be commented out for released version
    # https://stackoverflow.com/questions/34066804/disabling-caching-in-flask
    # @app.after_request
    # def add_header(r):
        # r.headers['Pragma'] = 'no-cache'
        # r.headers['Expires'] = '0'
        # r.headers['Cache-Control'] = 'public, max-age=0'
        # return r

    @app.context_processor
    def inject_variable():
        SCRAPYD_SERVERS = app.config.get('SCRAPYD_SERVERS', []) or ['127.0.0.1:6800']
        SCRAPYD_SERVERS_PUBLIC_URLS = app.config.get('SCRAPYD_SERVERS_PUBLIC_URLS', None)
        return dict(
            SCRAPYD_SERVERS=SCRAPYD_SERVERS,
            SCRAPYD_SERVERS_AMOUNT=len(SCRAPYD_SERVERS),
            SCRAPYD_SERVERS_GROUPS=app.config.get('SCRAPYD_SERVERS_GROUPS', []) or [''],
            SCRAPYD_SERVERS_AUTHS=app.config.get('SCRAPYD_SERVERS_AUTHS', []) or [None],
            SCRAPYD_SERVERS_PUBLIC_URLS=SCRAPYD_SERVERS_PUBLIC_URLS or [None] * len(SCRAPYD_SERVERS),

            DAEMONSTATUS_REFRESH_INTERVAL=app.config.get('DAEMONSTATUS_REFRESH_INTERVAL', 10),
            ENABLE_AUTH=app.config.get('ENABLE_AUTH', False),
            SHOW_SCRAPYD_ITEMS=app.config.get('SHOW_SCRAPYD_ITEMS', True),
        )

    # To solve https://github.com/my8100/scrapydweb/issues/17
    # http://flask.pocoo.org/docs/1.0/cli/?highlight=flask_debug#environments
    # flask/helpers.py: get_env() The default is 'production'
    # On Windows, get/set/delete: set FLASK_ENV, set FLASK_ENV=production, set set FLASK_ENV=
    # if not os.environ.get('FLASK_ENV'):
        # os.environ['FLASK_ENV'] = 'development'
        # print("The environment variable 'FLASK_ENV' has been set to 'development'")
        # print("WARNING: Do not use the development server in a production. "
               # "Check out http://flask.pocoo.org/docs/1.0/deploying/")

    # http://flask.pocoo.org/docs/1.0/config/?highlight=flask_debug#environment-and-debug-features
    if app.config.get('DEBUG', False):
        os.environ['FLASK_DEBUG'] = '1'
        logger.info("Note that use_reloader is set to False in run.py")
    else:
        os.environ['FLASK_DEBUG'] = '0'

    # site-packages/flask/app.py
    # Threaded mode is enabled by default.
    # https://stackoverflow.com/a/28590266/10517783 to run in HTTP or HTTPS mode
    # site-packages/werkzeug/serving.py
    # https://stackoverflow.com/questions/13895176/sqlalchemy-and-sqlite-database-is-locked
    if app.config.get('ENABLE_HTTPS', False):
        protocol = 'https'
        context = (app.config['CERTIFICATE_FILEPATH'], app.config['PRIVATEKEY_FILEPATH'])
    else:
        protocol = 'http'
        context = None

    print("{star}Visit ScrapydWeb at {protocol}://127.0.0.1:{port} "
          "or {protocol}://IP-OF-THE-CURRENT-HOST:{port}{star}\n".format(
           star=STAR, protocol=protocol, port=app.config['SCRAPYDWEB_PORT']))
    logger.info("For running Flask in production, check out http://flask.pocoo.org/docs/1.0/deploying/")
    apscheduler_logger.setLevel(logging.DEBUG)
    app.run(host=app.config['SCRAPYDWEB_BIND'], port=app.config['SCRAPYDWEB_PORT'],
            ssl_context=context, use_reloader=False)
Example #3
0
def parse_args(config):
    parser = argparse.ArgumentParser(description='ScrapydWeb -- %s' % __description__)

    SCRAPYDWEB_BIND = config.get('SCRAPYDWEB_BIND', '0.0.0.0')
    parser.add_argument(
        '-b', '--bind',
        default=SCRAPYDWEB_BIND,
        help=("current: %s, note that setting to 0.0.0.0 or IP-OF-THE-CURRENT-HOST would make ScrapydWeb server "
              "visible externally, otherwise, type '-b 127.0.0.1'") % SCRAPYDWEB_BIND
    )

    SCRAPYDWEB_PORT = config.get('SCRAPYDWEB_PORT', 5000)
    parser.add_argument(
        '-p', '--port',
        default=SCRAPYDWEB_PORT,
        help="current: %s, accept connections on the specified port" % SCRAPYDWEB_PORT
    )

    SCRAPYD_SERVERS = config.get('SCRAPYD_SERVERS', []) or ['127.0.0.1:6800']
    parser.add_argument(
        '-ss', '--scrapyd_server',
        action='append',
        help=("current: %s, type '-ss 127.0.0.1 -ss username:[email protected]:6801#group' "
              "to set up more than one Scrapyd server to manage. ") % SCRAPYD_SERVERS
    )

    ENABLE_AUTH = config.get('ENABLE_AUTH', False)
    parser.add_argument(
        '-da', '--disable_auth',
        action='store_true',
        help="current: ENABLE_AUTH = %s, append '--disable_auth' to disable basic auth for web UI" % ENABLE_AUTH
    )

    ENABLE_LOGPARSER = config.get('ENABLE_LOGPARSER', False)
    parser.add_argument(
        '-dlp', '--disable_logparser',
        action='store_true',
        help=("current: ENABLE_LOGPARSER = %s, append '--disable_logparser' to disable running LogParser "
              "as a subprocess at startup") % ENABLE_LOGPARSER
    )

    SCHEDULER_STATE = SCHEDULER_STATE_DICT[handle_metadata().get('scheduler_state', STATE_RUNNING)]
    parser.add_argument(
        '-sw', '--switch_scheduler_state',
        action='store_true',
        help=("current: %s, append '--switch_scheduler_state' to switch the state of scheduler "
              "for timer tasks") % SCHEDULER_STATE
    )

    ENABLE_MONITOR = config.get('ENABLE_MONITOR', False)
    parser.add_argument(
        '-dm', '--disable_monitor',
        action='store_true',
        help="current: ENABLE_MONITOR = %s, append '--disable_monitor' to disable monitor" % ENABLE_MONITOR
    )

    DEBUG = config.get('DEBUG', False)
    parser.add_argument(
        '-d', '--debug',
        action='store_true',
        help=("current: DEBUG = %s, append '--debug' to enable debug mode "
              "and the debugger would be available in the browser") % DEBUG
    )

    VERBOSE = config.get('VERBOSE', False)
    parser.add_argument(
        '-v', '--verbose',
        action='store_true',
        help=("current: VERBOSE = %s, append '--verbose' to set the logging level to DEBUG "
              "for getting more information about how ScrapydWeb works") % VERBOSE
    )

    return parser.parse_args()
Example #4
0
application = create_app()
application.config[
    'DEFAULT_SETTINGS_PY_PATH'] = '/var/www/html/scrapydweb/default_settings.py'
application.config[
    'SCRAPYDWEB_SETTINGS_PY_PATH'] = '/var/www/html/scrapydweb/default_settings.py'

main_pid = os.getpid()
application.config['MAIN_PID'] = main_pid
application.config['LOGPARSER_PID'] = main_pid
application.config['POLL_PID'] = main_pid

logger = logging.getLogger(__name__)
apscheduler_logger = logging.getLogger('apscheduler')

handle_metadata('main_pid', main_pid)


@application.context_processor
def inject_variable():
    SCRAPYD_SERVERS = application.config.get('SCRAPYD_SERVERS',
                                             []) or ['127.0.0.1:6800']
    SCRAPYD_SERVERS_PUBLIC_URLS = application.config.get(
        'SCRAPYD_SERVERS_PUBLIC_URLS', None)
    return dict(
        SCRAPYD_SERVERS=SCRAPYD_SERVERS,
        SCRAPYD_SERVERS_AMOUNT=len(SCRAPYD_SERVERS),
        SCRAPYD_SERVERS_GROUPS=application.config.get('SCRAPYD_SERVERS_GROUPS',
                                                      []) or [''],
        SCRAPYD_SERVERS_AUTHS=application.config.get('SCRAPYD_SERVERS_AUTHS',
                                                     []) or [None],