Exemple #1
0
def app():
    config = dict(
        TESTING=True,
        SCRAPYD_SERVERS=[custom_settings['_SCRAPYD_SERVER'], 'not-exist:6801'],
        SCRAPYD_SERVERS_AUTHS=[
            custom_settings['_SCRAPYD_SERVER_AUTH'], ('username', 'password')
        ],
        SCRAPYD_SERVERS_GROUPS=['', 'Scrapyd-group'],
        SCRAPY_PROJECTS_DIR=os.path.join(CWD, 'data'),
        SCRAPYD_LOGS_DIR='',
        EMAIL_WORKING_DAYS=list(range(1, 8)),
        EMAIL_WORKING_HOURS=list(range(24)),
        ON_JOB_FINISHED=True,
        LOG_CRITICAL_THRESHOLD=1,
        LOG_CRITICAL_TRIGGER_FORCESTOP=True)

    config.update(custom_settings)

    app = create_app(config)

    @app.context_processor
    def inject_variable():
        return dict(CHECK_LATEST_VERSION_FREQ=100)

    yield app
Exemple #2
0
def app():
    config = dict(TESTING=True,
                  SCRAPYD_SERVERS=['127.0.0.1:6800'],
                  SCRAPYD_SERVERS_AUTHS=[None],
                  SCRAPY_PROJECTS_DIR=os.path.join(CWD, 'data'),
                  SCRAPYD_LOGS_DIR='')

    # For test_a_factory.py and test_email() in test_log.py
    config.update(email_settings)

    app = create_app(config)

    @app.context_processor
    def inject_variable():
        return dict(
            SCRAPYD_SERVERS=app.config['SCRAPYD_SERVERS'],
            SCRAPYD_SERVERS_GROUPS=['Scrapyd-group'] *
            len(app.config['SCRAPYD_SERVERS']),
            SCRAPYD_SERVERS_AUTHS=[('Scrapyd-username', 'Scrapyd-password')] *
            len(app.config['SCRAPYD_SERVERS']),
            DEFAULT_LATEST_VERSION=DEFAULT_LATEST_VERSION,
            DAEMONSTATUS_REFRESH_INTERVAL=0,
            SCRAPYD_SERVERS_AMOUNT=len(app.config['SCRAPYD_SERVERS']))

    yield app
Exemple #3
0
def app():
    fake_server = 'scrapydweb-fake-domain.com:443'
    SCRAPYD_SERVERS = [custom_settings['_SCRAPYD_SERVER'], fake_server]
    if custom_settings['_SCRAPYD_SERVER_AUTH']:
        username, password = custom_settings['_SCRAPYD_SERVER_AUTH']
        authed_server = '%s:%s@%s' % (username, password,
                                      custom_settings['_SCRAPYD_SERVER'])
        _SCRAPYD_SERVERS = [authed_server, fake_server]
    else:
        _SCRAPYD_SERVERS = SCRAPYD_SERVERS

    config = dict(
        TESTING=True,
        # SERVER_NAME='127.0.0.1:5000',  # http://flask.pocoo.org/docs/0.12/config/#builtin-configuration-values
        DEFAULT_SETTINGS_PY_PATH='',
        SCRAPYDWEB_SETTINGS_PY_PATH='',
        MAIN_PID=os.getpid(),
        LOGPARSER_PID=0,
        POLL_PID=0,
        SCRAPYD_SERVERS=SCRAPYD_SERVERS,
        _SCRAPYD_SERVERS=_SCRAPYD_SERVERS,
        LOCAL_SCRAPYD_SERVER=custom_settings['_SCRAPYD_SERVER'],
        SCRAPYD_SERVERS_AUTHS=[
            custom_settings['_SCRAPYD_SERVER_AUTH'],
            ('username', '123456abcdef')
        ],
        SCRAPYD_SERVERS_GROUPS=['', 'Scrapyd-group'],
        SCRAPY_PROJECTS_DIR=os.path.join(cst.ROOT_DIR, 'data'),
        ENABLE_LOGPARSER=False,
        ALERT_WORKING_DAYS=list(range(1, 8)),
        ALERT_WORKING_HOURS=list(range(24)),
        VERBOSE=True,
    )

    config.update(custom_settings)

    app = create_app(config)

    @app.context_processor
    def inject_variable():
        SCRAPYD_SERVERS = app.config.get('SCRAPYD_SERVERS',
                                         []) or ['127.0.0.1:6800']
        return dict(
            SCRAPYD_SERVERS=SCRAPYD_SERVERS,
            SCRAPYD_SERVERS_AMOUNT=len(SCRAPYD_SERVERS),
            SCRAPYD_SERVERS_GROUPS=app.config.get('SCRAPYD_SERVERS_GROUPS', [])
            or [''],
            SCRAPYD_SERVERS_AUTHS=app.config.get('SCRAPYD_SERVERS_AUTHS', [])
            or [None],
            SCRAPYD_SERVERS_PUBLIC_URLS=[''] * len(SCRAPYD_SERVERS),
            DAEMONSTATUS_REFRESH_INTERVAL=app.config.get(
                'DAEMONSTATUS_REFRESH_INTERVAL', 10),
            ENABLE_AUTH=app.config.get('ENABLE_AUTH', False),
            SHOW_SCRAPYD_ITEMS=app.config.get('SHOW_SCRAPYD_ITEMS', True),
        )

    yield app
Exemple #4
0
def app():
    app = create_app({
        'TESTING': True,
        'SCRAPYD_SERVERS': ['127.0.0.1:6800']
    })

    @app.context_processor
    def inject_variable():
        return {
            'SCRAPYD_SERVERS': app.config['SCRAPYD_SERVERS'],
            'SCRAPYD_SERVERS_GROUP': ["" for s in app.config['SCRAPYD_SERVERS']],
            'DEFAULT_LATEST_VERSION': DEFAULT_LATEST_VERSION,
        }

    yield app
Exemple #5
0
def main():
    print(">>> scrapydweb version: %s" % __version__)
    print(">>> Run 'scrapydweb -h' to get help")
    print(">>> Run ScrapydWeb with argument '-ss 127.0.0.1 -ss 192.168.0.101:12345@group1' "
          "to set any number of Scrapyd servers to control.")
    print(">>> Run ScrapydWeb with argument '--scrapyd_logs_dir SCRAPYD_LOGS_DIR' to speed up loading utf8 and stats html")
    print(">>> Run ScrapydWeb with argument '--disable_cache' to disable caching utf8 and stats files in the background periodically")

    print(">>> Using default settings from %s" % os.path.join(CWD, 'default_settings.py'))
    app = create_app()

    scrapydweb_settings_py = find_scrapydweb_settings_py()
    if scrapydweb_settings_py:
        print(">>> Overriding custom settings from %s" % scrapydweb_settings_py)
        app.config.from_pyfile(scrapydweb_settings_py)
    else:
        try:
            copyfile(os.path.join(CWD, 'default_settings.py'), os.path.join('.', 'scrapydweb_settings.py'))
            print(">>> The config file 'scrapydweb_settings.py' is copied to your working directory, "
                  "and you may custom settings with it")
        except:
            print(">>> You may copy the file 'default_settings.py' from above path to your working directory, "
                  "and rename it as 'scrapydweb_settings.py' to custom settings")

    args = parse_args(app.config)
    check_args(args)
    update_app_config(app.config, args)
    # print(app.config)

    if not app.config['DISABLE_CACHE']:
        start_caching(app.config)

    print('>>> Visit ScrapydWeb at http://{host}:{port} or http://127.0.0.1:{port}'.format(
        host="IP-OF-THE-HOST-WHERE-SCRAPYDWEB-RUNS-ON", port=app.config['SCRAPYDWEB_PORT']))

    @app.context_processor
    def inject_variable():
        return {
            'SCRAPYD_SERVERS': app.config['SCRAPYD_SERVERS'],
            'SCRAPYD_SERVERS_GROUP': app.config['SCRAPYD_SERVERS_GROUP'],
            'DEFAULT_LATEST_VERSION': DEFAULT_LATEST_VERSION,
        }

    # /site-packages/flask/app.py
    # run(host=None, port=None, debug=None, load_dotenv=True, **options)
    # Threaded mode is enabled by default.
    app.run(host=app.config['SCRAPYDWEB_HOST'], port=app.config['SCRAPYDWEB_PORT'])  # , debug=True)
Exemple #6
0
def app():
    app = create_app({'TESTING': True, 'SCRAPYD_SERVERS': ['127.0.0.1:6800']})

    @app.context_processor
    def inject_variable():
        return {
            'SCRAPYD_SERVERS':
            app.config['SCRAPYD_SERVERS'],
            'SCRAPYD_SERVERS_GROUPS':
            ['fakegroup' for s in app.config['SCRAPYD_SERVERS']],
            'SCRAPYD_SERVERS_AUTHS': [('fakeusername', 'fakepassword')
                                      for s in app.config['SCRAPYD_SERVERS']],
            'DEFAULT_LATEST_VERSION':
            DEFAULT_LATEST_VERSION,
        }

    yield app
Exemple #7
0
def app():
    app = create_app({
        'TESTING': True,
        'SCRAPYD_SERVERS': ['127.0.0.1:6800'],
        'SCRAPY_PROJECTS_DIR': os.path.join(CWD, 'data'),
    })

    @app.context_processor
    def inject_variable():
        return {
            'SCRAPYD_SERVERS': app.config['SCRAPYD_SERVERS'],
            'SCRAPYD_SERVERS_GROUPS': ['fakegroup' for s in app.config['SCRAPYD_SERVERS']],
            'SCRAPYD_SERVERS_AUTHS': [('fakeusername', 'fakepassword') for s in app.config['SCRAPYD_SERVERS']],
            'DEFAULT_LATEST_VERSION': DEFAULT_LATEST_VERSION,
            'DAEMONSTATUS_REFRESH_INTERVAL': 0,
        }

    yield app
Exemple #8
0
def app():
    config = dict(
        TESTING=True,
        # SERVER_NAME='127.0.0.1:5000',  # http://flask.pocoo.org/docs/0.12/config/#builtin-configuration-values
        DEFAULT_SETTINGS_PY_PATH='',
        SCRAPYDWEB_SETTINGS_PY_PATH='',
        MAIN_PID=os.getpid(),
        LOGPARSER_PID=0,
        POLL_PID=0,
        SCRAPYD_SERVERS=[custom_settings['_SCRAPYD_SERVER'], 'not-exist:6801'],
        LOCAL_SCRAPYD_SERVER=custom_settings['_SCRAPYD_SERVER'],
        SCRAPYD_SERVERS_AUTHS=[
            custom_settings['_SCRAPYD_SERVER_AUTH'], ('username', 'password')
        ],
        SCRAPYD_SERVERS_GROUPS=['', 'Scrapyd-group'],
        SCRAPY_PROJECTS_DIR=os.path.join(cst.CWD, 'data'),
        ENABLE_LOGPARSER=False,
        EMAIL_WORKING_DAYS=list(range(1, 8)),
        EMAIL_WORKING_HOURS=list(range(24)),
        VERBOSE=True)

    config.update(custom_settings)

    app = create_app(config)

    @app.context_processor
    def inject_variable():
        return dict(
            SCRAPYD_SERVERS=app.config.get('SCRAPYD_SERVERS', [])
            or ['127.0.0.1:6800'],
            SCRAPYD_SERVERS_AMOUNT=len(
                app.config.get('SCRAPYD_SERVERS', []) or ['127.0.0.1:6800']),
            SCRAPYD_SERVERS_GROUPS=app.config.get('SCRAPYD_SERVERS_GROUPS', [])
            or [''],
            SCRAPYD_SERVERS_AUTHS=app.config.get('SCRAPYD_SERVERS_AUTHS', [])
            or [None],
            DAEMONSTATUS_REFRESH_INTERVAL=app.config.get(
                'DAEMONSTATUS_REFRESH_INTERVAL', 10),
            ENABLE_AUTH=app.config.get('ENABLE_AUTH', False),
            SHOW_SCRAPYD_ITEMS=app.config.get('SHOW_SCRAPYD_ITEMS', True),
        )

    yield app
Exemple #9
0
def app():
    config = dict(
        TESTING=True,
        SCRAPYD_SERVERS=[custom_settings['_SCRAPYD_SERVER'], 'not-exist:6801'],
        SCRAPYD_SERVERS_AUTHS=[
            custom_settings['_SCRAPYD_SERVER_AUTH'], ('username', 'password')
        ],
        SCRAPYD_SERVERS_GROUPS=['', 'Scrapyd-group'],
        SCRAPY_PROJECTS_DIR=os.path.join(CWD, 'data'),
        SCRAPYD_LOGS_DIR='',
        VERBOSE=True,
        EMAIL_WORKING_DAYS=list(range(1, 8)),
        EMAIL_WORKING_HOURS=list(range(24)),
        # ON_JOB_FINISHED=True,
        # LOG_CRITICAL_THRESHOLD=1,
        # LOG_CRITICAL_TRIGGER_FORCESTOP=True
    )

    config.update(custom_settings)

    app = create_app(config)

    @app.context_processor
    def inject_variable():
        return dict(
            SCRAPYD_SERVERS=app.config.get('SCRAPYD_SERVERS', [])
            or ['127.0.0.1:6800'],
            SCRAPYD_SERVERS_AMOUNT=len(
                app.config.get('SCRAPYD_SERVERS', []) or ['127.0.0.1:6800']),
            SCRAPYD_SERVERS_GROUPS=app.config.get('SCRAPYD_SERVERS_GROUPS', [])
            or [''],
            SCRAPYD_SERVERS_AUTHS=app.config.get('SCRAPYD_SERVERS_AUTHS', [])
            or [None],
            DAEMONSTATUS_REFRESH_INTERVAL=app.config.get(
                'DAEMONSTATUS_REFRESH_INTERVAL', 10),
            ENABLE_AUTH=app.config.get('ENABLE_AUTH', False),
            SHOW_SCRAPYD_ITEMS=app.config.get('SHOW_SCRAPYD_ITEMS', True),
        )

    yield app
def test_config():
    assert not create_app().testing
    assert create_app({'TESTING': True}).testing
Exemple #11
0
def main():
    print(">>> scrapydweb version: %s" % __version__)
    print(">>> Run 'scrapydweb -h' to get help")
    print(">>> Loading default settings from %s" %
          os.path.join(CWD, 'default_settings.py'))
    app = create_app()

    scrapydweb_settings_py = find_scrapydweb_settings_py()
    if scrapydweb_settings_py:
        print(">>> Overriding custom settings from %s" %
              scrapydweb_settings_py)
        app.config.from_pyfile(scrapydweb_settings_py)
    else:
        try:
            copyfile(os.path.join(CWD, 'default_settings.py'),
                     os.path.join('.', SCRAPYDWEB_SETTINGS_PY))
            print(
                ">>> The config file '%s' is copied to your working directory, "
                "and you may custom settings with it" % SCRAPYDWEB_SETTINGS_PY)
        except:
            print(
                ">>> You may copy the file 'default_settings.py' from above path to your working directory, "
                "and rename it to '%s' to custom settings" %
                SCRAPYDWEB_SETTINGS_PY)

    args = parse_args(app.config)
    check_args(args)
    update_app_config(app.config, args)
    # print(app.config)

    if not app.config['DISABLE_CACHE']:
        import atexit
        proc = start_caching(app.config)
        print(
            ">>> Caching utf8 and stats files in the background with pid: %s" %
            proc.pid)
        atexit.register(kill_child, proc)

    print(
        ">>> Visit ScrapydWeb at http://{host}:{port} or http://127.0.0.1:{port}"
        .format(host='IP-OF-THE-HOST-WHERE-SCRAPYDWEB-RUNS-ON',
                port=app.config['SCRAPYDWEB_PORT']))

    username = app.config.get('USERNAME', '')
    password = app.config.get('PASSWORD', '')

    @app.context_processor
    def inject_variable():
        return {
            'SCRAPYD_SERVERS': app.config['SCRAPYD_SERVERS'],
            'SCRAPYD_SERVERS_GROUPS': app.config['SCRAPYD_SERVERS_GROUPS'],
            'SCRAPYD_SERVERS_AUTHS': app.config['SCRAPYD_SERVERS_AUTHS'],
            'DEFAULT_LATEST_VERSION': DEFAULT_LATEST_VERSION,
            'GITHUB_URL': __url__,
            'REQUIRE_LOGIN': True if username and password else False,
            'HIDE_SCRAPYD_ITEMS': app.config.get('HIDE_SCRAPYD_ITEMS', False),
        }

    # https://stackoverflow.com/questions/34164464/flask-decorate-every-route-at-once
    @app.before_request
    def require_login():
        if request.form:
            app.logger.debug(json_dumps(request.form))
        if username and password:
            auth = request.authorization
            if not auth or not (auth.username == username
                                and auth.password == password):
                return authenticate()

    # /site-packages/flask/app.py
    # run(host=None, port=None, debug=None, load_dotenv=True, **options)
    # Threaded mode is enabled by default.
    app.run(host=app.config['SCRAPYDWEB_HOST'],
            port=app.config['SCRAPYDWEB_PORT'])  # , debug=True)
Exemple #12
0
def main():
    apscheduler_logger.setLevel(logging.ERROR)  # To hide warning logging in scheduler.py until app.run()
    main_pid = os.getpid()
    logger.info("ScrapydWeb version: %s", __version__)
    logger.info("Use 'scrapydweb -h' to get help")
    logger.info("Main pid: %s", main_pid)
    logger.debug("Loading default settings from %s", handle_slash(DEFAULT_SETTINGS_PY_PATH))
    app = create_app()
    handle_metadata('main_pid', main_pid)  # In handle_metadata(): with db.app.app_context():
    app.config['MAIN_PID'] = main_pid
    app.config['DEFAULT_SETTINGS_PY_PATH'] = DEFAULT_SETTINGS_PY_PATH
    app.config['SCRAPYDWEB_SETTINGS_PY_PATH'] = os.path.join(os.getcwd(), SCRAPYDWEB_SETTINGS_PY)
    load_custom_settings(app.config)

    args = parse_args(app.config)
    # "scrapydweb -h" ends up here
    update_app_config(app.config, args)
    try:
        check_app_config(app.config)
    except AssertionError as err:
        logger.error("Check app config fail: ")
        sys.exit(u"\n{err}\n\nCheck and update your settings in {path}\n".format(
                 err=err, path=handle_slash(app.config['SCRAPYDWEB_SETTINGS_PY_PATH'])))

    # https://stackoverflow.com/questions/34164464/flask-decorate-every-route-at-once
    @app.before_request
    def require_login():
        if app.config.get('ENABLE_AUTH', False):
            auth = request.authorization
            USERNAME = str(app.config.get('USERNAME', ''))  # May be 0 from config file
            PASSWORD = str(app.config.get('PASSWORD', ''))
            if not auth or not (auth.username == USERNAME and auth.password == PASSWORD):
                return authenticate()

    # MUST be commented out for released version
    # https://stackoverflow.com/questions/34066804/disabling-caching-in-flask
    # @app.after_request
    # def add_header(r):
        # r.headers['Pragma'] = 'no-cache'
        # r.headers['Expires'] = '0'
        # r.headers['Cache-Control'] = 'public, max-age=0'
        # return r

    @app.context_processor
    def inject_variable():
        SCRAPYD_SERVERS = app.config.get('SCRAPYD_SERVERS', []) or ['127.0.0.1:6800']
        SCRAPYD_SERVERS_PUBLIC_URLS = app.config.get('SCRAPYD_SERVERS_PUBLIC_URLS', None)
        return dict(
            SCRAPYD_SERVERS=SCRAPYD_SERVERS,
            SCRAPYD_SERVERS_AMOUNT=len(SCRAPYD_SERVERS),
            SCRAPYD_SERVERS_GROUPS=app.config.get('SCRAPYD_SERVERS_GROUPS', []) or [''],
            SCRAPYD_SERVERS_AUTHS=app.config.get('SCRAPYD_SERVERS_AUTHS', []) or [None],
            SCRAPYD_SERVERS_PUBLIC_URLS=SCRAPYD_SERVERS_PUBLIC_URLS or [None] * len(SCRAPYD_SERVERS),

            DAEMONSTATUS_REFRESH_INTERVAL=app.config.get('DAEMONSTATUS_REFRESH_INTERVAL', 10),
            ENABLE_AUTH=app.config.get('ENABLE_AUTH', False),
            SHOW_SCRAPYD_ITEMS=app.config.get('SHOW_SCRAPYD_ITEMS', True),
        )

    # To solve https://github.com/my8100/scrapydweb/issues/17
    # http://flask.pocoo.org/docs/1.0/cli/?highlight=flask_debug#environments
    # flask/helpers.py: get_env() The default is 'production'
    # On Windows, get/set/delete: set FLASK_ENV, set FLASK_ENV=production, set set FLASK_ENV=
    # if not os.environ.get('FLASK_ENV'):
        # os.environ['FLASK_ENV'] = 'development'
        # print("The environment variable 'FLASK_ENV' has been set to 'development'")
        # print("WARNING: Do not use the development server in a production. "
               # "Check out http://flask.pocoo.org/docs/1.0/deploying/")

    # http://flask.pocoo.org/docs/1.0/config/?highlight=flask_debug#environment-and-debug-features
    if app.config.get('DEBUG', False):
        os.environ['FLASK_DEBUG'] = '1'
        logger.info("Note that use_reloader is set to False in run.py")
    else:
        os.environ['FLASK_DEBUG'] = '0'

    # site-packages/flask/app.py
    # Threaded mode is enabled by default.
    # https://stackoverflow.com/a/28590266/10517783 to run in HTTP or HTTPS mode
    # site-packages/werkzeug/serving.py
    # https://stackoverflow.com/questions/13895176/sqlalchemy-and-sqlite-database-is-locked
    if app.config.get('ENABLE_HTTPS', False):
        protocol = 'https'
        context = (app.config['CERTIFICATE_FILEPATH'], app.config['PRIVATEKEY_FILEPATH'])
    else:
        protocol = 'http'
        context = None

    print("{star}Visit ScrapydWeb at {protocol}://127.0.0.1:{port} "
          "or {protocol}://IP-OF-THE-CURRENT-HOST:{port}{star}\n".format(
           star=STAR, protocol=protocol, port=app.config['SCRAPYDWEB_PORT']))
    logger.info("For running Flask in production, check out http://flask.pocoo.org/docs/1.0/deploying/")
    apscheduler_logger.setLevel(logging.DEBUG)
    app.run(host=app.config['SCRAPYDWEB_BIND'], port=app.config['SCRAPYDWEB_PORT'],
            ssl_context=context, use_reloader=False)
Exemple #13
0
def main():
    main_pid = os.getpid()
    printf("Main pid: %s" % main_pid)
    printf("ScrapydWeb version: %s" % __version__)
    printf("Use the 'scrapydweb -h' command to get help")
    printf("Loading default settings from %s" %
           os.path.join(CWD, 'default_settings.py'))

    app = create_app()
    load_custom_config(app.config)

    args = parse_args(app.config)
    # "scrapydweb -h" ends up here
    update_app_config(app.config, args)
    # from pprint import pprint
    # pprint(app.config)
    try:
        check_app_config(app.config)
    except AssertionError as err:
        sys.exit("\n!!! %s\nCheck and update your settings in: %s" %
                 (err, scrapydweb_settings_py_path))

    if app.config.get('ENABLE_CACHE', True):
        caching_pid = init_caching(app.config, main_pid)
    else:
        caching_pid = None

    # https://stackoverflow.com/questions/34164464/flask-decorate-every-route-at-once
    @app.before_request
    def require_login():
        if app.config.get('ENABLE_AUTH', False):
            auth = request.authorization
            USERNAME = str(app.config.get('USERNAME',
                                          ''))  # May be 0 from config file
            PASSWORD = str(app.config.get('PASSWORD', ''))
            if not auth or not (auth.username == USERNAME
                                and auth.password == PASSWORD):
                return authenticate()

    # Should be commented out for released version
    # https://stackoverflow.com/questions/34066804/disabling-caching-in-flask
    # @app.after_request
    # def add_header(r):
    # r.headers['Pragma'] = 'no-cache'
    # r.headers['Expires'] = '0'
    # r.headers['Cache-Control'] = 'public, max-age=0'
    # return r

    @app.context_processor
    def inject_variable():
        return dict(
            main_pid=main_pid,
            caching_pid=caching_pid,
            CHECK_LATEST_VERSION_FREQ=100,
            scrapydweb_settings_py_path=scrapydweb_settings_py_path,
        )

    printf(
        "Visit ScrapydWeb at http://127.0.0.1:{port} or http://{bind}:{port}".
        format(bind='IP-OF-CURRENT-HOST', port=app.config['SCRAPYDWEB_PORT']))

    # site-packages/flask/app.py
    # def run(self, host=None, port=None, debug=None, load_dotenv=True, **options):
    # Threaded mode is enabled by default.
    app.run(host=app.config['SCRAPYDWEB_BIND'],
            port=app.config['SCRAPYDWEB_PORT'])  # , debug=True)
Exemple #14
0
def main():
    main_pid = os.getpid()
    printf("ScrapydWeb version: %s" % __version__)
    printf("Use 'scrapydweb -h' to get help")
    printf("Main pid: %s" % main_pid)
    printf("Loading default settings from %s" % DEFAULT_SETTINGS_PY_PATH)
    app = create_app()
    app.config['MAIN_PID'] = main_pid
    app.config['DEFAULT_SETTINGS_PY_PATH'] = DEFAULT_SETTINGS_PY_PATH
    app.config['SCRAPYDWEB_SETTINGS_PY_PATH'] = os.path.join(
        os.getcwd(), SCRAPYDWEB_SETTINGS_PY)
    load_custom_settings(app.config)

    args = parse_args(app.config)
    # "scrapydweb -h" ends up here
    update_app_config(app.config, args)
    try:
        check_app_config(app.config)
    except AssertionError as err:
        sys.exit(
            "\n{alert}\n{err}\nCheck and update your settings in {path}\n{alert}"
            .format(alert=ALERT,
                    err=err,
                    path=app.config['SCRAPYDWEB_SETTINGS_PY_PATH']))

    # https://stackoverflow.com/questions/34164464/flask-decorate-every-route-at-once
    @app.before_request
    def require_login():
        if app.config.get('ENABLE_AUTH', False):
            auth = request.authorization
            USERNAME = str(app.config.get('USERNAME',
                                          ''))  # May be 0 from config file
            PASSWORD = str(app.config.get('PASSWORD', ''))
            if not auth or not (auth.username == USERNAME
                                and auth.password == PASSWORD):
                return authenticate()

    # MUST be commented out for released version
    # https://stackoverflow.com/questions/34066804/disabling-caching-in-flask
    # @app.after_request
    # def add_header(r):
    # r.headers['Pragma'] = 'no-cache'
    # r.headers['Expires'] = '0'
    # r.headers['Cache-Control'] = 'public, max-age=0'
    # return r

    @app.context_processor
    def inject_variable():
        return dict(
            SCRAPYD_SERVERS=app.config.get('SCRAPYD_SERVERS', [])
            or ['127.0.0.1:6800'],
            SCRAPYD_SERVERS_AMOUNT=len(
                app.config.get('SCRAPYD_SERVERS', []) or ['127.0.0.1:6800']),
            SCRAPYD_SERVERS_GROUPS=app.config.get('SCRAPYD_SERVERS_GROUPS', [])
            or [''],
            SCRAPYD_SERVERS_AUTHS=app.config.get('SCRAPYD_SERVERS_AUTHS', [])
            or [None],
            DAEMONSTATUS_REFRESH_INTERVAL=app.config.get(
                'DAEMONSTATUS_REFRESH_INTERVAL', 10),
            ENABLE_AUTH=app.config.get('ENABLE_AUTH', False),
            SHOW_SCRAPYD_ITEMS=app.config.get('SHOW_SCRAPYD_ITEMS', True),
        )

    # To solve https://github.com/my8100/scrapydweb/issues/17
    # http://flask.pocoo.org/docs/1.0/cli/?highlight=flask_debug#environments
    # flask/helpers.py: get_env() The default is 'production'
    # On Windows, get/set/delete: set FLASK_ENV, set FLASK_ENV=production, set set FLASK_ENV=
    # if not os.environ.get('FLASK_ENV'):
    # os.environ['FLASK_ENV'] = 'development'
    # printf("The environment variable 'FLASK_ENV' has been set to 'development'", warn=True)
    # printf("WARNING: Do not use the development server in a production. "
    # "Check out http://flask.pocoo.org/docs/1.0/deploying/", warn=True)

    # http://flask.pocoo.org/docs/1.0/config/?highlight=flask_debug#environment-and-debug-features
    if app.config.get('DEBUG', False):
        os.environ['FLASK_DEBUG'] = '1'
        printf(
            "It's not recommended to run ScrapydWeb in debug mode, set 'DEBUG = False' instead.",
            warn=True)
    else:
        os.environ['FLASK_DEBUG'] = '0'

    # site-packages/flask/app.py
    # Threaded mode is enabled by default.
    # https://stackoverflow.com/a/28590266/10517783 to run in HTTP or HTTPS mode
    # site-packages/werkzeug/serving.py
    if app.config.get('ENABLE_HTTPS', False):
        protocol = 'https'
        context = (app.config['CERTIFICATE_FILEPATH'],
                   app.config['PRIVATEKEY_FILEPATH'])
    else:
        protocol = 'http'
        context = None
    print(STAR)
    printf(
        "Visit ScrapydWeb at {protocol}://127.0.0.1:{port} or {protocol}://IP-OF-THE-CURRENT-HOST:{port}"
        .format(protocol=protocol, port=app.config['SCRAPYDWEB_PORT']))
    printf(
        "For running Flask in production, check out http://flask.pocoo.org/docs/1.0/deploying/",
        warn=True)
    print(STAR)
    app.run(host=app.config['SCRAPYDWEB_BIND'],
            port=app.config['SCRAPYDWEB_PORT'],
            ssl_context=context)
Exemple #15
0
def main():
    main_pid = os.getpid()
    printf("Main pid: %s" % main_pid)
    printf("scrapydweb version: %s" % __version__)
    printf("Run 'scrapydweb -h' to get help")
    printf("Loading default settings from %s" % os.path.join(CWD, 'default_settings.py'))

    app = create_app()
    load_custom_config(app.config)

    args = parse_args(app.config)
    # "scrapydweb -h" would end up here
    update_app_config(app.config, args)
    # from pprint import pprint
    # pprint(app.config)
    try:
        check_app_config(app.config)
    except AssertionError as err:
        sys.exit("\n!!! %s\nCheck out your settings in: %s" % (err, scrapydweb_settings_py_path))

    if not app.config.get('DISABLE_CACHE', False):
        caching_pid = init_caching(app.config, main_pid)
    else:
        caching_pid = None

    REQUIRE_LOGIN = False if app.config.get('DISABLE_AUTH', True) else True
    USERNAME = str(app.config.get('USERNAME', ''))  # May be 0 from config file
    PASSWORD = str(app.config.get('PASSWORD', ''))

    # https://stackoverflow.com/questions/34164464/flask-decorate-every-route-at-once
    @app.before_request
    def require_login():
        if REQUIRE_LOGIN:
            auth = request.authorization
            if not auth or not (auth.username == USERNAME and auth.password == PASSWORD):
                return authenticate()

    @app.context_processor
    def inject_variable():
        return dict(
            SCRAPYD_SERVERS=app.config['SCRAPYD_SERVERS'],
            SCRAPYD_SERVERS_AMOUNT=len(app.config['SCRAPYD_SERVERS']),
            SCRAPYD_SERVERS_GROUPS=app.config['SCRAPYD_SERVERS_GROUPS'],
            SCRAPYD_SERVERS_AUTHS=app.config['SCRAPYD_SERVERS_AUTHS'],
            PYTHON_VERSION='.'.join([str(n) for n in sys.version_info[:3]]),
            SCRAPYDWEB_VERSION=__version__,
            CHECK_LATEST_VERSION_FREQ=30,
            DEFAULT_LATEST_VERSION=DEFAULT_LATEST_VERSION,
            GITHUB_URL=__url__,
            SHOW_SCRAPYD_ITEMS=app.config.get('SHOW_SCRAPYD_ITEMS', True),
            DAEMONSTATUS_REFRESH_INTERVAL=int(app.config.get('DAEMONSTATUS_REFRESH_INTERVAL', 10)),
            REQUIRE_LOGIN=REQUIRE_LOGIN,
            scrapydweb_settings_py_path=scrapydweb_settings_py_path,
            main_pid=main_pid,
            caching_pid=caching_pid,
        )

    printf("Visit ScrapydWeb at http://{bind}:{port} or http://127.0.0.1:{port}".format(
        bind='IP-OF-CURRENT-HOST', port=app.config['SCRAPYDWEB_PORT']))

    # /site-packages/flask/app.py
    # def run(self, host=None, port=None, debug=None, load_dotenv=True, **options):
    # Threaded mode is enabled by default.
    app.run(host=app.config['SCRAPYDWEB_BIND'], port=app.config['SCRAPYDWEB_PORT'])  # , debug=True)
Exemple #16
0
import sys
import os
import logging

sys.path.insert(0, '/var/www/html/scrapydweb')
sys.path.append('/var/www/html/scrapydweb/my_env/lib/python3.8/site-packages')
sys.path.append('/var/www/html')

from scrapydweb import create_app
from scrapydweb.common import handle_metadata, authenticate
from flask import request

application = create_app()
application.config[
    'DEFAULT_SETTINGS_PY_PATH'] = '/var/www/html/scrapydweb/default_settings.py'
application.config[
    'SCRAPYDWEB_SETTINGS_PY_PATH'] = '/var/www/html/scrapydweb/default_settings.py'

main_pid = os.getpid()
application.config['MAIN_PID'] = main_pid
application.config['LOGPARSER_PID'] = main_pid
application.config['POLL_PID'] = main_pid

logger = logging.getLogger(__name__)
apscheduler_logger = logging.getLogger('apscheduler')

handle_metadata('main_pid', main_pid)


@application.context_processor
def inject_variable():