Beispiel #1
0
    def test_e2e_gives_experimental_warning(
        self,
        moodbot_domain_path: Path,
        e2e_bot_config_file: Path,
        e2e_stories_path: Text,
        nlu_data_path: Text,
        caplog: LogCaptureFixture,
        tmp_path: Path,
    ):
        with caplog.at_level(logging.WARNING):
            rasa.train(
                str(moodbot_domain_path),
                str(e2e_bot_config_file),
                [e2e_stories_path, nlu_data_path],
                output=str(tmp_path),
                dry_run=True,
            )

        assert any([
            "The end-to-end training is currently experimental"
            in record.message for record in caplog.records
        ])
Beispiel #2
0
def test_not_store_backward_in_database(
    data_loader: DataLoader, caplog: LogCaptureFixture, monkeypatch: MonkeyPatch, tmp_path: Path,
) -> None:
    internal_database = Database(DatabaseType.internal)
    dsx_write_database = Database(DatabaseType.dsx_write)
    data_output = DataOutput(RUNTIME_CONFIG, internal_database, dsx_write_database)

    model_config_account_1 = ModelConfigAccount1(runtime_config=RUNTIME_CONFIG, data_loader=data_loader)
    account_data = data_loader.load_account_data(model_config_account_1, -1)
    model_run = ForecastModelRun()
    model_config_account_1.forecast_path = tmp_path
    with caplog.at_level(logging.DEBUG):
        returned_model_run = data_output.store_forecast(
            model_config=model_config_account_1,
            model_run=model_run,
            account_data=account_data,
            forecast_raw=TEST_ACCOUNT_1_RAW_DATA,
            forecast_post=TEST_ACCOUNT_1_POST_DATA,
            actuals_newest_month=datetime(2019, 10, 1, 0, 0),
        )
    assert RUNTIME_CONFIG.engine_run_type == EngineRunType.backward
    assert returned_model_run is model_run
    assert "Skip storing forecast in internal database for backward run." in caplog.messages
def test_signed(
    caplog: LogCaptureFixture,
    clirunner,
    docker_registry_secure: DockerRegistrySecure,
    gpgsigner: GPGSigner,
    known_good_image: TypingKnownGoodImage,
    runner,
):
    """Test docker-verify can handle signed images."""
    caplog.clear()
    caplog.set_level(logging.DEBUG)

    source = known_good_image["image_name"]
    destination = source.clone()
    destination.digest = None
    destination.tag += "_signed"

    with ca_trust_store(docker_registry_secure.cacerts), registry_credentials(
            docker_registry_secure):
        result = clirunner.invoke(
            cli_signer,
            args=[
                "registry",
                "--keyid",
                gpgsigner.keyid,
                str(source),
                str(destination),
            ],
            env={"DSV_GPG_DATASTORE": str(gpgsigner.homedir)},
            input=f"{gpgsigner.passphrase}\n",
        )
        assert not result.exception
        assert "Integrity check passed." in caplog.text
        assert "Created new image" in caplog.text
        assert str(destination) in caplog.text

        caplog.clear()

        with gpg_datastore(gpgsigner.homedir):
            result = runner.invoke(cli, args=["registry", str(destination)])
            assert not result.exception
            assert "Integrity check passed." in caplog.text
            assert "Signature check passed." in caplog.text
            assert " is consistent; 1 signature(s) verified." in caplog.text
            assert str(destination) in caplog.text
Beispiel #4
0
    def test_retry_and_fail(
        self, database_type: DatabaseType, monkeypatch: MonkeyPatch, caplog: LogCaptureFixture
    ) -> None:
        monkeypatch.setattr(master_config, "db_connection_attempts", 3)
        monkeypatch.setattr(master_config, "db_connection_retry_sleep_seconds", 0.01)

        monkeypatch.setattr(pyodbc, "connect", Mock(side_effect=pyodbc.OperationalError("Always fails!")))

        with pytest.raises(DatabaseConnectionFailure):
            with caplog.at_level(logging.DEBUG):
                database = Database(database_type)
                database.get_existing_table_names()  # Run something that explicitly connects to the database

        assert caplog.messages == [
            f"Trying to connect to {database_type.name} database, attempt #1",
            f"Unsuccessful attempt #1 to connect to {database_type.name} database: Always fails!",
            "Waiting 0.01 seconds before next connection attempt",
            f"Trying to connect to {database_type.name} database, attempt #2",
            f"Unsuccessful attempt #2 to connect to {database_type.name} database: Always fails!",
            "Waiting 0.01 seconds before next connection attempt",
            f"Trying to connect to {database_type.name} database, attempt #3",
            f"Unsuccessful attempt #3 to connect to {database_type.name} database: Always fails!",
            f"Unable to connect to {database_type.name} database after 3 attempt(s).",
        ]
def test_for_signature(caplog: LogCaptureFixture):
    """Tests subclass instantiation."""
    caplog.set_level(logging.FATAL, logger="gnupg")
    result = Signer.for_signature("PGP SIGNATURE")
    assert result
    assert isinstance(result, GPGSigner)
Beispiel #6
0
def caplog_info_level(
        caplog: LogCaptureFixture) -> Iterable[LogCaptureFixture]:
    with caplog.at_level(logging.INFO, ):
        yield caplog
Beispiel #7
0
async def test_callback_error(
    setup: SetupTest, caplog: LogCaptureFixture
) -> None:
    """Test an error return from the OIDC token endpoint."""
    setup.configure("oidc")
    assert setup.config.oidc
    return_url = "https://example.com/foo"

    r = await setup.client.get(
        "/login", params={"rd": return_url}, allow_redirects=False
    )
    assert r.status_code == 307
    url = urlparse(r.headers["Location"])
    query = parse_qs(url.query)

    # Build an error response to return from the OIDC token URL and register
    # it as a result.
    response = {
        "error": "error_code",
        "error_description": "description",
    }
    setup.httpx_mock.add_response(
        url=setup.config.oidc.token_url,
        method="POST",
        json=response,
        status_code=400,
    )

    # Simulate the return from the OpenID Connect provider.
    caplog.clear()
    r = await setup.client.get(
        "/oauth2/callback",
        params={"code": "some-code", "state": query["state"][0]},
        allow_redirects=False,
    )
    assert r.status_code == 500
    assert "error_code: description" in r.text
    data = json.loads(caplog.record_tuples[-1][2])
    assert data == {
        "error": "error_code: description",
        "event": "Provider authentication failed",
        "level": "warning",
        "logger": "gafaelfawr",
        "method": "GET",
        "path": "/oauth2/callback",
        "return_url": return_url,
        "remote": "127.0.0.1",
        "request_id": ANY,
        "user_agent": ANY,
    }

    # Change the mock error response to not contain an error.  We should then
    # internally raise the exception for the return status, which should
    # translate into an internal server error.
    setup.httpx_mock.add_response(
        url=setup.config.oidc.token_url,
        method="POST",
        json={"foo": "bar"},
        status_code=400,
    )
    r = await setup.client.get(
        "/login", params={"rd": return_url}, allow_redirects=False
    )
    query = parse_qs(urlparse(r.headers["Location"]).query)
    r = await setup.client.get(
        "/oauth2/callback",
        params={"code": "some-code", "state": query["state"][0]},
        allow_redirects=False,
    )
    assert r.status_code == 500
    assert "Cannot contact authentication provider" in r.text

    # Now try a reply that returns 200 but doesn't have the field we
    # need.
    setup.httpx_mock.add_response(
        url=setup.config.oidc.token_url,
        method="POST",
        json={"foo": "bar"},
        status_code=200,
    )
    r = await setup.client.get(
        "/login", params={"rd": return_url}, allow_redirects=False
    )
    query = parse_qs(urlparse(r.headers["Location"]).query)
    r = await setup.client.get(
        "/oauth2/callback",
        params={"code": "some-code", "state": query["state"][0]},
        allow_redirects=False,
    )
    assert r.status_code == 500
    assert "No id_token in token reply" in r.text

    # Return invalid JSON, which should raise an error during JSON decoding.
    setup.httpx_mock.add_response(
        url=setup.config.oidc.token_url,
        method="POST",
        data="foo",
        status_code=200,
    )
    r = await setup.client.get(
        "/login", params={"rd": return_url}, allow_redirects=False
    )
    query = parse_qs(urlparse(r.headers["Location"]).query)
    r = await setup.client.get(
        "/oauth2/callback",
        params={"code": "some-code", "state": query["state"][0]},
        allow_redirects=False,
    )
    assert r.status_code == 500
    assert "not valid JSON" in r.text

    # Finally, return invalid JSON and an error reply.
    setup.httpx_mock.add_response(
        url=setup.config.oidc.token_url,
        method="POST",
        data="foo",
        status_code=400,
    )
    r = await setup.client.get(
        "/login", params={"rd": return_url}, allow_redirects=False
    )
    query = parse_qs(urlparse(r.headers["Location"]).query)
    r = await setup.client.get(
        "/oauth2/callback",
        params={"code": "some-code", "state": query["state"][0]},
        allow_redirects=False,
    )
    assert r.status_code == 500
    assert "Cannot contact authentication provider" in r.text
def test_subprocess_trace(datadog_tracer: ddtrace.Tracer,
                          caplog: LogCaptureFixture):
    """Verify that spans created in subprocesses are written to the queue and then flushed to the server,
    when wrapped in the SubprocessTracer"""

    # Enable log output for this logger for duration of this test
    caplog.set_level(logging.DEBUG, DatadogLoggingTraceFilter._log.name)
    test = f"{inspect.stack()[0][3]}"
    # And also send its output through a multiprocessing queue to surface logs from the subprocess
    log_queue = mp.Queue()
    DatadogLoggingTraceFilter._log.addHandler(QueueHandler(log_queue))
    DatadogLoggingTraceFilter.activate()

    subproc_test_msg = f"a test message was logged in a subprocess of {test}"
    state = mp.Queue()
    stop_sentinel = "-->STOP<--"

    with ddtrace.tracer.trace(
            name=f"{test}_operation",
            service=f"{test}_service",
            resource=f"{test}_resource",
            span_type=SpanTypes.TEST,
    ) as span:
        trace_id = span.trace_id
        logger = logging.getLogger(f"{test}_logger")
        test_msg = f"a test message was logged during {test}"
        logger.warning(test_msg)
        ctx = mp.get_context("fork")
        worker = ctx.Process(
            name=f"{test}_subproc",
            target=_do_things_in_subproc,
            args=(
                subproc_test_msg,
                state,
            ),
        )
        worker.start()
        worker.join(timeout=10)
        if worker.is_alive():
            worker.terminate()
            try:
                _drain_captured_log_queue(log_queue,
                                          stop_sentinel,
                                          caplog,
                                          force_immediate_stop=True)
            except Exception:
                print(
                    "Error draining captured log queue when handling subproc TimeoutError"
                )
                pass
            raise mp.TimeoutError(
                f"subprocess {worker.name} did not complete in timeout")
        DatadogLoggingTraceFilter._log.warning(stop_sentinel)

    subproc_trace_id, subproc_span_id = state.get(block=True, timeout=10)
    assert test_msg in caplog.text, "caplog.text did not seem to capture logging output during test"
    assert f"SPAN#{trace_id}" in caplog.text, "span marker not found in logging output"
    assert f"TRACE#{trace_id}" in caplog.text, "trace marker not found in logging output"
    assert f"resource {test}_resource" in caplog.text, "traced resource not found in logging output"
    assert subproc_trace_id == trace_id  # subprocess tracing should be a continuation of the trace in parent process

    _drain_captured_log_queue(log_queue, stop_sentinel, caplog)

    assert f"{subproc_span_id}" in caplog.text, "subproc span id not found in logging output"
    assert (
        f"resource {_do_things_in_subproc.__name__}_resource"
        in caplog.text), "subproc traced resource not found in logging output"
async def test_callback_error(
    tmp_path: Path,
    client: AsyncClient,
    respx_mock: respx.Router,
    caplog: LogCaptureFixture,
) -> None:
    """Test an error return from the OIDC token endpoint."""
    config = await configure(tmp_path, "oidc")
    assert config.oidc
    return_url = "https://example.com/foo"

    r = await client.get("/login", params={"rd": return_url})
    assert r.status_code == 307
    url = urlparse(r.headers["Location"])
    query = parse_qs(url.query)

    # Build an error response to return from the OIDC token URL and register
    # it as a result.
    response = {
        "error": "error_code",
        "error_description": "description",
    }
    respx_mock.post(config.oidc.token_url).respond(400, json=response)

    # Simulate the return from the OpenID Connect provider.
    caplog.clear()
    r = await client.get(
        "/oauth2/callback",
        params={
            "code": "some-code",
            "state": query["state"][0]
        },
    )
    assert r.status_code == 403
    assert "error_code: description" in r.text
    assert parse_log(caplog) == [
        {
            "event": f"Retrieving ID token from {config.oidc.token_url}",
            "httpRequest": {
                "requestMethod": "GET",
                "requestUrl": ANY,
                "remoteIp": "127.0.0.1",
            },
            "return_url": return_url,
            "severity": "info",
        },
        {
            "error": "error_code: description",
            "event": "Authentication provider failed",
            "httpRequest": {
                "requestMethod": "GET",
                "requestUrl": ANY,
                "remoteIp": "127.0.0.1",
            },
            "return_url": return_url,
            "severity": "warning",
        },
    ]

    # Change the mock error response to not contain an error.  We should then
    # internally raise the exception for the return status, which should
    # translate into an internal server error.
    respx_mock.post(config.oidc.token_url).respond(400, json={"foo": "bar"})
    r = await client.get("/login", params={"rd": return_url})
    query = parse_qs(urlparse(r.headers["Location"]).query)
    r = await client.get(
        "/oauth2/callback",
        params={
            "code": "some-code",
            "state": query["state"][0]
        },
    )
    assert r.status_code == 403
    assert "Cannot contact authentication provider" in r.text

    # Now try a reply that returns 200 but doesn't have the field we
    # need.
    respx_mock.post(config.oidc.token_url).respond(json={"foo": "bar"})
    r = await client.get("/login", params={"rd": return_url})
    query = parse_qs(urlparse(r.headers["Location"]).query)
    r = await client.get(
        "/oauth2/callback",
        params={
            "code": "some-code",
            "state": query["state"][0]
        },
    )
    assert r.status_code == 403
    assert "No id_token in token reply" in r.text

    # Return invalid JSON, which should raise an error during JSON decoding.
    respx_mock.post(config.oidc.token_url).respond(content=b"foo")
    r = await client.get("/login", params={"rd": return_url})
    query = parse_qs(urlparse(r.headers["Location"]).query)
    r = await client.get(
        "/oauth2/callback",
        params={
            "code": "some-code",
            "state": query["state"][0]
        },
    )
    assert r.status_code == 403
    assert "not valid JSON" in r.text

    # Finally, return invalid JSON and an error reply.
    respx_mock.post(config.oidc.token_url).respond(400, content=b"foo")
    r = await client.get("/login", params={"rd": return_url})
    query = parse_qs(urlparse(r.headers["Location"]).query)
    r = await client.get(
        "/oauth2/callback",
        params={
            "code": "some-code",
            "state": query["state"][0]
        },
    )
    assert r.status_code == 403
    assert "Cannot contact authentication provider" in r.text
Beispiel #10
0
def set_log_level_debug(caplog: LogCaptureFixture) -> None:
    # Set the post-test log level to DEBUG for failing tests.  For all tests
    # (failing and successful), the live log level can be additionally set in
    # `setup.cfg`. It should be set to WARNING.
    caplog.set_level(logging.DEBUG)
Beispiel #11
0
async def test_invalid(setup: SetupTest, caplog: LogCaptureFixture) -> None:
    token_data = await setup.create_session_token()
    issuer = setup.factory.create_token_issuer()
    oidc_token = issuer.issue_token(token_data, jti="some-jti")

    caplog.clear()
    r = await setup.client.get(
        "/auth/userinfo",
        headers={"Authorization": f"token {oidc_token.encoded}"},
    )

    assert r.status_code == 400
    authenticate = parse_www_authenticate(r.headers["WWW-Authenticate"])
    assert isinstance(authenticate, AuthErrorChallenge)
    assert authenticate.auth_type == AuthType.Bearer
    assert authenticate.realm == setup.config.realm
    assert authenticate.error == AuthError.invalid_request
    assert authenticate.error_description == "Unknown Authorization type token"

    log = json.loads(caplog.record_tuples[0][2])
    assert log == {
        "error": "Unknown Authorization type token",
        "event": "Invalid request",
        "level": "warning",
        "logger": "gafaelfawr",
        "method": "GET",
        "path": "/auth/userinfo",
        "remote": "127.0.0.1",
        "request_id": ANY,
        "user_agent": ANY,
    }

    r = await setup.client.get(
        "/auth/userinfo",
        headers={"Authorization": f"bearer{oidc_token.encoded}"},
    )

    assert r.status_code == 400
    authenticate = parse_www_authenticate(r.headers["WWW-Authenticate"])
    assert isinstance(authenticate, AuthErrorChallenge)
    assert authenticate.auth_type == AuthType.Bearer
    assert authenticate.realm == setup.config.realm
    assert authenticate.error == AuthError.invalid_request
    assert authenticate.error_description == "Malformed Authorization header"

    caplog.clear()
    r = await setup.client.get(
        "/auth/userinfo",
        headers={"Authorization": f"bearer XXX{oidc_token.encoded}"},
    )

    assert r.status_code == 401
    authenticate = parse_www_authenticate(r.headers["WWW-Authenticate"])
    assert isinstance(authenticate, AuthErrorChallenge)
    assert authenticate.auth_type == AuthType.Bearer
    assert authenticate.realm == setup.config.realm
    assert authenticate.error == AuthError.invalid_token
    assert authenticate.error_description

    log = json.loads(caplog.record_tuples[0][2])
    assert log == {
        "error": ANY,
        "event": "Invalid token",
        "level": "warning",
        "logger": "gafaelfawr",
        "method": "GET",
        "path": "/auth/userinfo",
        "remote": "127.0.0.1",
        "request_id": ANY,
        "token_source": "bearer",
        "user_agent": ANY,
    }
Beispiel #12
0
async def test_token_errors(
    setup: SetupTest, caplog: LogCaptureFixture
) -> None:
    clients = [
        OIDCClient(client_id="some-id", client_secret="some-secret"),
        OIDCClient(client_id="other-id", client_secret="other-secret"),
    ]
    setup.configure(oidc_clients=clients)
    token_data = await setup.create_session_token()
    token = token_data.token
    oidc_service = setup.factory.create_oidc_service()
    redirect_uri = f"https://{TEST_HOSTNAME}/app"
    code = await oidc_service.issue_code("some-id", redirect_uri, token)

    # Missing parameters.
    request: Dict[str, str] = {}
    caplog.clear()
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "invalid_request",
        "error_description": "Invalid token request",
    }

    log = json.loads(caplog.record_tuples[0][2])
    assert log == {
        "error": "Invalid token request",
        "event": "Invalid request",
        "level": "warning",
        "logger": "gafaelfawr",
        "method": "POST",
        "path": "/auth/openid/token",
        "remote": "127.0.0.1",
        "request_id": ANY,
        "user_agent": ANY,
    }

    # Invalid grant type.
    request = {
        "grant_type": "bogus",
        "client_id": "other-client",
        "code": "nonsense",
        "redirect_uri": f"https://{TEST_HOSTNAME}/",
    }
    caplog.clear()
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "unsupported_grant_type",
        "error_description": "Invalid grant type bogus",
    }

    log = json.loads(caplog.record_tuples[0][2])
    assert log == {
        "error": "Invalid grant type bogus",
        "event": "Unsupported grant type",
        "level": "warning",
        "logger": "gafaelfawr",
        "method": "POST",
        "path": "/auth/openid/token",
        "remote": "127.0.0.1",
        "request_id": ANY,
        "user_agent": ANY,
    }

    # Invalid code.
    request["grant_type"] = "authorization_code"
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "invalid_grant",
        "error_description": "Invalid authorization code",
    }

    # No client_secret.
    request["code"] = str(OIDCAuthorizationCode())
    caplog.clear()
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "invalid_client",
        "error_description": "No client_secret provided",
    }

    log = json.loads(caplog.record_tuples[0][2])
    assert log == {
        "error": "No client_secret provided",
        "event": "Unauthorized client",
        "level": "warning",
        "logger": "gafaelfawr",
        "method": "POST",
        "path": "/auth/openid/token",
        "remote": "127.0.0.1",
        "request_id": ANY,
        "user_agent": ANY,
    }

    # Incorrect client_id.
    request["client_secret"] = "other-secret"
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "invalid_client",
        "error_description": "Unknown client ID other-client",
    }

    # Incorrect client_secret.
    request["client_id"] = "some-id"
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "invalid_client",
        "error_description": "Invalid secret for some-id",
    }

    # No stored data.
    request["client_secret"] = "some-secret"
    bogus_code = OIDCAuthorizationCode()
    request["code"] = str(bogus_code)
    caplog.clear()
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "invalid_grant",
        "error_description": "Invalid authorization code",
    }
    log = json.loads(caplog.record_tuples[0][2])
    assert log["event"] == "Invalid authorization code"
    assert log["error"] == f"Unknown authorization code {bogus_code.key}"

    # Corrupt stored data.
    await setup.redis.set(bogus_code.key, "XXXXXXX")
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "invalid_grant",
        "error_description": "Invalid authorization code",
    }

    # Correct code, but invalid client_id for that code.
    bogus_code = await oidc_service.issue_code("other-id", redirect_uri, token)
    request["code"] = str(bogus_code)
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "invalid_grant",
        "error_description": "Invalid authorization code",
    }

    # Correct code and client_id but invalid redirect_uri.
    request["code"] = str(code)
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "invalid_grant",
        "error_description": "Invalid authorization code",
    }

    # Delete the underlying token.
    token_service = setup.factory.create_token_service()
    await token_service.delete_token(
        token.key, token_data, token_data.username, ip_address="127.0.0.1"
    )
    request["redirect_uri"] = redirect_uri
    r = await setup.client.post("/auth/openid/token", data=request)
    assert r.status_code == 400
    assert r.json() == {
        "error": "invalid_grant",
        "error_description": "Invalid authorization code",
    }
Beispiel #13
0
async def test_login(setup: SetupTest, caplog: LogCaptureFixture) -> None:
    clients = [OIDCClient(client_id="some-id", client_secret="some-secret")]
    setup.configure(oidc_clients=clients)
    token_data = await setup.create_session_token()
    await setup.login(token_data.token)
    return_url = f"https://{TEST_HOSTNAME}:4444/foo?a=bar&b=baz"

    # Log in
    caplog.clear()
    r = await setup.client.get(
        "/auth/openid/login",
        params={
            "response_type": "code",
            "scope": "openid",
            "client_id": "some-id",
            "state": "random-state",
            "redirect_uri": return_url,
        },
        allow_redirects=False,
    )
    assert r.status_code == 307
    url = urlparse(r.headers["Location"])
    assert url.scheme == "https"
    assert url.netloc == f"{TEST_HOSTNAME}:4444"
    assert url.path == "/foo"
    assert url.query
    query = parse_qs(url.query)
    assert query == {
        "a": ["bar"],
        "b": ["baz"],
        "code": [ANY],
        "state": ["random-state"],
    }
    code = query["code"][0]

    log = json.loads(caplog.record_tuples[0][2])
    assert log == {
        "event": "Returned OpenID Connect authorization code",
        "level": "info",
        "logger": "gafaelfawr",
        "method": "GET",
        "path": "/auth/openid/login",
        "remote": "127.0.0.1",
        "request_id": ANY,
        "return_url": return_url,
        "scope": "user:token",
        "token": token_data.token.key,
        "token_source": "cookie",
        "user": token_data.username,
        "user_agent": ANY,
    }

    # Redeem the code for a token and check the result.
    caplog.clear()
    r = await setup.client.post(
        "/auth/openid/token",
        data={
            "grant_type": "authorization_code",
            "client_id": "some-id",
            "client_secret": "some-secret",
            "code": code,
            "redirect_uri": return_url,
        },
    )
    assert r.status_code == 200
    assert r.headers["Cache-Control"] == "no-store"
    assert r.headers["Pragma"] == "no-cache"
    data = r.json()
    assert data == {
        "access_token": ANY,
        "token_type": "Bearer",
        "expires_in": ANY,
        "id_token": ANY,
    }
    assert isinstance(data["expires_in"], int)
    exp_seconds = setup.config.issuer.exp_minutes * 60
    assert exp_seconds - 5 <= data["expires_in"] <= exp_seconds

    assert data["access_token"] == data["id_token"]
    verifier = setup.factory.create_token_verifier()
    token = verifier.verify_internal_token(OIDCToken(encoded=data["id_token"]))
    assert token.claims == {
        "aud": setup.config.issuer.aud,
        "exp": ANY,
        "iat": ANY,
        "iss": setup.config.issuer.iss,
        "jti": OIDCAuthorizationCode.from_str(code).key,
        "name": token_data.name,
        "preferred_username": token_data.username,
        "scope": "openid",
        "sub": token_data.username,
        setup.config.issuer.username_claim: token_data.username,
        setup.config.issuer.uid_claim: token_data.uid,
    }
    now = time.time()
    expected_exp = now + setup.config.issuer.exp_minutes * 60
    assert expected_exp - 5 <= token.claims["exp"] <= expected_exp
    assert now - 5 <= token.claims["iat"] <= now

    log = json.loads(caplog.record_tuples[0][2])
    username = token_data.username
    assert log == {
        "event": f"Retrieved token for user {username} via OpenID Connect",
        "level": "info",
        "logger": "gafaelfawr",
        "method": "POST",
        "path": "/auth/openid/token",
        "remote": "127.0.0.1",
        "request_id": ANY,
        "token": OIDCAuthorizationCode.from_str(code).key,
        "user": username,
        "user_agent": ANY,
    }
Beispiel #14
0
async def test_login_errors(
    setup: SetupTest, caplog: LogCaptureFixture
) -> None:
    clients = [OIDCClient(client_id="some-id", client_secret="some-secret")]
    setup.configure(oidc_clients=clients)
    token_data = await setup.create_session_token()
    await setup.login(token_data.token)

    # No parameters at all.
    caplog.clear()
    r = await setup.client.get("/auth/openid/login", allow_redirects=False)
    assert r.status_code == 422

    # Good client ID but missing redirect_uri.
    login_params = {"client_id": "some-id"}
    caplog.clear()
    r = await setup.client.get(
        "/auth/openid/login", params=login_params, allow_redirects=False
    )
    assert r.status_code == 422

    # Bad client ID.
    caplog.clear()
    login_params = {
        "client_id": "bad-client",
        "redirect_uri": f"https://{TEST_HOSTNAME}/",
    }
    r = await setup.client.get(
        "/auth/openid/login", params=login_params, allow_redirects=False
    )
    assert r.status_code == 400
    assert "Unknown client_id bad-client" in r.text

    log = json.loads(caplog.record_tuples[0][2])
    assert log == {
        "error": "Unknown client_id bad-client in OpenID Connect request",
        "event": "Invalid request",
        "level": "warning",
        "logger": "gafaelfawr",
        "method": "GET",
        "path": "/auth/openid/login",
        "remote": "127.0.0.1",
        "request_id": ANY,
        "return_url": f"https://{TEST_HOSTNAME}/",
        "scope": "user:token",
        "token": ANY,
        "token_source": "cookie",
        "user": token_data.username,
        "user_agent": ANY,
    }

    # Bad redirect_uri.
    login_params["client_id"] = "some-id"
    login_params["redirect_uri"] = "https://foo.example.com/"
    r = await setup.client.get(
        "/auth/openid/login", params=login_params, allow_redirects=False
    )
    assert r.status_code == 422
    assert "URL is not at" in r.text

    # Valid redirect_uri but missing response_type.
    login_params["redirect_uri"] = f"https://{TEST_HOSTNAME}/app"
    caplog.clear()
    r = await setup.client.get(
        "/auth/openid/login", params=login_params, allow_redirects=False
    )
    assert r.status_code == 307
    url = urlparse(r.headers["Location"])
    assert url.scheme == "https"
    assert url.netloc == TEST_HOSTNAME
    assert url.path == "/app"
    assert url.query
    query = parse_qs(url.query)
    assert query == {
        "error": ["invalid_request"],
        "error_description": ["Missing response_type parameter"],
    }

    log = json.loads(caplog.record_tuples[0][2])
    assert log == {
        "error": "Missing response_type parameter",
        "event": "Invalid request",
        "level": "warning",
        "logger": "gafaelfawr",
        "method": "GET",
        "path": "/auth/openid/login",
        "remote": "127.0.0.1",
        "request_id": ANY,
        "return_url": login_params["redirect_uri"],
        "scope": "user:token",
        "token": ANY,
        "token_source": "cookie",
        "user": token_data.username,
        "user_agent": ANY,
    }

    # Invalid response_type.
    login_params["response_type"] = "bogus"
    r = await setup.client.get(
        "/auth/openid/login", params=login_params, allow_redirects=False
    )
    assert r.status_code == 307
    assert query_from_url(r.headers["Location"]) == {
        "error": ["invalid_request"],
        "error_description": ["code is the only supported response_type"],
    }

    # Valid response_type but missing scope.
    login_params["response_type"] = "code"
    r = await setup.client.get(
        "/auth/openid/login", params=login_params, allow_redirects=False
    )
    assert r.status_code == 307
    assert query_from_url(r.headers["Location"]) == {
        "error": ["invalid_request"],
        "error_description": ["Missing scope parameter"],
    }

    # Invalid scope.
    login_params["scope"] = "user:email"
    r = await setup.client.get(
        "/auth/openid/login", params=login_params, allow_redirects=False
    )
    assert r.status_code == 307
    assert query_from_url(r.headers["Location"]) == {
        "error": ["invalid_request"],
        "error_description": ["openid is the only supported scope"],
    }
Beispiel #15
0
async def test_create(
    factory: ComponentFactory,
    mock_kubernetes: MockKubernetesApi,
    caplog: LogCaptureFixture,
) -> None:
    await create_test_service_tokens(mock_kubernetes)
    kubernetes_service = factory.create_kubernetes_service(MagicMock())
    await kubernetes_service.update_service_tokens()
    await assert_kubernetes_secrets_are_correct(factory, mock_kubernetes)

    service_token = await mock_kubernetes.get_namespaced_custom_object(
        "gafaelfawr.lsst.io",
        "v1alpha1",
        "mobu",
        "gafaelfawrservicetokens",
        "gafaelfawr-secret",
    )
    assert service_token["status"]["conditions"] == [
        {
            "lastTransitionTime": ANY,
            "message": "Secret was created",
            "observedGeneration": 1,
            "reason": StatusReason.Created.value,
            "status": "True",
            "type": "SecretCreated",
        }
    ]
    service_token = await mock_kubernetes.get_namespaced_custom_object(
        "gafaelfawr.lsst.io",
        "v1alpha1",
        "nublado2",
        "gafaelfawrservicetokens",
        "gafaelfawr",
    )
    assert service_token["status"]["conditions"] == [
        {
            "lastTransitionTime": ANY,
            "message": "Secret was created",
            "observedGeneration": 45,
            "reason": StatusReason.Created.value,
            "status": "True",
            "type": "SecretCreated",
        }
    ]

    assert parse_log(caplog) == [
        {
            "event": "Created new service token",
            "key": ANY,
            "severity": "info",
            "token_scope": "admin:token",
            "token_username": "******",
        },
        {
            "event": "Created mobu/gafaelfawr-secret secret",
            "scopes": ["admin:token"],
            "severity": "info",
            "service": "mobu",
        },
        {
            "event": "Created new service token",
            "key": ANY,
            "severity": "info",
            "token_scope": "",
            "token_username": "******",
        },
        {
            "event": "Created nublado2/gafaelfawr secret",
            "scopes": [],
            "severity": "info",
            "service": "nublado-hub",
        },
    ]

    # Running creation again should not change anything.
    caplog.clear()
    objects = mock_kubernetes.get_all_objects_for_test("Secret")
    await kubernetes_service.update_service_tokens()
    assert mock_kubernetes.get_all_objects_for_test("Secret") == objects
    assert caplog.record_tuples == []
Beispiel #16
0
def test_validate_with_none_if_default_is_valid(caplog: LogCaptureFixture,
                                                tmp_path: pathlib.Path):
    with caplog.at_level(logging.WARNING, rasa.cli.utils.logger.name):
        assert get_validated_path(None, "out", str(tmp_path)) == str(tmp_path)

    assert caplog.records == []
Beispiel #17
0
async def test_create_delete_modify(
    client: AsyncClient, factory: ComponentFactory, caplog: LogCaptureFixture
) -> None:
    user_info = TokenUserInfo(
        username="******",
        name="Example Person",
        email="*****@*****.**",
        uid=45613,
        groups=[TokenGroup(name="foo", id=12313)],
    )
    token_service = factory.create_token_service()
    async with factory.session.begin():
        session_token = await token_service.create_session_token(
            user_info,
            scopes=["read:all", "exec:admin", "user:token"],
            ip_address="127.0.0.1",
        )
    csrf = await set_session_cookie(client, session_token)

    expires = current_datetime() + timedelta(days=100)
    r = await client.post(
        "/auth/api/v1/users/example/tokens",
        headers={"X-CSRF-Token": csrf},
        json={
            "token_name": "some token",
            "scopes": ["read:all"],
            "expires": int(expires.timestamp()),
        },
    )
    assert r.status_code == 201
    assert r.json() == {"token": ANY}
    user_token = Token.from_str(r.json()["token"])
    token_url = r.headers["Location"]
    assert token_url == f"/auth/api/v1/users/example/tokens/{user_token.key}"

    r = await client.get(token_url)
    assert r.status_code == 200
    info = r.json()
    assert info == {
        "token": user_token.key,
        "username": "******",
        "token_name": "some token",
        "token_type": "user",
        "scopes": ["read:all"],
        "created": ANY,
        "expires": int(expires.timestamp()),
    }

    # Check that this is the same information as is returned by the token-info
    # route.  This is a bit tricky to do since the cookie will take precedence
    # over the Authorization header, but we can't just delete the cookie since
    # we'll lose the CSRF token.  Save the cookie and delete it, and then
    # later restore it.
    cookie = client.cookies.pop(COOKIE_NAME)
    r = await client.get(
        "/auth/api/v1/token-info",
        headers={"Authorization": f"bearer {user_token}"},
    )
    assert r.status_code == 200
    assert r.json() == info
    client.cookies.set(COOKIE_NAME, cookie, domain=TEST_HOSTNAME)

    # Listing all tokens for this user should return the user token and a
    # session token.
    r = await client.get("/auth/api/v1/users/example/tokens")
    assert r.status_code == 200
    data = r.json()

    # Adjust for sorting, which will be by creation date and then token.
    assert len(data) == 2
    if data[0] == info:
        session_info = data[1]
    else:
        assert data[1] == info
        session_info = data[0]
    assert session_info == {
        "token": session_token.key,
        "username": "******",
        "token_type": "session",
        "scopes": ["exec:admin", "read:all", "user:token"],
        "created": ANY,
        "expires": ANY,
    }

    # Change the name, scope, and expiration of the token.
    caplog.clear()
    new_expires = current_datetime() + timedelta(days=200)
    r = await client.patch(
        token_url,
        headers={"X-CSRF-Token": csrf},
        json={
            "token_name": "happy token",
            "scopes": ["exec:admin"],
            "expires": int(new_expires.timestamp()),
        },
    )
    assert r.status_code == 201
    assert r.json() == {
        "token": user_token.key,
        "username": "******",
        "token_name": "happy token",
        "token_type": "user",
        "scopes": ["exec:admin"],
        "created": ANY,
        "expires": int(new_expires.timestamp()),
    }

    # Check the logging.  Regression test for a bug where new expirations
    # would be logged as raw datetime objects instead of timestamps.
    assert parse_log(caplog) == [
        {
            "expires": int(new_expires.timestamp()),
            "event": "Modified token",
            "httpRequest": {
                "requestMethod": "PATCH",
                "requestUrl": f"https://{TEST_HOSTNAME}{token_url}",
                "remoteIp": "127.0.0.1",
            },
            "key": user_token.key,
            "scope": "exec:admin read:all user:token",
            "severity": "info",
            "token": session_token.key,
            "token_name": "happy token",
            "token_scope": "exec:admin",
            "token_source": "cookie",
            "user": "******",
        }
    ]

    # Delete the token.
    r = await client.delete(token_url, headers={"X-CSRF-Token": csrf})
    assert r.status_code == 204
    r = await client.get(token_url)
    assert r.status_code == 404

    # Deleting again should return 404.
    r = await client.delete(token_url, headers={"X-CSRF-Token": csrf})
    assert r.status_code == 404

    # This user should now have only one token.
    r = await client.get("/auth/api/v1/users/example/tokens")
    assert r.status_code == 200
    assert len(r.json()) == 1

    # We should be able to see the change history for the token.
    r = await client.get(token_url + "/change-history")
    assert r.status_code == 200
    assert r.json() == [
        {
            "token": user_token.key,
            "username": "******",
            "token_type": "user",
            "token_name": "happy token",
            "scopes": ["exec:admin"],
            "expires": int(new_expires.timestamp()),
            "actor": "example",
            "action": "revoke",
            "ip_address": "127.0.0.1",
            "event_time": ANY,
        },
        {
            "token": user_token.key,
            "username": "******",
            "token_type": "user",
            "token_name": "happy token",
            "scopes": ["exec:admin"],
            "expires": int(new_expires.timestamp()),
            "actor": "example",
            "action": "edit",
            "old_token_name": "some token",
            "old_scopes": ["read:all"],
            "old_expires": int(expires.timestamp()),
            "ip_address": "127.0.0.1",
            "event_time": ANY,
        },
        {
            "token": user_token.key,
            "username": "******",
            "token_type": "user",
            "token_name": "some token",
            "scopes": ["read:all"],
            "expires": int(expires.timestamp()),
            "actor": "example",
            "action": "create",
            "ip_address": "127.0.0.1",
            "event_time": ANY,
        },
    ]
def test_load_exogenous_feature_with_database_disabled(
        feature_name: str, tmp_path: Path, monkeypatch: MonkeyPatch,
        caplog: LogCaptureFixture) -> None:
    caplog.set_level(logging.INFO)

    test_data = pd.DataFrame(
        [
            [
                "EF", "Project_1", 0, 0, "Contract_1", "EF", "Airframe_1",
                None, "Build Rate", "2015-01-01", 25.60
            ],
            [
                "EF", "Project_1", 0, 0, "Contract_1", "EF", "Airframe_1",
                None, "Build Rate 202001", "2015-01-01", 25.60
            ],
            [
                "EF", "Project_1", 0, 0, "Contract_1", "EF", "Airframe_1",
                None, "Build Rate", "2016-01-01", 25.60
            ],
            [
                "EF", "Project_1", 0, 0, "Contract_1", "EF", "Airframe_1",
                None, "Build Rate 202001", "2016-01-01", 25.60
            ],
        ],
        columns=[
            "binid",
            "projectid",
            "ship to",
            "branch",
            "contractid",
            "custmiscinfo",
            "airframe",
            "program",
            "periodicdatastream",
            "perioddate",
            "value",
        ],
    )

    data_loader = DataLoader(internal_database=Mock(spec=Database,
                                                    is_disabled=lambda: True),
                             dsx_read_database=Mock(spec=Database))

    dsx_exogenous_data_path = "01 Raw data/Test_DSX_exogenous_features.csv"
    absolute_dsx_exogenous_data_path = tmp_path / dsx_exogenous_data_path
    absolute_dsx_exogenous_data_path.parent.mkdir(parents=True)
    pd.DataFrame(test_data).to_csv(absolute_dsx_exogenous_data_path,
                                   index=False)

    monkeypatch.setattr(master_config, "dsx_exogenous_data_path",
                        dsx_exogenous_data_path)
    monkeypatch.setattr(master_config, "default_data_loader_location",
                        tmp_path)

    exogenous_data = data_loader.load_exogenous_feature(
        feature_name=feature_name, run_id=-1)

    assert_frame_equal(generate_exogenous_feature_data(feature_name),
                       exogenous_data)

    assert (
        f'Loaded exogenous feature for Periodic_Data_Stream "{feature_name}" '
        f'with 2 lines from "{dsx_exogenous_data_path}"' in caplog.messages)
Beispiel #19
0
async def test_queue_monitor(producer: InternalProducer,
                             caplog: LogCaptureFixture, fake_coroutine):
    """Ensure the queue monitor logs as we expect

    Note that something we implicitly test for here is that the monitor
    does not log lots of duplicate lines. Rather it only logs when
    something changes.
    """
    producer.size_warning = 3
    producer.monitor_interval = 0.01
    caplog.set_level(logging.WARNING)

    # Start the producer running
    producer.start()

    # No logging yet
    assert not caplog.records

    # Add a couple of items to the queue (still under size_warning)
    producer.queue.put_nowait(None)
    producer.queue.put_nowait(None)
    await asyncio.sleep(0.05)

    # Still no logging yet
    assert not caplog.records

    # One more gets us up to the warning level
    producer.queue.put_nowait(None)
    await asyncio.sleep(0.05)

    # Now we have logging
    assert len(caplog.records) == 1
    assert caplog.records[0].getMessage(
    ) == "Queue in InternalProducer now has 3 commands."
    caplog.clear()  # Clear the log messages

    # Let's check we get another messages when the queue gets bigger again
    producer.queue.put_nowait(None)
    await asyncio.sleep(0.05)

    assert len(caplog.records) == 1
    assert caplog.records[0].getMessage(
    ) == "Queue in InternalProducer now has 4 commands."
    caplog.clear()  # Clear the log messages

    # Now check we get logging when the queue shrinks, but is still above the warning level
    producer.queue.get_nowait()
    await asyncio.sleep(0.05)

    assert len(caplog.records) == 1
    assert caplog.records[0].getMessage() == (
        "Queue in InternalProducer has shrunk back down to 3 commands.")
    caplog.clear()  # Clear the log messages

    # Now check we get logging when the queue shrinks to BELOW the warning level
    producer.queue.get_nowait()
    await asyncio.sleep(0.05)

    assert len(caplog.records) == 1
    assert caplog.records[0].getMessage() == (
        "Queue in InternalProducer has shrunk back down to 2 commands. "
        "Queue is now at an OK size again.")
    caplog.clear()  # Clear the log messages
def test_load_cleaning_input_data(caplog: LogCaptureFixture,
                                  monkeypatch: MonkeyPatch) -> None:
    caplog.set_level(logging.WARNING)

    def mock_load_csv(*args: Any, **kwargs: Any) -> pd.DataFrame:
        return pd.DataFrame(
            [
                # this one will be kept after cleaning
                [
                    "Account_18", "Project_0", "Contract_0", 1,
                    pd.Timestamp("2020-02-01"), 1.0, 1.0, "mn0"
                ],
                # this one will be removed due to adhoc contract in dsx data workaround in TODO FSC-371
                [
                    "Account_18", "Project_0", "Contract_730", 1,
                    pd.Timestamp("2020-02-01"), 1.0, 1.0, "mn0"
                ],
                # this one will be removed due to wrong days in dsx data workaround in TODO FSC-318
                [
                    "Account_18", "Project_0", "Contract_0", 1,
                    pd.Timestamp("2020-02-28"), 1.0, 1.0, "mn0"
                ],
            ],
            columns=[
                "lowlevelcust",
                "projectid",
                "contractid",
                "shortid",
                "perioddate",
                "Cost",
                "Adjusted History",
                "masterpart",
            ],
        )

    monkeypatch.setattr(DataLoader, "load_csv", mock_load_csv)
    internal_database = Database(DatabaseType.internal)
    dsx_read_database = Database(DatabaseType.dsx_read)
    dsx_read_database.is_disabled = lambda: True  # type: ignore
    data_loader = DataLoader(internal_database, dsx_read_database)

    assert data_loader.load_cleaning_input_data().equals(
        pd.DataFrame(
            [
                # this one will be kept after cleaning
                [
                    "Account_18", "Project_0", "Contract_0", 1,
                    pd.Timestamp("2020-02-01"), 1.0, 1.0, "mn0"
                ]
            ],
            columns=[
                "lowlevelcust",
                "projectid",
                "contractid",
                "shortid",
                "perioddate",
                "Cost",
                "Adjusted History",
                "masterpart",
            ],
        ))
    assert "Removed 1 rows with invalid dates." in caplog.messages
    assert "Removed 1 unexpected rows of adhoc contract." in caplog.messages
Beispiel #21
0
async def test_login(
    tmp_path: Path,
    client: AsyncClient,
    respx_mock: respx.Router,
    caplog: LogCaptureFixture,
) -> None:
    config = await configure(tmp_path, "oidc")
    token = await create_upstream_oidc_token(groups=["admin"],
                                             name="Some Person",
                                             email="*****@*****.**")
    await mock_oidc_provider_config(respx_mock, config.issuer.keypair)
    await mock_oidc_provider_token(respx_mock, "some-code", token)
    assert config.oidc
    return_url = "https://example.com:4444/foo?a=bar&b=baz"

    caplog.clear()
    r = await client.get("/login", params={"rd": return_url})
    assert r.status_code == 307
    assert r.headers["Location"].startswith(config.oidc.login_url)
    url = urlparse(r.headers["Location"])
    assert url.query
    query = parse_qs(url.query)
    login_params = {p: [v] for p, v in config.oidc.login_params.items()}
    assert query == {
        "client_id": [config.oidc.client_id],
        "redirect_uri": [config.oidc.redirect_url],
        "response_type": ["code"],
        "scope": ["openid " + " ".join(config.oidc.scopes)],
        "state": [ANY],
        **login_params,
    }

    # Verify the logging.
    login_url = config.oidc.login_url
    assert parse_log(caplog) == [{
        "event": f"Redirecting user to {login_url} for authentication",
        "httpRequest": {
            "requestMethod": "GET",
            "requestUrl": ANY,
            "remoteIp": "127.0.0.1",
        },
        "return_url": return_url,
        "severity": "info",
    }]

    # Simulate the return from the provider.
    caplog.clear()
    r = await client.get("/login",
                         params={
                             "code": "some-code",
                             "state": query["state"][0]
                         })
    assert r.status_code == 307
    assert r.headers["Location"] == return_url

    # Verify the logging.
    expected_scopes_set = set(config.issuer.group_mapping["admin"])
    expected_scopes_set.add("user:token")
    expected_scopes = " ".join(sorted(expected_scopes_set))
    event = f"Successfully authenticated user {token.username} ({token.uid})"
    assert parse_log(caplog) == [
        {
            "event": f"Retrieving ID token from {config.oidc.token_url}",
            "httpRequest": {
                "requestMethod": "GET",
                "requestUrl": ANY,
                "remoteIp": "127.0.0.1",
            },
            "return_url": return_url,
            "severity": "info",
        },
        {
            "event": event,
            "httpRequest": {
                "requestMethod": "GET",
                "requestUrl": ANY,
                "remoteIp": "127.0.0.1",
            },
            "return_url": return_url,
            "scope": expected_scopes,
            "severity": "info",
            "token": ANY,
            "user": token.username,
        },
    ]

    # Check that the /auth route works and finds our token.
    r = await client.get("/auth", params={"scope": "exec:admin"})
    assert r.status_code == 200
    assert r.headers["X-Auth-Request-Token-Scopes"] == expected_scopes
    assert r.headers["X-Auth-Request-Scopes-Accepted"] == "exec:admin"
    assert r.headers["X-Auth-Request-Scopes-Satisfy"] == "all"
    assert r.headers["X-Auth-Request-User"] == token.username
    assert r.headers["X-Auth-Request-Email"] == "*****@*****.**"
    assert r.headers["X-Auth-Request-Uid"] == str(token.uid)
    assert r.headers["X-Auth-Request-Groups"] == "admin"
Beispiel #22
0
def test_jieba_load_and_persist_dictionary(
    tmp_path_factory: TempPathFactory,
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    caplog: LogCaptureFixture,
):
    dictionary_directory = tmp_path_factory.mktemp("dictionaries")
    dictionary_path = dictionary_directory / "dictionary_1"

    dictionary_contents = """
创新办 3 i
云计算 5
凱特琳 nz
台中
        """
    dictionary_path.write_text(dictionary_contents, encoding="utf-8")

    component_config = {"dictionary_path": dictionary_directory}

    resource = Resource("jieba")
    tk = JiebaTokenizerGraphComponent.create(
        {
            **JiebaTokenizerGraphComponent.get_default_config(),
            **component_config
        },
        default_model_storage,
        resource,
        default_execution_context,
    )

    tk.process_training_data(TrainingData([Message(data={TEXT: ""})]))

    # The dictionary has not been persisted yet.
    with caplog.at_level(logging.WARN):
        JiebaTokenizerGraphComponent.load(
            {
                **JiebaTokenizerGraphComponent.get_default_config(),
                **component_config
            },
            default_model_storage,
            resource,
            default_execution_context,
        )
        assert any(
            "Failed to load JiebaTokenizerGraphComponent from model storage."
            in message for message in caplog.messages)

    tk.persist()

    # Check the persisted dictionary matches the original file.
    with default_model_storage.read_from(resource) as resource_dir:
        contents = (resource_dir / "dictionary_1").read_text(encoding="utf-8")
        assert contents == dictionary_contents

    # Delete original files to show that we read from the model storage.
    dictionary_path.unlink()
    dictionary_directory.rmdir()

    JiebaTokenizerGraphComponent.load(
        {
            **JiebaTokenizerGraphComponent.get_default_config(),
            **component_config
        },
        default_model_storage,
        resource,
        default_execution_context,
    )

    tk.process([Message(data={TEXT: ""})])