def test_gcs_init(self, google_client): handler = GCSResultHandler(bucket="bob") assert handler.bucket == "bob" assert handler.credentials_secret == "GOOGLE_APPLICATION_CREDENTIALS" assert google_client.called is False handler.initialize_client() assert google_client.return_value.bucket.call_args[0][0] == "bob"
def test_gcs_writes_binary_string(self, google_client): blob = MagicMock() google_client.return_value.bucket = MagicMock(return_value=MagicMock( blob=MagicMock(return_value=blob))) handler = GCSResultHandler(bucket="foo") handler.write(None) assert blob.upload_from_string.called assert isinstance(blob.upload_from_string.call_args[0][0], str)
def test_gcs_uses_custom_secret_name(self, google_client): handler = GCSResultHandler(bucket="foo", credentials_secret="TEST_SECRET") with prefect.context(secrets=dict(TEST_SECRET=94611)): with set_temporary_config({"cloud.use_local_secrets": True}): handler.initialize_client() assert google_client.call_args[1]["credentials"] == 94611
def test_gcs_writes_to_blob_prefixed_by_date_suffixed_by_prefect( self, google_client): bucket = MagicMock() google_client.return_value.bucket = MagicMock(return_value=bucket) handler = GCSResultHandler(bucket="foo") handler.write("so-much-data") assert bucket.blob.called assert bucket.blob.call_args[0][0].startswith( pendulum.now("utc").format("Y/M/D")) assert bucket.blob.call_args[0][0].endswith("prefect_result")
def test_serialize(self): handler = GCSResultHandler(bucket="my-bucket", credentials_secret="FOO") serialized = ResultHandlerSchema().dump(handler) assert serialized["type"] == "GCSResultHandler" assert serialized["bucket"] == "my-bucket" assert serialized["credentials_secret"] == "FOO"
def test_gcs_uses_custom_secret_name(self): auth = MagicMock() handler = GCSResultHandler(bucket="foo", credentials_secret="TEST_SECRET") with prefect.context(secrets=dict(TEST_SECRET=94611)): with set_temporary_config({"cloud.use_local_secrets": True}): with patch.dict( "sys.modules", { "google.cloud": MagicMock(), "google.oauth2.service_account": auth, }, ): handler.initialize_client() assert auth.Credentials.from_service_account_info.call_args[0][0] == 94611
def test_gcs_handler_is_pickleable(self, google_client, monkeypatch): class gcs_bucket: def __init__(self, *args, **kwargs): pass def __getstate__(self): raise ValueError("I cannot be pickled.") handler = GCSResultHandler("foo") res = cloudpickle.loads(cloudpickle.dumps(handler)) assert isinstance(res, GCSResultHandler)
def test_roundtrip_never_loads_client(self, monkeypatch): schema = ResultHandlerSchema() def raise_me(*args, **kwargs): raise SyntaxError("oops") monkeypatch.setattr(GCSResultHandler, "initialize_client", raise_me) handler = schema.load( schema.dump( GCSResultHandler(bucket="bucket3", credentials_secret="FOO"))) assert isinstance(handler, GCSResultHandler) assert handler.bucket == "bucket3" assert handler.credentials_secret == "FOO"
def __init__(self, bucket: str, key: str = None, project: str = None) -> None: self.flows = dict() # type: Dict[str, str] self._flows = dict() # type: Dict[str, "Flow"] self.bucket = bucket self.key = key self.project = project result_handler = GCSResultHandler(bucket=bucket) super().__init__(result_handler=result_handler)
def test_gcs_handler_is_pickleable(self, monkeypatch): class gcs_bucket: def __init__(self, *args, **kwargs): pass def __getstate__(self): raise ValueError("I cannot be pickled.") client = MagicMock(bucket=gcs_bucket) storage = MagicMock(Client=MagicMock(return_value=client)) with patch.dict( "sys.modules", { "google.cloud": MagicMock(storage=storage), "google.oauth2.service_account": MagicMock(), }, ): handler = GCSResultHandler("foo") res = cloudpickle.loads(cloudpickle.dumps(handler)) assert isinstance(res, GCSResultHandler)
def test_roundtrip(self): schema = ResultHandlerSchema() handler = schema.load(schema.dump(GCSResultHandler(bucket="bucket3"))) assert isinstance(handler, GCSResultHandler) assert handler.bucket == "bucket3"
def test_serialize(self, google_client): handler = GCSResultHandler(bucket="my-bucket") serialized = ResultHandlerSchema().dump(handler) assert serialized["type"] == "GCSResultHandler" assert serialized["bucket"] == "my-bucket"
parameter_defaults=dict( url='http://www.insidethex.co.uk/')), ]), storage=Docker( # TODO: change to your docker registry: # https://docs.prefect.io/cloud/recipes/configuring_storage.html registry_url='szelenka', # TODO: 'pin' the exact versions you used on your development machine python_dependencies=[ 'requests==2.23.0', 'beautifulsoup4==4.8.2', 'sqlalchemy==1.3.15' ], ), # TODO: specify how you want to handle results # https://docs.prefect.io/core/concepts/results.html#results-and-result-handlers result_handler=GCSResultHandler(bucket='prefect_results')) as flow: _url = Parameter("url", default='http://www.insidethex.co.uk/') _bypass = Parameter("bypass", default=False, required=False) _db_file = Parameter("db_file", default='xfiles_db.sqlite', required=False) # scrape the website _home_page = retrieve_url(_url) _episodes = create_episode_list(base_url=_url, main_html=_home_page, bypass=_bypass) _episode = retrieve_url.map(_episodes) _dialogue = scrape_dialogue.map(_episode) # insert into SQLite table _db = create_db(filename=_db_file) _final = insert_episode.map(episode=_dialogue, tbl=unmapped(_db))
def test_gcs_init(self, google_client): handler = GCSResultHandler(bucket="bob") assert handler.bucket == "bob" assert google_client.return_value.bucket.call_args[0][0] == "bob"
def test_basic_conversion_gcs_result(): result_handler = GCSResultHandler(bucket="foo") result = ResultHandlerResult.from_result_handler(result_handler) assert isinstance(result, GCSResult) assert result.bucket == "foo"