def test_add_duplicate_flow_to_gcs(self): storage = GCS(bucket="awesome-bucket") f = Flow("awesome-flow") assert storage.add_flow(f) with pytest.raises(ValueError): storage.add_flow(f)
def test_upload_multiple_flows_to_gcs(self, google_client): blob_mock = MagicMock() bucket_mock = MagicMock(blob=MagicMock(return_value=blob_mock)) google_client.return_value.get_bucket = MagicMock( return_value=bucket_mock) storage = GCS(bucket="awesome-bucket") flows = (Flow("awesome-flow-1"), Flow("awesome-flow-2")) for f in flows: storage.add_flow(f) assert storage.build() assert bucket_mock.blob.call_count == 2 assert blob_mock.upload_from_string.call_count == 2 expected_blob_calls = [] expected_upload_calls = [] for f in flows: expected_blob_calls.append(call(blob_name=storage.flows[f.name])) expected_upload_calls.append(call(cloudpickle.dumps(f))) # note, we don't upload until build() is called, which iterates on a dictionary, which is not ordered older versions of python bucket_mock.blob.assert_has_calls(expected_blob_calls, any_order=True) blob_mock.upload_from_string.assert_has_calls(expected_upload_calls, any_order=True)
def test_add_flow_to_gcs(self): storage = GCS(bucket="awesome-bucket") f = Flow("awesome-flow") assert f.name not in storage key = storage.add_flow(f) # ensures that our auto-generation of key name # is Windows compatible assert key.startswith("awesome-flow/") assert f.name in storage
def test_add_multiple_flows_to_gcs(self): storage = GCS(bucket="awesome-bucket") flows = (Flow("awesome-flow-1"), Flow("awesome-flow-2")) for f in flows: assert f.name not in storage assert storage.add_flow(f) assert f.name in storage assert len(storage.flows) == 2 assert len(storage._flows) == 2
def test_upload_single_flow_with_custom_key_to_gcs(self, google_client): blob_mock = MagicMock() bucket_mock = MagicMock(blob=MagicMock(return_value=blob_mock)) google_client.return_value.get_bucket = MagicMock(return_value=bucket_mock) storage = GCS(bucket="awesome-bucket", key="the-best-key") f = Flow("awesome-flow") assert f.name not in storage assert storage.add_flow(f) assert f.name in storage assert storage.build() bucket_mock.blob.assert_called_with(blob_name="the-best-key") blob_mock.upload_from_string.assert_called_with(cloudpickle.dumps(f))
def test_create_gcs_client_no_project(self, google_client): storage = GCS(bucket="bucket", project=None) client = storage._gcs_client assert client google_client.assert_called_with(project=None)
def test_get_flow_from_gcs(self, google_client): f = Flow("awesome-flow") flow_content = cloudpickle.dumps(f) blob_mock = MagicMock(download_as_string=MagicMock(return_value=flow_content)) bucket_mock = MagicMock(get_blob=MagicMock(return_value=blob_mock)) google_client.return_value.get_bucket = MagicMock(return_value=bucket_mock) storage = GCS(bucket="awesome-bucket", key="a-place") storage.add_flow(f) fetched_flow = storage.get_flow("a-place") assert fetched_flow.name == f.name bucket_mock.get_blob.assert_called_with("a-place") assert blob_mock.download_as_string.call_count == 1
def test_get_flow_from_gcs_as_file(self, google_client): f = Flow("awesome-flow") flow_content = """from prefect import Flow\nf=Flow('awesome-flow')""" blob_mock = MagicMock(download_as_string=MagicMock(return_value=flow_content)) bucket_mock = MagicMock(get_blob=MagicMock(return_value=blob_mock)) google_client.return_value.get_bucket = MagicMock(return_value=bucket_mock) storage = GCS(bucket="awesome-bucket", key="a-place", stored_as_script=True) storage.add_flow(f) fetched_flow = storage.get_flow("a-place") assert fetched_flow.name == f.name bucket_mock.get_blob.assert_called_with("a-place") assert blob_mock.download_as_string.call_count == 1
def test_build_no_upload_if_file_and_no_local_script_path(self, google_client): storage = GCS(bucket="awesome-bucket", stored_as_script=True) with pytest.raises(ValueError): storage.build() storage = GCS(bucket="awesome-bucket", stored_as_script=True, key="myflow.py") assert storage == storage.build()
def test_put_get_and_run_single_flow_to_gcs(self, google_client): blob_mock = MagicMock() bucket_mock = MagicMock(blob=MagicMock(return_value=blob_mock)) google_client.return_value.get_bucket = MagicMock(return_value=bucket_mock) storage = GCS(bucket="awesome-bucket") f = Flow("awesome-flow") assert f.name not in storage assert storage.add_flow(f) assert f.name in storage assert storage.build() flow_as_bytes = blob_mock.upload_from_string.call_args[0][0] new_flow = cloudpickle.loads(flow_as_bytes) assert new_flow.name == "awesome-flow" state = new_flow.run() assert state.is_successful()
def test_create_gcs_storage(self): storage = GCS( bucket="awesome-bucket", key="the-best-key", project="mayhem", secrets=["boo"], ) assert storage assert len(storage._flows) == 0 assert storage.bucket == "awesome-bucket" assert storage.key == "the-best-key" assert storage.project == "mayhem" assert storage.secrets == ["boo"]
def test_local_agent_deploy_processes_gcs_storage(monkeypatch, runner_token): popen = MagicMock() monkeypatch.setattr("prefect.agent.local.agent.Popen", popen) agent = LocalAgent() agent.deploy_flow(flow_run=GraphQLResult( { "flow": GraphQLResult({"storage": GCS(bucket="test").serialize()}), "id": "id", })) assert popen.called assert len(agent.processes) == 1
def test_upload_script_if_path(self, google_client, tmpdir): blob_mock = MagicMock() bucket_mock = MagicMock(blob=MagicMock(return_value=blob_mock)) google_client.return_value.get_bucket = MagicMock(return_value=bucket_mock) with open(f"{tmpdir}/flow.py", "w") as tmpfile: tmpfile.write("foo") storage = GCS( bucket="awesome-bucket", stored_as_script=True, local_script_path=f"{tmpdir}/flow.py", key="key", ) f = Flow("awesome-flow") assert f.name not in storage assert storage.add_flow(f) assert f.name in storage assert storage.build() bucket_mock.blob.assert_called_with(blob_name=storage.flows[f.name]) blob_mock.upload_from_file.called assert blob_mock.upload_from_file.call_args[0]
def test_local_agent_deploy_processes_azure_storage(monkeypatch, cloud_api): popen = MagicMock() monkeypatch.setattr("prefect.agent.local.agent.Popen", popen) agent = LocalAgent() agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "storage": GCS(bucket="test").serialize(), "id": "foo", "core_version": "0.13.0", }), "id": "id", })) assert popen.called assert len(agent.processes) == 1
from prefect import Flow, task from prefect.environments.storage import GCS @task def extract(): return [1, 2, 3] @task def transform(data): return [i * 10 for i in data] @task def load(data): print("Here's your data: {}".format(data)) with Flow("ETL-gcs-script", storage=GCS( bucket="prefect-flows-josh", stored_as_script=True, )) as flow: e = extract() t = transform(e) l = load(t) # flow_id = flow.register(project_name="Demo") # print(flow_id)
"run_config": run.serialize() }, }), run, ) assert env_vars["KEY1"] == "VAL1" assert env_vars["KEY2"] == "OVERRIDE" assert env_vars["PREFECT__LOGGING__LEVEL"] == "TEST" assert working_dir in env_vars["PYTHONPATH"] @pytest.mark.parametrize( "storage", [ Local(directory="test"), GCS(bucket="test"), S3(bucket="test"), Azure(container="test"), GitLab("test/repo", path="path/to/flow.py"), Bitbucket(project="PROJECT", repo="test-repo", path="test-flow.py"), CodeCommit("test/repo", path="path/to/flow.py"), Webhook( build_request_kwargs={"url": "test-service/upload"}, build_request_http_method="POST", get_flow_request_kwargs={"url": "test-service/download"}, get_flow_request_http_method="GET", ), ], ) def test_local_agent_deploy_processes_valid_storage(storage, monkeypatch): popen = MagicMock()