def test_create_s3_storage_init_args(): storage = S3( aws_access_key_id="id", aws_secret_access_key="secret", aws_session_token="session", bucket="bucket", key="key", ) assert storage assert storage.flows == dict() assert storage.aws_access_key_id == "id" assert storage.aws_secret_access_key == "secret" assert storage.aws_session_token == "session" assert storage.bucket == "bucket" assert storage.key == "key"
def test_get_flow_s3_from_init_key_run(monkeypatch): client = MagicMock() boto3 = MagicMock(download_fileobj=MagicMock(return_value=client)) monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3) f = Flow("test") monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f)) storage = S3(bucket="bucket", key="key") downloaded_flow = storage.get_flow() assert isinstance(downloaded_flow, Flow) state = downloaded_flow.run() assert state.is_successful()
def test_get_flow_s3_not_in_storage(monkeypatch): client = MagicMock() boto3 = MagicMock(download_fileobj=MagicMock(return_value=client)) monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3) f = Flow("test") monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f)) storage = S3(bucket="bucket", key="test") assert f.name not in storage with pytest.raises(ValueError): storage.get_flow("test/test")
def test_upload_flow_to_s3_client_error(monkeypatch): client = MagicMock() boto3 = MagicMock(upload_fileobj=MagicMock(return_value=client)) boto3.upload_fileobj.side_effect = ClientError({}, None) monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3) storage = S3(bucket="bucket") f = Flow("test") assert f.name not in storage assert storage.add_flow(f) with pytest.raises(ClientError): storage.build() assert boto3.upload_fileobj.called
def test_upload_flow_to_s3_key_format(monkeypatch): client = MagicMock() boto3 = MagicMock(upload_fileobj=MagicMock(return_value=client)) monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3) storage = S3(bucket="bucket") f = Flow("test") assert storage.add_flow(f) assert storage.build() assert boto3.upload_fileobj.call_args[1]["Bucket"] == "bucket" key = boto3.upload_fileobj.call_args[1]["Key"].split("/") assert key[0] == "test" assert key[1]
def test_create_s3_storage_init_args(): storage = S3( bucket="bucket", key="key", client_options={"endpoint_url": "http://some-endpoint", "use_ssl": False,}, secrets=["auth"], ) assert storage assert storage.flows == dict() assert storage.bucket == "bucket" assert storage.key == "key" assert storage.client_options == { "endpoint_url": "http://some-endpoint", "use_ssl": False, } assert storage.secrets == ["auth"]
def test_serialize_s3_storage(): storage = S3( bucket="bucket", client_options={ "endpoint_url": "http://some-endpoint", "use_ssl": False }, ) serialized_storage = storage.serialize() assert serialized_storage["type"] == "S3" assert serialized_storage["bucket"] == "bucket" assert serialized_storage["client_options"] == { "endpoint_url": "http://some-endpoint", "use_ssl": False, }
def test_get_flow_s3_bucket_key(monkeypatch): client = MagicMock() boto3 = MagicMock(download_fileobj=MagicMock(return_value=client)) monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3) f = Flow("test") monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f)) storage = S3(bucket="bucket", key="key") assert f.name not in storage flow_location = storage.add_flow(f) assert storage.get_flow(flow_location) assert boto3.download_fileobj.call_args[1]["Bucket"] == "bucket" assert boto3.download_fileobj.call_args[1]["Key"] == "key"
def test_get_flow_s3_from_init_key(monkeypatch): client = MagicMock() boto3 = MagicMock(download_fileobj=MagicMock(return_value=client)) monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3) f = Flow("test") monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f)) storage = S3(bucket="bucket", key="key") assert len(storage.flows) == 0 assert len(storage._flows) == 0 downloaded_flow = storage.get_flow() assert downloaded_flow.name == "test"
def test_local_agent_deploy_processes_s3_storage(monkeypatch, runner_token): popen = MagicMock() monkeypatch.setattr("prefect.agent.local.agent.Popen", popen) agent = LocalAgent() agent.deploy_flow( flow_run=GraphQLResult( { "flow": GraphQLResult({"storage": S3(bucket="test").serialize()}), "id": "id", } ) ) assert popen.called assert len(agent.processes) == 1
def test_upload_flow_to_s3_flow_byte_stream(monkeypatch): client = MagicMock() boto3 = MagicMock(upload_fileobj=MagicMock(return_value=client)) monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3) storage = S3(bucket="bucket") f = Flow("test") assert storage.add_flow(f) assert storage.build() flow_as_bytes = boto3.upload_fileobj.call_args[0][0] assert isinstance(flow_as_bytes, io.BytesIO) new_flow = cloudpickle.loads(flow_as_bytes.read()) assert new_flow.name == "test" state = new_flow.run() assert state.is_successful()
def test_get_flow_s3(monkeypatch): client = MagicMock() boto3 = MagicMock(download_fileobj=MagicMock(return_value=client)) monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3) f = Flow("test") monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f)) storage = S3(bucket="bucket") with pytest.raises(ValueError): storage.get_flow() assert f.name not in storage flow_location = storage.add_flow(f) assert storage.get_flow(flow_location) assert boto3.download_fileobj.called assert f.name in storage
def test_boto3_client_property(monkeypatch): client = MagicMock() boto3 = MagicMock(client=MagicMock(return_value=client)) monkeypatch.setattr("boto3.client", boto3) storage = S3( bucket="bucket", aws_access_key_id="id", aws_secret_access_key="secret", aws_session_token="session", ) boto3_client = storage._boto3_client assert boto3_client boto3.assert_called_with( "s3", aws_access_key_id="id", aws_secret_access_key="secret", aws_session_token="session", )
def test_create_s3_storage_init_args(): storage = S3( aws_access_key_id="id", aws_secret_access_key="secret", aws_session_token="session", bucket="bucket", key="key", client_options={ "endpoint_url": "http://some-endpoint", "use_ssl": False, }, ) assert storage assert storage.flows == dict() assert storage.aws_access_key_id == "id" assert storage.aws_secret_access_key == "secret" assert storage.aws_session_token == "session" assert storage.bucket == "bucket" assert storage.key == "key" assert storage.client_options == { "endpoint_url": "http://some-endpoint", "use_ssl": False, }
def test_build_script_upload(monkeypatch): client = MagicMock() boto3 = MagicMock(upload_fileobj=MagicMock(return_value=client)) monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3) storage = S3(bucket="bucket", stored_as_script=True, local_script_path="local.py", key="key") f = Flow("test") assert f.name not in storage assert storage.add_flow(f) assert storage.build() assert f.name in storage assert boto3.upload_file.called assert boto3.upload_file.call_args[0] == ("local.py", "bucket", "key") boto3.upload_file.side_effect = ClientError({}, None) with pytest.raises(ClientError): storage.build()
def test_get_flow_s3_runs(monkeypatch): client = MagicMock() boto3 = MagicMock(download_fileobj=MagicMock(return_value=client)) monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3) f = Flow("test") monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f)) storage = S3(bucket="bucket") assert f.name not in storage flow_location = storage.add_flow(f) new_flow = storage.get_flow(flow_location) assert boto3.download_fileobj.called assert f.name in storage assert isinstance(new_flow, Flow) assert new_flow.name == "test" assert len(new_flow.tasks) == 0 state = new_flow.run() assert state.is_successful()
def test_boto3_client_property(monkeypatch): client = MagicMock() boto3 = MagicMock(client=MagicMock(return_value=client)) monkeypatch.setattr("boto3.client", boto3) storage = S3( bucket="bucket", client_options={"endpoint_url": "http://some-endpoint", "use_ssl": False,}, ) credentials = dict( ACCESS_KEY="id", SECRET_ACCESS_KEY="secret", SESSION_TOKEN="session" ) with context(secrets=dict(AWS_CREDENTIALS=credentials)): boto3_client = storage._boto3_client assert boto3_client boto3.assert_called_with( "s3", aws_access_key_id="id", aws_secret_access_key="secret", aws_session_token="session", endpoint_url="http://some-endpoint", use_ssl=False, )
def test_serialize_s3_storage(): storage = S3(bucket="bucket") serialized_storage = storage.serialize() assert serialized_storage["type"] == "S3"
save_to_neo=True, writers={}) try: search = search_term job_name = "qanon" limit = 10000000 for df in fh.search_time_range( tp=tp, Search=search, Since=datetime.strftime(start, "%Y-%m-%d %H:%M:%S"), Until=datetime.strftime(current, "%Y-%m-%d %H:%M:%S"), job_name=job_name, Limit=10000000, stride_sec=30): logger.info('got: %s', len(df) if not (df is None) else 'None') logger.info('proceed to next df') except Exception as e: logger.error("job exception", exc_info=True) raise e logger.info("job finished") schedule = IntervalSchedule(interval=timedelta(seconds=30), ) storage = S3(bucket=S3_BUCKET) #with Flow("covid-19 stream-single") as flow: #with Flow("covid-19 stream", storage=storage, schedule=schedule) as flow: with Flow("qanon stream", schedule=schedule) as flow: run_stream() flow.run()
import os import boto3 from prefect import Parameter, Flow, task from prefect.environments import LocalEnvironment from prefect.environments.storage import S3 # testing boto directly s3 = boto3.resource("s3") for bucket in s3.buckets.all(): print(bucket.name) # testing boto via Prefect @task def say_hello(person: str) -> None: print("Hello, {}!".format(person)) with Flow("Say hi!") as flow: name = Parameter("name") say_hello(name) storage = S3(bucket="my-prefect-flows") # also tried, after setting AWS_CREDENTIALS: # storage = S3(bucket="REDACTED", secrets=["AWS_CREDENTIALS"]) flow.storage = storage flow.register(project_name="Demo")
from prefect import Flow, task from prefect.environments.storage import S3 @task def extract(): return [1, 2, 3] @task def transform(data): return [i * 10 for i in data] @task def load(data): print("Here's your data: {}".format(data)) with Flow("ETL-s3-reg-demo", storage=S3( bucket="my-prefect-flows", secrets=["AWS_CREDENTIALS"], )) as flow: e = extract() t = transform(e) l = load(t) flow.register(project_name="Demo") # print(flow_id)
def test_create_s3_storage(): storage = S3(bucket="test") assert storage assert storage.logger
# from prefect.environments import RemoteEnvironment @task def extract(): """Get a list of data""" return [1, 2, 3] @task def transform(data): """Multiply the input by 10""" return [i * 10 for i in data] @task def load(data): """Print the data to indicate it was received""" print("Here's your data: {}".format(data)) from prefect import Flow with Flow("s3-storage", storage=S3(bucket="my-prefect-flows")) as flow: e = extract() t = transform(e) l = load(t) flow.register(project_name="Demo")
}, }), run, ) assert env_vars["KEY1"] == "VAL1" assert env_vars["KEY2"] == "OVERRIDE" assert env_vars["PREFECT__LOGGING__LEVEL"] == "TEST" assert working_dir in env_vars["PYTHONPATH"] @pytest.mark.parametrize( "storage", [ Local(directory="test"), GCS(bucket="test"), S3(bucket="test"), Azure(container="test"), GitLab("test/repo", path="path/to/flow.py"), Bitbucket(project="PROJECT", repo="test-repo", path="test-flow.py"), CodeCommit("test/repo", path="path/to/flow.py"), Webhook( build_request_kwargs={"url": "test-service/upload"}, build_request_http_method="POST", get_flow_request_kwargs={"url": "test-service/download"}, get_flow_request_http_method="GET", ), ], ) def test_local_agent_deploy_processes_valid_storage(storage, monkeypatch): popen = MagicMock() monkeypatch.setattr("prefect.agent.local.agent.Popen", popen)