def test_azure_serialize_with_flows():
    azure = storage.Azure(
        container="container",
        connection_string="conn",
        blob_name="name",
        secrets=["foo"],
    )
Beispiel #2
0
def test_azure_empty_serialize():
    azure = storage.Azure(container="container")
    serialized = AzureSchema().dump(azure)

    assert serialized
    assert serialized["__version__"] == prefect.__version__
    assert serialized["container"] == "container"
    assert serialized["blob_name"] is None
    assert serialized["secrets"] == []
def test_azure_full_serialize():
    azure = storage.Azure(
        container="container",
        connection_string="conn",
        blob_name="name",
        secrets=["foo"],
        labels=["bar", "baz"],
        add_default_labels=False,
    )
Beispiel #4
0
def test_azure_creds_not_serialized():
    azure = storage.Azure(container="container",
                          connection_string="conn",
                          blob_name="name")
    serialized = AzureSchema().dump(azure)

    assert serialized
    assert serialized["__version__"] == prefect.__version__
    assert serialized["container"] == "container"
    assert serialized["blob_name"] == "name"
    assert serialized.get("connection_string") is None
Beispiel #5
0
def test_azure_full_serialize():
    azure = storage.Azure(
        container="container",
        connection_string="conn",
        blob_name="name",
        secrets=["foo"],
        labels=["bar", "baz"],
        add_default_labels=False,
    )
    serialized = AzureSchema().dump(azure)

    assert serialized
    assert serialized["__version__"] == prefect.__version__
    assert serialized["container"] == "container"
    assert serialized["blob_name"] == "name"
    assert serialized["secrets"] == ["foo"]
Beispiel #6
0
def configure_flow_storage(cluster: Cluster, secrets):
    if cluster.flow_storage_protocol == S3_PROTOCOL:
        key = secrets[cluster.flow_storage_options.key]
        secret = secrets[cluster.flow_storage_options.secret]
        flow_storage = storage.S3(
            bucket=cluster.flow_storage,
            client_options={
                "aws_access_key_id": key,
                "aws_secret_access_key": secret
            },
        )
        return flow_storage
    elif cluster.flow_storage_protocol == ABFS_PROTOCOL:
        secret = secrets[cluster.flow_storage_options.secret]
        flow_storage = storage.Azure(container=cluster.flow_storage,
                                     connection_string=secret)
        return flow_storage
    else:
        raise UnsupportedFlowStorage
Beispiel #7
0
def test_azure_serialize_with_flows():
    azure = storage.Azure(
        container="container",
        connection_string="conn",
        blob_name="name",
        secrets=["foo"],
    )
    f = prefect.Flow("test")
    azure.flows["test"] = "key"
    serialized = AzureSchema().dump(azure)

    assert serialized
    assert serialized["__version__"] == prefect.__version__
    assert serialized["container"] == "container"
    assert serialized["blob_name"] == "name"
    assert serialized.get("connection_string") is None
    assert serialized["flows"] == {"test": "key"}

    deserialized = AzureSchema().load(serialized)
    assert f.name in deserialized
    assert deserialized.secrets == ["foo"]
def register_recipe(recipe: BaseRecipe):
    fs_remote = AzureBlobFileSystem(
        connection_string=os.environ["FLOW_STORAGE_CONNECTION_STRING"]
    )
    target = FSSpecTarget(
        fs_remote,
        root_path=f"abfs://{os.environ['FLOW_CACHE_CONTAINER']}/azurerecipetest/",
    )
    recipe.target = target
    recipe.input_cache = CacheFSSpecTarget(
        fs_remote,
        root_path=(
            f"abfs://{os.environ['FLOW_CACHE_CONTAINER']}/azurerecipetestcache/"
        ),
    )
    recipe.metadata_cache = target

    executor = PrefectPipelineExecutor()
    pipeline = recipe.to_pipelines()
    flow = executor.pipelines_to_plan(pipeline)

    job_template = yaml.safe_load(
        """
        apiVersion: batch/v1
        kind: Job
        metadata:
          annotations:
            "cluster-autoscaler.kubernetes.io/safe-to-evict": "false"
        spec:
          template:
            spec:
              containers:
                - name: flow
        """
    )

    flow_name = "test-noaa-flow"
    flow.storage = storage.Azure(
        container=os.environ["FLOW_STORAGE_CONTAINER"],
        connection_string=os.environ["FLOW_STORAGE_CONNECTION_STRING"],
    )
    flow.run_config = KubernetesRun(
        job_template=job_template,
        image=os.environ["BAKERY_IMAGE"],
        env={
            "AZURE_STORAGE_CONNECTION_STRING": os.environ[
                "FLOW_STORAGE_CONNECTION_STRING"
            ],
        },
        labels=json.loads(os.environ["PREFECT__CLOUD__AGENT__LABELS"]),
        cpu_request="1000m",
        memory_request="3Gi",
    )
    worker_spec = make_pod_spec(
        image=os.environ["BAKERY_IMAGE"],
        labels={"flow": flow_name},
        memory_limit="1Gi",
        memory_request="500Mi",
        cpu_limit="512m",
        cpu_request="256m",
        env={
            "AZURE_STORAGE_CONNECTION_STRING": os.environ[
                "FLOW_STORAGE_CONNECTION_STRING"
            ]
        },
    )

    scheduler_spec = make_pod_spec(
        image=os.environ["BAKERY_IMAGE"],
        labels={"flow": flow_name},
        memory_request="500Mi",
        cpu_request="256m",
    )
    scheduler_spec.spec.containers[0].args = ["dask-scheduler"]
    scheduler_spec = clean_pod_template(scheduler_spec, pod_type="scheduler")

    flow.executor = DaskExecutor(
        cluster_class="dask_kubernetes.KubeCluster",
        cluster_kwargs={
            "pod_template": worker_spec,
            "scheduler_pod_template": scheduler_spec,
        },
        adapt_kwargs={"maximum": 10},
    )

    for flow_task in flow.tasks:
        flow_task.run = set_log_level(flow_task.run)

    flow.name = flow_name
    project_name = os.environ["PREFECT_PROJECT"]
    flow_id = flow.register(project_name=project_name)
    return flow_id
def test_azure_creds_not_serialized():
    azure = storage.Azure(
        container="container", connection_string="conn", blob_name="name"
    )
def test_azure_empty_serialize():
    azure = storage.Azure(container="container")