Example #1
0
def test_populate_env_vars_from_run_config(tmpdir):
    agent = LocalAgent(env_vars={"KEY1": "VAL1", "KEY2": "VAL2"})
    working_dir = str(tmpdir)

    run = LocalRun(
        env={
            "KEY2": "OVERRIDE",
            "PREFECT__LOGGING__LEVEL": "TEST"
        },
        working_dir=working_dir,
    )

    env_vars = agent.populate_env_vars(
        GraphQLResult({
            "id": "id",
            "name": "name",
            "flow": {
                "id": "foo"
            },
            "run_config": run.serialize(),
        }),
        run,
    )
    assert env_vars["KEY1"] == "VAL1"
    assert env_vars["KEY2"] == "OVERRIDE"
    assert env_vars["PREFECT__LOGGING__LEVEL"] == "TEST"
    assert working_dir in env_vars["PYTHONPATH"]
Example #2
0
def test_local_agent_deploy_run_config_missing_working_dir(monkeypatch, tmpdir):
    popen = MagicMock()
    monkeypatch.setattr("prefect.agent.local.agent.Popen", popen)

    working_dir = str(tmpdir.join("missing"))

    agent = LocalAgent()

    with pytest.raises(ValueError, match="nonexistent `working_dir`"):
        agent.deploy_flow(
            flow_run=GraphQLResult(
                {
                    "id": "id",
                    "flow": {
                        "storage": Local().serialize(),
                        "id": "foo",
                        "core_version": "0.13.0",
                    },
                    "run_config": LocalRun(working_dir=working_dir).serialize(),
                },
            )
        )

    assert not popen.called
    assert not agent.processes
Example #3
0
 def test_generate_job_spec_errors_if_non_kubernetesrun_run_config(self):
     with pytest.raises(
             TypeError,
             match=
             "`run_config` of type `LocalRun`, only `KubernetesRun` is supported",
     ):
         self.agent.generate_job_spec(self.build_flow_run(LocalRun()))
Example #4
0
 def test_deploy_flow_errors_if_not_ecs_run_config(self):
     with pytest.raises(
             TypeError,
             match=
             "`run_config` of type `LocalRun`, only `ECSRun` is supported",
     ):
         self.deploy_flow(LocalRun())
Example #5
0
 def test_deploy_flow_errors_if_not_vertex_run_config(self, agent):
     with pytest.raises(
             TypeError,
             match=
             "`run_config` of type `LocalRun`, only `VertexRun` is supported",
     ):
         self.deploy_flow(agent, LocalRun())
def test_docker_agent_deploy_flow_unsupported_run_config(api):
    agent = DockerAgent()

    with pytest.raises(
            TypeError,
            match=
            "`run_config` of type `LocalRun`, only `DockerRun` is supported",
    ):
        agent.deploy_flow(flow_run=GraphQLResult({
            "flow":
            GraphQLResult({
                "storage": Local().serialize(),
                "id": "foo",
                "name": "flow-name",
                "core_version": "0.13.0",
            }),
            "run_config":
            LocalRun().serialize(),
            "id":
            "id",
            "name":
            "name",
            "version":
            "version",
        }))

    assert not api.pull.called
Example #7
0
def test_local_agent_deploy_run_config_working_dir(monkeypatch, working_dir, tmpdir):
    popen = MagicMock()
    monkeypatch.setattr("prefect.agent.local.agent.Popen", popen)

    if working_dir is not None:
        working_dir = str(tmpdir)

    agent = LocalAgent()

    agent.deploy_flow(
        flow_run=GraphQLResult(
            {
                "id": "id",
                "flow": {
                    "storage": Local().serialize(),
                    "id": "foo",
                    "core_version": "0.13.0",
                },
                "run_config": LocalRun(working_dir=working_dir).serialize(),
            },
        )
    )

    assert popen.called
    assert len(agent.processes) == 1
    assert popen.call_args[1]["cwd"] == working_dir
Example #8
0
def test_client_register_flow_id_output(
    patch_post, use_run_config, compressed, monkeypatch, capsys, cloud_api, tmpdir
):
    if compressed:
        response = {
            "data": {
                "project": [{"id": "proj-id"}],
                "create_flow_from_compressed_string": {"id": "long-id"},
                "flow_by_pk": {"flow_group_id": "fg-id"},
            }
        }
    else:
        response = {
            "data": {
                "project": [{"id": "proj-id"}],
                "create_flow": {"id": "long-id"},
                "flow_by_pk": {"flow_group_id": "fg-id"},
            }
        }
    patch_post(response)

    monkeypatch.setattr(
        "prefect.client.Client.get_default_tenant_slug", MagicMock(return_value="tslug")
    )

    with set_temporary_config(
        {
            "cloud.api": "http://my-cloud.foo",
            "cloud.auth_token": "secret_token",
            "backend": "cloud",
        }
    ):
        client = Client()

    labels = ["test1", "test2"]
    storage = Local(tmpdir)
    if use_run_config:
        flow = prefect.Flow(
            name="test", storage=storage, run_config=LocalRun(labels=labels)
        )
        flow.environment = None
    else:
        flow = prefect.Flow(
            name="test", storage=storage, environment=LocalEnvironment(labels=labels)
        )
    flow.result = flow.storage.result

    flow_id = client.register(
        flow,
        project_name="my-default-project",
        compressed=compressed,
        version_group_id=str(uuid.uuid4()),
    )
    assert flow_id == "long-id"

    captured = capsys.readouterr()
    assert "Flow URL: https://cloud.prefect.io/tslug/flow/fg-id\n" in captured.out
    assert f"Labels: {labels}" in captured.out
Example #9
0
def test_prefect_logging_level_override_logic(config, agent_env_vars,
                                              run_config_env_vars,
                                              expected_logging_level, tmpdir):
    with set_temporary_config(config):
        agent = LocalAgent(env_vars=agent_env_vars)
        run = LocalRun(working_dir=str(tmpdir), env=run_config_env_vars)
        env_vars = agent.populate_env_vars(
            GraphQLResult({
                "id": "id",
                "name": "name",
                "flow": {
                    "id": "foo"
                },
                "run_config": run.serialize(),
            }),
            run,
        )
        assert env_vars["PREFECT__LOGGING__LEVEL"] == expected_logging_level
Example #10
0
def test_all_args(tmpdir):
    working_dir = str(tmpdir)
    config = LocalRun(
        env={"hello": "world"},
        working_dir=working_dir,
        labels=["a", "b"],
    )
    assert config.env == {"hello": "world"}
    assert config.working_dir == working_dir
    assert config.labels == {"a", "b"}
Example #11
0
def get_local_run_config() -> LocalRun:
    """
    Return a LocalRun configuration to attach to a flow.

    Returns:
       - prefect.run_configs.LocalRun: The local run configuration to be applied to a flow
    """
    return LocalRun(
        working_dir=ROOT_DIR,
        env={
            "PREFECT__USER_CONFIG_PATH": MFP_CONFIG_PATH,
            "PYTHONPATH": PYTHONPATH,
        },
    )
Example #12
0
def create_flow() -> Flow:
    """Creates and returns flow object"""
    # Haven't used different executors enough to know the difference
    with Flow(FLOW_NAME, run_config=LocalRun()) as flow:
        country = Parameter("country", default=DEFAULT_COUNTRY)

        # covid_df = extract_whole_covid_data()
        covid_df = extract_covid_data_from_file()
        filtered_covid_df = filter_data(covid_df, country)

        # Only for whole data, not latest
        full_df = extract_full_country_data(filtered_covid_df)

        base_y = extract_label_column(full_df, 'new_cases')
        cleaned_y = clean_NaN(base_y)
        # print_head(cleaned_y)

        base_X = remove_overfit_columns(full_df, DROP_COLUMNS)
        cleaned_X = clean_NaN(base_X)
        optimal_X = optimize_feature_columns(cleaned_X, 10, cleaned_y)
        scaled_X = scale_data(optimal_X)
        # print_head(scaled_X)

        train_test_data = split_data(scaled_X, cleaned_y)
        check_data(train_test_data)
        # check_for_infinity(train_test_data)

        # Some issue with my data's format & type while being processed
        # within the model.
        # Going back to Kaggle. Maybe I'm using the wrong model?
        # Just don't know enough yet.
        # train_model = grid_search(train_test_data)

        # save_data(covid_df, 'raw', 'whole')

    return flow
Example #13
0
 def test_deploy_flow_errors_if_not_ecs_run_config(self):
     with pytest.raises(TypeError, match="Unsupported RunConfig"):
         self.deploy_flow(LocalRun())
Example #14
0
import pytest

from prefect.storage import Docker, Local
from prefect.run_configs import KubernetesRun, LocalRun
from prefect.utilities.agent import get_flow_image, get_flow_run_command
from prefect.utilities.graphql import GraphQLResult


@pytest.mark.parametrize("run_config", [KubernetesRun(), LocalRun(), None])
def test_get_flow_image_run_config_docker_storage(run_config):
    flow_run = GraphQLResult({
        "flow":
        GraphQLResult({
            "storage":
            Docker(registry_url="test", image_name="name",
                   image_tag="tag").serialize(),
            "id":
            "id",
        }),
        "run_config":
        run_config.serialize() if run_config else None,
        "id":
        "id",
    })
    image = get_flow_image(flow_run)
    assert image == "test/name:tag"


@pytest.mark.parametrize("run_config", [KubernetesRun(), LocalRun(), None])
@pytest.mark.parametrize("version",
                         ["0.13.0", "0.10.0+182.g385a32514.dirty", None])
Example #15
0
        "cpu_limit",
        "cpu_request",
        "memory_limit",
        "memory_request",
        "service_account_name",
        "image_pull_secrets",
        "image_pull_policy",
    ]
    for field in fields:
        assert getattr(config, field) == getattr(config2, field)


@pytest.mark.parametrize(
    "config",
    [
        LocalRun(),
        LocalRun(
            env={"test": "foo"},
            working_dir="/path/to/dir",
            labels=["a", "b"],
        ),
    ],
)
def test_serialize_local_run(config):
    msg = RunConfigSchema().dump(config)
    config2 = RunConfigSchema().load(msg)
    assert sorted(config.labels) == sorted(config2.labels)
    fields = ["env", "working_dir"]
    for field in fields:
        assert getattr(config, field) == getattr(config2, field)
Example #16
0
                                      table_name='example_data')

    data: dd.DataFrame = ML.feature_engineering(data=data)
    data: dd.DataFrame = ML.inference(data=data)

    IO.write_to_S3(bucket_name=bucket_name,
                   folder_name='out',
                   table_name='example_data')

flow.storage = GitHub(  # prefect register -f _flow.py
    repo="Brontomerus/ml-workflows",
    ref="master",
    path="/workflows/ml/flow.py",
    secrets=["GITHUB_ACCESS_TOKEN"])

flow.run_config = LocalRun(labels=['dev'])

flow.executor = DaskExecutor(
    cluster_class="dask_cloudprovider.aws.FargateCluster",
    cluster_kwargs={
        "image":
        "daskdev/dask:2021.4.1",
        "fargate_use_private_ip":
        True,
        "n_workers":
        2,
        "scheduler_timeout":
        "4 minutes",
        "worker_cpu":
        2048,  #2048
        "worker_mem":
Example #17
0

with Flow(
        "map_100_docker",
        # storage=Docker(
        #     registry_url="joshmeek18",
        #     image_name="flows",
        # ),
) as flow:
    v1 = values()
    do_something.map(v1)

    v2 = values()
    do_something.map(v2)

    v3 = values()
    do_something.map(v3)

    v4 = values()
    do_something.map(v4)

flow.run_config = LocalRun()

# from prefect.environments import LocalEnvironment
from prefect.engine.executors import LocalDaskExecutor

flow.executor = LocalDaskExecutor()

# flow.environment
flow.register(project_name="Demo")
Example #18
0
import os

from prefect import Flow, task, Parameter
from prefect.storage import GitHub
from prefect.run_configs import LocalRun


@task(log_stdout=True)
def greet(name):
    greeting = os.environ.get("GREETING", "Hello")
    print(f"{greeting}, {name}!")


with Flow("test-github") as flow:
    name = Parameter("name")
    greet(name)

flow.storage = GitHub("jcrist/prefect-hacking", path="test_github.py")
flow.run_config = LocalRun(env={"GREETING": "Hello"})
Example #19
0
                          name="SQL-stuff"
                          # commit: bool = False,
                          )
#--------------------------------------------------------------
# Flow context
#--------------------------------------------------------------
with Flow("github_flow") as f:

    password = EnvVarSecret(prefect.config.sql_server.password_var)

    logger = prefect.context.get("logger")
    thing = Parameter("thing", default=["Thing 1"])
    d = dog(thing)

    s = sql_task(password=password)

    v = view_sql(s)

#--------------------------------------------------------------
# Closing Details
#--------------------------------------------------------------
f.run_config = LocalRun(env={
    "PREFECT__USER_CONFIG_PATH":
    '/Users/peytonrunyan/TRP/prefect/config.toml'
})

f.storage = GitHub(repo="peyton-trp/prefect-test",
                   path="simple_flow.py",
                   secrets=["GITHUB_ACCESS_TOKEN"])

f.register("cat_flow")
Example #20
0
def test_no_args():
    config = LocalRun()
    assert config.env is None
    assert config.working_dir is None
    assert config.labels == set()
Example #21
0
from prefect import Flow, task
from prefect.executors import DaskExecutor
from prefect.run_configs import LocalRun
import time


@task
def get_vals():
    return [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


@task
def print_val(v):
    time.sleep(10)
    print(v)


with Flow("dask-test", executor=DaskExecutor(), run_config=LocalRun()) as f:
    v = get_vals()
    print_val.map(v)

f.register("Demo")
Example #22
0
 def run_config(self) -> RunConfig:
     return LocalRun(env=self._generate_env())
Example #23
0
@task(log_stdout=True)
def extract(input_string):
    print(input_string)
    return [1, 2, 3, 4, 5, 6]


@task
def transform(number):
    return number * 2


@task
def load(numbers):
    print(f"Uploaded {numbers} to Snowflake")


with Flow(
        "ETL - Local",
        storage=GitHub(
            repo="dylanbhughes/pgr_examples_3",
            path="local_flow.py",
            secrets=["GITHUB_ACCESS_TOKEN"],
        ),
        run_config=LocalRun(labels=["pgr local"]),
        executor=LocalDaskExecutor(scheduler="threads", num_workers=3),
) as flow:
    input_string = Parameter(name="input_string", required=True)
    numbers = extract(input_string=input_string)
    tranformed_numbers = transform.map(numbers)
    result = load(numbers=tranformed_numbers)
Example #24
0
    interval = randrange(0, 60)
    logger.info(interval)
    time.sleep(interval)
    if interval > 50:
        logger.info("Failing flow...")
        raise signals.FAIL()

with Flow(
    "Data Warehouse ETL",
    storage=GitHub(
        repo="kmoonwright/utility_flows", 
        path="enterprise_demo/filler_flows.py",
        access_token_secret="GITHUB_ACCESS_TOKEN"
    ),
    # schedule=Schedule(clocks=[IntervalClock(timedelta(minutes=2))]),
    run_config=LocalRun(labels=["local"])
) as flow1:
    task1 = task_1()
    task2 = task_2()
    task3 = task_3()
    task2.set_upstream(task1)
    task3.set_upstream(task2)
flow1.register(project_name="data-warehouse")

with Flow(
    "Dev Environment ML Training",
    storage=GitHub(
        repo="kmoonwright/utility_flows", 
        path="enterprise_demo/filler_flows.py",
        access_token_secret="GITHUB_ACCESS_TOKEN"
    ),
Example #25
0
def test_working_dir_relpath_to_abspath():
    relpath = os.path.join("local", "path")
    abspath = os.path.abspath(relpath)
    config = LocalRun(working_dir=relpath)
    assert config.working_dir == abspath