Esempio n. 1
0
def test_create_github_storage_init_args():
    storage = GitHub(repo="test/repo", path="flow.py", secrets=["auth"])
    assert storage
    assert storage.flows == dict()
    assert storage.repo == "test/repo"
    assert storage.path == "flow.py"
    assert storage.secrets == ["auth"]
Esempio n. 2
0
def test_add_flow_to_github_storage():
    storage = GitHub(repo="test/repo", path="flow.py")

    f = Flow("test")
    assert f.name not in storage
    assert storage.add_flow(f) == "flow.py"
    assert f.name in storage
Esempio n. 3
0
def test_serialize_github_storage():
    storage = GitHub(repo="test/repo", path="flow.py", secrets=["auth"])
    serialized_storage = storage.serialize()

    assert serialized_storage["type"] == "GitHub"
    assert serialized_storage["repo"] == "test/repo"
    assert serialized_storage["path"] == "flow.py"
    assert serialized_storage["secrets"] == ["auth"]
Esempio n. 4
0
def test_add_flow_to_github_already_added():
    storage = GitHub(repo="test/repo", path="flow.py")

    f = Flow("test")
    assert f.name not in storage
    assert storage.add_flow(f) == "flow.py"
    assert f.name in storage

    with pytest.raises(ValueError):
        storage.add_flow(f)
Esempio n. 5
0
def test_github_client_property(monkeypatch):
    github = MagicMock()
    monkeypatch.setattr("prefect.utilities.git.Github", github)

    storage = GitHub(repo="test/repo")

    credentials = "ACCESS_TOKEN"
    with context(secrets=dict(GITHUB_ACCESS_TOKEN=credentials)):
        github_client = storage._github_client
    assert github_client
    github.assert_called_with("ACCESS_TOKEN", )
Esempio n. 6
0
def test_get_flow_github(monkeypatch):
    f = Flow("test")

    github = MagicMock()
    monkeypatch.setattr("prefect.utilities.git.Github", github)

    monkeypatch.setattr(
        "prefect.environments.storage.github.extract_flow_from_file",
        MagicMock(return_value=f),
    )

    with pytest.raises(ValueError):
        storage = GitHub(repo="test/repo")
        storage.get_flow()

    storage = GitHub(repo="test/repo", path="flow")

    assert f.name not in storage
    flow_location = storage.add_flow(f)

    new_flow = storage.get_flow(flow_location)
    assert new_flow.run()
Esempio n. 7
0
import random

from prefect import task, Flow
from prefect.environments.storage import GitHub


@task
def random_number():
    return random.randint(0, 100)


with Flow("test-flow") as flow:
    random_number()

flow.storage = GitHub(repo="https://github.com/benson-w/prefect-test",
                      path="/flows/flow.py")

flow.register(project_name="syncs")
Esempio n. 8
0
from prefect import task, Flow, Parameter
from prefect.environments.storage import GitHub
import time

@task
def sleep(x):
    time.sleep(x)

@task
def hello(y):
    print("hello there ", y)

with Flow(name="parameters flow") as flow:
    x = Parameter("time", default = 5)
    y = Parameter("name", default = "allyson")
    sleep(x=x)
    hello(y=y)

flow.storage = GitHub(
    repo="whimsicallyson/example-flows",
    path="flows/parameters-flow.py",
    secrets=["ALLYSON_GITHUB_ACCESS_TOKEN"]
)
Esempio n. 9
0
from prefect import task, Flow
from prefect.environments.storage import GitHub
from prefect.engine.results import LocalResult


@task
def extract():
    return [0, 1, 2]


@task
def transform(data):
    return [100 * i for i in data]


@task(log_stdout=True)
def load(data):
    print("Here's your data: {}".format(data))


with Flow("GitHub AZ Test") as flow:
    e = extract()
    t = transform(e)
    l = load(t)

flow.storage = GitHub(
    repo="joshmeek/storage_test",
    path="/flows/azflow.py",
    secrets=["GITHUB_ACCESS_TOKEN"],
)
Esempio n. 10
0
def test_create_github_storage():
    storage = GitHub(repo="test/repo")
    assert storage
    assert storage.logger
from prefect import Task, Flow
from prefect.environments.storage import GitHub

with Flow("hUGe fLow") as flow:
    for i in range(2000):
        flow.add_task(Task(name=f"{i}"))

flow.storage = GitHub(
    repo="znicholasbrown/project-schematics",
    path="flows/hUGe_fLow.py",
    secrets=["GITHUB_AUTH_TOKEN"],
)

flow.register(project_name="Dev Straining")

class Node(Task):
    def run(self):
        self.logger.info(f"{self.name} running...")
        time.sleep(random.randint(1, 3))
        if random.random() > 0.98:
            raise ValueError(f"{self.name} failed :(")
        else:
            self.logger.info(f"{self.name} complete.")
            return list(range(5))


storage = GitHub(
    repo="znicholasbrown/project-schematics",
    path="flows/CaptureProductMetrics.py",
    secrets=["GITHUB_AUTH_TOKEN"],
    ref="master",
)

environment = LocalEnvironment(
    labels=[],
    executor=LocalDaskExecutor(scheduler="threads", num_workers=6),
)

schedule = IntervalSchedule(interval=timedelta(minutes=5))
with Flow(
        "Capture Product Metrics",
        schedule=schedule,
        storage=storage,
        environment=environment,
) as flow:
Esempio n. 13
0
from prefect import task, Flow
from prefect.environments.storage import GitHub

@task
def t():
    raise Exception("NONONONO")

f = Flow("filetest", tasks=[t])

# You would still configure storage object on flow
# Maybe we should update storage.add_flow to take a filepath as well as flow object
f.storage = GitHub(repo="joshmeek/flow_storage_test")

# f.serialize(build=True)

# we might want a way to register a file from the command line
# prefect register -f file.py
#   Load flow
#   Register it
# would avoid having to run script directly

# idea: secondary storage where you say "flow is here"
# no need in this case to do the file magic

# possible idea for extra commands, check a flag in env
# which would be set during run and they wouldn't run again

# things that could be paramaterized:
# name of file, where it's currently stored,
# where we want to move it to
Esempio n. 14
0
    node4_2 = Sleeper(name="Module 4_2", checkpoint=False).map(
        i=node3_1, upstream_tasks=[node3_1, node3_2]
    )
    node4_3 = Sleeper(name="Module 4_3", checkpoint=False).map(
        i=node3_2, upstream_tasks=[node3_1, node3_2]
    )
    node4_4 = Sleeper(name="Module 4_4", checkpoint=False).map(
        i=node3_2, upstream_tasks=[node3_1, node3_2]
    )
    node4_5 = Sleeper(name="Module 4_5", checkpoint=False).map(
        i=node3_2, upstream_tasks=[node3_1, node3_2]
    )
    node4_6 = Sleeper(name="Module 4_6", checkpoint=False).map(
        i=node3_2, upstream_tasks=[node3_1, node3_2]
    )

flow.environment = LocalEnvironment(
    labels=[],
    executor=LocalDaskExecutor(scheduler="threads", num_workers=6),
)


flow.storage = GitHub(
    repo="znicholasbrown/project-schematics",
    path="flows/0.13.18 Sleeper.py",
    secrets=["GITHUB_AUTH_TOKEN"],
    ref="master",
)

flow.register(project_name="PROJECT: Schematics")
Esempio n. 15
0
from prefect.environments.storage import GitHub
from prefect.run_configs.local import LocalRun


@task()
def get_data():
    return [1, 2, 3, 4, 5]


@task()
def print_data(data):
    print(data)


storage = GitHub(repo="pnd-dkuda/prefect_github_flow",
                 path="flows/my_flow.py",
                 secrets=["GITHUB_ACCESS_TOKEN"])

run_config = LocalRun(
    env={
        'GITHUB_ACCESS_TOKEN': 'x',
        'PREFECT__CONTEXT__SECRETS__GITHUB_ACCESS_TOKEN': 'x'
    })

with Flow("file-based-flow", storage=storage, run_config=run_config) as flow:
    data = get_data()
    print_data(data)

if __name__ == '__main__':

    api_client = api_client_register_flow.RegisterFlow(
Esempio n. 16
0
from prefect.environments.storage import GitHub


class GenerateArtifact(Task):
    def run(self, data):
        artifact_id = artifacts.create_link(data)
        return artifact_id


with Flow("Relative Link Artifacts") as flow:
    a = StartFlowRun(
        project_name="PROJECT: Schematics",
        parameters={"input": "¡Hola, mundo!"},
        wait=True,
    )(flow_name="Orchestration Dependency A", run_name="ODEP-A")

    GenerateArtifact(task_run_name=lambda **kwargs: f"Artifact {kwargs['data']}").map(
        data=a
    )


flow.storage = GitHub(
    repo="znicholasbrown/project-artifacts",
    path="relative-link-artifacts.py",
    ref="master",
    secrets=["GITHUB_AUTH_TOKEN"],
)


flow.register(project_name="Artifacts")
import prefect
from prefect import Flow, task, Parameter
from prefect.environments.storage import GitHub
from prefect.environments import LocalEnvironment


@task
def print_param(param):
    logger = prefect.context.get("logger")

    logger.info(f"This is the value of the parameter that was passed: {param}")


with Flow("Testing Default Parameters") as flow:
    param_with_no_default = Parameter("config")

    print_param(param_with_no_default)

flow.storage = GitHub(
    repo="znicholasbrown/demo-flows",
    path="TestingDefaultParams.py",
    secrets=["NICHOLAS_GITHUB_ACCESS"],
)
        b = base64.b64decode(readme.content)

        return b.decode("utf-8")


class GenerateArtifact(Task):
    def run(self, readme, ref):
        artifact_id = artifacts.create_markdown(readme)
        return artifact_id


with Flow("GitHub README Artifacts") as flow:
    repos = Parameter(
        "repo", ["PrefectHQ/prefect", "PrefectHQ/ui", "PrefectHQ/server"])

    readme = GetReadMe(
        task_run_name=lambda **kwargs: f"Fetch {kwargs['ref']}").map(ref=repos)

    GenerateArtifact(
        task_run_name=lambda **kwargs: f"Render {kwargs['ref']}").map(
            readme=readme, ref=repos)

flow.storage = GitHub(
    repo="znicholasbrown/project-artifacts",
    path="github-readme-artifacts.py",
    ref="master",
    access_token_secret="GITHUB_AUTH_TOKEN",
)

flow.register(project_name="Artifacts")
Esempio n. 19
0
schedule = IntervalSchedule(interval=timedelta(minutes=1))
with Flow("Mapped - Local Dask Executor", schedule=schedule) as flow:
    version = Version()

    root = Root(checkpoint=False)(upstream_tasks=[version])
    node1_1 = Node(name="Node 1_1",
                   checkpoint=False).map(upstream_tasks=[root])
    node1_2 = Node(name="Node 1_2",
                   checkpoint=False).map(upstream_tasks=[root])
    node2_1 = Node(name="Node 2_1",
                   checkpoint=False).map(upstream_tasks=[node1_1, node1_2])
    node3_1 = Node(name="Node 3_1",
                   checkpoint=False).map(upstream_tasks=[node2_1])
    node3_2 = Node(name="Node 3_2",
                   checkpoint=False).map(upstream_tasks=[node2_1])
    node4_1 = Node(name="Node 4_1",
                   checkpoint=False).map(upstream_tasks=[node3_1, node3_2])

flow.environment = LocalEnvironment(
    labels=[],
    executor=LocalDaskExecutor(scheduler="threads", num_workers=6),
)

flow.storage = GitHub(
    repo="znicholasbrown/project-schematics",
    path="flows/LocalDaskExecutor.py",
    secrets=["GITHUB_AUTH_TOKEN"],
)

flow.register(project_name="PROJECT: Schematics")
Esempio n. 20
0
    node3_4 = Node(name="Node 3_4", checkpoint=False)(upstream_tasks=[node2_1])
    node3_5 = Node(name="Node 3_5", checkpoint=False)(upstream_tasks=[node2_1])
    node3_6 = Node(name="Node 3_6", checkpoint=False)(upstream_tasks=[node2_1])

    node4_1 = Node(name="Node 4_1",
                   checkpoint=False)(upstream_tasks=[node3_1, node3_2])
    node4_2 = Node(name="Node 4_2",
                   checkpoint=False)(upstream_tasks=[node3_1, node3_2])
    node4_3 = Node(name="Node 4_3",
                   checkpoint=False)(upstream_tasks=[node3_1, node3_2])
    node4_4 = Node(name="Node 4_4",
                   checkpoint=False)(upstream_tasks=[node3_1, node3_2])
    node4_5 = Node(name="Node 4_5",
                   checkpoint=False)(upstream_tasks=[node3_1, node3_2])
    node4_6 = Node(name="Node 4_6",
                   checkpoint=False)(upstream_tasks=[node3_1, node3_2])

flow.environment = LocalEnvironment(
    labels=[],
    executor=LocalDaskExecutor(scheduler="threads", num_workers=6),
)

flow.storage = GitHub(
    repo="znicholasbrown/project-schematics",
    path="flows/6 Parallel Tasks.py",
    secrets=["GITHUB_AUTH_TOKEN"],
    ref="master",
)

flow.register(project_name="PROJECT: Schematics")

@task(
    name="Ping YNAB to Import Transactions",
    max_retries=10,
    retry_delay=timedelta(seconds=30),
    log_stdout=True,
)
def ping_ynab_to_import_transactions(api_key):
    response = requests.post(
        url="https://api.youneedabudget.com/v1/budgets/last-used/transactions/import",
        headers={"Authorization": f"Bearer {api_key}"},
    )
    print(response)
    return response


storage = GitHub(
    repo="dylanbhughes/pull-my-ynab-transactions",
    path="flow.py",
    secrets=["YNAB_GITHUB_ACCESS_TOKEN"],
)

with Flow(name="Pull My YNAB Transactions", storage=storage) as flow:
    API_KEY = PrefectSecret("YNAB_API_KEY")
    result = ping_ynab_to_import_transactions(api_key=API_KEY)

flow.run_config = KubernetesRun(
    image="prefecthq/prefect:all_extras", cpu_request=1, memory_request="2Gi"
)
Esempio n. 22
0
@task
def get_todays_star_count(repo):
    data = requests.get(repo).json()
    return data["stargazers_count"]

@task
def print_response(prefect_slack, dagster_slack, core_stars, server_stars, ui_stars, yesterdays_metrics):
  print(f"""
  Today, the Prefect slack has {prefect_slack} users, an increase of {prefect_slack - yesterdays_metrics["records"][2]["fields"]["Count"]} from the previous business day. Dagster has {dagster_slack} in their Slack, an increase of {dagster_slack - yesterdays_metrics["records"][3]["fields"]["Count"]}. 
  The core repo has {core_stars} stars ({core_stars - yesterdays_metrics["records"][4]["fields"]["Count"]} increase), Server has {server_stars} ({server_stars - yesterdays_metrics["records"][1]["fields"]["Count"]} increase), and UI has {ui_stars} ({ui_stars - yesterdays_metrics["records"][0]["fields"]["Count"]} increase). 
  
  Have a great day!
  """)


with Flow("Metrics Reporting Flow") as flow:
  yesterdays_metrics = get_yesterdays_metrics()
  prefect_slack = get_todays_slack_stats('https://prefect-slackin.herokuapp.com/data')
  dagster_slack = get_todays_slack_stats('https://dagster-slackin.herokuapp.com/data')
  core_stars = get_todays_star_count('https://api.github.com/repos/prefecthq/prefect')
  server_stars = get_todays_star_count('https://api.github.com/repos/prefecthq/server')
  ui_stars = get_todays_star_count('https://api.github.com/repos/prefecthq/ui')
  todays_metrics = [{"id": "recWlYBY5Bx0RNBr1", "fields": {"Count": server_stars}}, {"id": 'recenhEgeiQY83FrL', "fields": {"Count": ui_stars}}, {"id": 'rechq8AKCsgu5dj1b', "fields": {"Count": dagster_slack}}, {"id": 'recnOekrsOmEr2n0g', "fields": {"Count": core_stars}}, {"id": 'recrhgFxXbHn7BNDv', "fields": {"Count": prefect_slack}}]
  update_metrics(todays_metrics)
  print_response(prefect_slack, dagster_slack, core_stars, server_stars, ui_stars, yesterdays_metrics)

flow.storage = GitHub(
    repo="whimsicallyson/example-flows",
    path="flows/marketing_daily_metrics.py",
    secrets=["ALLYSON_GITHUB_ACCESS_TOKEN"]
)
Esempio n. 23
0
        ShouldNotify
        NotificationMessage
        Notify [SlackTask]
    """
    repository = Parameter("repository", default="prefect")
    owner = Parameter("owner", default="PrefectHQ")

    stars = GetStars(name="Get Stars",
                     max_retries=2,
                     retry_delay=timedelta(minutes=1))(repository=repository,
                                                       owner=owner)

    should_notify = ShouldNotify()(stars=stars)

    with case(should_notify, True):
        message = NotificationMessage()(repository=repository,
                                        owner=owner,
                                        stars=stars)

        notification = SlackTask(
            webhook_secret="STARGAZERS_SLACK_WEBHOOK_TOKEN")(message=message)

flow.storage = GitHub(
    repo="znicholasbrown/stargazers",
    path="/stargazers.flow.py",
    secrets=["GITHUB_AUTH_TOKEN"],
)

flow.register(project_name="PROJECT: Nicholas")
# flow.run()
Esempio n. 24
0
import prefect
from prefect import task, Flow, Parameter
from prefect.schedules import clocks, Schedule
from prefect.environments.storage import GitHub


@task
def hello_world():
    print("Hello, World!")


clock = clocks.CronClock("0 0 * * *")
schedule = Schedule(clocks=[clock])
with Flow("Star GitHub Repositories", schedule=schedule) as flow:
    hello_world()

flow.storage = GitHub(
    repo="znicholasbrown/star-repos",
    path="app.py",
    secrets=["GITHUB_AUTH_TOKEN"
             ],  # Change this to your own GitHub auth token secret
)

flow.register(project_name="SOME PROJECT")
# flow.run()
Esempio n. 25
0
clock1, clock2 = IntervalClock(
    start_date=datetime.utcnow() + timedelta(seconds=10),
    interval=timedelta(hours=12),
    parameter_defaults={"length": 15}), IntervalClock(
        start_date=datetime.utcnow() + timedelta(seconds=10),
        interval=timedelta(hours=24),
        parameter_defaults={"length": 20})

schedule = Schedule(clocks=[clock1, clock2])

# Deployment
# Storage of code retrieved from GitHub repository at runtime
from prefect.environments.storage import GitHub, Docker, S3, GCS, Local

storage = GitHub(repo="amazing_flows",
                 path="flows/evolving_etl.py",
                 secrets=["GITHUB_ACCESS_TOKEN"])

# Environment configuration to dynamically spawn Dask clusters on Kubernetes for FlowRun
from prefect.environments import DaskKubernetesEnvironment

environment = DaskKubernetesEnvironment(worker_spec_file="worker_spec.yaml",
                                        labels=["Evolving", "ETL"])

# Define Tasks in a Flow Context
with Flow('Evolving ETL',
          result=S3Result(bucket="flow-result-storage"),
          state_handlers=[my_state_handler],
          schedule=schedule,
          storage=storage,
          environment=environment) as flow: