def test_create_github_storage_init_args(): storage = GitHub(repo="test/repo", path="flow.py", secrets=["auth"]) assert storage assert storage.flows == dict() assert storage.repo == "test/repo" assert storage.path == "flow.py" assert storage.secrets == ["auth"]
def test_add_flow_to_github_storage(): storage = GitHub(repo="test/repo", path="flow.py") f = Flow("test") assert f.name not in storage assert storage.add_flow(f) == "flow.py" assert f.name in storage
def test_serialize_github_storage(): storage = GitHub(repo="test/repo", path="flow.py", secrets=["auth"]) serialized_storage = storage.serialize() assert serialized_storage["type"] == "GitHub" assert serialized_storage["repo"] == "test/repo" assert serialized_storage["path"] == "flow.py" assert serialized_storage["secrets"] == ["auth"]
def test_add_flow_to_github_already_added(): storage = GitHub(repo="test/repo", path="flow.py") f = Flow("test") assert f.name not in storage assert storage.add_flow(f) == "flow.py" assert f.name in storage with pytest.raises(ValueError): storage.add_flow(f)
def test_github_client_property(monkeypatch): github = MagicMock() monkeypatch.setattr("prefect.utilities.git.Github", github) storage = GitHub(repo="test/repo") credentials = "ACCESS_TOKEN" with context(secrets=dict(GITHUB_ACCESS_TOKEN=credentials)): github_client = storage._github_client assert github_client github.assert_called_with("ACCESS_TOKEN", )
def test_get_flow_github(monkeypatch): f = Flow("test") github = MagicMock() monkeypatch.setattr("prefect.utilities.git.Github", github) monkeypatch.setattr( "prefect.environments.storage.github.extract_flow_from_file", MagicMock(return_value=f), ) with pytest.raises(ValueError): storage = GitHub(repo="test/repo") storage.get_flow() storage = GitHub(repo="test/repo", path="flow") assert f.name not in storage flow_location = storage.add_flow(f) new_flow = storage.get_flow(flow_location) assert new_flow.run()
import random from prefect import task, Flow from prefect.environments.storage import GitHub @task def random_number(): return random.randint(0, 100) with Flow("test-flow") as flow: random_number() flow.storage = GitHub(repo="https://github.com/benson-w/prefect-test", path="/flows/flow.py") flow.register(project_name="syncs")
from prefect import task, Flow, Parameter from prefect.environments.storage import GitHub import time @task def sleep(x): time.sleep(x) @task def hello(y): print("hello there ", y) with Flow(name="parameters flow") as flow: x = Parameter("time", default = 5) y = Parameter("name", default = "allyson") sleep(x=x) hello(y=y) flow.storage = GitHub( repo="whimsicallyson/example-flows", path="flows/parameters-flow.py", secrets=["ALLYSON_GITHUB_ACCESS_TOKEN"] )
from prefect import task, Flow from prefect.environments.storage import GitHub from prefect.engine.results import LocalResult @task def extract(): return [0, 1, 2] @task def transform(data): return [100 * i for i in data] @task(log_stdout=True) def load(data): print("Here's your data: {}".format(data)) with Flow("GitHub AZ Test") as flow: e = extract() t = transform(e) l = load(t) flow.storage = GitHub( repo="joshmeek/storage_test", path="/flows/azflow.py", secrets=["GITHUB_ACCESS_TOKEN"], )
def test_create_github_storage(): storage = GitHub(repo="test/repo") assert storage assert storage.logger
from prefect import Task, Flow from prefect.environments.storage import GitHub with Flow("hUGe fLow") as flow: for i in range(2000): flow.add_task(Task(name=f"{i}")) flow.storage = GitHub( repo="znicholasbrown/project-schematics", path="flows/hUGe_fLow.py", secrets=["GITHUB_AUTH_TOKEN"], ) flow.register(project_name="Dev Straining")
class Node(Task): def run(self): self.logger.info(f"{self.name} running...") time.sleep(random.randint(1, 3)) if random.random() > 0.98: raise ValueError(f"{self.name} failed :(") else: self.logger.info(f"{self.name} complete.") return list(range(5)) storage = GitHub( repo="znicholasbrown/project-schematics", path="flows/CaptureProductMetrics.py", secrets=["GITHUB_AUTH_TOKEN"], ref="master", ) environment = LocalEnvironment( labels=[], executor=LocalDaskExecutor(scheduler="threads", num_workers=6), ) schedule = IntervalSchedule(interval=timedelta(minutes=5)) with Flow( "Capture Product Metrics", schedule=schedule, storage=storage, environment=environment, ) as flow:
from prefect import task, Flow from prefect.environments.storage import GitHub @task def t(): raise Exception("NONONONO") f = Flow("filetest", tasks=[t]) # You would still configure storage object on flow # Maybe we should update storage.add_flow to take a filepath as well as flow object f.storage = GitHub(repo="joshmeek/flow_storage_test") # f.serialize(build=True) # we might want a way to register a file from the command line # prefect register -f file.py # Load flow # Register it # would avoid having to run script directly # idea: secondary storage where you say "flow is here" # no need in this case to do the file magic # possible idea for extra commands, check a flag in env # which would be set during run and they wouldn't run again # things that could be paramaterized: # name of file, where it's currently stored, # where we want to move it to
node4_2 = Sleeper(name="Module 4_2", checkpoint=False).map( i=node3_1, upstream_tasks=[node3_1, node3_2] ) node4_3 = Sleeper(name="Module 4_3", checkpoint=False).map( i=node3_2, upstream_tasks=[node3_1, node3_2] ) node4_4 = Sleeper(name="Module 4_4", checkpoint=False).map( i=node3_2, upstream_tasks=[node3_1, node3_2] ) node4_5 = Sleeper(name="Module 4_5", checkpoint=False).map( i=node3_2, upstream_tasks=[node3_1, node3_2] ) node4_6 = Sleeper(name="Module 4_6", checkpoint=False).map( i=node3_2, upstream_tasks=[node3_1, node3_2] ) flow.environment = LocalEnvironment( labels=[], executor=LocalDaskExecutor(scheduler="threads", num_workers=6), ) flow.storage = GitHub( repo="znicholasbrown/project-schematics", path="flows/0.13.18 Sleeper.py", secrets=["GITHUB_AUTH_TOKEN"], ref="master", ) flow.register(project_name="PROJECT: Schematics")
from prefect.environments.storage import GitHub from prefect.run_configs.local import LocalRun @task() def get_data(): return [1, 2, 3, 4, 5] @task() def print_data(data): print(data) storage = GitHub(repo="pnd-dkuda/prefect_github_flow", path="flows/my_flow.py", secrets=["GITHUB_ACCESS_TOKEN"]) run_config = LocalRun( env={ 'GITHUB_ACCESS_TOKEN': 'x', 'PREFECT__CONTEXT__SECRETS__GITHUB_ACCESS_TOKEN': 'x' }) with Flow("file-based-flow", storage=storage, run_config=run_config) as flow: data = get_data() print_data(data) if __name__ == '__main__': api_client = api_client_register_flow.RegisterFlow(
from prefect.environments.storage import GitHub class GenerateArtifact(Task): def run(self, data): artifact_id = artifacts.create_link(data) return artifact_id with Flow("Relative Link Artifacts") as flow: a = StartFlowRun( project_name="PROJECT: Schematics", parameters={"input": "¡Hola, mundo!"}, wait=True, )(flow_name="Orchestration Dependency A", run_name="ODEP-A") GenerateArtifact(task_run_name=lambda **kwargs: f"Artifact {kwargs['data']}").map( data=a ) flow.storage = GitHub( repo="znicholasbrown/project-artifacts", path="relative-link-artifacts.py", ref="master", secrets=["GITHUB_AUTH_TOKEN"], ) flow.register(project_name="Artifacts")
import prefect from prefect import Flow, task, Parameter from prefect.environments.storage import GitHub from prefect.environments import LocalEnvironment @task def print_param(param): logger = prefect.context.get("logger") logger.info(f"This is the value of the parameter that was passed: {param}") with Flow("Testing Default Parameters") as flow: param_with_no_default = Parameter("config") print_param(param_with_no_default) flow.storage = GitHub( repo="znicholasbrown/demo-flows", path="TestingDefaultParams.py", secrets=["NICHOLAS_GITHUB_ACCESS"], )
b = base64.b64decode(readme.content) return b.decode("utf-8") class GenerateArtifact(Task): def run(self, readme, ref): artifact_id = artifacts.create_markdown(readme) return artifact_id with Flow("GitHub README Artifacts") as flow: repos = Parameter( "repo", ["PrefectHQ/prefect", "PrefectHQ/ui", "PrefectHQ/server"]) readme = GetReadMe( task_run_name=lambda **kwargs: f"Fetch {kwargs['ref']}").map(ref=repos) GenerateArtifact( task_run_name=lambda **kwargs: f"Render {kwargs['ref']}").map( readme=readme, ref=repos) flow.storage = GitHub( repo="znicholasbrown/project-artifacts", path="github-readme-artifacts.py", ref="master", access_token_secret="GITHUB_AUTH_TOKEN", ) flow.register(project_name="Artifacts")
schedule = IntervalSchedule(interval=timedelta(minutes=1)) with Flow("Mapped - Local Dask Executor", schedule=schedule) as flow: version = Version() root = Root(checkpoint=False)(upstream_tasks=[version]) node1_1 = Node(name="Node 1_1", checkpoint=False).map(upstream_tasks=[root]) node1_2 = Node(name="Node 1_2", checkpoint=False).map(upstream_tasks=[root]) node2_1 = Node(name="Node 2_1", checkpoint=False).map(upstream_tasks=[node1_1, node1_2]) node3_1 = Node(name="Node 3_1", checkpoint=False).map(upstream_tasks=[node2_1]) node3_2 = Node(name="Node 3_2", checkpoint=False).map(upstream_tasks=[node2_1]) node4_1 = Node(name="Node 4_1", checkpoint=False).map(upstream_tasks=[node3_1, node3_2]) flow.environment = LocalEnvironment( labels=[], executor=LocalDaskExecutor(scheduler="threads", num_workers=6), ) flow.storage = GitHub( repo="znicholasbrown/project-schematics", path="flows/LocalDaskExecutor.py", secrets=["GITHUB_AUTH_TOKEN"], ) flow.register(project_name="PROJECT: Schematics")
node3_4 = Node(name="Node 3_4", checkpoint=False)(upstream_tasks=[node2_1]) node3_5 = Node(name="Node 3_5", checkpoint=False)(upstream_tasks=[node2_1]) node3_6 = Node(name="Node 3_6", checkpoint=False)(upstream_tasks=[node2_1]) node4_1 = Node(name="Node 4_1", checkpoint=False)(upstream_tasks=[node3_1, node3_2]) node4_2 = Node(name="Node 4_2", checkpoint=False)(upstream_tasks=[node3_1, node3_2]) node4_3 = Node(name="Node 4_3", checkpoint=False)(upstream_tasks=[node3_1, node3_2]) node4_4 = Node(name="Node 4_4", checkpoint=False)(upstream_tasks=[node3_1, node3_2]) node4_5 = Node(name="Node 4_5", checkpoint=False)(upstream_tasks=[node3_1, node3_2]) node4_6 = Node(name="Node 4_6", checkpoint=False)(upstream_tasks=[node3_1, node3_2]) flow.environment = LocalEnvironment( labels=[], executor=LocalDaskExecutor(scheduler="threads", num_workers=6), ) flow.storage = GitHub( repo="znicholasbrown/project-schematics", path="flows/6 Parallel Tasks.py", secrets=["GITHUB_AUTH_TOKEN"], ref="master", ) flow.register(project_name="PROJECT: Schematics")
@task( name="Ping YNAB to Import Transactions", max_retries=10, retry_delay=timedelta(seconds=30), log_stdout=True, ) def ping_ynab_to_import_transactions(api_key): response = requests.post( url="https://api.youneedabudget.com/v1/budgets/last-used/transactions/import", headers={"Authorization": f"Bearer {api_key}"}, ) print(response) return response storage = GitHub( repo="dylanbhughes/pull-my-ynab-transactions", path="flow.py", secrets=["YNAB_GITHUB_ACCESS_TOKEN"], ) with Flow(name="Pull My YNAB Transactions", storage=storage) as flow: API_KEY = PrefectSecret("YNAB_API_KEY") result = ping_ynab_to_import_transactions(api_key=API_KEY) flow.run_config = KubernetesRun( image="prefecthq/prefect:all_extras", cpu_request=1, memory_request="2Gi" )
@task def get_todays_star_count(repo): data = requests.get(repo).json() return data["stargazers_count"] @task def print_response(prefect_slack, dagster_slack, core_stars, server_stars, ui_stars, yesterdays_metrics): print(f""" Today, the Prefect slack has {prefect_slack} users, an increase of {prefect_slack - yesterdays_metrics["records"][2]["fields"]["Count"]} from the previous business day. Dagster has {dagster_slack} in their Slack, an increase of {dagster_slack - yesterdays_metrics["records"][3]["fields"]["Count"]}. The core repo has {core_stars} stars ({core_stars - yesterdays_metrics["records"][4]["fields"]["Count"]} increase), Server has {server_stars} ({server_stars - yesterdays_metrics["records"][1]["fields"]["Count"]} increase), and UI has {ui_stars} ({ui_stars - yesterdays_metrics["records"][0]["fields"]["Count"]} increase). Have a great day! """) with Flow("Metrics Reporting Flow") as flow: yesterdays_metrics = get_yesterdays_metrics() prefect_slack = get_todays_slack_stats('https://prefect-slackin.herokuapp.com/data') dagster_slack = get_todays_slack_stats('https://dagster-slackin.herokuapp.com/data') core_stars = get_todays_star_count('https://api.github.com/repos/prefecthq/prefect') server_stars = get_todays_star_count('https://api.github.com/repos/prefecthq/server') ui_stars = get_todays_star_count('https://api.github.com/repos/prefecthq/ui') todays_metrics = [{"id": "recWlYBY5Bx0RNBr1", "fields": {"Count": server_stars}}, {"id": 'recenhEgeiQY83FrL', "fields": {"Count": ui_stars}}, {"id": 'rechq8AKCsgu5dj1b', "fields": {"Count": dagster_slack}}, {"id": 'recnOekrsOmEr2n0g', "fields": {"Count": core_stars}}, {"id": 'recrhgFxXbHn7BNDv', "fields": {"Count": prefect_slack}}] update_metrics(todays_metrics) print_response(prefect_slack, dagster_slack, core_stars, server_stars, ui_stars, yesterdays_metrics) flow.storage = GitHub( repo="whimsicallyson/example-flows", path="flows/marketing_daily_metrics.py", secrets=["ALLYSON_GITHUB_ACCESS_TOKEN"] )
ShouldNotify NotificationMessage Notify [SlackTask] """ repository = Parameter("repository", default="prefect") owner = Parameter("owner", default="PrefectHQ") stars = GetStars(name="Get Stars", max_retries=2, retry_delay=timedelta(minutes=1))(repository=repository, owner=owner) should_notify = ShouldNotify()(stars=stars) with case(should_notify, True): message = NotificationMessage()(repository=repository, owner=owner, stars=stars) notification = SlackTask( webhook_secret="STARGAZERS_SLACK_WEBHOOK_TOKEN")(message=message) flow.storage = GitHub( repo="znicholasbrown/stargazers", path="/stargazers.flow.py", secrets=["GITHUB_AUTH_TOKEN"], ) flow.register(project_name="PROJECT: Nicholas") # flow.run()
import prefect from prefect import task, Flow, Parameter from prefect.schedules import clocks, Schedule from prefect.environments.storage import GitHub @task def hello_world(): print("Hello, World!") clock = clocks.CronClock("0 0 * * *") schedule = Schedule(clocks=[clock]) with Flow("Star GitHub Repositories", schedule=schedule) as flow: hello_world() flow.storage = GitHub( repo="znicholasbrown/star-repos", path="app.py", secrets=["GITHUB_AUTH_TOKEN" ], # Change this to your own GitHub auth token secret ) flow.register(project_name="SOME PROJECT") # flow.run()
clock1, clock2 = IntervalClock( start_date=datetime.utcnow() + timedelta(seconds=10), interval=timedelta(hours=12), parameter_defaults={"length": 15}), IntervalClock( start_date=datetime.utcnow() + timedelta(seconds=10), interval=timedelta(hours=24), parameter_defaults={"length": 20}) schedule = Schedule(clocks=[clock1, clock2]) # Deployment # Storage of code retrieved from GitHub repository at runtime from prefect.environments.storage import GitHub, Docker, S3, GCS, Local storage = GitHub(repo="amazing_flows", path="flows/evolving_etl.py", secrets=["GITHUB_ACCESS_TOKEN"]) # Environment configuration to dynamically spawn Dask clusters on Kubernetes for FlowRun from prefect.environments import DaskKubernetesEnvironment environment = DaskKubernetesEnvironment(worker_spec_file="worker_spec.yaml", labels=["Evolving", "ETL"]) # Define Tasks in a Flow Context with Flow('Evolving ETL', result=S3Result(bucket="flow-result-storage"), state_handlers=[my_state_handler], schedule=schedule, storage=storage, environment=environment) as flow: