def generate_list(): return [1, 2, 3] @task def do_something(n): return n @task def fail(x): print(x) raise ValueError() result = LocalResult(location="{task_full_name}.pb") with Flow( "Restart Me", storage=Local( stored_as_script=True, path="/Users/josh/Desktop/code/Dummy-Flows/restartme.py", ), result=result, ) as flow: lst = generate_list() d = do_something.map(lst) fail(d) environment = LocalEnvironment(executor=DaskExecutor()) flow.environment = environment
def test_shell_initializes_and_multiline_output_optionally_returns_all_lines(): with Flow(name="test") as f: task = ShellTask(return_all=True)(command="echo -n 'hello world\n42'") out = f.run() assert out.is_successful() assert out.result[task].result == ["hello world", "42"]
def test_shell_runs_other_shells(): with Flow(name="test") as f: task = ShellTask(shell="zsh")(command="echo -n $ZSH_NAME") out = f.run() assert out.is_successful() assert out.result[task].result == "zsh"
def test_shell_task_raises_fail_if_cmd_fails(): with Flow(name="test") as f: task = ShellTask()(command="ls surely_a_dir_that_doesnt_exist") out = f.run() assert out.is_failed() assert "Command failed with exit code" in str(out.result[task].message)
def test_shell_initializes_and_multiline_output_returns_last_line(): with Flow(name="test") as f: task = ShellTask()(command="echo -n 'hello world\n42'") out = f.run() assert out.is_successful() assert out.result[task].result == "42"
# dicionary values: https://docs.prefect.io/core/tutorials/task-guide.html#adding-tasks-to-flows # Cron clock generator: https://crontab-generator.org/ # 0 6 * * * means run every day at 6am schedule = Schedule(clocks=[CronClock("0 6 * * *")]) # alternatively: crontab -l to list all crontabs, or see generator to generate the crontab # there's also launchctl and using plist files. # there's also using the calendar. # View Python command with `ps` # Rename `ps` command: https://stackoverflow.com/a/49097964/2138773 # https://github.com/dvarrazzo/py-setproctitle from setproctitle import setproctitle setproctitle('prefect: ds arxiv') # using the Imperitive API: https://docs.prefect.io/core/concepts/flows.html#imperative-api with Flow('Build Arxiv', state_handlers=[slack_handler]) as flow: # Dates date_today = datetime.now().strftime('%Y-%m-%d') # Begin the flow. Will fail if len(df) = 0 # FIXME: date_query is actually overwritten within the function to just be the max date. See function for details. df_full = df_get_arxiv(Constant(arx_list), Constant(arx_dict)) filter_to_date = determine_filter_date(df_full) df = filter_df_arxiv(df=df_full, filter_to_date=filter_to_date) # Creating the Post folder, save the dataframe there, and build the rmd dir_post = create_dir_post(date_published=date_today) dir_post.set_dependencies(upstream_tasks=[df]) written_df = write_df_to_csv(df=df, dir_post=dir_post) fp_post = copy_rmd_template(dir_post)
def test_no_raise_on_remote_env(self): flow = Flow("THIS IS A TEST", environment=RemoteEnvironment()) assert healthchecks.environment_dependency_check([flow]) is None
def storage_flow_runner(service: str): with Flow('Install Storage') as flow: installer = PrefectAddServiceStorage() installer.create_service_bucket(service) return flow.run()
from prefect import Flow, task, context from schemas.customer import CustomerList from converters import csv_converter @task def generate(schema, start, iterations): return schema.generate_list(iterations=iterations) @task def convert_to_csv(data, header): return csv_converter(data, header) @task def printer(data): print(data) with Flow("customer-list") as flow: customer_list = CustomerList(seed="PATRICK") customer_list2 = CustomerList(seed="PATRICK") data = convert_to_csv(generate(customer_list, 0, 2), True) printer(data) data = convert_to_csv(generate(customer_list2, 0, 2), False) printer(data) flow.run()
import prefect from prefect import task, Flow @task def hello_task(): logger = prefect.context.get("logger") logger.info("Hello, Cloud!") with Flow("Hello World") as flow: hello_task() flow.register(project_name='Hello World') flow.run_agent()
from prefect import task, Flow from prefect.environments.execution import DaskKubernetesEnvironment from prefect.environments.storage import Docker @task def first_task(): return [100] * 100 @task def compute(x): return x * 100 with Flow( "dktest", environment=DaskKubernetesEnvironment(min_workers=1, max_workers=3), storage=Docker(registry_url="joshmeek18", image_name="flows"), ) as flow: one = first_task() result = compute.map(one) result2 = compute.map(one) result3 = compute.map(one) # flow.deploy(project_name="Demo") flow.visualize()
default_shap = run_default_shap_impl(model_state, data_to_explain) match = compare_results_impl(default_shap, my_shap_distributed) if match is True: print("Results match!") else: print("Results don't match!") print('done') distributed = True if __name__ == '__main__': cluster = LocalCluster(n_workers=5) serv_address = cluster.scheduler.address # test() with Flow("shap pipeline") as flow: name = Parameter('name') # load data from CSV and get a dataframe df = etl(name) # Train randomforest model model_state = create_model(df) # get data to explain: returns test dataframe rows from start to end index data_to_explain = get_data_to_explain(model_state, 0, 5) # Run my serial (non-distributed) implementation of shap my_shap = run_my_shap(model_state, data_to_explain) # Run the distributed version my_shap_distributed = run_distributed_shap(model_state, data_to_explain) # my_shap_distributed = run_distributed_shap(model_state, data_to_explain) # Run the default shap python library implementation default_shap = run_default_shap(model_state, data_to_explain)
return list(range(random.randint(1, 10))) class Node(Task): def run(self): self.logger.info(f'{self.name} running...') time.sleep(5) if random.random() > 0.99: raise ValueError(f'{self.name} failed :(') else: self.logger.info(f'{self.name} complete.') return list(range(random.randint(1, 10))) schedule = IntervalSchedule(interval=timedelta(minutes=30)) with Flow("Long Flow Run", schedule=schedule) as Long_Flow_Run: root = Root() version = Version()(upstream_tasks=[root]) node1_1 = Node(name="Node 1_1").map(upstream_tasks=[version]) node1_2 = Node(name="Node 1_2").map(upstream_tasks=[node1_1]) node1_3 = Node(name="Node 1_3").map(upstream_tasks=[node1_2]) node1_4 = Node(name="Node 1_4").map(upstream_tasks=[node1_3]) node1_5 = Node(name="Node 1_5").map(upstream_tasks=[node1_4]) node1_6 = Node(name="Node 1_6").map(upstream_tasks=[node1_5]) node1_7 = Node(name="Node 1_7").map(upstream_tasks=[node1_6]) node1_8 = Node(name="Node 1_8").map(upstream_tasks=[node1_7]) node1_9 = Node(name="Node 1_9").map(upstream_tasks=[node1_8]) node1_10 = Node(name="Node 1_10").map(upstream_tasks=[node1_9]) node1_11 = Node(name="Node 1_11").map(upstream_tasks=[node1_10]) node1_12 = Node(name="Node 1_12").map(upstream_tasks=[node1_11]) node1_13 = Node(name="Node 1_13").map(upstream_tasks=[node1_12])
@task def check_if_even(value): return value % 2 == 0 @task def print_odd(value): print("{} is odd!".format(value)) @task def print_even(value): print("{} is even!".format(value)) with Flow("Check Even/Odd") as f: value = Parameter("value") is_even = check_if_even(value) even = print_even(value) odd = print_odd(value) ifelse(is_even, even, odd) # Prints '2 is even!' f.run(value=2) # Prints '1 is odd!' f.run(value=1) f.visualize()
amt_metrics['AMT_ANALYST_HOLD'].get(date, pd.NA), 'INS_ANALYST_SELL': ins_metrics['INS_ANALYST_SELL'].get(date, pd.NA), 'INS_ANALYST_BUY': ins_metrics['INS_ANALYST_BUY'].get(date, pd.NA), 'INS_ANALYST_HOLD': ins_metrics['INS_ANALYST_HOLD'].get(date, pd.NA) }) return result @task def load(result_dict): """ This function prints results. Two tables INS metrics ans AMT metrics. :param result_dict: dict with results :return: None """ print(result_dict) if __name__ == '__main__': with Flow('ms-etl') as flow: url = ('https://www.marketbeat.com/stocks/' 'NASDAQ/MSFT/price-target/?MostRecent=0') soup = extract(url) res = transform(soup) load(res) flow.run()
from prefect import task, Flow, Parameter from prefect.engine.result import NoResult @task() def vals(): return [1, 2, 3] @task() def ret(x): return 1 with Flow('a') as f: p = Parameter('p') v = vals() a = ret.map(v) b = ret.map(p) f.register(project_name="Demo")
from prefect import Flow, task, unmapped, Parameter from prefect.engine.results import LocalResult from prefect.engine.executors import LocalDaskExecutor, DaskExecutor from prefect.engine.cache_validators import all_parameters lr = LocalResult(location="{flow_name}-{task_name}-{x}-{y}.pkl", validators=all_parameters) @task(log_stdout=True, checkpoint=True) def add(x, y): print(f"add ran with {x} {y}") try: return sum(x) + y except TypeError: return x + y with Flow("iterated map", result=lr) as flow: y = unmapped(Parameter("y", default=7)) x = Parameter("x", default=[1, 2, 3]) mapped_result = add.map(x, y=y) out = add(mapped_result, y) if __name__ == "__main__": flow.run(executor=DaskExecutor())
import prefect from prefect import Flow, task import time from datetime import timedelta @task(timeout=11) def log_me(): logger = prefect.context.get("logger") logger.info("LOGGED") return "LOGGER" with Flow("loggin") as flow: log_me() from prefect.environments import LocalEnvironment from prefect.engine.executors import DaskExecutor flow.environment = LocalEnvironment(executor=DaskExecutor()) flow.register(project_name="Demo")
def test_no_raise_on_normal_flow(self): flow = Flow("THIS IS A TEST") assert healthchecks.environment_dependency_check([flow]) is None
"""This module holds the Prefect flow definition. Description ----------- This flow will pull Twitter trends and put them on a picture for NFT sale. Author ------ Viral NFT <*****@*****.**> Created ------- March 30, 2021, 16:41:15 """ from prefect import Flow # NOTE: It is highly advised not to import `src.config` in this module. from src.tasks import Trends, Tweets ############################################################################### # Initialize flow. flow = Flow(name="Trend grabber and image generator") trends = Trends() tweets = Tweets() with flow: trends() ###############################################################################
from prefect import task, Flow from datetime import timedelta from prefect.schedules import IntervalSchedule import pendulum @task def say_hello(): print("Hello, world!") schedule = IntervalSchedule(interval=timedelta(days=1), start_date=pendulum.datetime(2010, 1, 1)) with Flow("interval-schedule", schedule) as flow: say_hello() flow.run(run_on_schedule=True) # flow.register(project_name="Demo", version_group_id="custom_int") pd = pendulum.datetime(2010, 1, 1) pd.add(days=1)
else: append_write = "w" # make a new file if not with open("./results/" + FILENAME_RESULTS, append_write) as f: f.write(f"TYPE: {INSTRUCTION_TYPE} \n") f.write(f"ACC DEV: {output['dev']['score']} \n") f.write(f"ACC TEST: {output['test']['score']} \n") f.write("=========================== \n \n") logger.info(f"TYPE: {INSTRUCTION_TYPE} \n") logger.info(f"ACC DEV: {output['dev']['score']} \n") logger.info(f"ACC TEST: {output['test']['score']} \n") logger.info("=========================== \n \n") with Flow("Running the Transformers for Pair Classification") as flow1: with tags("train"): train_input = prepare_rico_task(train_path, type_instructions=INSTRUCTION_TYPE) train_dataset = prepare_rico_layout_lm_task(train_input["data"]) with tags("dev"): dev_input = prepare_rico_task(dev_path, type_instructions=INSTRUCTION_TYPE) dev_dataset = prepare_rico_layout_lm_task(dev_input["data"]) with tags("test"): test_input = prepare_rico_task(test_path, type_instructions=INSTRUCTION_TYPE) test_dataset = prepare_rico_layout_lm_task(test_input["data"]) outputs = layout_lm_trainer_task( train_dataset=train_dataset, dev_dataset=dev_dataset,
def test_shell_initializes_with_basic_cmd(): with Flow(name="test") as f: task = ShellTask(command="echo -n 'hello world'")() out = f.run() assert out.is_successful() assert out.result[task].result == "hello world"
from prefect import task, Flow, Parameter import prefect logger = prefect.utilities.logging.get_logger() @task def print_plus_one(x): print(x + 1) logger.warning(x + 1) with Flow('default-param') as flow: x = Parameter('x', default=2) print_plus_one(x=x) flow.register(project_name="Demo")
def test_shell_returns_none_if_empty_output(): with Flow(name="test") as f: task = ShellTask()(command="ls > /dev/null") out = f.run() assert out.is_successful() assert out.result[task].result is None
from prefect import task, Flow @task def say_hello(): print("Hello, world!") with Flow("Hello world flow") as flow: say_hello() state = flow.run()
def test_shell_raises_if_no_command_provided(): with Flow(name="test") as f: ShellTask()() with pytest.raises(TypeError): with raise_on_exception(): assert f.run()
f'~/github/ds-arxiv/python/resources/rmd_template.Rmd') fp_post = os.path.join(dir_post, 'news.Rmd') shutil.copy(fp_template, fp_post) return fp_post @task def knit_rmd_to_html(fp_post, written_df: bool): """Renders to HTML""" if written_df: cmd = f'Rscript -e \'rmarkdown::render(\"{fp_post}\")\'' os.system(cmd) if __name__ == '__main__': with Flow('parse_arxiv') as flow: # Default is to filter to yesterday's publications df = df_get_arxiv(arx_list, arx_dict, '2019-12-24') today = datetime.now().strftime('%Y-%m-%d') # Creating the Post folder, save the dataframe there, and build the rmd dir_post = create_dir_post() written_df = write_df_to_csv(df=df, dir_post=dir_post) fp_post = copy_rmd_template(dir_post) knit = knit_rmd_to_html(fp_post=fp_post, written_df=written_df) gcp = git_commit_push() flow.run()
'C': 'Amarela', 'D': 'Parda', 'E': 'Indigena', 'F': "", ' ': "" }) return filtro[['cor']] @task def join_data(df, idadecent, idadequadrado, cor, estcivil): final = pd.concat([df, idadecent, idadequadrado, cor, estcivil], axis=1) final = final[[ 'CO_GRUPO', 'TP_SEXO', 'cor', 'estcivil', 'idadecent', 'idade2' ]] logger = prefect.context.get("logger") logger.info(final.head().to_json()) final.to_csv('enade_tratado.csv', index=False) with Flow("Enade", schedule) as flow: path = get_raw_data() filtro = aplica_filtros(path) idadecent = constroi_idade_centralizada(filtro) idadequadrado = constroi_idade_cent_quad(idadecent) estcivil = constroi_est_civil(filtro) cor = constroi_cor(filtro) j = join_data(filtro, idadecent, idadequadrado, cor, estcivil) flow.register(project_name="igti", idempotency_key=flow.serialized_hash()) flow.run_agent(token="htoyS1CWdSn8PmX3ZoW8wA")
from prefect import Flow, Parameter, task from prefect.engine.signals import LOOP @task(max_retries=5, retry_delay=timedelta(seconds=2)) def compute_large_fibonacci(M): # we extract the accumulated task loop result from context loop_payload = prefect.context.get("task_loop_result", {}) n = loop_payload.get("n", 1) fib = loop_payload.get("fib", 1) next_fib = requests.post("https://nemo.api.stdlib.com/[email protected]/", data={ "nth": n }).json() if next_fib > M: return fib # return statements end the loop raise LOOP(message=f"Fib {n}={next_fib}", result=dict(n=n + 1, fib=next_fib)) with Flow("fibonacci") as flow: M = Parameter("M") fib_num = compute_large_fibonacci(M) flow_state = flow.run(M=100) print(flow_state.result[fib_num].result) # 89