def test_is_pickleable_after_start(self): e = LocalDaskExecutor() with e.start(): post = cloudpickle.loads(cloudpickle.dumps(e)) assert isinstance(post, LocalDaskExecutor)
raise ValueError(f"{self.name} failed :(") else: self.logger.info(f"{self.name} complete.") return list(range(5)) storage = GitHub( repo="znicholasbrown/project-schematics", path="flows/CaptureProductMetrics.py", secrets=["GITHUB_AUTH_TOKEN"], ref="master", ) environment = LocalEnvironment( labels=[], executor=LocalDaskExecutor(scheduler="threads", num_workers=6), ) schedule = IntervalSchedule(interval=timedelta(minutes=5)) with Flow( "Capture Product Metrics", schedule=schedule, storage=storage, environment=environment, ) as flow: version = Version() root = Root(checkpoint=False)(upstream_tasks=[version]) node1_1 = Node(name="Fetch Users")(upstream_tasks=[root]) node1_2 = Node(name="Fetch Extra Params")(upstream_tasks=[root])
def test_wait(self): e = LocalDaskExecutor() with e.start(): assert e.wait(1) == 1 assert e.wait(prefect) is prefect assert e.wait(e.submit(lambda: 1)) == 1 assert e.wait(e.submit(lambda x: x, 1)) == 1 assert e.wait(e.submit(lambda x: x, x=1)) == 1 assert e.wait(e.submit(lambda: prefect)) is prefect
def test_is_pickleable(self): e = LocalDaskExecutor() post = cloudpickle.loads(cloudpickle.dumps(e)) assert isinstance(post, LocalDaskExecutor)
def test_responds_to_kwargs(self): e = LocalDaskExecutor(scheduler="threads") assert e.scheduler == "threads"
def test_start_yields_cfg(self): with LocalDaskExecutor(scheduler="threads").start() as cfg: assert cfg["scheduler"] == "threads"
def mproc_local(): "Multiprocessing executor using local dask (not distributed cluster)" yield LocalDaskExecutor(scheduler="processes")
fastqs.append(os.path.join(dir_, f)) result = " ".join(fastqs) print(result) return result def partition_strategy(): pass if __name__ == '__main__': env = os.environ.copy() env["PATH"] = env["PATH"] + ":/home/kevin/anaconda3/envs/albacore/bin" runner = ShellRunner(env=env) cluster = LocalCluster() executor = LocalDaskExecutor(address=cluster.scheduler_address) # Data albacore_input = [ "/home/kevin/bin/hydra_nanopore/tests/test_data/minion_sample_raw_data/Experiment_01/sample_02_local/pass/2", "/home/kevin/bin/hydra_nanopore/tests/test_data/minion_sample_raw_data/Experiment_01/sample_02_local/pass/3", "/home/kevin/bin/hydra_nanopore/tests/test_data/minion_sample_raw_data/Experiment_01/sample_02_local/pass/4", "/home/kevin/bin/hydra_nanopore/tests/test_data/minion_sample_raw_data/Experiment_01/sample_02_local/pass/5" ] albacore_output = [ "/home/kevin/bin/hydra_nanopore/tests/test_data/output/2", "/home/kevin/bin/hydra_nanopore/tests/test_data/output/3", "/home/kevin/bin/hydra_nanopore/tests/test_data/output/4", "/home/kevin/bin/hydra_nanopore/tests/test_data/output/5" ]
def test_create_fargate_task_environment_with_executor(): executor = LocalDaskExecutor() environment = FargateTaskEnvironment(executor=executor) assert environment.executor is executor
load = postgres.load_datafile.map(datafile=downloads) # commit new data to database and clean up complete = postgres.complete_load() # make sure prep runs before load flow.add_edge(upstream_task=prep, downstream_task=load) # make sure load runs before complete flow.add_edge(upstream_task=load, downstream_task=complete) if __name__ == "__main__": logger = prefect.context.get("logger") dask = prefect.config.dask mode = prefect.config.mode reset_db = prefect.config.reset_db all_datasets = dict(prefect.config.socrata.datasets) years = list(prefect.config.data.years) # use only year datasets if in full mode otherwise use all w/since if mode == 'full': run_datasets = dict((k, all_datasets[k]) for k in years) else: run_datasets = all_datasets logger.info( f"Starting \"{mode}\" flow for {', '.join(run_datasets.keys())}" f" {'and resetting db' if reset_db else ''}") state = flow.run(datasets=list(run_datasets.values()), executor=LocalDaskExecutor() if dask else LocalExecutor())
import prefect from prefect import Flow, task from prefect.engine.executors import LocalDaskExecutor, DaskExecutor from prefect.engine.state import Failed from prefect.environments import LocalEnvironment from prefect.utilities.notifications import slack_notifier flow_name = "logger-test" @task(timeout=60 * 60 * 2) def test(): from prefect import context logger = context.get("logger") logger.info("this is a test") with Flow( flow_name, environment=LocalEnvironment(LocalDaskExecutor()), # state_handlers=[slack_notifier(only_states=[Failed])], ) as flow: test() flow.register("Demo")
@task def dec(x): sleep(random.random() / 10) return x - 1 @task def add(x, y): sleep(random.random() / 10) return x + y @task(name="sum") def list_sum(arr): return sum(arr) # executor = DaskExecutor(address="localhost:8786") executor = LocalDaskExecutor() with Flow("dask-example", environment=LocalEnvironment(executor=executor)) as flow: incs = inc.map(x=range(100)) decs = dec.map(x=range(100)) adds = add.map(x=incs, y=decs) total = list_sum(adds) # executor = DaskExecutor(address="tcp://10.254.248.214:8786") # flow.run(executor=executor) flow.register("Demo") # flow.run_agent()
from prefect.engine.executors import LocalDaskExecutor from prefect.engine.results import LocalResult from prefect.tasks.secrets import PrefectSecret import tasks with Flow(name="CommonLit SQL-to-S3", storage=Docker( registry_url='.../prefect-flows/', base_image='.../prefect-flows/prefect:0.13.15-python3.8', python_dependencies=list( map(str.strip, (Path(__file__).parent / 'requirements.txt').open().readlines())), env_vars={'PYTHONPATH': '/opt:${PYTHONPATH}'}, files={Path(__file__).parent / 'tasks.py': '/opt/tasks.py'}), environment=LocalEnvironment(executor=LocalDaskExecutor( scheduler='threads', num_workers=4), labels=["cae"]), result=LocalResult(dir='./results')) as flow: prefect_secrets = PrefectSecret('COMMON_LIT_SECRETS') destination_directory = Parameter('destination_directory', default=None) tables = Parameter('tables', required=True) indexed_field = Parameter('indexed_field', default='id', required=True) starting_index = Parameter('starting_index', default=0, required=True) total_records_to_move = Parameter('total_records_to_move', default=0, required=True) number_of_records_in_batch = Parameter('number_of_records_in_batch', default=100000, required=False) max_concurrent_connections = Parameter('max_concurrent_connections', default=50)
def test_prefect_executors(train_data, grid_search, parallel_columns): try: from prefect.engine.executors import DaskExecutor from prefect.engine.executors import LocalDaskExecutor from prefect.engine.executors import LocalExecutor from dask.distributed import Client except Exception: print("`prefect` not installed, skipping the test...") pass else: client = Client() executors = { "dask_already_running": DaskExecutor(address=client.scheduler.address), "local": LocalExecutor(), "local_dask": LocalDaskExecutor(), # this spins up LocalDaskExecutor, but just to check the interface "dask_create_on_call": DaskExecutor(), } for executor_name, executor in executors.items(): flow, state = run_model_selection( df=train_data, grid_search=grid_search, target_col_name="Quantity", frequency="D", partition_columns=["Product"], parallel_over_columns=parallel_columns, include_rules=None, exclude_rules=None, country_code_column="Holidays_code", output_path="", persist_cv_results=False, persist_cv_data=False, persist_model_reprs=False, persist_best_model=False, persist_partition=False, persist_model_selector_results=False, visualize_success=False, executor=executor, ) assert state.is_successful() results = select_model_general( df=train_data, grid_search=grid_search, target_col_name="Quantity", frequency="D", partition_columns=["Product"], parallel_over_columns=parallel_columns, executor=executor, include_rules=None, exclude_rules=None, country_code_column="Holidays_code", output_path="", persist_cv_results=False, persist_cv_data=False, persist_model_reprs=False, persist_best_model=False, persist_partition=False, persist_model_selector_results=False, ) assert len(results) == len( train_data[parallel_columns + ["Product"]].drop_duplicates()) assert isinstance(results[0], ModelSelectorResult) if executor_name == "dask_already_running": client.shutdown() if client.status != "closed": client.shutdown()
def test_scheduler_defaults_to_threads(self): e = LocalDaskExecutor() assert e.scheduler == "threads"
def sync(): "Synchronous dask (not dask.distributed) executor" yield LocalDaskExecutor()
from prefect import task, Flow from prefect.environments import LocalEnvironment from prefect.engine.executors import LocalDaskExecutor @task def vals(): return [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @task def printv(v): print(v) with Flow("local-dask", environment=LocalEnvironment(executor=LocalDaskExecutor(nthreads=4))) as f: v = vals() printv.map(v) f.register(project_name="Demo")