args.get("total-executor-cores"), row.job_id), dag=dag, on_failure_callback=failure_callback_wrapper( row.owner.split(",") if row.owner != '' else None)) md = md + "[{}:{}] ".format(task_id, row.job_desc) job_dict[str(row.job_num)] = task_id for _, row in rows[(rows.schedule_id == d.schedule_id) & (rows.dependent_jobs != '')].iterrows(): dependent_jobs = row.dependent_jobs for dep_job_id in dependent_jobs.split(","): if "." in dep_job_id: ext_dag_id, ext_task_id, execution_delta = dep_job_id.split( ".") try: ext_task = dag.get_task('wait_for_{}_{}'.format( ext_dag_id, ext_task_id)) dummy = dag.get_task('{}_{}_finish'.format( ext_dag_id, ext_task_id)) except: ext_task = ExternalTaskSensor( task_id='wait_for_{}_{}'.format( ext_dag_id, ext_task_id), external_dag_id=ext_dag_id, external_task_id=ext_task_id, execution_delta=datetime.timedelta( minutes=int(execution_delta)), dag=dag) dummy = DummyOperator(task_id='{}_{}_finish'.format( ext_dag_id, ext_task_id), dag=dag) ext_task >> dummy >> dag.get_task(
from airflow.providers.docker.operators.docker import DockerOperator from airflow import DAG from airflow.utils.dates import days_ago default_args = { 'start_date': days_ago(0), } dag = DAG( dag_id='{{project_name}}', default_args=default_args, description='Ploomber DAG ({{project_name}})', schedule_interval=None, ) path_to_spec = Path(__file__).parent / '{{project_name}}.json' spec = json.loads(path_to_spec.read_text()) for task in spec['tasks']: DockerOperator(image=spec['image'], command=task['command'], dag=dag, task_id=task['name']) for task in spec['tasks']: t = dag.get_task(task['name']) for upstream in task['upstream']: t.set_upstream(dag.get_task(upstream))
def assertDagDictEqual(self, source: dict, dag: DAG): assert dag.task_dict.keys() == source.keys() for task_id, downstream_list in source.items(): assert dag.has_task(task_id) task = dag.get_task(task_id) assert task.downstream_task_ids == set(downstream_list)
dag=dag) batchops[0].set_upstream(startop) # if are in batch two and onwards, set the previous joiner as an upstream else: if len(batchops) > 0: batchops[-1].set_upstream(batchjoins[-1]) # init the "real" worker that does the heavy lifting workerop = PythonOperator(task_id='dyn_task_{}'.format(thing_identifyer), python_callable=process_thing, dag=dag, params={ 'thing_identifyer': thing_identifyer, 'thing_action': thing_action }) # set the batch group as the upstream workerop.set_upstream(batchops[-1]) j += 1 # finish batch group by joining the tasks in the group into a dummy joiner if j is max_tasks: batchjoins.append( DummyOperator(task_id='join_{}'.format(len(batchjoins)), dag=dag)) for task in batchops[-1].downstream_list: dag.get_task(task.task_id).set_downstream(batchjoins[-1]) j = 0 c += 1
from airflow import DAG from eai_graph_tools.airflow_data.dag_components.prediction_unb2017 import create_grid_prediction_dag import pytest cfg_name = "unit_test_deep_graph_embeddings_agg_gs_fext_deg_dim10_interval10" dag = DAG(dag_id=cfg_name, default_args={'start_date': datetime.utcfromtimestamp(0)}, start_date=datetime.utcfromtimestamp(0), schedule_interval=None) create_grid_prediction_dag(dag, cfg_file="eai_graph_tools/airflow_data/configs/configs_unb2017.ini", cfg_name=cfg_name) create_training_dataset = dag.get_task("create_training_dataset") create_inference_dataset = dag.get_task("create_inference_dataset") train_graph_model = dag.get_task("train_graph_model") create_graph_model_node_embeddings = dag.get_task("create_graph_model_node_embeddings") infer_predictions = dag.get_task("predict") create_interval_metrics = dag.get_task("create_interval_metrics") def overwrite_out_dir_param(path, cfg_name=""): Variable.set(cfg_name + 'out_dir', path) def get_out_dir(cfg_name=""): return Variable.get(cfg_name + 'out_dir', default_var='')