def create_dag(dag_id, value): def run_print_var(): return "go_fail" default_args = { 'owner': 'kwas', 'start_date': datetime(2018, 9, 6), 'var': 'default' } dag = DAG(dag_id, default_args=default_args) print_date = BashOperator(task_id='print_date', bash_command='date', dag=dag) branch = BranchPythonOperator(task_id='branch', python_callable=run_print_var, dag=dag) branch.set_upstream(print_date) fail = BashOperator( task_id='go_fail', bash_command='if [ ! -f /tmp/kwas-fail ]; then exit 1; fi', dag=dag) fail.set_upstream(branch) finish = BashOperator(task_id='final_task', bash_command='echo finish', trigger_rule='all_success', dag=dag) finish.set_upstream(fail) return dag
def deploy_tasks(model, parent_dag_name, child_dag_name, default_args, PROJECT_ID, MODEL_NAME, MODEL_VERSION, MODEL_LOCATION): # Create inner dag dag = DAG("{0}.{1}".format(parent_dag_name, child_dag_name), default_args=default_args, schedule_interval=None) # Constants OTHER_VERSION_NAME = "v_{0}".format( datetime.datetime.now().strftime("%Y%m%d%H%M%S")[0:12]) # Create model on ML-Engine bash_ml_engine_models_list_op = BashOperator( task_id="bash_ml_engine_models_list_{}_task".format( model.replace(".", "_")), xcom_push=True, bash_command="gcloud ml-engine models list --filter='name:{0}'".format( MODEL_NAME + model.replace(".", "_")), dag=dag) def check_if_model_already_exists(templates_dict, **kwargs): cur_model = templates_dict["model"].replace(".", "_") ml_engine_models_list = kwargs["ti"].xcom_pull( task_ids="bash_ml_engine_models_list_{}_task".format(cur_model)) logging.info( "check_if_model_already_exists: {}: ml_engine_models_list = \n{}". format(cur_model, ml_engine_models_list)) create_model_task = "ml_engine_create_model_{}_task".format(cur_model) dont_create_model_task = "dont_create_model_dummy_branch_{}_task".format( cur_model) if len(ml_engine_models_list ) == 0 or ml_engine_models_list == "Listed 0 items.": return create_model_task return dont_create_model_task check_if_model_already_exists_op = BranchPythonOperator( task_id="check_if_model_already_exists_{}_task".format( model.replace(".", "_")), templates_dict={"model": model.replace(".", "_")}, python_callable=check_if_model_already_exists, provide_context=True, dag=dag) ml_engine_create_model_op = MLEngineModelOperator( task_id="ml_engine_create_model_{}_task".format(model.replace( ".", "_")), project_id=PROJECT_ID, model={"name": MODEL_NAME + model.replace(".", "_")}, operation="create", dag=dag) create_model_dummy_op = DummyOperator( task_id="create_model_dummy_{}_task".format(model.replace(".", "_")), trigger_rule="all_done", dag=dag) dont_create_model_dummy_branch_op = DummyOperator( task_id="dont_create_model_dummy_branch_{}_task".format( model.replace(".", "_")), dag=dag) dont_create_model_dummy_op = DummyOperator( task_id="dont_create_model_dummy_{}_task".format( model.replace(".", "_")), trigger_rule="all_done", dag=dag) # Create version of model on ML-Engine bash_ml_engine_versions_list_op = BashOperator( task_id="bash_ml_engine_versions_list_{}_task".format( model.replace(".", "_")), xcom_push=True, bash_command= "gcloud ml-engine versions list --model {0} --filter='name:{1}'". format(MODEL_NAME + model.replace(".", "_"), MODEL_VERSION), dag=dag) def check_if_model_version_already_exists(templates_dict, **kwargs): cur_model = templates_dict["model"].replace(".", "_") ml_engine_versions_list = kwargs["ti"].xcom_pull( task_ids="bash_ml_engine_versions_list_{}_task".format(cur_model)) logging.info( "check_if_model_version_already_exists: {}: ml_engine_versions_list = \n{}" .format(cur_model, ml_engine_versions_list)) create_version_task = "ml_engine_create_version_{}_task".format( cur_model) create_other_version_task = "ml_engine_create_other_version_{}_task".format( cur_model) if len(ml_engine_versions_list ) == 0 or ml_engine_versions_list == "Listed 0 items.": return create_version_task return create_other_version_task check_if_model_version_already_exists_op = BranchPythonOperator( task_id="check_if_model_version_already_exists_{}_task".format( model.replace(".", "_")), templates_dict={"model": model.replace(".", "_")}, python_callable=check_if_model_version_already_exists, provide_context=True, dag=dag) ml_engine_create_version_op = MLEngineVersionOperator( task_id="ml_engine_create_version_{}_task".format( model.replace(".", "_")), project_id=PROJECT_ID, model_name=MODEL_NAME + model.replace(".", "_"), version_name=MODEL_VERSION, version={ "name": MODEL_VERSION, "deploymentUri": MODEL_LOCATION + model.replace(".", "_"), "runtimeVersion": "1.13", "framework": "TENSORFLOW", "pythonVersion": "3.5", }, operation="create", dag=dag) ml_engine_create_other_version_op = MLEngineVersionOperator( task_id="ml_engine_create_other_version_{}_task".format( model.replace(".", "_")), project_id=PROJECT_ID, model_name=MODEL_NAME + model.replace(".", "_"), version_name=OTHER_VERSION_NAME, version={ "name": OTHER_VERSION_NAME, "deploymentUri": MODEL_LOCATION + model.replace(".", "_"), "runtimeVersion": "1.13", "framework": "TENSORFLOW", "pythonVersion": "3.5", }, operation="create", dag=dag) ml_engine_set_default_version_op = MLEngineVersionOperator( task_id="ml_engine_set_default_version_{}_task".format( model.replace(".", "_")), project_id=PROJECT_ID, model_name=MODEL_NAME + model.replace(".", "_"), version_name=MODEL_VERSION, version={"name": MODEL_VERSION}, operation="set_default", dag=dag) ml_engine_set_default_other_version_op = MLEngineVersionOperator( task_id="ml_engine_set_default_other_version_{}_task".format( model.replace(".", "_")), project_id=PROJECT_ID, model_name=MODEL_NAME + model.replace(".", "_"), version_name=OTHER_VERSION_NAME, version={"name": OTHER_VERSION_NAME}, operation="set_default", dag=dag) # Build dependency graph, set_upstream dependencies for all tasks check_if_model_already_exists_op.set_upstream( bash_ml_engine_models_list_op) ml_engine_create_model_op.set_upstream(check_if_model_already_exists_op) create_model_dummy_op.set_upstream(ml_engine_create_model_op) dont_create_model_dummy_branch_op.set_upstream( check_if_model_already_exists_op) dont_create_model_dummy_op.set_upstream(dont_create_model_dummy_branch_op) bash_ml_engine_versions_list_op.set_upstream( [dont_create_model_dummy_op, create_model_dummy_op]) check_if_model_version_already_exists_op.set_upstream( bash_ml_engine_versions_list_op) ml_engine_create_version_op.set_upstream( check_if_model_version_already_exists_op) ml_engine_create_other_version_op.set_upstream( check_if_model_version_already_exists_op) ml_engine_set_default_version_op.set_upstream(ml_engine_create_version_op) ml_engine_set_default_other_version_op.set_upstream( ml_engine_create_other_version_op) return dag
task_id='compare_result', provide_context=True, python_callable=compare_result, trigger_rule="all_done", dag=dag) t3.set_upstream(t1) t3.set_upstream(t2) options = ['hadoop_jar_cmd', 'presto_cmd', 'db_query', 'spark_cmd'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), dag=dag) branching.set_upstream(t3) join = DummyOperator( task_id='join', trigger_rule='one_success', dag=dag ) t4 = QuboleOperator( task_id='hadoop_jar_cmd', command_type='hadoopcmd', sub_command='jar s3://paid-qubole/HadoopAPIExamples/' 'jars/hadoop-0.20.1-dev-streaming.jar ' '-mapper wc ' '-numReduceTasks 0 -input s3://paid-qubole/HadoopAPITests/' 'data/3.tsv -output '
} dag = DAG( dag_id='example_branch_operator', default_args=args, schedule_interval="@daily") cmd = 'ls -l' run_this_first = DummyOperator(task_id='run_this_first', dag=dag) options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), dag=dag) branching.set_upstream(run_this_first) join = DummyOperator( task_id='join', trigger_rule='one_success', dag=dag ) for option in options: t = DummyOperator(task_id=option, dag=dag) t.set_upstream(branching) dummy_follow = DummyOperator(task_id='follow_' + option, dag=dag) t.set_downstream(dummy_follow) dummy_follow.set_downstream(join)
f"{BASE_PACKAGE}.transactional-tables", "OutletsByDate", dag, RETAIL_ID, schema_name, ENV_TYPE, ) items_by_date_task = bash_operator_for_spark_submit( f"{BASE_PACKAGE}.transactional-tables", "ItemsByDate", dag, RETAIL_ID, schema_name, ENV_TYPE, ) push_instruments.set_downstream(push_server_details) branch_task.set_upstream(push_server_details) branch_task.set_downstream(master_tables_load) branch_task.set_downstream(history_load_done) master_tables_load.set_downstream(create_table_structure) history_load_done.set_downstream(create_table_structure) create_table_structure.set_downstream(unix_chmod_task) unix_chmod_task.set_downstream(market_baskets_task) market_baskets_task.set_downstream( [transaction_line_item_task, outlets_by_date_task, items_by_date_task]) data_load_done.set_upstream( [transaction_line_item_task, outlets_by_date_task, items_by_date_task]) create_constraint_task.set_upstream(data_load_done)
bq_check_eval_data_op.set_upstream(bq_eval_data_op) bash_remove_old_data_op.set_upstream( [bq_check_train_data_op, bq_check_eval_data_op]) bq_export_gcs_train_csv_op.set_upstream([bash_remove_old_data_op]) bq_export_gcs_eval_csv_op.set_upstream([bash_remove_old_data_op]) ml_engine_training_op.set_upstream( [bq_export_gcs_train_csv_op, bq_export_gcs_eval_csv_op]) bash_remove_old_saved_model_op.set_upstream(ml_engine_training_op) bash_copy_new_saved_model_op.set_upstream(bash_remove_old_saved_model_op) bash_ml_engine_models_list_op.set_upstream(ml_engine_training_op) check_if_model_already_exists_op.set_upstream( bash_ml_engine_models_list_op) ml_engine_create_model_op.set_upstream(check_if_model_already_exists_op) create_model_dummy_op.set_upstream(ml_engine_create_model_op) dont_create_model_dummy_branch_op.set_upstream( check_if_model_already_exists_op) dont_create_model_dummy_op.set_upstream(dont_create_model_dummy_branch_op) bash_ml_engine_versions_list_op.set_upstream( [dont_create_model_dummy_op, create_model_dummy_op]) check_if_model_version_already_exists_op.set_upstream( bash_ml_engine_versions_list_op) ml_engine_create_version_op.set_upstream([ bash_copy_new_saved_model_op, check_if_model_version_already_exists_op ])
class MLTaskSubDag(LoggingMixin): """ Class for Epi Tasks subDAGs """ def __init__(self, args: Dict, parent_dag_id: str, child_dag_id: str, repository_class: TypeVar(TaskRepositoryMixin), engine: Engine = None): """ Defines subDAG tasks """ self._parent_dag_id = parent_dag_id self._child_dag_id = child_dag_id self._repository_class = repository_class self._engine = engine self._subdag = DAG( dag_id=f'{self._parent_dag_id}.{self._child_dag_id}', default_args=args, schedule_interval=None) self._initialize_task_operator = PythonOperator( task_id=f'initialize_{self._child_dag_id}', provide_context=True, python_callable=self._initialize_task, dag=self._subdag) self._conditional_operator = BranchPythonOperator( task_id=f'conditional_{self._child_dag_id}', provide_context=True, python_callable=self._execute_or_skip_task, dag=self._subdag) self._dummy_operator = DummyOperator( task_id=f'skip_{self._child_dag_id}', dag=self._subdag) self._start_task_in_db_operator = PythonOperator( task_id=f'start_task_in_db_{self._child_dag_id}', provide_context=True, python_callable=self._start_task, dag=self._subdag) self._parametrized_bash_operator = ParametrizedBashOperator( task_id=f'bash_{self._child_dag_id}', parameters_provider=self._parameters_provider, bash_command='echo', dag=self._subdag) self._finish_task_in_db_operator = PythonOperator( task_id=f'finish_task_in_db_{self._child_dag_id}', provide_context=True, python_callable=self._finish_task, dag=self._subdag) self._join_operator = DummyOperator( task_id=f'join_{self._child_dag_id}', trigger_rule='one_success', dag=self._subdag) def _initialize_task(self, **kwargs) -> None: """ Inserts task with ml_dag_id into DB, if it doesn't already exists in DB Args: **kwargs: Airflow context """ self.log.debug(f'kwargs: {kwargs}') ml_dag_id = dag_utils.get_ml_dag_id(parent_dag_id=self._parent_dag_id, **kwargs) try: self._repository_class(engine=self._engine).insert_task_with_ml_dag_id(ml_dag_id=ml_dag_id) except DBException: pass def _execute_or_skip_task(self, **kwargs) -> str: """ Conditional that chooses task that should be executed after branching based on presence of datetime_finished in repository for task (based on repository_class). Args: **kwargs: Airflow context Returns: Name of the task that should be executed after branching """ self.log.debug(f'kwargs: {kwargs}') ml_dag_id = dag_utils.get_ml_dag_id(parent_dag_id=self._parent_dag_id, **kwargs) if self._repository_class(engine=self._engine).is_task_finished(ml_dag_id=ml_dag_id): return 'skip_{}'.format(self._child_dag_id) else: return 'start_task_in_db_{}'.format(self._child_dag_id) def _start_task(self, **kwargs) -> None: """ Writes datetime_started to task table (based on repository_class) for ml_dag_id Args: **kwargs: Airflow context """ self.log.debug(f'kwargs: {kwargs}') ml_dag_id = dag_utils.get_ml_dag_id(parent_dag_id=self._parent_dag_id, **kwargs) self._repository_class(engine=self._engine).start_task(ml_dag_id=ml_dag_id) def _finish_task(self, **kwargs) -> None: """ Writes datetime_finished to task table (based on repository_class) for ml_dag_id Args: **kwargs: Airflow context """ self.log.debug(f'kwargs: {kwargs}') ml_dag_id = dag_utils.get_ml_dag_id(parent_dag_id=self._parent_dag_id, **kwargs) self._repository_class(engine=self._engine).finish_task(ml_dag_id=ml_dag_id) @abc.abstractmethod def _parameters_provider(self, **kwargs) -> str: """ Abstract Callable that provides additional parameters for Bash calls. Returns: If not overridden returns empty string """ return '' def build(self) -> DAG: """ Constructs and returns initialized subDAG """ # DAG edges definitions self._conditional_operator.set_upstream(self._initialize_task_operator) self._start_task_in_db_operator.set_upstream(self._conditional_operator) self._parametrized_bash_operator.set_upstream(self._start_task_in_db_operator) self._finish_task_in_db_operator.set_upstream(self._parametrized_bash_operator) self._dummy_operator.set_upstream(self._conditional_operator) self._join_operator.set_upstream([self._dummy_operator, self._finish_task_in_db_operator]) return self._subdag
dag = DAG("test_branch", default_args=default_args, schedule_interval=timedelta(minutes=5), catchup=False) t1 = BashOperator( task_id="init", bash_command="echo lol", params={"my_param": "Parameter I passed in"}, dag=dag, ) options = ["wowww", "wowww2"] t2 = BranchPythonOperator(task_id='branching', python_callable=lambda: random.choice(options), dag=dag) t3 = BashOperator( task_id="wowww", bash_command="echo wowwww", params={"my_param": "Parameter I passed in"}, dag=dag, ) t4 = DummyOperator(task_id='wowww2', trigger_rule='one_success', dag=dag) t2.set_upstream(t1) t3.set_upstream(t2) t4.set_upstream(t2)
channel=slack_channel, username='******', text='Cluster has been *restarted!*\n' 'It\'s all fine move forward with your ETLs and Crawlers!\n' 'Message datetime: {{params.curr_date}}', params={'curr_date': str(datetime.now(pytz.timezone('America/Sao_Paulo')))}, dag=dag ) run_etl_crawler_cluster_up = SubDagOperator( subdag=sub_dag('check_cluster_slack', 'crawler_dag_cluster_up', dag.schedule_interval), task_id='crawler_dag_cluster_up', dag=dag, ) run_etl_crawler_cluster_restarted = SubDagOperator( subdag=sub_dag('check_cluster_slack', 'crawler_dag_cluster_restarted', dag.schedule_interval), task_id='crawler_dag_cluster_restarted', dag=dag, ) branch1.set_upstream(check_cluster) send_slack_cluster_ok.set_upstream(branch1) send_slack_cluster_start.set_upstream(branch1) start_cluster.set_upstream(send_slack_cluster_start) branch2.set_upstream(start_cluster) send_slack_cluster_down.set_upstream(branch2) send_slack_cluster_restarted_ok.set_upstream(branch2) run_etl_crawler_cluster_up.set_upstream(send_slack_cluster_ok) run_etl_crawler_cluster_restarted.set_upstream(send_slack_cluster_restarted_ok)
hdfs_path='/data/mydata/{{ ds }}'), schema='my_hive_db', provide_context=True, dag=dag ) hdfs_to_hive_trasfer.set_upstream(create_hive_db) count_data_rows = BranchPythonOperator( task_id='count_data_rows', python_callable=tasks.count_data_rows, templates_dict={'schema': 'my_hive_db'}, provide_context=True, dag=dag ) count_data_rows.set_upstream(hdfs_to_hive_trasfer) stop_flow = DummyOperator( task_id='stop_flow', dag=dag ) create_source_id = PythonOperator( task_id='create_source_id', python_callable=tasks.create_source_id, templates_dict={'source': 'mydata'}, provide_context=True, dag=dag ) create_source_id.set_upstream(source_data_sensor)
files=["{}/latest_links.txt".format(RAW_TWEET_DIR)], dag=dag, ) sub = SubDagOperator(subdag=subdag, task_id="insert_and_id_pop", trigger_rule="one_success", dag=dag) clear_latest = BashOperator( bash_command="rm -rf {}/latest_links.txt".format(RAW_TWEET_DIR), task_id="clear_latest", dag=dag, ) gen_search_terms.set_upstream(fill_search_terms) for term in SEARCH_TERMS: term_without_punctuation = re.sub(r"\W+", "", term) simple_search = PythonOperator( task_id="search_{}_twitter".format(term_without_punctuation), provide_context=True, python_callable=search_twitter, dag=dag, params={"query": term}, ) simple_search.set_upstream(gen_search_terms) simple_search.set_downstream(sub) sub.set_downstream(email_links) email_links.set_downstream(clear_latest)
task_id='UpdateWarehouse', python_callable=update_data_warehouse, requirements=[ 'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary', 'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas', 'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage', 'fsspec', 'sklearn', 'gcsfs', 'cloudstorage' ], python_version='3', trigger_rule='all_done', dag=dag) Join = DummyOperator(task_id='Join', dag=dag, trigger_rule='all_done') Skip1 = DummyOperator(task_id='Skip1', dag=dag, trigger_rule='all_done') Skip2 = DummyOperator(task_id='Skip2', dag=dag, trigger_rule='all_done') Skip3 = DummyOperator(task_id='Skip3', dag=dag, trigger_rule='all_done') TrainModel.set_upstream(CheckTrainApi) Skip1.set_upstream(CheckTrainApi) CheckPredictProfile.set_upstream(Skip1) CheckPredictProfile.set_upstream(TrainModel) PredictProfile.set_upstream(CheckPredictProfile) Skip2.set_upstream(CheckPredictProfile) CheckUpdateWarehouse.set_upstream(Skip2) CheckUpdateWarehouse.set_upstream(PredictProfile) UpdateWarehouse.set_upstream(CheckUpdateWarehouse) Skip3.set_upstream(CheckUpdateWarehouse) Join.set_upstream(Skip3) Join.set_upstream(UpdateWarehouse) #PredictProfile.set_upstream(TrainModel) #UpdateWarehouse.set_upstream(PredictProfile)
curl = BashOperator( bash_command= r"""curl -H "Content-Type: application/json" -d '{"status":"passing", "time":"{{ ts }}"}' mock-server.default.svc.cluster.local""", task_id="curl-task", dag=dag, ) branch = BranchPythonOperator( task_id='branch', python_callable=return_branch, dag=dag, ) python_print = PythonOperator( task_id='python-print', provide_context=True, python_callable=print_context, dag=dag, ) python_fail = PythonOperator( task_id='python-fail', python_callable=exit_failure, dag=dag, ) curl.set_upstream(start) branch.set_upstream(start) python_print.set_upstream(branch) python_fail.set_upstream(branch)
task_id='compare_result', provide_context=True, python_callable=compare_result, trigger_rule="all_done", dag=dag) t3.set_upstream(t1) t3.set_upstream(t2) options = ['hadoop_jar_cmd', 'presto_cmd', 'db_query', 'spark_cmd'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), dag=dag) branching.set_upstream(t3) join = DummyOperator( task_id='join', trigger_rule='one_success', dag=dag ) t4 = QuboleOperator( task_id='hadoop_jar_cmd', command_type='hadoopcmd', sub_command='jar s3://paid-qubole/HadoopAPIExamples/jars/hadoop-0.20.1-dev-streaming.jar -mapper wc -numReduceTasks 0 -input s3://paid-qubole/HadoopAPITests/data/3.tsv -output s3://paid-qubole/HadoopAPITests/data/3_wc', cluster_label='default', fetch_logs=True, dag=dag)
provide_context=True, python_callable=clear_export_folder, dag=dag ) export_athena_scifi_table = AWSAthenaOperator( task_id="export_athena_scifi_table", #query=export_athena_scifi_table_query, query=export_athena_scifi_table_query2, workgroup = "devday-demo", database=athena_db, sleep_time = 60, output_location='s3://'+s3_dlake+"/"+athena_output+'export_athena_scifi_table' ) export_scifi_tofile = PythonOperator ( task_id='export_scifi_tofile', provide_context=True, python_callable=export_scifi_tofile, dag=dag ) check_athena_export_table.set_upstream(disp_variables) drop_athena_export_table.set_upstream(check_athena_export_table) check_athena_export_table_done.set_upstream(check_athena_export_table) check_athena_export_table_pass.set_upstream(drop_athena_export_table) check_athena_export_table_pass.set_upstream(check_athena_export_table_done) export_athena_scifi_table.set_upstream(clear_export_folder) clear_export_folder.set_upstream(check_athena_export_table_pass) export_scifi_tofile.set_upstream(export_athena_scifi_table)
from airflow.models import DAG args = {'owner': 'airflow', 'start_date': airflow.utils.dates.days_ago(12)} dag = DAG(dag_id='example_branch_operator_further_back', default_args=args, schedule_interval="@daily") cmd = 'ls -l' run_this_first = DummyOperator(task_id='run_this_first', dag=dag) options = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] def return_current_day(**context): return options.__getitem__(context["execution_date"].weekday()) branching = BranchPythonOperator(task_id='branching', python_callable=return_current_day, provide_context=True, dag=dag) branching.set_upstream(run_this_first) join = DummyOperator(task_id='join', trigger_rule='one_success', dag=dag) for option in options: t = DummyOperator(task_id=option, dag=dag) t.set_upstream(branching) t.set_downstream(join)
task_id='CalculateProbability', python_callable=calculate_probability, requirements=[ 'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary', 'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas', 'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage', 'fsspec', 'gcsfs' ], python_version='3', trigger_rule='all_done', dag=dag) Join = DummyOperator(task_id='Join', dag=dag, trigger_rule='all_done') Skip1 = DummyOperator(task_id='Skip1', dag=dag, trigger_rule='all_done') Skip2 = DummyOperator(task_id='Skip2', dag=dag, trigger_rule='all_done') Skip3 = DummyOperator(task_id='Skip3', dag=dag, trigger_rule='all_done') CallDividendApi.set_upstream(CheckCallApi) Skip1.set_upstream(CheckCallApi) CheckCsvLoad.set_upstream(Skip1) CheckCsvLoad.set_upstream(CallDividendApi) CsvLoad.set_upstream(CheckCsvLoad) Skip2.set_upstream(CheckCsvLoad) CheckCalculateProbability.set_upstream(Skip2) CheckCalculateProbability.set_upstream(CsvLoad) CalculateProbability.set_upstream(CheckCalculateProbability) Skip3.set_upstream(CheckCalculateProbability) Join.set_upstream(Skip3) Join.set_upstream(CalculateProbability) #CsvLoad.set_upstream(CallDividendApi) #CalculateProbability.set_upstream(CsvLoad)