def get_dag_runs(dag_id, state=None): """ Returns a list of Dag Runs for a specific DAG ID. :param dag_id: String identifier of a DAG :param state: queued|running|success... :return: List of DAG runs of a DAG with requested state, or all runs if the state is not specified """ dagbag = DagBag() # Check DAG exists. if dag_id not in dagbag.dags: error_message = "Dag id {} not found".format(dag_id) raise AirflowException(error_message) dag_runs = list() state = state.lower() if state else None for run in DagRun.find(dag_id=dag_id, state=state): dag_runs.append({ 'id': run.id, 'run_id': run.run_id, 'state': run.state, 'dag_id': run.dag_id, 'execution_date': run.execution_date.isoformat(), 'start_date': ((run.start_date or '') and run.start_date.isoformat()), 'dag_run_url': url_for('Airflow.graph', dag_id=run.dag_id, execution_date=run.execution_date) }) return dag_runs
def trigger_dag(args): dag = get_dag(args) if not dag: logging.error("Cannot find dag {}".format(args.dag_id)) sys.exit(1) execution_date = datetime.now() run_id = args.run_id or "manual__{0}".format(execution_date.isoformat()) dr = DagRun.find(dag_id=args.dag_id, run_id=run_id) if dr: logging.error("This run_id {} already exists".format(run_id)) raise AirflowException() run_conf = {} if args.conf: run_conf = json.loads(args.conf) trigger = dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.RUNNING, conf=run_conf, external_trigger=True ) logging.info("Created {}".format(trigger))
def set_dag_run_state(dag, execution_date, state=State.SUCCESS, commit=False): """ Set the state of a dag run and all task instances associated with the dag run for a specific execution date. :param dag: the DAG of which to alter state :param execution_date: the execution date from which to start looking :param state: the state to which the DAG need to be set :param commit: commit DAG and tasks to be altered to the database :return: list of tasks that have been created and updated :raises: AssertionError if dag or execution_date is invalid """ res = [] if not dag or not execution_date: return res # Mark all task instances in the dag run for task in dag.tasks: task.dag = dag new_state = set_state(task=task, execution_date=execution_date, state=state, commit=commit) res.extend(new_state) # Mark the dag run if commit: drs = DagRun.find(dag.dag_id, execution_date=execution_date) for dr in drs: dr.dag = dag dr.update_state() return res
def _get_dep_statuses(self, ti, session, dep_context): dag = ti.task.dag dagrun = ti.get_dagrun(session) if not dagrun: # The import is needed here to avoid a circular dependency from airflow.models import DagRun running_dagruns = DagRun.find( dag_id=dag.dag_id, state=State.RUNNING, external_trigger=False, session=session ) if len(running_dagruns) >= dag.max_active_runs: reason = ("The maximum number of active dag runs ({0}) for this task " "instance's DAG '{1}' has been reached.".format( dag.max_active_runs, ti.dag_id)) else: reason = "Unknown reason" yield self._failing_status( reason="Task instance's dagrun did not exist: {0}.".format(reason)) else: if dagrun.state != State.RUNNING: yield self._failing_status( reason="Task instance's dagrun was not in the 'running' state but in " "the state '{}'.".format(dagrun.state))
def trigger_dag(dag_id, run_id=None, conf=None, execution_date=None): dagbag = DagBag() if dag_id not in dagbag.dags: raise AirflowException("Dag id {} not found".format(dag_id)) dag = dagbag.get_dag(dag_id) if not execution_date: execution_date = datetime.now() if not run_id: run_id = "manual__{0}".format(execution_date.isoformat()) dr = DagRun.find(dag_id=dag_id, run_id=run_id) if dr: raise AirflowException("Run id {} already exists for dag id {}".format( run_id, dag_id )) run_conf = None if conf: run_conf = json.loads(conf) trigger = dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.RUNNING, conf=run_conf, external_trigger=True ) return trigger
def dag_state(args): """ Returns the state of a DagRun at the command line. >>> airflow dag_state tutorial 2015-01-01T00:00:00.000000 running """ dag = get_dag(args) dr = DagRun.find(dag.dag_id, execution_date=args.execution_date) print(dr[0].state if len(dr) > 0 else None)
def wes_cancel_run(self, run_id): logger.debug(f"""Call wes_cancel_run with run_id={run_id}""") try: dag_run = DagRun.find(dag_id=run_id, state=None)[0] self.stop_tasks(dag_run) self.remove_tmp_data(dag_run) return {"run_id": run_id} except Exception as err: logger.debug(f"""Failed to cancel dag run {run_id}, {err}""") return connexion.problem(500, f"""Failed to cancel dag run {run_id}""", str(err))
def get_dag_run(dag_run_id): session = settings.Session() runs = DagRun.find(run_id=dag_run_id, session=session) if len(runs) == 0: return ApiResponse.not_found('Dag run not found') dag_run = runs[0] session.close() return ApiResponse.success({'dag_run': format_dag_run(dag_run)})
def verify_dag_run_integrity(dag, dates): """ Verify the integrity of the dag runs in case a task was added or removed set the confirmed execution dates as they might be different from what was provided """ confirmed_dates = [] dag_runs = DagRun.find(dag_id=dag.dag_id, execution_date=dates) for dag_run in dag_runs: dag_run.dag = dag dag_run.verify_integrity() confirmed_dates.append(dag_run.execution_date) return confirmed_dates
def get_dag_runs(context, dag_id=defaults.KARAJAN_ID, state=State.RUNNING, external_trigger=True): if hasattr(context, 'dag_runs'): return context.dag_runs drs = DagRun.find( dag_id=dag_id, state=state, external_trigger=external_trigger, ) setattr(context, 'dag_runs', drs) return drs
def get_dag_runs(self, dag_id=None, run_id=None, execution_date=None, state=None): logging.info( f"Call get_dag_runs with dag_id={dag_id}, run_id={run_id}, execution_date={execution_date}, state={state}" ) try: dag_runs = [] dag_ids = [dag_id] if dag_id else self.list_dags() logging.debug(f"Found dags {dag_ids}") for d_id in dag_ids: logging.info(f"Process dag {d_id}") for dag_run in DagRun.find(dag_id=d_id, state=state): logging.info( f"Process dag_run {dag_run.run_id}, {dag_run.execution_date.isoformat()}" ) if run_id and run_id != dag_run.run_id or execution_date and execution_date != dag_run.execution_date.isoformat( ): logging.info( f"Skip dag_run {dag_run.run_id}, {dag_run.execution_date.isoformat()} (run_id or execution_date doesn't match)" ) continue response_item = { "dag_id": d_id, "run_id": dag_run.run_id, "execution_date": dag_run.execution_date.isoformat(), "start_date": dag_run.start_date.isoformat(), "state": dag_run.state, "tasks": [{ "id": ti.task_id, "state": ti.state } for ti in dag_run.get_task_instances()], "progress": int( len([ ti for ti in dag_run.get_task_instances( State.SUCCESS) ]) / len(dag_run.get_task_instances()) * 100) } dag_runs.append(response_item) return {"dag_runs": dag_runs} except Exception as err: logging.error(f"Failed to call get_dag_runs {err}") return {"dag_runs": []}
def list_dag_runs(self, dag_id, state): dag_runs = [] for dag_run in DagRun.find(dag_id=dag_id, state=state): dag_runs.append({ "run_id": dag_run.run_id, "state": dag_run.state, "execution_date": dag_run.execution_date.isoformat(), "start_date": ((dag_run.start_date or '') and dag_run.start_date.isoformat()) }) return dag_runs
def dag_list_dag_runs(args, dag=None): """Lists dag runs for a given DAG""" if dag: args.dag_id = dag.dag_id dagbag = DagBag() if args.dag_id not in dagbag.dags: error_message = "Dag id {} not found".format(args.dag_id) raise AirflowException(error_message) dag_runs = [] state = args.state.lower() if args.state else None for dag_run in DagRun.find(dag_id=args.dag_id, state=state, no_backfills=args.no_backfill): dag_runs.append({ 'id': dag_run.id, 'run_id': dag_run.run_id, 'state': dag_run.state, 'dag_id': dag_run.dag_id, 'execution_date': dag_run.execution_date.isoformat(), 'start_date': ((dag_run.start_date or '') and dag_run.start_date.isoformat()), }) if not dag_runs: print('No dag runs for {dag_id}'.format(dag_id=args.dag_id)) header_template = textwrap.dedent("""\n {line} DAG RUNS {line} {dag_run_header} """) dag_runs.sort(key=lambda x: x['execution_date'], reverse=True) dag_run_header = '%-3s | %-20s | %-10s | %-20s | %-20s |' % ( 'id', 'run_id', 'state', 'execution_date', 'start_date') print(header_template.format(dag_run_header=dag_run_header, line='-' * 120)) for dag_run in dag_runs: record = '%-3s | %-20s | %-10s | %-20s | %-20s |' % ( dag_run['id'], dag_run['run_id'], dag_run['state'], dag_run['execution_date'], dag_run['start_date']) print(record)
def evaluate_dagrun( self, dag_id, expected_task_states, # dict of task_id: state dagrun_state, run_kwargs=None, advance_execution_date=False, session=None): """ Helper for testing DagRun states with simple two-task DAGS. This is hackish: a dag run is created but its tasks are run by a backfill. """ if run_kwargs is None: run_kwargs = {} scheduler = SchedulerJob() dag = self.dagbag.get_dag(dag_id) dag.clear() dr = scheduler.schedule_dag(dag) if advance_execution_date: # run a second time to schedule a dagrun after the start_date dr = scheduler.schedule_dag(dag) ex_date = dr.execution_date try: dag.run(start_date=ex_date, end_date=ex_date, **run_kwargs) except AirflowException: pass # test tasks for task_id, expected_state in expected_task_states.items(): task = dag.get_task(task_id) ti = TI(task, ex_date) ti.refresh_from_db() self.assertEqual(ti.state, expected_state) # load dagrun dr = DagRun.find(dag_id=dag_id, execution_date=ex_date) dr = dr[0] dr.dag = dag # dagrun is running self.assertEqual(dr.state, State.RUNNING) dr.update_state() # dagrun failed self.assertEqual(dr.state, dagrun_state)
def evaluate_dagrun( self, dag_id, expected_task_states, # dict of task_id: state dagrun_state, run_kwargs=None, advance_execution_date=False, session=None): """ Helper for testing DagRun states with simple two-task DAGS. This is hackish: a dag run is created but its tasks are run by a backfill. """ if run_kwargs is None: run_kwargs = {} scheduler = SchedulerJob(**self.default_scheduler_args) dag = self.dagbag.get_dag(dag_id) dag.clear() dr = scheduler.create_dag_run(dag) if advance_execution_date: # run a second time to schedule a dagrun after the start_date dr = scheduler.create_dag_run(dag) ex_date = dr.execution_date try: dag.run(start_date=ex_date, end_date=ex_date, **run_kwargs) except AirflowException: pass # test tasks for task_id, expected_state in expected_task_states.items(): task = dag.get_task(task_id) ti = TI(task, ex_date) ti.refresh_from_db() self.assertEqual(ti.state, expected_state) # load dagrun dr = DagRun.find(dag_id=dag_id, execution_date=ex_date) dr = dr[0] dr.dag = dag # dagrun is running self.assertEqual(dr.state, State.RUNNING) dr.update_state() # dagrun failed self.assertEqual(dr.state, dagrun_state)
def test_backfill_max_limit_check_complete_loop(self): dag = self._get_dag_test_max_active_limits( 'test_backfill_max_limit_check_complete_loop') start_date = DEFAULT_DATE - datetime.timedelta(hours=1) end_date = DEFAULT_DATE # Given the max limit to be 1 in active dag runs, we need to run the # backfill job 3 times success_expected = 2 executor = MockExecutor() job = BackfillJob(dag=dag, start_date=start_date, end_date=end_date, executor=executor, donot_pickle=True) job.run() success_dagruns = len( DagRun.find(dag_id=dag.dag_id, state=State.SUCCESS)) running_dagruns = len( DagRun.find(dag_id=dag.dag_id, state=State.RUNNING)) self.assertEqual(success_expected, success_dagruns) self.assertEqual( 0, running_dagruns) # no dag_runs in running state are left
def get_latest_log(dag_id, task_id="JobCleanup", state=None): log_base = os.path.expanduser(configuration.get('core', 'BASE_LOG_FOLDER')) dag_run = sorted(DagRun.find(dag_id, state=state), reverse=True, key=lambda x: x.execution_date)[0] for task in dag_run.get_task_instances(): if task.task_id == task_id: kwargs = { "log_base": log_base, "dag_id": task.dag_id, "task_id": task.task_id, "execution_date": task.execution_date.isoformat(), "try_number": task._try_number } return "{log_base}/{dag_id}/{task_id}/{execution_date}/{try_number}.log".format( **kwargs)
def test_backfill_conf(self): dag = self._get_dummy_dag('test_backfill_conf') executor = MockExecutor() conf = json.loads("""{"key": "value"}""") job = BackfillJob(dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), conf=conf) job.run() dr = DagRun.find(dag_id='test_backfill_conf') self.assertEqual(conf, dr[0].conf)
def remove_outdated_dags(cwl_id, dags_folder): """ Iterates over DAG files from the dags_folder (excluding Airflow examples). Assuming that dag_id written inside Python file is equal to its rootname and follows the naming rule "cwldid-commitsha", we check if there are any files that have target cwl_id in the rootname (aka in the dag_id). For all collected DAGs (based on cwl_id) we save modified timestamp and location, then sort them by timestamp excluding the newest one, thus forming a list of outdated DAGs for the same cwl_id (the same workflow). Then we iterate over the list of outdated DAGs and check whether we can safely remove it (both from DB and disk). The only condition when we don't delete outdated DAG is when there is at list one DagRun for it. """ logging.info( f"Searching for dags based on cwl_id: {cwl_id} in order to remove the old ones" ) dags = {} for location in list_py_file_paths(dags_folder, include_examples=False): dag_id = get_rootname(location) if cwl_id not in dag_id: continue dags[dag_id] = { "location": location, "modified": datetime.fromtimestamp(os.path.getmtime(location)) } logging.info( f"Found dag_id: {dag_id}, modified: {dags[dag_id]['modified']}") for dag_id, dag_metadata in sorted(dags.items(), key=lambda i: i[1]["modified"])[:-1]: logging.info(f"Cleaning dag_id: {dag_id}") if len(DagRun.find(dag_id=dag_id, state=State.RUNNING)) == 0: try: delete_dag.delete_dag(dag_id) except Exception as ex: logging.error(f"Failed to delete DAG\n {ex}") for f in [ dag_metadata["location"], os.path.splitext(dag_metadata["location"])[0] + ".cwl" ]: try: logging.info(f"Deleting DAG file: {f}") os.remove(f) except Exception as ex: logging.error(f"Failed to delete file {f}\n {ex}") else: logging.info("Skipping, DAG has running DagRuns")
def test_sub_set_subdag(self): dag = DAG('test_sub_set_subdag', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) with dag: op1 = DummyOperator(task_id='leave1') op2 = DummyOperator(task_id='leave2') op3 = DummyOperator(task_id='upstream_level_1') op4 = DummyOperator(task_id='upstream_level_2') op5 = DummyOperator(task_id='upstream_level_3') # order randomly op2.set_downstream(op3) op1.set_downstream(op3) op4.set_downstream(op5) op3.set_downstream(op4) dag.clear() dr = dag.create_dagrun(run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE) executor = MockExecutor() sub_dag = dag.sub_dag(task_regex="leave*", include_downstream=False, include_upstream=False) job = BackfillJob(dag=sub_dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor) job.run() self.assertRaises(sqlalchemy.orm.exc.NoResultFound, dr.refresh_from_db) # the run_id should have changed, so a refresh won't work drs = DagRun.find(dag_id=dag.dag_id, execution_date=DEFAULT_DATE) dr = drs[0] self.assertEqual( BackfillJob.ID_FORMAT_PREFIX.format(DEFAULT_DATE.isoformat()), dr.run_id) for ti in dr.get_task_instances(): if ti.task_id == 'leave1' or ti.task_id == 'leave2': self.assertEqual(State.SUCCESS, ti.state) else: self.assertEqual(State.NONE, ti.state)
def test_backfill_max_limit_check_within_limit(self): dag = self._get_dag_test_max_active_limits( 'test_backfill_max_limit_check_within_limit', max_active_runs=16) start_date = DEFAULT_DATE - datetime.timedelta(hours=1) end_date = DEFAULT_DATE executor = MockExecutor() job = BackfillJob(dag=dag, start_date=start_date, end_date=end_date, executor=executor, donot_pickle=True) job.run() dagruns = DagRun.find(dag_id=dag.dag_id) self.assertEqual(2, len(dagruns)) self.assertTrue(all([run.state == State.SUCCESS for run in dagruns]))
def dag_state(args): """ Returns the state (and conf if exists) of a DagRun at the command line. >>> airflow dags state tutorial 2015-01-01T00:00:00.000000 running >>> airflow dags state a_dag_with_conf_passed 2015-01-01T00:00:00.000000 failed, {"name": "bob", "age": "42"} """ if args.subdir: dag = get_dag(args.subdir, args.dag_id) else: dag = get_dag_by_file_location(args.dag_id) dr = DagRun.find(dag.dag_id, execution_date=args.execution_date) out = dr[0].state if dr else None confout = '' if out and dr[0].conf: confout = ', ' + json.dumps(dr[0].conf) print(str(out) + confout)
def punish(self, task_id=None): dag = self.airflowDal.get_dag_by_prefix(prefix=self.dag_prefix) dag_to_tasks_list = {} self.get_tasks_to_kill(dag=dag, dag_to_tasks_list=dag_to_tasks_list) killed_tasks = [] filtered_task_id = {} if task_id: for dag, tasks in dag_to_tasks_list.iteritems(): for task in tasks: if task_id == task.task_id: filtered_task_array = [] filtered_task_array.append(task_id) filtered_task_id[dag] = filtered_task_array if filtered_task_id: dag_to_tasks_list = filtered_task_id # while we haven't killed them all while killed_tasks.__len__() != dag_to_tasks_list.values().__len__(): for dag, tasks in dag_to_tasks_list.iteritems(): # refresh dag run state. we can multiple runs in the same time running_dags = DagRun.find(state=State.RUNNING, dag_id=dag) for running_dag in running_dags: # don't try to kill tasks that are already killed once for task in [ item for item in tasks if item not in killed_tasks ]: if isinstance(task, str): task_instance = running_dag.get_task_instance( task_id=task) else: task_instance = running_dag.get_task_instance( task_id=task.task_id) if task_instance.state == State.RUNNING: if ProcessUtils.check_pid(task_instance.pid): logging.info("poking for task_id=%s", task) if ProcessUtils.kill_pid(task_instance.pid): logging.info( "task_id=%s successfully killed at %s", task, str(running_dag.execution_date)) killed_tasks.append(task) logging.info("going to sleep for %d seconds", SLEEP_DURATION) time.sleep(SLEEP_DURATION)
def wes_get_run_log(self, run_id): logging.debug(f"Call wes_get_run_log with {run_id}") try: dag_run_info = self.get_dag_runs(dag_id=run_id, run_id=run_id)["dag_runs"][0] dag_run = DagRun.find(dag_id=run_id, state=None)[0] workflow_params = dag_run.conf["job"] del workflow_params["id"] workflow_outputs = {} try: results_location = dag_run.get_task_instance( task_id="CWLJobGatherer").xcom_pull() with open(results_location, "r") as input_stream: workflow_outputs = json.load(input_stream) except Exception as err: logging.debug( f"Failed to read workflow results from file. \n {err}") return { "run_id": run_id, "request": { "workflow_params": workflow_params }, "state": self.wes_state_conversion[dag_run_info["state"]], "run_log": { "name": run_id, "cmd": [""], "start_time": dag_run_info["start_date"], "end_time": "", "stdout": "", "stderr": "", "exit_code": "" }, "task_logs": [{ "name": task["id"] } for task in dag_run_info["tasks"]], "outputs": workflow_outputs } except Exception as err: logging.debug(f"Failed to fetch infromation for {run_id}") return {}
def get_active_jobs(jobs_folder, limit=10): """ :param jobs_folder: job_folder: abs path to the folder with job json files :param limit: max number of jobs to return :return: """ all_jobs = [] for job_path in list_files(abs_path=jobs_folder, ext=[".json", ".yml", ".yaml"]): dag_id = gen_dag_id(job_path) dag_runs = DagRun.find(dag_id) all_jobs.append({"path": job_path, "creation_date": datetime.fromtimestamp(os.path.getctime(job_path)), "content": load_job(job_path), "dag_id": dag_id, "state": dag_runs[0].state if len(dag_runs) > 0 else State.NONE}) success_jobs = sorted([j for j in all_jobs if j["state"] == State.SUCCESS], key=lambda k: k["creation_date"], reverse=True)[:limit] running_jobs = sorted([j for j in all_jobs if j["state"] == State.RUNNING], key=lambda k: k["creation_date"], reverse=True)[:limit] failed_jobs = sorted([j for j in all_jobs if j["state"] == State.FAILED], key=lambda k: k["creation_date"], reverse=True)[:limit] unknown_jobs = sorted([j for j in all_jobs if j["state"] == State.NONE], key=lambda k: k["creation_date"], reverse=True)[:limit] return success_jobs + running_jobs + failed_jobs + unknown_jobs
def _trigger_dag(self, dag_id: str, dag_bag: DagBag, dag_run: DagRun): dag = dag_bag.get_dag( dag_id) # prefetch dag if it is stored serialized if dag_id not in dag_bag.dags: raise DagNotFound(f"Dag id {dag_id} not found") execution_date = timezone.utcnow() run_id = f"rb_status_manual__{execution_date.isoformat()}" dag_run_id = dag_run.find(dag_id=dag_id, run_id=run_id) if dag_run_id: raise DagRunAlreadyExists( f"Run id {run_id} already exists for dag id {dag_id}") dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.RUNNING, external_trigger=True, )
def clean_up_dag_run(dag_id, run_id, dags_folder=None, kill_timeout=None): """ For the provided dag_id and run_id fetches a list of dag_runs (should be always a list of 1 item). For each dag_run stops all running tasks, removes temporary data and correspondent records in DB. Then removes outdated DAGs for the same workflow. For that dag_id should follow the naming rule "cwlid-commitsha". If dags_folder was not provided reads dags_folder from the airflow.cfg. If kill_timeout was not provided use 2 times longer intervar than the one from the airflow.cfg. This function should never raise any exceptions. """ logging.info(f"Cleaning up dag_id: {dag_id}, run_id: {run_id}") dags_folder = conf.get( "core", "dags_folder") if dags_folder is None else dags_folder kill_timeout = 2 * conf.getint("core", "KILLED_TASK_CLEANUP_TIME" ) if kill_timeout is None else kill_timeout for dag_run in DagRun.find(dag_id=dag_id, run_id=run_id): stop_dag_run_tasks(dag_run, kill_timeout) remove_dag_run_tmp_data(dag_run) clean_dag_run_db(dag_run) remove_outdated_dags(dag_id.split("-")[0], dags_folder)
def test_quarantine_branch( airflow_local_pipeline_run, wait_for_completion, task_to_fail, expected_task_states ): """ Tests that correct tasks run, with correct end state, when ETL is not successful. We fail each of the tasks init, extract, transform and load. """ end_state = "failed" fail_state = "success" dag_type = "testing" airflow_local_pipeline_run({"TASK_TO_FAIL": task_to_fail}) final_etl_state = wait_for_completion( end_state=end_state, fail_state=fail_state, dag_id=f"etl_{dag_type}" ) assert final_etl_state == end_state etl_dag = DagRun.find(f"etl_{dag_type}", state=end_state)[0] task_states = {task.task_id: task.state for task in etl_dag.get_task_instances()} assert task_states == expected_task_states
def wes_get_run_log(self, run_id): logger.debug(f"""Call wes_get_run_log with {run_id}""") try: dag_run_info = self.get_dag_runs(dag_id=run_id, run_id=run_id)["dag_runs"][0] dag_run = DagRun.find(dag_id=run_id, state=None)[0] workflow_params = dag_run.conf["job"] del workflow_params["id"] try: workflow_outputs = dag_run.get_task_instance( task_id="CWLJobGatherer").xcom_pull()[0] except Exception: workflow_outputs = {} return { "run_id": run_id, "request": { "workflow_params": workflow_params }, "state": self.wes_state_conversion[dag_run_info["state"]], "run_log": { "name": run_id, "cmd": [""], "start_time": dag_run_info["start_date"], "end_time": "", "stdout": "", "stderr": "", "exit_code": "" }, "task_logs": [{ "name": task["id"] } for task in dag_run_info["tasks"]], "outputs": workflow_outputs } except Exception as err: logger.debug(f"""Failed to fetch infromation for {run_id}""") return {}
def __init__(self, mailbox: Mailbox, dag_run_id: DagRunId, max_num_event: int, poll_timeout: int = 0) -> None: """ :param mailbox: where the EventHandleResult is send to. :type mailbox: Mailbox :param dag_run_id: the run id of the dag run :type dag_run_id: str :param max_num_event: max number of event can be handled before exit :type max_num_event: int :param poll_timeout: poll timeout in second for event before exit :type poll_timeout: int """ super().__init__() self._mailbox = mailbox self._dag_run_id = dag_run_id self._event_queue = Queue() self._max_num_event = max_num_event self._poll_timeout = poll_timeout dag_runs = DagRun.find(dag_id=dag_run_id.dag_id, run_id=dag_run_id.run_id) if len(dag_runs) < 1: raise RuntimeError( "no dag_run found with dag_run_id: {}".format(dag_run_id)) elif len(dag_runs) > 1: raise RuntimeError( "more than one dag_run found with dag_run_id: {}".format( dag_run_id)) self._dag_run = dag_runs[0] dag_id = self._dag_run.dag_id self._serialized_dag_model = SerializedDagModel.get(dag_id) if self._serialized_dag_model is None: raise RuntimeError( "no serialized dag is found with dag_id: {}".format(dag_id))
def trigger_dag(dag_id, run_id=None, conf=None, execution_date=None, replace_microseconds=True): dagbag = DagBag() if dag_id not in dagbag.dags: raise AirflowException("Dag id {} not found".format(dag_id)) dag = dagbag.get_dag(dag_id) if not execution_date: execution_date = timezone.utcnow() assert timezone.is_localized(execution_date) if replace_microseconds: execution_date = execution_date.replace(microsecond=0) if not run_id: run_id = "manual__{0}".format(execution_date.isoformat()) dr = DagRun.find(dag_id=dag_id, run_id=run_id) if dr: raise AirflowException("Run id {} already exists for dag id {}".format( run_id, dag_id)) run_conf = None if conf: run_conf = conf trigger = dag.create_dagrun(run_id=run_id, execution_date=execution_date, state=State.RUNNING, conf=run_conf, external_trigger=True) return trigger
def dag_list_dag_runs(args, dag=None): """Lists dag runs for a given DAG""" if dag: args.dag_id = dag.dag_id dagbag = DagBag() if args.dag_id not in dagbag.dags: error_message = "Dag id {} not found".format(args.dag_id) raise AirflowException(error_message) state = args.state.lower() if args.state else None dag_runs = DagRun.find(dag_id=args.dag_id, state=state, no_backfills=args.no_backfill) if not dag_runs: print('No dag runs for {dag_id}'.format(dag_id=args.dag_id)) return dag_runs.sort(key=lambda x: x.execution_date, reverse=True) table = _tabulate_dag_runs(dag_runs, tablefmt=args.output) print(table)
def _create_dagruns(dag, execution_dates, state, run_id_template): """ Infers from the dates which dag runs need to be created and does so. :param dag: the dag to create dag runs for :param execution_dates: list of execution dates to evaluate :param state: the state to set the dag run to :param run_id_template:the template for run id to be with the execution date :return: newly created and existing dag runs for the execution dates supplied """ # find out if we need to create any dag runs drs = DagRun.find(dag_id=dag.dag_id, execution_date=execution_dates) dates_to_create = list(set(execution_dates) - set([dr.execution_date for dr in drs])) for date in dates_to_create: dr = dag.create_dagrun( run_id=run_id_template.format(date.isoformat()), execution_date=date, start_date=timezone.utcnow(), external_trigger=False, state=state, ) drs.append(dr) return drs
def execute(self, context: Context): if isinstance(self.execution_date, datetime.datetime): parsed_execution_date = self.execution_date elif isinstance(self.execution_date, str): parsed_execution_date = timezone.parse(self.execution_date) else: parsed_execution_date = timezone.utcnow() if self.trigger_run_id: run_id = self.trigger_run_id else: run_id = DagRun.generate_run_id(DagRunType.MANUAL, parsed_execution_date) try: dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=parsed_execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, parsed_execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=parsed_execution_date, end_date=parsed_execution_date) dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0] else: raise e if dag_run is None: raise RuntimeError("The dag_run should be set here!") # Store the execution date from the dag run (either created or found above) to # be used when creating the extra link on the webserver. ti = context['task_instance'] ti.xcom_push(key=XCOM_EXECUTION_DATE_ISO, value=dag_run.execution_date.isoformat()) ti.xcom_push(key=XCOM_RUN_ID, value=dag_run.run_id) if self.wait_for_completion: # wait for dag to complete while True: self.log.info( 'Waiting for %s on %s to become allowed state %s ...', self.trigger_dag_id, dag_run.execution_date, self.allowed_states, ) time.sleep(self.poke_interval) dag_run.refresh_from_db() state = dag_run.state if state in self.failed_states: raise AirflowException( f"{self.trigger_dag_id} failed with failed states {state}" ) if state in self.allowed_states: self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state) return
def test_backfill_max_limit_check(self): dag_id = 'test_backfill_max_limit_check' run_id = 'test_dagrun' start_date = DEFAULT_DATE - datetime.timedelta(hours=1) end_date = DEFAULT_DATE dag_run_created_cond = threading.Condition() def run_backfill(cond): cond.acquire() # this session object is different than the one in the main thread with create_session() as thread_session: try: dag = self._get_dag_test_max_active_limits(dag_id) # Existing dagrun that is not within the backfill range dag.create_dagrun( run_id=run_id, state=State.RUNNING, execution_date=DEFAULT_DATE + datetime.timedelta(hours=1), start_date=DEFAULT_DATE, ) thread_session.commit() cond.notify() finally: cond.release() thread_session.close() executor = MockExecutor() job = BackfillJob(dag=dag, start_date=start_date, end_date=end_date, executor=executor, donot_pickle=True) job.run() backfill_job_thread = threading.Thread(target=run_backfill, name="run_backfill", args=(dag_run_created_cond, )) dag_run_created_cond.acquire() with create_session() as session: backfill_job_thread.start() try: # at this point backfill can't run since the max_active_runs has been # reached, so it is waiting dag_run_created_cond.wait(timeout=1.5) dagruns = DagRun.find(dag_id=dag_id) dr = dagruns[0] self.assertEqual(1, len(dagruns)) self.assertEqual(dr.run_id, run_id) # allow the backfill to execute # by setting the existing dag run to SUCCESS, # backfill will execute dag runs 1 by 1 dr.set_state(State.SUCCESS) session.merge(dr) session.commit() backfill_job_thread.join() dagruns = DagRun.find(dag_id=dag_id) self.assertEqual(3, len(dagruns)) # 2 from backfill + 1 existing self.assertEqual(dagruns[-1].run_id, dr.run_id) finally: dag_run_created_cond.release()
def test_backfill_fill_blanks(self): dag = DAG( 'test_backfill_fill_blanks', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}, ) with dag: op1 = DummyOperator(task_id='op1') op2 = DummyOperator(task_id='op2') op3 = DummyOperator(task_id='op3') op4 = DummyOperator(task_id='op4') op5 = DummyOperator(task_id='op5') op6 = DummyOperator(task_id='op6') dag.clear() dr = dag.create_dagrun(run_id='test', state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE) executor = MockExecutor() session = settings.Session() tis = dr.get_task_instances() for ti in tis: if ti.task_id == op1.task_id: ti.state = State.UP_FOR_RETRY ti.end_date = DEFAULT_DATE elif ti.task_id == op2.task_id: ti.state = State.FAILED elif ti.task_id == op3.task_id: ti.state = State.SKIPPED elif ti.task_id == op4.task_id: ti.state = State.SCHEDULED elif ti.task_id == op5.task_id: ti.state = State.UPSTREAM_FAILED # op6 = None session.merge(ti) session.commit() session.close() job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor) self.assertRaisesRegex(AirflowException, 'Some task instances failed', job.run) self.assertRaises(sqlalchemy.orm.exc.NoResultFound, dr.refresh_from_db) # the run_id should have changed, so a refresh won't work drs = DagRun.find(dag_id=dag.dag_id, execution_date=DEFAULT_DATE) dr = drs[0] self.assertEqual(dr.state, State.FAILED) tis = dr.get_task_instances() for ti in tis: if ti.task_id in (op1.task_id, op4.task_id, op6.task_id): self.assertEqual(ti.state, State.SUCCESS) elif ti.task_id == op2.task_id: self.assertEqual(ti.state, State.FAILED) elif ti.task_id == op3.task_id: self.assertEqual(ti.state, State.SKIPPED) elif ti.task_id == op5.task_id: self.assertEqual(ti.state, State.UPSTREAM_FAILED)
def set_state(task, execution_date, upstream=False, downstream=False, future=False, past=False, state=State.SUCCESS, commit=False): """ Set the state of a task instance and if needed its relatives. Can set state for future tasks (calculated from execution_date) and retroactively for past tasks. Will verify integrity of past dag runs in order to create tasks that did not exist. It will not create dag runs that are missing on the schedule (but it will as for subdag dag runs if needed). :param task: the task from which to work. task.task.dag needs to be set :param execution_date: the execution date from which to start looking :param upstream: Mark all parents (upstream tasks) :param downstream: Mark all siblings (downstream tasks) of task_id, including SubDags :param future: Mark all future tasks on the interval of the dag up until last execution date. :param past: Retroactively mark all tasks starting from start_date of the DAG :param state: State to which the tasks need to be set :param commit: Commit tasks to be altered to the database :return: list of tasks that have been created and updated """ assert timezone.is_localized(execution_date) # microseconds are supported by the database, but is not handled # correctly by airflow on e.g. the filesystem and in other places execution_date = execution_date.replace(microsecond=0) assert task.dag is not None dag = task.dag latest_execution_date = dag.latest_execution_date assert latest_execution_date is not None # determine date range of dag runs and tasks to consider end_date = latest_execution_date if future else execution_date if 'start_date' in dag.default_args: start_date = dag.default_args['start_date'] elif dag.start_date: start_date = dag.start_date else: start_date = execution_date start_date = execution_date if not past else start_date if dag.schedule_interval == '@once': dates = [start_date] else: dates = dag.date_range(start_date=start_date, end_date=end_date) # find relatives (siblings = downstream, parents = upstream) if needed task_ids = [task.task_id] if downstream: relatives = task.get_flat_relatives(upstream=False) task_ids += [t.task_id for t in relatives] if upstream: relatives = task.get_flat_relatives(upstream=True) task_ids += [t.task_id for t in relatives] # verify the integrity of the dag runs in case a task was added or removed # set the confirmed execution dates as they might be different # from what was provided confirmed_dates = [] drs = DagRun.find(dag_id=dag.dag_id, execution_date=dates) for dr in drs: dr.dag = dag dr.verify_integrity() confirmed_dates.append(dr.execution_date) # go through subdagoperators and create dag runs. We will only work # within the scope of the subdag. We wont propagate to the parent dag, # but we will propagate from parent to subdag. session = Session() dags = [dag] sub_dag_ids = [] while len(dags) > 0: current_dag = dags.pop() for task_id in task_ids: if not current_dag.has_task(task_id): continue current_task = current_dag.get_task(task_id) if isinstance(current_task, SubDagOperator): # this works as a kind of integrity check # it creates missing dag runs for subdagoperators, # maybe this should be moved to dagrun.verify_integrity drs = _create_dagruns(current_task.subdag, execution_dates=confirmed_dates, state=State.RUNNING, run_id_template=BackfillJob.ID_FORMAT_PREFIX) for dr in drs: dr.dag = current_task.subdag dr.verify_integrity() if commit: dr.state = state session.merge(dr) dags.append(current_task.subdag) sub_dag_ids.append(current_task.subdag.dag_id) # now look for the task instances that are affected TI = TaskInstance # get all tasks of the main dag that will be affected by a state change qry_dag = session.query(TI).filter( TI.dag_id==dag.dag_id, TI.execution_date.in_(confirmed_dates), TI.task_id.in_(task_ids)).filter( or_(TI.state.is_(None), TI.state != state) ) # get *all* tasks of the sub dags if len(sub_dag_ids) > 0: qry_sub_dag = session.query(TI).filter( TI.dag_id.in_(sub_dag_ids), TI.execution_date.in_(confirmed_dates)).filter( or_(TI.state.is_(None), TI.state != state) ) if commit: tis_altered = qry_dag.with_for_update().all() if len(sub_dag_ids) > 0: tis_altered += qry_sub_dag.with_for_update().all() for ti in tis_altered: ti.state = state session.commit() else: tis_altered = qry_dag.all() if len(sub_dag_ids) > 0: tis_altered += qry_sub_dag.all() session.expunge_all() session.close() return tis_altered