def test_dagrun_id_is_not_backfill(self): """ Task instances whose dagrun ID is not a backfill dagrun ID should pass this dep. """ dagrun = DagRun() dagrun.run_id = 'notbackfill_something' ti = Mock(get_dagrun=Mock(return_value=dagrun)) self.assertTrue(DagrunIdDep().is_met(ti=ti)) dagrun = DagRun() dagrun.run_id = None ti = Mock(get_dagrun=Mock(return_value=dagrun)) self.assertTrue(DagrunIdDep().is_met(ti=ti))
def test_dagrun_id_is_backfill(self): """ Task instances whose dagrun ID is a backfill dagrun ID should fail this dep. """ dagrun = DagRun() dagrun.run_id = BackfillJob.ID_PREFIX + '_something' ti = Mock(get_dagrun=Mock(return_value=dagrun)) self.assertFalse(DagrunIdDep().is_met(ti=ti))
def create_dagrun_from_dbnd_run( databand_run, dag, execution_date, run_id, state=State.RUNNING, external_trigger=False, conf=None, session=None, ): """ Create new DagRun and all relevant TaskInstances """ dagrun = (session.query(DagRun).filter( DagRun.dag_id == dag.dag_id, DagRun.execution_date == execution_date).first()) if dagrun is None: dagrun = DagRun( run_id=run_id, execution_date=execution_date, start_date=dag.start_date, _state=state, external_trigger=external_trigger, dag_id=dag.dag_id, conf=conf, ) session.add(dagrun) else: logger.warning("Running with existing airflow dag run %s", dagrun) dagrun.dag = dag dagrun.run_id = run_id session.commit() # create the associated task instances # state is None at the moment of creation # dagrun.verify_integrity(session=session) # fetches [TaskInstance] again # tasks_skipped = databand_run.tasks_skipped # we can find a source of the completion, but also, # sometimes we don't know the source of the "complete" TI = TaskInstance tis = (session.query(TI).filter(TI.dag_id == dag.dag_id, TI.execution_date == execution_date).all()) tis = {ti.task_id: ti for ti in tis} for af_task in dag.tasks: ti = tis.get(af_task.task_id) if ti is None: ti = TaskInstance(af_task, execution_date=execution_date) ti.start_date = timezone.utcnow() ti.end_date = timezone.utcnow() session.add(ti) task_run = databand_run.get_task_run_by_af_id(af_task.task_id) # all tasks part of the backfill are scheduled to dagrun # Set log file path to expected airflow log file path task_run.log.local_log_file.path = ti.log_filepath.replace( ".log", "/{0}.log".format(ti.try_number)) if task_run.is_reused: # this task is completed and we don't need to run it anymore ti.state = State.SUCCESS session.commit() return dagrun