def autogenerate(self, data, **kwargs): """Auto generate run_id and logical_date if they are not provided. For compatibility, if `execution_date` is submitted, it is converted to `logical_date`. """ logical_date = data.get("logical_date", _MISSING) execution_date = data.pop("execution_date", _MISSING) if logical_date is execution_date is _MISSING: # Both missing. data["logical_date"] = str(timezone.utcnow()) elif logical_date is _MISSING: # Only logical_date missing. data["logical_date"] = execution_date elif execution_date is _MISSING: # Only execution_date missing. pass elif logical_date != execution_date: # Both provided but don't match. raise BadRequest( "logical_date conflicts with execution_date", detail=f"{logical_date!r} != {execution_date!r}", ) if "dag_run_id" not in data: try: data["dag_run_id"] = DagRun.generate_run_id( DagRunType.MANUAL, timezone.parse(data["logical_date"]) ) except (ParserError, TypeError) as err: raise BadRequest("Incorrect datetime argument", detail=str(err)) return data
def autogenerate(self, data, **kwargs): """Auto generate run_id and execution_date if they are not loaded""" if "execution_date" not in data.keys(): data["execution_date"] = str(timezone.utcnow()) if "dag_run_id" not in data.keys(): data["dag_run_id"] = DagRun.generate_run_id( DagRunType.MANUAL, timezone.parse(data["execution_date"])) return data
def autogenerate(self, data, **kwargs): """Auto generate run_id and execution_date if they are not loaded""" if "execution_date" not in data.keys(): data["execution_date"] = str(timezone.utcnow()) if "dag_run_id" not in data.keys(): try: data["dag_run_id"] = DagRun.generate_run_id( DagRunType.MANUAL, timezone.parse(data["execution_date"]) ) except (ParserError, TypeError) as err: raise BadRequest("Incorrect datetime argument", detail=str(err)) return data
def test_sub_set_subdag(self): dag = DAG('test_sub_set_subdag', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) with dag: op1 = DummyOperator(task_id='leave1') op2 = DummyOperator(task_id='leave2') op3 = DummyOperator(task_id='upstream_level_1') op4 = DummyOperator(task_id='upstream_level_2') op5 = DummyOperator(task_id='upstream_level_3') # order randomly op2.set_downstream(op3) op1.set_downstream(op3) op4.set_downstream(op5) op3.set_downstream(op4) dag.clear() dr = dag.create_dagrun(run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE) executor = MockExecutor() sub_dag = dag.sub_dag(task_regex="leave*", include_downstream=False, include_upstream=False) job = BackfillJob(dag=sub_dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor) job.run() self.assertRaises(sqlalchemy.orm.exc.NoResultFound, dr.refresh_from_db) # the run_id should have changed, so a refresh won't work drs = DagRun.find(dag_id=dag.dag_id, execution_date=DEFAULT_DATE) dr = drs[0] self.assertEqual( DagRun.generate_run_id(DagRunType.BACKFILL_JOB, DEFAULT_DATE), dr.run_id) for ti in dr.get_task_instances(): if ti.task_id == 'leave1' or ti.task_id == 'leave2': self.assertEqual(State.SUCCESS, ti.state) else: self.assertEqual(State.NONE, ti.state)
def execute(self, context: Context): if isinstance(self.execution_date, datetime.datetime): parsed_execution_date = self.execution_date elif isinstance(self.execution_date, str): parsed_execution_date = timezone.parse(self.execution_date) else: parsed_execution_date = timezone.utcnow() if self.trigger_run_id: run_id = self.trigger_run_id else: run_id = DagRun.generate_run_id(DagRunType.MANUAL, parsed_execution_date) try: dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=parsed_execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, parsed_execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=parsed_execution_date, end_date=parsed_execution_date) dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0] else: raise e if dag_run is None: raise RuntimeError("The dag_run should be set here!") # Store the execution date from the dag run (either created or found above) to # be used when creating the extra link on the webserver. ti = context['task_instance'] ti.xcom_push(key=XCOM_EXECUTION_DATE_ISO, value=dag_run.execution_date.isoformat()) ti.xcom_push(key=XCOM_RUN_ID, value=dag_run.run_id) if self.wait_for_completion: # wait for dag to complete while True: self.log.info( 'Waiting for %s on %s to become allowed state %s ...', self.trigger_dag_id, dag_run.execution_date, self.allowed_states, ) time.sleep(self.poke_interval) dag_run.refresh_from_db() state = dag_run.state if state in self.failed_states: raise AirflowException( f"{self.trigger_dag_id} failed with failed states {state}" ) if state in self.allowed_states: self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state) return