def autogenerate(self, data, **kwargs):
        """Auto generate run_id and logical_date if they are not provided.

        For compatibility, if `execution_date` is submitted, it is converted
        to `logical_date`.
        """
        logical_date = data.get("logical_date", _MISSING)
        execution_date = data.pop("execution_date", _MISSING)
        if logical_date is execution_date is _MISSING:  # Both missing.
            data["logical_date"] = str(timezone.utcnow())
        elif logical_date is _MISSING:  # Only logical_date missing.
            data["logical_date"] = execution_date
        elif execution_date is _MISSING:  # Only execution_date missing.
            pass
        elif logical_date != execution_date:  # Both provided but don't match.
            raise BadRequest(
                "logical_date conflicts with execution_date",
                detail=f"{logical_date!r} != {execution_date!r}",
            )

        if "dag_run_id" not in data:
            try:
                data["dag_run_id"] = DagRun.generate_run_id(
                    DagRunType.MANUAL, timezone.parse(data["logical_date"])
                )
            except (ParserError, TypeError) as err:
                raise BadRequest("Incorrect datetime argument", detail=str(err))
        return data
Beispiel #2
0
 def autogenerate(self, data, **kwargs):
     """Auto generate run_id and execution_date if they are not loaded"""
     if "execution_date" not in data.keys():
         data["execution_date"] = str(timezone.utcnow())
     if "dag_run_id" not in data.keys():
         data["dag_run_id"] = DagRun.generate_run_id(
             DagRunType.MANUAL, timezone.parse(data["execution_date"]))
     return data
Beispiel #3
0
 def autogenerate(self, data, **kwargs):
     """Auto generate run_id and execution_date if they are not loaded"""
     if "execution_date" not in data.keys():
         data["execution_date"] = str(timezone.utcnow())
     if "dag_run_id" not in data.keys():
         try:
             data["dag_run_id"] = DagRun.generate_run_id(
                 DagRunType.MANUAL, timezone.parse(data["execution_date"])
             )
         except (ParserError, TypeError) as err:
             raise BadRequest("Incorrect datetime argument", detail=str(err))
     return data
Beispiel #4
0
    def test_sub_set_subdag(self):
        dag = DAG('test_sub_set_subdag',
                  start_date=DEFAULT_DATE,
                  default_args={'owner': 'owner1'})

        with dag:
            op1 = DummyOperator(task_id='leave1')
            op2 = DummyOperator(task_id='leave2')
            op3 = DummyOperator(task_id='upstream_level_1')
            op4 = DummyOperator(task_id='upstream_level_2')
            op5 = DummyOperator(task_id='upstream_level_3')
            # order randomly
            op2.set_downstream(op3)
            op1.set_downstream(op3)
            op4.set_downstream(op5)
            op3.set_downstream(op4)

        dag.clear()
        dr = dag.create_dagrun(run_id="test",
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE,
                               start_date=DEFAULT_DATE)

        executor = MockExecutor()
        sub_dag = dag.sub_dag(task_regex="leave*",
                              include_downstream=False,
                              include_upstream=False)
        job = BackfillJob(dag=sub_dag,
                          start_date=DEFAULT_DATE,
                          end_date=DEFAULT_DATE,
                          executor=executor)
        job.run()

        self.assertRaises(sqlalchemy.orm.exc.NoResultFound, dr.refresh_from_db)
        # the run_id should have changed, so a refresh won't work
        drs = DagRun.find(dag_id=dag.dag_id, execution_date=DEFAULT_DATE)
        dr = drs[0]

        self.assertEqual(
            DagRun.generate_run_id(DagRunType.BACKFILL_JOB, DEFAULT_DATE),
            dr.run_id)
        for ti in dr.get_task_instances():
            if ti.task_id == 'leave1' or ti.task_id == 'leave2':
                self.assertEqual(State.SUCCESS, ti.state)
            else:
                self.assertEqual(State.NONE, ti.state)
Beispiel #5
0
    def execute(self, context: Context):
        if isinstance(self.execution_date, datetime.datetime):
            parsed_execution_date = self.execution_date
        elif isinstance(self.execution_date, str):
            parsed_execution_date = timezone.parse(self.execution_date)
        else:
            parsed_execution_date = timezone.utcnow()

        if self.trigger_run_id:
            run_id = self.trigger_run_id
        else:
            run_id = DagRun.generate_run_id(DagRunType.MANUAL,
                                            parsed_execution_date)
        try:
            dag_run = trigger_dag(
                dag_id=self.trigger_dag_id,
                run_id=run_id,
                conf=self.conf,
                execution_date=parsed_execution_date,
                replace_microseconds=False,
            )

        except DagRunAlreadyExists as e:
            if self.reset_dag_run:
                self.log.info("Clearing %s on %s", self.trigger_dag_id,
                              parsed_execution_date)

                # Get target dag object and call clear()

                dag_model = DagModel.get_current(self.trigger_dag_id)
                if dag_model is None:
                    raise DagNotFound(
                        f"Dag id {self.trigger_dag_id} not found in DagModel")

                dag_bag = DagBag(dag_folder=dag_model.fileloc,
                                 read_dags_from_db=True)
                dag = dag_bag.get_dag(self.trigger_dag_id)
                dag.clear(start_date=parsed_execution_date,
                          end_date=parsed_execution_date)
                dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0]
            else:
                raise e
        if dag_run is None:
            raise RuntimeError("The dag_run should be set here!")
        # Store the execution date from the dag run (either created or found above) to
        # be used when creating the extra link on the webserver.
        ti = context['task_instance']
        ti.xcom_push(key=XCOM_EXECUTION_DATE_ISO,
                     value=dag_run.execution_date.isoformat())
        ti.xcom_push(key=XCOM_RUN_ID, value=dag_run.run_id)

        if self.wait_for_completion:
            # wait for dag to complete
            while True:
                self.log.info(
                    'Waiting for %s on %s to become allowed state %s ...',
                    self.trigger_dag_id,
                    dag_run.execution_date,
                    self.allowed_states,
                )
                time.sleep(self.poke_interval)

                dag_run.refresh_from_db()
                state = dag_run.state
                if state in self.failed_states:
                    raise AirflowException(
                        f"{self.trigger_dag_id} failed with failed states {state}"
                    )
                if state in self.allowed_states:
                    self.log.info("%s finished with allowed state %s",
                                  self.trigger_dag_id, state)
                    return