def test_fractional_seconds(self): """ Tests if fractional seconds are stored in the database """ dag = DAG(TEST_DAG_ID + 'test_fractional_seconds') dag.schedule_interval = '@once' dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) start_date = timezone.utcnow() run = dag.create_dagrun( run_id='test_' + start_date.isoformat(), execution_date=start_date, start_date=start_date, state=State.RUNNING, external_trigger=False ) run.refresh_from_db() self.assertEqual(start_date, run.execution_date, "dag run execution_date loses precision") self.assertEqual(start_date, run.start_date, "dag run start_date loses precision ")
def test_schedule_dag_start_end_dates(self): """ Tests that an attempt to schedule a task after the Dag's end_date does not succeed. """ delta = timedelta(hours=1) runs = 3 start_date = DEFAULT_DATE end_date = start_date + (runs - 1) * delta dag = DAG(self.TEST_SCHEDULE_START_END_DATES_DAG_ID, start_date=start_date, end_date=end_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) # Create and schedule the dag runs dag_runs = [] for _ in range(runs): dag_runs.append(dag_file_processor.create_dag_run(dag)) additional_dag_run = dag_file_processor.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run)
def test_schedule_dag_start_end_dates(self): """ Tests that an attempt to schedule a task after the Dag's end_date does not succeed. """ delta = timedelta(hours=1) runs = 3 start_date = DEFAULT_DATE end_date = start_date + (runs - 1) * delta dag = DAG(self.TEST_SCHEDULE_START_END_DATES_DAG_ID, start_date=start_date, end_date=end_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) # Create and schedule the dag runs dag_runs = [] scheduler = jobs.SchedulerJob(**self.default_scheduler_args) for _ in range(runs): dag_runs.append(scheduler.create_dag_run(dag)) additional_dag_run = scheduler.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run)
def test_schedule_dag_fake_scheduled_previous(self): """ Test scheduling a dag where there is a prior DagRun which has the same run_id as the next run should have """ delta = timedelta(hours=1) dag = DAG(self.TEST_SCHEDULE_DAG_FAKE_SCHEDULED_PREVIOUS_DAG_ID, schedule_interval=delta, start_date=DEFAULT_DATE) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=DEFAULT_DATE)) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag.create_dagrun(run_id=DagRun.id_for_date(DEFAULT_DATE), execution_date=DEFAULT_DATE, state=State.SUCCESS, external_trigger=True) dag_run = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( DEFAULT_DATE + delta, dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger)
def test_schedule_dag_no_previous_runs(self): """ Tests scheduling a dag with no previous runs """ dag = DAG(self.TEST_SCHEDULE_WITH_NO_PREVIOUS_RUNS_DAG_ID) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag_run = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( datetime(2015, 1, 2, 0, 0), dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger) dag.clear()
def __new__(cls): """ Build a DAG given a `Workflow` subclass. """ dag_args = cls._create_dag_args() dag = DAG(**dag_args) # Register operator with their DAG in the order specified for operator in cls.declared_operators.values(): if not operator.has_dag(): if not inspect.isclass(operator): dag.add_task(operator) continue if not issubclass(type(operator), Operator): continue # Hack the Airflow BaseOperator to set dag on the operator as # the dag setter doesn't seem to evaluate correctly operator._dag = dag operator.dag = dag operators = cls.declared_operators.values() # Set dependencies if len(operators) > 1: cls.dependencies(operators) # Make DAG itself accessible from the created class cls.DAG = dag
def build_airflow_dag(self, task_runs): # create new dag from current tasks and tasks selected to run root_task = self.run.root_task_run.task if isinstance(root_task, AirflowDagAsDbndTask): # it's the dag without the task itself dag = root_task.dag set_af_doc_md(self.run, dag) for af_task in dag.tasks: task_run = self.run.get_task_run(operator_to_to_dbnd_task_id(af_task)) set_af_operator_doc_md(task_run, af_task) return root_task.dag # paused is just for better clarity in the airflow ui dag = DAG( self.run.dag_id, default_args=get_dbnd_default_args(), is_paused_upon_creation=True, concurrency=self.airflow_config.dbnd_dag_concurrency, ) if hasattr(dag, "_description"): dag._description = "Dynamic DAG generated by DBND" with dag: airflow_ops = {} for task_run in task_runs: task = task_run.task if isinstance(task, AirflowOperatorAsDbndTask): op = task.airflow_op # this is hack, we clean the state of the op. # better : implement proxy object like # databandOperator that can wrap real Operator op._dag = dag op.upstream_task_ids.clear() dag.add_task(op) set_af_operator_doc_md(task_run, op) else: # we will create DatabandOperator for databand tasks op = build_dbnd_operator_from_taskrun(task_run) airflow_ops[task.task_id] = op for task_run in task_runs: task = task_run.task op = airflow_ops[task.task_id] upstream_tasks = task.ctrl.task_dag.upstream for t in upstream_tasks: if t.task_id not in airflow_ops: # we have some tasks that were not selected to run, we don't add them to graph continue upstream_ops = airflow_ops[t.task_id] if upstream_ops.task_id not in op.upstream_task_ids: op.set_upstream(upstream_ops) dag.fileloc = root_task.task_definition.source_code.task_source_file set_af_doc_md(self.run, dag) return dag
def _create_task_instance(self): dag = DAG('TEST_DAG_ID', start_date=timezone.parse(self.default_time), end_date=timezone.parse(self.default_time)) op1 = DummyOperator( task_id="TEST_TASK_ID", owner="airflow", ) dag.add_task(op1) ti = TaskInstance(task=op1, execution_date=timezone.parse(self.default_time)) return ti
def test_schedule_dag_once(self): """ Tests scheduling a dag scheduled for @once - should be scheduled the first time it is called, and not scheduled the second. """ dag = DAG(self.TEST_SCHEDULE_ONCE_DAG_ID) dag.schedule_interval = '@once' dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_run = DagFileProcessor(dag_ids=[], log=mock.MagicMock()).create_dag_run(dag) dag_run2 = DagFileProcessor(dag_ids=[], log=mock.MagicMock()).create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertIsNone(dag_run2) dag.clear()
def test_schedule_dag_once(self): """ Tests scheduling a dag scheduled for @once - should be scheduled the first time it is called, and not scheduled the second. """ dag = DAG(self.TEST_SCHEDULE_ONCE_DAG_ID) dag.schedule_interval = '@once' dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_run = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) dag_run2 = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertIsNone(dag_run2) dag.clear()
def test_schedule_dag_relativedelta(self): """ Tests scheduling a dag with a relativedelta schedule_interval """ delta = relativedelta(hours=+1) dag = DAG(self.TEST_SCHEDULE_RELATIVEDELTA_DAG_ID, schedule_interval=delta) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag_run = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( datetime(2015, 1, 2, 0, 0), dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger) dag_run2 = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run2) self.assertEqual(dag.dag_id, dag_run2.dag_id) self.assertIsNotNone(dag_run2.run_id) self.assertNotEqual('', dag_run2.run_id) self.assertEqual( datetime(2015, 1, 2, 0, 0) + delta, dag_run2.execution_date, msg='dag_run2.execution_date did not match expectation: {0}' .format(dag_run2.execution_date) ) self.assertEqual(State.RUNNING, dag_run2.state) self.assertFalse(dag_run2.external_trigger) dag.clear()
def test_schedule_dag_no_end_date_up_to_today_only(self): """ Tests that a Dag created without an end_date can only be scheduled up to and including the current datetime. For example, if today is 2016-01-01 and we are scheduling from a start_date of 2015-01-01, only jobs up to, but not including 2016-01-01 should be scheduled. """ session = settings.Session() delta = timedelta(days=1) now = utcnow() start_date = now.subtract(weeks=1) runs = (now - start_date).days dag = DAG(self.TEST_SCHEDULE_DAG_NO_END_DATE_UP_TO_TODAY_ONLY_DAG_ID, start_date=start_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag_runs = [] for _ in range(runs): dag_run = dag_file_processor.create_dag_run(dag) dag_runs.append(dag_run) # Mark the DagRun as complete dag_run.state = State.SUCCESS session.merge(dag_run) session.commit() # Attempt to schedule an additional dag run (for 2016-01-01) additional_dag_run = dag_file_processor.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run)
""" # That DAG is use to test the behavior of the ExternalTaskSensor # when depending on several runs of an external task. # test_without_failure should not fail, leading to # test_external_task_sensor_multiple_dates_with_failure # to succeed, whereas test_with_failure should fail once # per minute (the DAG runs every second) leading to # test_external_task_sensor_multiple_dates_with_failure # to fail (because of timeout). dag_external_id = TEST_DAG_ID + '_secondly_external' dag_secondly_external = DAG(dag_external_id, default_args=args, schedule_interval=timedelta(seconds=1)) dag_secondly_external.add_task( BashOperator(task_id="test_with_failure", bash_command=bash_command_code, retries=0, depends_on_past=False, start_date=DEFAULT_DATE)) dag_secondly_external.add_task( DummyOperator(task_id="test_without_failure", retries=0, depends_on_past=False, start_date=DEFAULT_DATE)) dag_id = TEST_DAG_ID + '_minutely' dag_minutely = DAG(dag_id, default_args=args, schedule_interval=timedelta(minutes=1)) dag_minutely.add_task( ExternalTaskSensor( task_id='test_external_task_sensor_multiple_dates_without_failure',