def test_scheduler_process_check_heartrate(self):
        """
        Test if process dag honors the heartrate
        """
        dag = DAG(
            dag_id='test_scheduler_process_check_heartrate',
            start_date=DEFAULT_DATE)
        dag_task1 = DummyOperator(
            task_id='dummy',
            dag=dag,
            owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        orm_dag.last_scheduler_run = datetime.datetime.now()
        session.merge(orm_dag)
        session.commit()
        session.close()

        scheduler = SchedulerJob()
        scheduler.heartrate = 1000

        dag.clear()

        dr = scheduler.schedule_dag(dag)
        self.assertIsNotNone(dr)

        queue = mock.Mock()
        scheduler.process_dag(dag, queue=queue)

        queue.put.assert_not_called()
Beispiel #2
0
    def test_scheduler_verify_pool_full(self, mock_pool_full):
        """
        Test task instances not queued when pool is full
        """
        mock_pool_full.return_value = False

        dag = DAG(
            dag_id='test_scheduler_verify_pool_full',
            start_date=DEFAULT_DATE)

        DummyOperator(
            task_id='dummy',
            dag=dag,
            owner='airflow',
            pool='test_scheduler_verify_pool_full')

        session = settings.Session()
        pool = Pool(pool='test_scheduler_verify_pool_full', slots=1)
        session.add(pool)
        orm_dag = DagModel(dag_id=dag.dag_id)
        orm_dag.is_paused = False
        session.merge(orm_dag)
        session.commit()

        scheduler = SchedulerJob()
        dag.clear()

        # Create 2 dagruns, which will create 2 task instances.
        dr = scheduler.create_dag_run(dag)
        self.assertIsNotNone(dr)
        self.assertEquals(dr.execution_date, DEFAULT_DATE)
        dr = scheduler.create_dag_run(dag)
        self.assertIsNotNone(dr)
        queue = []
        scheduler._process_task_instances(dag, queue=queue)
        self.assertEquals(len(queue), 2)
        dagbag = SimpleDagBag([dag])

        # Recreated part of the scheduler here, to kick off tasks -> executor
        for ti_key in queue:
            task = dag.get_task(ti_key[1])
            ti = models.TaskInstance(task, ti_key[2])
            # Task starts out in the scheduled state. All tasks in the
            # scheduled state will be sent to the executor
            ti.state = State.SCHEDULED

            # Also save this task instance to the DB.
            session.merge(ti)
            session.commit()

        scheduler._execute_task_instances(dagbag,
                                          (State.SCHEDULED,
                                           State.UP_FOR_RETRY))

        self.assertEquals(len(scheduler.executor.queued_tasks), 1)
Beispiel #3
0
    def test_scheduler_reschedule(self):
        """
        Checks if tasks that are not taken up by the executor
        get rescheduled
        """
        executor = TestExecutor()

        dagbag = DagBag(executor=executor)
        dagbag.dags.clear()
        dagbag.executor = executor

        dag = DAG(
            dag_id='test_scheduler_reschedule',
            start_date=DEFAULT_DATE)
        dag_task1 = DummyOperator(
            task_id='dummy',
            dag=dag,
            owner='airflow')

        dag.clear()
        dag.is_subdag = False

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        orm_dag.is_paused = False
        session.merge(orm_dag)
        session.commit()

        dagbag.bag_dag(dag=dag, root_dag=dag, parent_dag=dag)

        @mock.patch('airflow.models.DagBag', return_value=dagbag)
        @mock.patch('airflow.models.DagBag.collect_dags')
        def do_schedule(function, function2):
            # Use a empty file since the above mock will return the
            # expected DAGs. Also specify only a single file so that it doesn't
            # try to schedule the above DAG repeatedly.
            scheduler = SchedulerJob(num_runs=1,
                                     executor=executor,
                                     subdir=os.path.join(models.DAGS_FOLDER,
                                                         "no_dags.py"))
            scheduler.heartrate = 0
            scheduler.run()

        do_schedule()
        self.assertEquals(1, len(executor.queued_tasks))
        executor.queued_tasks.clear()

        do_schedule()
        self.assertEquals(2, len(executor.queued_tasks))
Beispiel #4
0
    def test_scheduler_reschedule(self):
        """
        Checks if tasks that are not taken up by the executor
        get rescheduled
        """
        executor = TestExecutor()

        dagbag = DagBag(executor=executor)
        dagbag.dags.clear()
        dagbag.executor = executor

        dag = DAG(
            dag_id='test_scheduler_reschedule',
            start_date=DEFAULT_DATE)
        dag_task1 = DummyOperator(
            task_id='dummy',
            dag=dag,
            owner='airflow')

        dag.clear()
        dag.is_subdag = False

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        orm_dag.is_paused = False
        session.merge(orm_dag)
        session.commit()

        dagbag.bag_dag(dag=dag, root_dag=dag, parent_dag=dag)

        @mock.patch('airflow.models.DagBag', return_value=dagbag)
        @mock.patch('airflow.models.DagBag.collect_dags')
        def do_schedule(function, function2):
            scheduler = SchedulerJob(num_runs=1, executor=executor,)
            scheduler.heartrate = 0
            scheduler.run()

        do_schedule()
        self.assertEquals(1, len(executor.queued_tasks))
        executor.queued_tasks.clear()

        do_schedule()
        self.assertEquals(2, len(executor.queued_tasks))
Beispiel #5
0
    def test_dag_is_deactivated_upon_dagfile_deletion(self):
        dag_id = 'old_existing_dag'
        dag_fileloc = "/usr/local/airflow/dags/non_existing_path.py"
        dag = DAG(
            dag_id,
            is_paused_upon_creation=True,
        )
        dag.fileloc = dag_fileloc
        session = settings.Session()
        dag.sync_to_db(session=session)

        orm_dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).one()

        self.assertTrue(orm_dag.is_active)
        self.assertEqual(orm_dag.fileloc, dag_fileloc)

        DagModel.deactivate_deleted_dags(list_py_file_paths(settings.DAGS_FOLDER))

        orm_dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).one()
        self.assertFalse(orm_dag.is_active)

        # CleanUp
        session.execute(DagModel.__table__.delete().where(DagModel.dag_id == dag_id))
        session.close()
Beispiel #6
0
    def test_is_paused_subdag(self, mock_dag_bag):
        subdag_id = 'dag.subdag'
        subdag = DAG(
            subdag_id,
            start_date=DEFAULT_DATE,
        )
        with subdag:
            DummyOperator(task_id='dummy_task', )

        dag_id = 'dag'
        dag = DAG(
            dag_id,
            start_date=DEFAULT_DATE,
        )

        with dag:
            SubDagOperator(task_id='subdag', subdag=subdag)

        mock_dag_bag.return_value.get_dag.return_value = dag

        session = settings.Session()
        dag.sync_to_db(session=session)

        unpaused_dags = session.query(DagModel).filter(
            DagModel.dag_id.in_([subdag_id, dag_id]), ).filter(
                DagModel.is_paused.is_(False)).count()

        self.assertEqual(2, unpaused_dags)

        DagModel.get_dagmodel(dag.dag_id).set_is_paused(is_paused=True)

        paused_dags = session.query(DagModel).filter(
            DagModel.dag_id.in_([subdag_id, dag_id]), ).filter(
                DagModel.is_paused.is_(True)).count()

        self.assertEqual(2, paused_dags)
Beispiel #7
0
    def test_delete_dag(self):
        url_template = '/api/experimental/dags/{}'

        from airflow import settings
        session = settings.Session()
        key = "my_dag_id"
        session.add(DagModel(dag_id=key))
        session.commit()
        response = self.app.delete(url_template.format(key),
                                   content_type="application/json")
        self.assertEqual(200, response.status_code)

        response = self.app.delete(url_template.format('does_not_exist_dag'),
                                   content_type="application/json")
        self.assertEqual(404, response.status_code)
Beispiel #8
0
    def setUp(self):
        # Airflow relies on reading the DAG from disk when triggering it.
        # Therefore write a temp file holding the DAG to trigger.
        with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
            self._tmpfile = f.name
            f.write(DAG_SCRIPT)
            f.flush()

        with create_session() as session:
            session.add(DagModel(dag_id=TRIGGERED_DAG_ID, fileloc=self._tmpfile))
            session.commit()

        self.dag = DAG(TEST_DAG_ID, default_args={"owner": "airflow", "start_date": DEFAULT_DATE})
        dagbag = DagBag(f.name, read_dags_from_db=False, include_examples=False)
        dagbag.bag_dag(self.dag, root_dag=self.dag)
        dagbag.sync_to_db()
Beispiel #9
0
 def _create_dag_runs(self, count):
     dag_runs = [
         DagRun(
             dag_id="TEST_DAG_ID",
             run_id="TEST_DAG_RUN_ID" + str(i),
             run_type=DagRunType.MANUAL.value,
             execution_date=timezone.parse(self.default_time) +
             timedelta(minutes=i),
             start_date=timezone.parse(self.default_time),
             external_trigger=True,
         ) for i in range(1, count + 1)
     ]
     dag = DagModel(dag_id="TEST_DAG_ID")
     with create_session() as session:
         session.add_all(dag_runs)
         session.add(dag)
Beispiel #10
0
    def test_emit_scheduling_delay(self, schedule_interval, expected):
        """
        Tests that dag scheduling delay stat is set properly once running scheduled dag.
        dag_run.update_state() invokes the _emit_true_scheduling_delay_stats_for_finished_state method.
        """
        dag = DAG(dag_id='test_emit_dag_stats', start_date=days_ago(1), schedule_interval=schedule_interval)
        dag_task = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        try:
            orm_dag = DagModel(
                dag_id=dag.dag_id,
                has_task_concurrency_limits=False,
                next_dagrun=dag.start_date,
                next_dagrun_create_after=dag.following_schedule(dag.start_date),
                is_active=True,
            )
            session.add(orm_dag)
            session.flush()
            dag_run = dag.create_dagrun(
                run_type=DagRunType.SCHEDULED,
                state=State.SUCCESS,
                execution_date=dag.start_date,
                start_date=dag.start_date,
                session=session,
            )
            ti = dag_run.get_task_instance(dag_task.task_id, session)
            ti.set_state(State.SUCCESS, session)
            session.flush()

            with mock.patch.object(Stats, 'timing') as stats_mock:
                dag_run.update_state(session)

            metric_name = f'dagrun.{dag.dag_id}.first_task_scheduling_delay'

            if expected:
                true_delay = ti.start_date - dag.following_schedule(dag_run.execution_date)
                sched_delay_stat_call = call(metric_name, true_delay)
                assert sched_delay_stat_call in stats_mock.mock_calls
            else:
                # Assert that we never passed the metric
                sched_delay_stat_call = call(metric_name, mock.ANY)
                assert sched_delay_stat_call not in stats_mock.mock_calls
        finally:
            # Don't write anything to the DB
            session.rollback()
            session.close()
Beispiel #11
0
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel:
    """Checks that DAG exists and in case it is specified that Task exist"""
    dag_model = DagModel.get_current(dag_id)
    if dag_model is None:
        raise DagNotFound("Dag id {} not found in DagModel".format(dag_id))

    dagbag = DagBag(dag_folder=dag_model.fileloc,
                    store_serialized_dags=conf.getboolean(
                        'core', 'store_serialized_dags'))
    dag = dagbag.get_dag(dag_id)  # prefetch dag if it is stored serialized
    if dag_id not in dagbag.dags:
        error_message = "Dag id {} not found".format(dag_id)
        raise DagNotFound(error_message)
    if task_id and not dag.has_task(task_id):
        error_message = 'Task {} not found in dag {}'.format(task_id, dag_id)
        raise TaskNotFound(error_message)
    return dag
Beispiel #12
0
 def test_stop_dag(self):
     t = threading.Thread(target=self.stop_dag_function)
     t.setDaemon(True)
     t.start()
     self.start_scheduler('../../dags/test_event_based_scheduler.py')
     with create_session() as session:
         from airflow.models import DagModel
         dag_model: DagModel = DagModel.get_dagmodel(
             EVENT_BASED_SCHEDULER_DAG)
         self.assertTrue(dag_model.is_paused)
         self.assertEqual(dag_model.get_last_dagrun().state, "killed")
         for ti in session.query(TaskInstance).filter(
                 TaskInstance.dag_id == EVENT_BASED_SCHEDULER_DAG):
             self.assertTrue(ti.state in [State.SUCCESS, State.KILLED])
         for te in session.query(TaskExecution).filter(
                 TaskExecution.dag_id == EVENT_BASED_SCHEDULER_DAG):
             self.assertTrue(te.state in [State.SUCCESS, State.KILLED])
Beispiel #13
0
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel:
    """Checks that DAG exists and in case it is specified that Task exist"""
    dag_model = DagModel.get_current(dag_id)
    if dag_model is None:
        raise DagNotFound("Dag id {} not found in DagModel".format(dag_id))

    dagbag = DagBag(
        dag_folder=dag_model.fileloc,
        read_dags_from_db=True
    )
    dag = dagbag.get_dag(dag_id)
    if not dag:
        error_message = "Dag id {} not found".format(dag_id)
        raise DagNotFound(error_message)
    if task_id and not dag.has_task(task_id):
        error_message = 'Task {} not found in dag {}'.format(task_id, dag_id)
        raise TaskNotFound(error_message)
    return dag
Beispiel #14
0
def dag_state(args, session=NEW_SESSION):
    """
    Returns the state (and conf if exists) of a DagRun at the command line.
    >>> airflow dags state tutorial 2015-01-01T00:00:00.000000
    running
    >>> airflow dags state a_dag_with_conf_passed 2015-01-01T00:00:00.000000
    failed, {"name": "bob", "age": "42"}
    """
    dag = DagModel.get_dagmodel(args.dag_id, session=session)

    if not dag:
        raise SystemExit(f"DAG: {args.dag_id} does not exist in 'dag' table")
    dr = session.query(DagRun).filter_by(dag_id=args.dag_id, execution_date=args.execution_date).one_or_none()
    out = dr.state if dr else None
    conf_out = ''
    if out and dr.conf:
        conf_out = ', ' + json.dumps(dr.conf)
    print(str(out) + conf_out)
Beispiel #15
0
    def test_delete_dag(self):
        key = "my_dag_id"

        with create_session() as session:
            self.assertEqual(
                session.query(DagModel).filter(DagModel.dag_id == key).count(),
                0)
            session.add(DagModel(dag_id=key))

        with create_session() as session:
            self.assertEqual(
                session.query(DagModel).filter(DagModel.dag_id == key).count(),
                1)

            self.client.delete_dag(dag_id=key)
            self.assertEqual(
                session.query(DagModel).filter(DagModel.dag_id == key).count(),
                0)
Beispiel #16
0
    def execute(self, context: Dict):
        if isinstance(self.execution_date, datetime.datetime):
            execution_date = self.execution_date
        elif isinstance(self.execution_date, str):
            execution_date = timezone.parse(self.execution_date)
            self.execution_date = execution_date
        else:
            execution_date = timezone.utcnow()

        run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date)
        try:
            # Ignore MyPy type for self.execution_date
            # because it doesn't pick up the timezone.parse() for strings
            trigger_dag(
                dag_id=self.trigger_dag_id,
                run_id=run_id,
                conf=self.conf,
                execution_date=self.execution_date,
                replace_microseconds=False,
            )

        except DagRunAlreadyExists as e:
            if self.reset_dag_run:
                self.log.info("Clearing %s on %s", self.trigger_dag_id,
                              self.execution_date)

                # Get target dag object and call clear()

                dag_model = DagModel.get_current(self.trigger_dag_id)
                if dag_model is None:
                    raise DagNotFound(
                        f"Dag id {self.trigger_dag_id} not found in DagModel")

                dag_bag = DagBag(
                    dag_folder=dag_model.fileloc,
                    store_serialized_dags=settings.STORE_SERIALIZED_DAGS)

                dag = dag_bag.get_dag(self.trigger_dag_id)

                dag.clear(start_date=self.execution_date,
                          end_date=self.execution_date)
            else:
                raise e
Beispiel #17
0
 def test_process_dag_to_response(self):
     dag_id = "test_dag"
     fileloc = "my_file"
     is_paused = True
     dag = DagModel(dag_id=dag_id,
                    is_paused=is_paused,
                    is_active=True,
                    is_subdag=False,
                    last_scheduler_run=datetime.now(),
                    last_pickled=datetime.now(),
                    last_expired=datetime.now(),
                    scheduler_lock=False,
                    pickle_id=1,
                    fileloc=fileloc,
                    owners="test")
     processed_response = _process_dag_to_response(dag)
     assert dag_id == processed_response[DAG_ID_KEY]
     assert fileloc == processed_response[FILE_LOCATION_KEY]
     assert is_paused == processed_response[IS_PAUSED_KEY]
 def test_should_respond_200(self, name, request_json, session):
     del name
     dag_instance = DagModel(dag_id="TEST_DAG_ID")
     session.add(dag_instance)
     session.commit()
     response = self.client.post(
         "api/v1/dags/TEST_DAG_ID/dagRuns", json=request_json, environ_overrides={'REMOTE_USER': "******"}
     )
     assert response.status_code == 200
     assert {
         "conf": {},
         "dag_id": "TEST_DAG_ID",
         "dag_run_id": response.json["dag_run_id"],
         "end_date": None,
         "execution_date": response.json["execution_date"],
         "external_trigger": True,
         "start_date": response.json["start_date"],
         "state": "running",
     } == response.json
Beispiel #19
0
    def test_create_dag_specific_permissions(self):
        dag_id = 'some_dag_id'
        dag_permission_name = self.security_manager.prefixed_dag_id(dag_id)
        assert ('can_read', dag_permission_name) not in self.security_manager.get_all_permissions()

        dag_model = DagModel(
            dag_id=dag_id, fileloc='/tmp/dag_.py', schedule_interval='2 2 * * *', is_paused=True
        )
        self.session.add(dag_model)
        self.session.commit()

        self.security_manager.create_dag_specific_permissions()
        self.session.commit()

        assert ('can_read', dag_permission_name) in self.security_manager.get_all_permissions()

        # Make sure we short circuit when the perms already exist
        with assert_queries_count(2):  # One query to get DagModels, one query to get all perms
            self.security_manager.create_dag_specific_permissions()
Beispiel #20
0
    def index(self):
        logging.info("REST_API.index() called")

        # get the information that we want to display on the page regarding the dags that are available
        dagbag = self.get_dagbag()
        dags = []
        for dag_id in dagbag.dags:
            orm_dag = DagModel.get_current(dag_id)
            dags.append({
                "dag_id": dag_id,
                "is_active": (not orm_dag.is_paused) if orm_dag is not None else False
            })

        return self.render("rest_api_plugin/index.html",
                           dags=dags,
                           airflow_webserver_base_url=airflow_webserver_base_url,
                           rest_api_endpoint=rest_api_endpoint,
                           apis_metadata=apis_metadata,
                           airflow_version=airflow_version,
                           )
Beispiel #21
0
 def test_response_409(self, session):
     dag_instance = DagModel(dag_id="TEST_DAG_ID")
     session.add(dag_instance)
     session.add_all(self._create_test_dag_run())
     session.commit()
     response = self.client.post(
         "api/v1/dags/TEST_DAG_ID/dagRuns",
         json={"dag_run_id": "TEST_DAG_RUN_ID_1", "execution_date": self.default_time,},
         environ_overrides={'REMOTE_USER': "******"},
     )
     self.assertEqual(response.status_code, 409, response.data)
     self.assertEqual(
         response.json,
         {
             "detail": "DAGRun with DAG ID: 'TEST_DAG_ID' and "
             "DAGRun ID: 'TEST_DAG_RUN_ID_1' already exists",
             "status": 409,
             "title": "Conflict",
             "type": EXCEPTIONS_LINK_MAP[409],
         },
     )
    def index(self):
        logging.info("REST_API.index() called")
        dagbag = self.get_dagbag()
        dags = []
        for dag_id in dagbag.dags:
            orm_dag = DagModel.get_current(dag_id)
            dags.append({
                "dag_id":
                dag_id,
                "is_active":
                (not orm_dag.is_paused) if orm_dag is not None else False
            })

        return self.render(
            "rest_api_plugin/index.html",
            dags=dags,
            airflow_webserver_base_url=airflow_webserver_base_url,
            rest_api_endpoint=rest_api_endpoint,
            apis=apis,
            airflow_version=airflow_version,
            rest_api_plugin_version=rest_api_plugin_version)
 def test_should_response_200(self, name, request_json, session):
     del name
     dag_instance = DagModel(dag_id="TEST_DAG_ID")
     session.add(dag_instance)
     session.commit()
     response = self.client.post("api/v1/dags/TEST_DAG_ID/dagRuns",
                                 json=request_json)
     self.assertEqual(response.status_code, 200)
     self.assertEqual(
         {
             "conf": {},
             "dag_id": "TEST_DAG_ID",
             "dag_run_id": response.json["dag_run_id"],
             "end_date": None,
             "execution_date": response.json["execution_date"],
             "external_trigger": True,
             "start_date": response.json["start_date"],
             "state": "running",
         },
         response.json,
     )
Beispiel #24
0
    def test_scheduler_do_not_schedule_too_early(self):
        dag = DAG(dag_id='test_scheduler_do_not_schedule_too_early',
                  start_date=datetime.datetime(2200, 1, 1))
        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()
        session.close()

        scheduler = SchedulerJob()
        dag.clear()

        dr = scheduler.create_dag_run(dag)
        self.assertIsNone(dr)

        queue = mock.Mock()
        scheduler._process_task_instances(dag, queue=queue)

        queue.put.assert_not_called()
Beispiel #25
0
    def _create_xcom_entries(self, dag_id, dag_run_id, execution_date, task_id, session=None):
        for i in [1, 2]:
            XCom.set(
                key=f'test-xcom-key-{i}',
                value="TEST",
                execution_date=execution_date,
                task_id=task_id,
                dag_id=dag_id,
            )

        dag = DagModel(dag_id=dag_id)
        session.add(dag)

        dagrun = DR(
            dag_id=dag_id,
            run_id=dag_run_id,
            execution_date=execution_date,
            start_date=execution_date,
            run_type=DagRunType.MANUAL,
        )
        session.add(dagrun)
Beispiel #26
0
    def test_deactivate_unknown_dags(self):
        """
        Test that dag_ids not passed into deactivate_unknown_dags
        are deactivated when function is invoked
        """
        dagbag = DagBag(include_examples=True)
        dag_id = "test_deactivate_unknown_dags"
        expected_active_dags = dagbag.dags.keys()

        model_before = DagModel(dag_id=dag_id, is_active=True)
        with create_session() as session:
            session.merge(model_before)

        models.DAG.deactivate_unknown_dags(expected_active_dags)

        after_model = DagModel.get_dagmodel(dag_id)
        self.assertTrue(model_before.is_active)
        self.assertFalse(after_model.is_active)

        # clean up
        with create_session() as session:
            session.query(DagModel).filter(DagModel.dag_id == 'test_deactivate_unknown_dags').delete()
Beispiel #27
0
def trigger_dag(
        dag_id: str,
        run_id: Optional[str] = None,
        conf: Optional[Union[dict, str]] = None,
        execution_date: Optional[datetime] = None,
        replace_microseconds: bool = True,
) -> Optional[DagRun]:
    """Triggers execution of DAG specified by dag_id

    :param dag_id: DAG ID
    :param run_id: ID of the dag_run
    :param conf: configuration
    :param execution_date: date of execution
    :param replace_microseconds: whether microseconds should be zeroed
    :return: first dag run triggered - even if more than one Dag Runs were triggered or None
    """
    dag_model = DagModel.get_current(dag_id)
    if dag_model is None:
        raise DagNotFound("Dag id {} not found in DagModel".format(dag_id))

    def read_store_serialized_dags():
        from airflow.configuration import conf
        return conf.getboolean('core', 'store_serialized_dags')
    dagbag = DagBag(
        dag_folder=dag_model.fileloc,
        store_serialized_dags=read_store_serialized_dags()
    )
    dag_run = DagRun()
    triggers = _trigger_dag(
        dag_id=dag_id,
        dag_run=dag_run,
        dag_bag=dagbag,
        run_id=run_id,
        conf=conf,
        execution_date=execution_date,
        replace_microseconds=replace_microseconds,
    )

    return triggers[0] if triggers else None
 def test_should_respond_200_with_schedule_interval_none(self, session=None):
     dag_model = DagModel(
         dag_id="TEST_DAG_1",
         fileloc="/tmp/dag_1.py",
         schedule_interval=None,
     )
     session.add(dag_model)
     session.commit()
     response = self.client.get("/api/v1/dags/TEST_DAG_1", environ_overrides={'REMOTE_USER': "******"})
     assert response.status_code == 200
     assert {
         "dag_id": "TEST_DAG_1",
         "description": None,
         "fileloc": "/tmp/dag_1.py",
         "file_token": 'Ii90bXAvZGFnXzEucHki.EnmIdPaUPo26lHQClbWMbDFD1Pk',
         "is_paused": False,
         "is_subdag": False,
         "owners": [],
         "root_dag_id": None,
         "schedule_interval": None,
         "tags": [],
     } == response.json
Beispiel #29
0
    def trigger_dag(self, dag_id, run_id, conf):
        try:
            dag_path = DagModel.get_current(dag_id).fileloc
        except Exception:
            dag_path = path.join(DAGS_FOLDER, dag_id + ".py")

        dag_bag = DagBag(dag_folder=dag_path)
        if not dag_bag.dags:
            logging.info("Failed to import dag due to the following errors")
            logging.info(dag_bag.import_errors)
            logging.info("Sleep for 3 seconds and give it a second try")
            sleep(3)
            dag_bag = DagBag(dag_folder=dag_path)

        triggers = trigger_dag._trigger_dag(dag_id=dag_id,
                                            dag_run=DagRun(),
                                            dag_bag=dag_bag,
                                            run_id=run_id,
                                            conf=conf,
                                            execution_date=None,
                                            replace_microseconds=False)
        return triggers[0] if triggers else None
Beispiel #30
0
    def test_scheduler_max_active_runs_respected_after_clear(self):
        """
        Test if _process_task_instances only schedules ti's up to max_active_runs
        (related to issue AIRFLOW-137)
        """
        dag = DAG(
            dag_id='test_scheduler_max_active_runs_respected_after_clear',
            start_date=DEFAULT_DATE)
        dag.max_active_runs = 3

        dag_task1 = DummyOperator(
            task_id='dummy',
            dag=dag,
            owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()
        session.close()

        scheduler = SchedulerJob()
        dag.clear()

        # First create up to 3 dagruns in RUNNING state.
        scheduler.create_dag_run(dag)

        # Reduce max_active_runs to 1
        dag.max_active_runs = 1

        queue = mock.Mock()
        # and schedule them in, so we can check how many
        # tasks are put on the queue (should be one, not 3)
        scheduler._process_task_instances(dag, queue=queue)

        queue.append.assert_called_with(
            (dag.dag_id, dag_task1.task_id, DEFAULT_DATE)
        )
Beispiel #31
0
    def test_scheduler_verify_max_active_runs_and_dagrun_timeout(self):
        """
        Test if a a dagrun will not be scheduled if max_dag_runs has been reached and dagrun_timeout is not reached
        Test if a a dagrun will be scheduled if max_dag_runs has been reached but dagrun_timeout is also reached
        """
        dag = DAG(
            dag_id='test_scheduler_verify_max_active_runs_and_dagrun_timeout',
            start_date=DEFAULT_DATE)
        dag.max_active_runs = 1
        dag.dagrun_timeout = datetime.timedelta(seconds=60)

        dag_task1 = DummyOperator(
            task_id='dummy',
            dag=dag,
            owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()
        session.close()

        scheduler = SchedulerJob()
        dag.clear()

        dr = scheduler.create_dag_run(dag)
        self.assertIsNotNone(dr)

        # Should not be scheduled as DagRun has not timedout and max_active_runs is reached
        new_dr = scheduler.create_dag_run(dag)
        self.assertIsNone(new_dr)

        # Should be scheduled as dagrun_timeout has passed
        dr.start_date = datetime.datetime.now() - datetime.timedelta(days=1)
        session.merge(dr)
        session.commit()
        new_dr = scheduler.create_dag_run(dag)
        self.assertIsNotNone(new_dr)
Beispiel #32
0
    def test_scheduler_add_new_task(self):
        """
        Test if a task instance will be added if the dag is updated
        """
        dag = DAG(
            dag_id='test_scheduler_add_new_task',
            start_date=DEFAULT_DATE)

        dag_task1 = DummyOperator(
            task_id='dummy',
            dag=dag,
            owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()
        session.close()

        scheduler = SchedulerJob()
        dag.clear()

        dr = scheduler.create_dag_run(dag)
        self.assertIsNotNone(dr)

        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 1)

        dag_task2 = DummyOperator(
            task_id='dummy2',
            dag=dag,
            owner='airflow')

        queue = mock.Mock()
        scheduler._process_task_instances(dag, queue=queue)

        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
Beispiel #33
0
    def trigger_dag(self):
        """
        Triggers execution of DAG interpreted from the report's dag_id

        _trigger_dag iterates through the class registry and looks
        For any model that has dag_id as an attribute and deletes
        all references to the specific dag_id

        :param dag_id: DAG ID
        :param dagbag: dagbag
        :param dagrun: empty dag run to be created
        """
        dag_model = DagModel.get_current(self.dag_id)
        if dag_model is None:
            raise DagNotFound(f"Dag id {self.dag_id} not found in DagModel")

        dagbag = DagBag(
            dag_folder=dag_model.fileloc,
            store_serialized_dags=conf.getboolean("core",
                                                  "store_serialized_dags"),
        )
        dag_run = DagRun()
        self._trigger_dag(dag_id=self.dag_id, dag_bag=dagbag, dag_run=dag_run)
def trigger_dag(
    dag_id,
    run_id=None,
    conf=None,
    execution_date=None,
    replace_microseconds=True,
):
    dag_model = DagModel.get_current(dag_id)
    if dag_model is None:
        raise DagNotFound("Dag id {} not found in DagModel".format(dag_id))
    dagbag = DagBag(dag_folder=dag_model.fileloc)
    dag_run = DagRun()
    triggers = _trigger_dag(
        dag_id=dag_id,
        dag_run=dag_run,
        dag_bag=dagbag,
        run_id=run_id,
        conf=conf,
        execution_date=execution_date,
        replace_microseconds=replace_microseconds,
    )

    return triggers[0] if triggers else None
 def test_response_409(self, session):
     dag_instance = DagModel(dag_id="TEST_DAG_ID")
     session.add(dag_instance)
     session.add_all(self._create_test_dag_run())
     session.commit()
     response = self.client.post(
         "api/v1/dags/TEST_DAG_ID/dagRuns",
         json={
             "dag_run_id": "TEST_DAG_RUN_ID_1",
             "execution_date": self.default_time,
         },
     )
     self.assertEqual(response.status_code, 409, response.data)
     self.assertEqual(
         response.json,
         {
             "detail": "DAGRun with DAG ID: 'TEST_DAG_ID' and "
             "DAGRun ID: 'TEST_DAG_RUN_ID_1' already exists",
             "status": 409,
             "title": "Object already exists",
             "type": "about:blank",
         },
     )
Beispiel #36
0
def trigger_dag(
        dag_id,
        run_id=None,
        conf=None,
        execution_date=None,
        replace_microseconds=True,
):
    dag_model = DagModel.get_current(dag_id)
    if dag_model is None:
        raise DagNotFound("Dag id {} not found in DagModel".format(dag_id))
    dagbag = DagBag(dag_folder=dag_model.fileloc)
    dag_run = DagRun()
    triggers = _trigger_dag(
        dag_id=dag_id,
        dag_run=dag_run,
        dag_bag=dagbag,
        run_id=run_id,
        conf=conf,
        execution_date=execution_date,
        replace_microseconds=replace_microseconds,
    )

    return triggers[0] if triggers else None