Ejemplo n.º 1
0
    def test_should_response_200_serialized(self):
        # Create empty app with empty dagbag to check if DAG is read from db
        app_serialized = app.create_app(testing=True)  # type:ignore
        dag_bag = DagBag(os.devnull, include_examples=False, store_serialized_dags=True)
        app_serialized.dag_bag = dag_bag  # type:ignore
        client = app_serialized.test_client()

        SerializedDagModel.write_dag(self.dag)

        expected = {
            'catchup': True,
            'concurrency': 16,
            'dag_id': 'test_dag',
            'dag_run_timeout': None,
            'default_view': 'tree',
            'description': None,
            'doc_md': 'details',
            'fileloc': __file__,
            'is_paused': None,
            'is_subdag': False,
            'orientation': 'LR',
            'owners': [],
            'schedule_interval': {
                '__type': 'TimeDelta',
                'days': 1,
                'microseconds': 0,
                'seconds': 0
            },
            'start_date': '2020-06-15T00:00:00+00:00',
            'tags': None,
            'timezone': "Timezone('UTC')"
        }
        response = client.get(f"/api/v1/dags/{self.dag_id}/details")
        assert response.status_code == 200
        assert response.json == expected
Ejemplo n.º 2
0
    def test_serialized_dag_is_updated_only_if_dag_is_changed(self):
        """Test Serialized DAG is updated if DAG is changed"""

        example_dags = make_example_dags(example_dags_module)
        example_bash_op_dag = example_dags.get("example_bash_operator")
        dag_updated = SDM.write_dag(dag=example_bash_op_dag)
        assert dag_updated is True

        with create_session() as session:
            s_dag = session.query(SDM).get(example_bash_op_dag.dag_id)

            # Test that if DAG is not changed, Serialized DAG is not re-written and last_updated
            # column is not updated
            dag_updated = SDM.write_dag(dag=example_bash_op_dag)
            s_dag_1 = session.query(SDM).get(example_bash_op_dag.dag_id)

            assert s_dag_1.dag_hash == s_dag.dag_hash
            assert s_dag.last_updated == s_dag_1.last_updated
            assert dag_updated is False

            # Update DAG
            example_bash_op_dag.tags += ["new_tag"]
            assert set(example_bash_op_dag.tags) == {"example", "example2", "new_tag"}

            dag_updated = SDM.write_dag(dag=example_bash_op_dag)
            s_dag_2 = session.query(SDM).get(example_bash_op_dag.dag_id)

            assert s_dag.last_updated != s_dag_2.last_updated
            assert s_dag.dag_hash != s_dag_2.dag_hash
            assert s_dag_2.data["dag"]["tags"] == ["example", "example2", "new_tag"]
            assert dag_updated is True
Ejemplo n.º 3
0
 def _setup_session(self):
     session = Session()
     session.query(DagRun).delete()
     session.commit()
     session.close()
     dagbag = DagBag(include_examples=True)
     for dag in dagbag.dags.values():
         dag.sync_to_db()
         SerializedDagModel.write_dag(dag)
Ejemplo n.º 4
0
    def test_should_response_200_serialized(self):
        # Create empty app with empty dagbag to check if DAG is read from db
        app_serialized = app.create_app(testing=True)
        dag_bag = DagBag(os.devnull, include_examples=False, read_dags_from_db=True)
        app_serialized.dag_bag = dag_bag
        client = app_serialized.test_client()

        SerializedDagModel.write_dag(self.dag)

        expected = {
            "catchup": True,
            "concurrency": 16,
            "dag_id": "test_dag",
            "dag_run_timeout": None,
            "default_view": "tree",
            "description": None,
            "doc_md": "details",
            "fileloc": __file__,
            "is_paused": None,
            "is_subdag": False,
            "orientation": "LR",
            "owners": [],
            "schedule_interval": {"__type": "TimeDelta", "days": 1, "microseconds": 0, "seconds": 0,},
            "start_date": "2020-06-15T00:00:00+00:00",
            "tags": None,
            "timezone": "Timezone('UTC')",
        }
        response = client.get(
            f"/api/v1/dags/{self.dag_id}/details", environ_overrides={'REMOTE_USER': "******"}
        )
        assert response.status_code == 200
        assert response.json == expected

        response = self.client.get(
            f"/api/v1/dags/{self.dag_id}/details", environ_overrides={'REMOTE_USER': "******"}
        )
        assert response.status_code == 200
        expected = {
            'catchup': True,
            'concurrency': 16,
            'dag_id': 'test_dag',
            'dag_run_timeout': None,
            'default_view': 'tree',
            'description': None,
            'doc_md': 'details',
            'fileloc': __file__,
            'is_paused': None,
            'is_subdag': False,
            'orientation': 'LR',
            'owners': [],
            'schedule_interval': {'__type': 'TimeDelta', 'days': 1, 'microseconds': 0, 'seconds': 0},
            'start_date': '2020-06-15T00:00:00+00:00',
            'tags': None,
            'timezone': "Timezone('UTC')",
        }
        assert response.json == expected
Ejemplo n.º 5
0
 def setUpClass(cls):
     super(TestDagRunsEndpoint, cls).setUpClass()
     session = Session()
     session.query(DagRun).delete()
     session.commit()
     session.close()
     dagbag = DagBag(include_examples=True)
     for dag in dagbag.dags.values():
         dag.sync_to_db()
         SerializedDagModel.write_dag(dag)
    def init_dag_and_dag_run(
        dag_file: str, dag_id: str, execution_date: datetime.datetime
    ) -> Tuple[SerializedDagModel, DagRun]:
        dags = DagBag(dag_folder=dag_file).dags

        dag = dags[dag_id]
        SerializedDagModel.write_dag(dag)
        serialized_dag = SerializedDagModel.get(dag.dag_id)
        dag_run = dag.create_dagrun(run_type=DagRunType.MANUAL,
                                    execution_date=execution_date,
                                    state=State.RUNNING)
        return serialized_dag, dag_run
Ejemplo n.º 7
0
    def setUpClass(cls):
        super().setUpClass()
        session = Session()
        session.query(DagRun).delete()
        session.query(TaskInstance).delete()
        session.commit()
        session.close()

        dagbag = DagBag(include_examples=False, dag_folder=cls.PAPERMILL_EXAMPLE_DAGS)
        for dag in dagbag.dags.values():
            dag.sync_to_db()
            SerializedDagModel.write_dag(dag)
Ejemplo n.º 8
0
    def test_should_response_200_serialized(self):
        # Create empty app with empty dagbag to check if DAG is read from db
        with conf_vars({
            ("api", "auth_backend"):
                "tests.test_utils.remote_user_api_auth_backend"
        }):
            app_serialized = app.create_app(testing=True)
        dag_bag = DagBag(os.devnull,
                         include_examples=False,
                         read_dags_from_db=True)
        app_serialized.dag_bag = dag_bag
        client = app_serialized.test_client()

        SerializedDagModel.write_dag(self.dag)

        expected = {
            "class_ref": {
                "class_name": "DummyOperator",
                "module_path": "airflow.operators.dummy_operator",
            },
            "depends_on_past": False,
            "downstream_task_ids": [],
            "end_date": None,
            "execution_timeout": None,
            "extra_links": [],
            "owner": "airflow",
            "pool": "default_pool",
            "pool_slots": 1.0,
            "priority_weight": 1.0,
            "queue": "default",
            "retries": 0.0,
            "retry_delay": {
                "__type": "TimeDelta",
                "days": 0,
                "seconds": 300,
                "microseconds": 0
            },
            "retry_exponential_backoff": False,
            "start_date": "2020-06-15T00:00:00+00:00",
            "task_id": "op1",
            "template_fields": [],
            "trigger_rule": "all_success",
            "ui_color": "#e8f7e4",
            "ui_fgcolor": "#000",
            "wait_for_downstream": False,
            "weight_rule": "downstream",
        }
        response = client.get(
            f"/api/v1/dags/{self.dag_id}/tasks/{self.task_id}",
            environ_overrides={'REMOTE_USER': "******"})
        assert response.status_code == 200
        assert response.json == expected
Ejemplo n.º 9
0
    def _populate_db(self):
        session = Session()
        session.query(DagRun).delete()
        session.query(TaskInstance).delete()
        session.commit()
        session.close()

        dagbag = DagBag(
            include_examples=True,
            dag_folder=self.PAPERMILL_EXAMPLE_DAGS,
        )
        for dag in dagbag.dags.values():
            dag.sync_to_db()
            SerializedDagModel.write_dag(dag)
Ejemplo n.º 10
0
    def test_should_respond_200_serialized(self):

        # Get the dag out of the dagbag before we patch it to an empty one
        SerializedDagModel.write_dag(self.app.dag_bag.get_dag(self.dag_id))

        dag_bag = DagBag(os.devnull,
                         include_examples=False,
                         read_dags_from_db=True)
        patcher = unittest.mock.patch.object(self.app, 'dag_bag', dag_bag)
        patcher.start()

        expected = {
            "class_ref": {
                "class_name": "DummyOperator",
                "module_path": "airflow.operators.dummy",
            },
            "depends_on_past": False,
            "downstream_task_ids": [self.task_id2],
            "end_date": None,
            "execution_timeout": None,
            "extra_links": [],
            "owner": "airflow",
            "pool": "default_pool",
            "pool_slots": 1.0,
            "priority_weight": 1.0,
            "queue": "default",
            "retries": 0.0,
            "retry_delay": {
                "__type": "TimeDelta",
                "days": 0,
                "seconds": 300,
                "microseconds": 0
            },
            "retry_exponential_backoff": False,
            "start_date": "2020-06-15T00:00:00+00:00",
            "task_id": "op1",
            "template_fields": [],
            "trigger_rule": "all_success",
            "ui_color": "#e8f7e4",
            "ui_fgcolor": "#000",
            "wait_for_downstream": False,
            "weight_rule": "downstream",
        }
        response = self.client.get(
            f"/api/v1/dags/{self.dag_id}/tasks/{self.task_id}",
            environ_overrides={'REMOTE_USER': "******"})
        assert response.status_code == 200
        assert response.json == expected
        patcher.stop()
Ejemplo n.º 11
0
        def _serialize_dag_capturing_errors(dag, session):
            """
            Try to serialize the dag to the DB, but make a note of any errors.

            We can't place them directly in import_errors, as this may be retried, and work the next time
            """
            if dag.is_subdag:
                return []
            try:
                # We can't use bulk_write_to_db as we want to capture each error individually
                dag_was_updated = SerializedDagModel.write_dag(
                    dag,
                    min_update_interval=settings.
                    MIN_SERIALIZED_DAG_UPDATE_INTERVAL,
                    session=session,
                )
                if dag_was_updated:
                    self._sync_perm_for_dag(dag, session=session)
                return []
            except OperationalError:
                raise
            except Exception:
                self.log.exception("Failed to write serialized DAG: %s",
                                   dag.full_filepath)
                return [(dag.fileloc,
                         traceback.format_exc(
                             limit=-self.dagbag_import_error_traceback_depth))]
Ejemplo n.º 12
0
        def _serialize_dag_capturing_errors(dag, session):
            """
            Try to serialize the dag to the DB, but make a note of any errors.

            We can't place them directly in import_errors, as this may be retried, and work the next time
            """
            if dag.is_subdag:
                return []
            try:
                # We cant use bulk_write_to_db as we want to capture each error individually
                dag_was_updated = SerializedDagModel.write_dag(
                    dag,
                    min_update_interval=settings.
                    MIN_SERIALIZED_DAG_UPDATE_INTERVAL,
                    session=session,
                )
                if dag_was_updated:
                    self.log.debug("Syncing DAG permissions: %s to the DB",
                                   dag.dag_id)
                    from airflow.www.security import ApplessAirflowSecurityManager

                    security_manager = ApplessAirflowSecurityManager(
                        session=session)
                    security_manager.sync_perm_for_dag(dag.dag_id,
                                                       dag.access_control)
                return []
            except OperationalError:
                raise
            except Exception:  # pylint: disable=broad-except
                return [(dag.fileloc,
                         traceback.format_exc(
                             limit=-self.dagbag_import_error_traceback_depth))]
    def test_event_op_dag_read_write(self):
        class TestHandler(EventMetHandler):
            def met(self, ti: TaskInstance, ts: TaskState) -> TaskAction:
                return TaskAction.START

        now = timezone.utcnow()
        dag_id = 'test_add_taskstate_0'
        dag = DAG(dag_id=dag_id, start_date=now)
        task0 = DummyOperator(task_id='backfill_task_0', owner='test', dag=dag)
        task0.add_event_dependency('key', "EVENT")
        task0.set_event_met_handler(TestHandler())
        SDM.write_dag(dag)
        with db.create_session() as session:
            sdag = session.query(SDM).first()
            dag = SerializedDAG.from_dict(sdag.data)
        self.assertEqual(dag_id, dag.dag_id)
        self.assertEqual(
            1, len(dag.task_dict["backfill_task_0"].event_dependencies()))
Ejemplo n.º 14
0
    def test_get_dag_with_dag_serialization(self):
        """
        Test that Serialized DAG is updated in DagBag when it is updated in
        Serialized DAG table after 'min_serialized_dag_fetch_interval' seconds are passed.
        """

        with freeze_time(tz.datetime(2020, 1, 5, 0, 0, 0)):
            example_bash_op_dag = DagBag(
                include_examples=True).dags.get("example_bash_operator")
            SerializedDagModel.write_dag(dag=example_bash_op_dag)

            dag_bag = DagBag(read_dags_from_db=True)
            ser_dag_1 = dag_bag.get_dag("example_bash_operator")
            ser_dag_1_update_time = dag_bag.dags_last_fetched[
                "example_bash_operator"]
            self.assertEqual(example_bash_op_dag.tags, ser_dag_1.tags)
            self.assertEqual(ser_dag_1_update_time,
                             tz.datetime(2020, 1, 5, 0, 0, 0))

        # Check that if min_serialized_dag_fetch_interval has not passed we do not fetch the DAG
        # from DB
        with freeze_time(tz.datetime(2020, 1, 5, 0, 0, 4)):
            with assert_queries_count(0):
                self.assertEqual(
                    dag_bag.get_dag("example_bash_operator").tags,
                    ["example", "example2"])

        # Make a change in the DAG and write Serialized DAG to the DB
        with freeze_time(tz.datetime(2020, 1, 5, 0, 0, 6)):
            example_bash_op_dag.tags += ["new_tag"]
            SerializedDagModel.write_dag(dag=example_bash_op_dag)

        # Since min_serialized_dag_fetch_interval is passed verify that calling 'dag_bag.get_dag'
        # fetches the Serialized DAG from DB
        with freeze_time(tz.datetime(2020, 1, 5, 0, 0, 8)):
            with assert_queries_count(2):
                updated_ser_dag_1 = dag_bag.get_dag("example_bash_operator")
                updated_ser_dag_1_update_time = dag_bag.dags_last_fetched[
                    "example_bash_operator"]

        self.assertCountEqual(updated_ser_dag_1.tags,
                              ["example", "example2", "new_tag"])
        self.assertGreater(updated_ser_dag_1_update_time,
                           ser_dag_1_update_time)
Ejemplo n.º 15
0
    def test_collect_dags_from_db(self):
        """DAGs are collected from Database"""
        example_dags_folder = airflow.example_dags.__path__[0]
        dagbag = DagBag(example_dags_folder)

        example_dags = dagbag.dags
        for dag in example_dags.values():
            SerializedDagModel.write_dag(dag)

        new_dagbag = DagBag(read_dags_from_db=True)
        self.assertEqual(len(new_dagbag.dags), 0)
        new_dagbag.collect_dags_from_db()
        new_dags = new_dagbag.dags
        self.assertEqual(len(example_dags), len(new_dags))
        for dag_id, dag in example_dags.items():
            serialized_dag = new_dags[dag_id]

            self.assertEqual(serialized_dag.dag_id, dag.dag_id)
            self.assertEqual(set(serialized_dag.task_dict), set(dag.task_dict))
Ejemplo n.º 16
0
    def test_serialized_dag_is_updated_only_if_dag_is_changed(self):
        """Test Serialized DAG is updated if DAG is changed"""

        example_dags = make_example_dags(example_dags_module)
        example_bash_op_dag = example_dags.get("example_bash_operator")
        SDM.write_dag(dag=example_bash_op_dag)

        with create_session() as session:
            s_dag = session.query(SDM).get(example_bash_op_dag.dag_id)

            # Test that if DAG is not changed, Serialized DAG is not re-written and last_updated
            # column is not updated
            SDM.write_dag(dag=example_bash_op_dag)
            s_dag_1 = session.query(SDM).get(example_bash_op_dag.dag_id)

            self.assertEqual(s_dag_1.dag_hash, s_dag.dag_hash)
            self.assertEqual(s_dag.last_updated, s_dag_1.last_updated)

            # Update DAG
            example_bash_op_dag.tags += ["new_tag"]
            self.assertCountEqual(example_bash_op_dag.tags, ["example", "new_tag"])

            SDM.write_dag(dag=example_bash_op_dag)
            s_dag_2 = session.query(SDM).get(example_bash_op_dag.dag_id)

            self.assertNotEqual(s_dag.last_updated, s_dag_2.last_updated)
            self.assertNotEqual(s_dag.dag_hash, s_dag_2.dag_hash)
            self.assertEqual(s_dag_2.data["dag"]["tags"], ["example", "new_tag"])
Ejemplo n.º 17
0
    def test_serialized_dag_is_updated_only_if_dag_is_changed(self):
        """Test Serialized DAG is updated if DAG is changed"""

        example_dags = make_example_dags(example_dags_module)
        example_bash_op_dag = example_dags.get("example_bash_operator")
        SDM.write_dag(dag=example_bash_op_dag)

        with create_session() as session:
            last_updated = session.query(SDM.last_updated).filter(
                SDM.dag_id == example_bash_op_dag.dag_id).one_or_none()

            # Test that if DAG is not changed, Serialized DAG is not re-written and last_updated
            # column is not updated
            SDM.write_dag(dag=example_bash_op_dag)
            last_updated_1 = session.query(SDM.last_updated).filter(
                SDM.dag_id == example_bash_op_dag.dag_id).one_or_none()

            self.assertEqual(last_updated, last_updated_1)

            # Update DAG
            example_bash_op_dag.tags += ["new_tag"]
            self.assertCountEqual(example_bash_op_dag.tags,
                                  ["example", "new_tag"])

            SDM.write_dag(dag=example_bash_op_dag)
            new_s_dag = session.query(SDM.last_updated, SDM.data).filter(
                SDM.dag_id == example_bash_op_dag.dag_id).one_or_none()

            self.assertNotEqual(last_updated, new_s_dag.last_updated)
            self.assertEqual(new_s_dag.data["dag"]["tags"],
                             ["example", "new_tag"])
Ejemplo n.º 18
0
 def setUp(self):
     app = application.create_app(testing=True)
     app.config['WTF_CSRF_METHODS'] = []
     self.app = app.test_client()
     self.default_date = datetime(2020, 3, 1)
     self.dag = DAG(
         "testdag",
         start_date=self.default_date,
         user_defined_filters={"hello": lambda name: 'Hello ' + name},
         user_defined_macros={
             "fullname": lambda fname, lname: fname + " " + lname
         })
     self.task1 = BashOperator(task_id='task1',
                               bash_command='{{ task_instance_key_str }}',
                               dag=self.dag)
     self.task2 = BashOperator(
         task_id='task2',
         bash_command='echo {{ fullname("Apache", "Airflow") | hello }}',
         dag=self.dag)
     SerializedDagModel.write_dag(self.dag)
     with create_session() as session:
         session.query(RTIF).delete()
Ejemplo n.º 19
0
    def test_should_respond_200_serialized(self):
        # Get the dag out of the dagbag before we patch it to an empty one
        SerializedDagModel.write_dag(self.app.dag_bag.get_dag(self.dag_id))

        # Create empty app with empty dagbag to check if DAG is read from db
        dag_bag = DagBag(os.devnull,
                         include_examples=False,
                         read_dags_from_db=True)
        patcher = unittest.mock.patch.object(self.app, 'dag_bag', dag_bag)
        patcher.start()

        expected = {
            "catchup": True,
            "concurrency": 16,
            "dag_id": "test_dag",
            "dag_run_timeout": None,
            "default_view": "tree",
            "description": None,
            "doc_md": "details",
            "fileloc": __file__,
            "file_token": FILE_TOKEN,
            "is_paused": None,
            "is_subdag": False,
            "orientation": "LR",
            "owners": ['airflow'],
            "params": {
                "foo": 1
            },
            "schedule_interval": {
                "__type": "TimeDelta",
                "days": 1,
                "microseconds": 0,
                "seconds": 0,
            },
            "start_date": "2020-06-15T00:00:00+00:00",
            "tags": [{
                'name': 'example'
            }],
            "timezone": "Timezone('UTC')",
        }
        response = self.client.get(f"/api/v1/dags/{self.dag_id}/details",
                                   environ_overrides={'REMOTE_USER': "******"})

        assert response.status_code == 200
        assert response.json == expected

        patcher.stop()

        response = self.client.get(f"/api/v1/dags/{self.dag_id}/details",
                                   environ_overrides={'REMOTE_USER': "******"})
        assert response.status_code == 200
        expected = {
            'catchup': True,
            'concurrency': 16,
            'dag_id': 'test_dag',
            'dag_run_timeout': None,
            'default_view': 'tree',
            'description': None,
            'doc_md': 'details',
            'fileloc': __file__,
            "file_token": FILE_TOKEN,
            'is_paused': None,
            'is_subdag': False,
            'orientation': 'LR',
            'owners': ['airflow'],
            "params": {
                "foo": 1
            },
            'schedule_interval': {
                '__type': 'TimeDelta',
                'days': 1,
                'microseconds': 0,
                'seconds': 0
            },
            'start_date': '2020-06-15T00:00:00+00:00',
            'tags': [{
                'name': 'example'
            }],
            'timezone': "Timezone('UTC')",
        }
        assert response.json == expected
Ejemplo n.º 20
0
 def _write_example_dags(self):
     example_dags = make_example_dags(example_dags_module)
     for dag in example_dags.values():
         SDM.write_dag(dag)
     return example_dags
Ejemplo n.º 21
0
 def setUp(self):
     super(TestApiExperimental, self).setUp()
     dagbag = DagBag(include_examples=True)
     for dag in dagbag.dags.values():
         dag.sync_to_db()
         SerializedDagModel.write_dag(dag)