def register_deployed_dags_command(url: str = None, user: str = None, pwd: str = None, verbose: int = 0): # Full imports import logging as log import time # Partial imports from sqlalchemy.exc import DBAPIError, IntegrityError # Internal modules imports from cornflow_client.airflow.api import Airflow from ..models import DeployedDAG from cornflow_core.shared import db af_client = Airflow(url, user, pwd) max_attempts = 20 attempts = 0 while not af_client.is_alive() and attempts < max_attempts: attempts += 1 if verbose == 1: print(f"Airflow is not reachable (attempt {attempts})") time.sleep(15) if not af_client.is_alive(): if verbose == 1: print("Airflow is not reachable") return False dags_registered = [dag.id for dag in DeployedDAG.get_all_objects()] response = af_client.get_model_dags() dag_list = response.json()["dags"] processed_dags = [ DeployedDAG({ "id": dag["dag_id"], "description": dag["description"] }) for dag in dag_list if dag["dag_id"] not in dags_registered ] if len(processed_dags) > 0: db.session.bulk_save_objects(processed_dags) try: db.session.commit() except IntegrityError as e: db.session.rollback() log.error(f"Integrity error on deployed dags register: {e}") except DBAPIError as e: db.session.rollback() log.error(f"Unknown error on deployed dags register: {e}") if verbose == 1: if len(processed_dags) > 0: print(f"DAGs registered: {processed_dags}") else: print("No new DAGs") return True
def update_schemas_command(url, user, pwd, verbose): import time from cornflow_client.airflow.api import Airflow af_client = Airflow(url, user, pwd) max_attempts = 20 attempts = 0 while not af_client.is_alive() and attempts < max_attempts: attempts += 1 if verbose == 1: print(f"Airflow is not reachable (attempt {attempts})") time.sleep(15) if not af_client.is_alive(): if verbose == 1: print("Airflow is not reachable") return False response = af_client.update_schemas() if response.status_code == 200: if verbose == 1: print("DAGs schemas updated") else: if verbose == 1: print("The DAGs schemas were not updated properly") return True
def run_update_all_schemas_until_finished(self): client = Airflow(url="http://localhost:8080", user="******", pwd="admin") response = client.consume_dag_run(dag_name="update_all_schemas", payload={}) self.assertEqual(response.status_code, 200) data = response.json() finished = False while not finished: time.sleep(2) status = client.get_dag_run_status("update_all_schemas", data["dag_run_id"]) state = status.json()["state"] finished = state != "running" print("STATUS OF update_all_schemas: {}".format(state)) return client
def get(self, dag_name): """ API method to get the input, output and config schemas for a given dag :return: A dictionary with a message and a integer with the HTTP status code :rtype: Tuple(dict, integer) """ user = Auth().get_user_from_header(request.headers) permission = PermissionsDAG.check_if_has_permissions(user_id=user.id, dag_id=dag_name) if permission: af_client = Airflow.from_config(current_app.config) if not af_client.is_alive(): log.error( "Airflow not accessible when getting schema {}".format( dag_name)) raise AirflowError(error="Airflow is not accessible") # try airflow and see if dag_name exists af_client.get_dag_info(dag_name) log.info("User gets schema {}".format(dag_name)) # it exists: we try to get its schemas return af_client.get_schemas_for_dag_name(dag_name) else: raise NoPermission( error="User does not have permission to access this dag", status_code=403, )
def test_connect_from_config(self): client = Airflow.from_config({ "AIRFLOW_URL": "http://127.0.0.1:8080", "AIRFLOW_USER": "******", "AIRFLOW_PWD": "admin", }) self.assertTrue(client.is_alive())
def post(self, idx): execution = ExecutionModel.get_one_object(user=self.get_user(), idx=idx) if execution is None: raise ObjectDoesNotExist() af_client = Airflow.from_config(current_app.config) if not af_client.is_alive(): raise AirflowError(error="Airflow is not accessible") response = af_client.set_dag_run_to_fail( dag_name=execution.schema, dag_run_id=execution.dag_run_id) execution.update_state(EXEC_STATE_STOPPED) log.info(f"User {self.get_user()} stopped execution {idx}") return {"message": "The execution has been stopped"}, 200
def get(self, idx): """ API method to get the status of the execution created by the user It requires authentication to be passed in the form of a token that has to be linked to an existing session (login) made by a user. :param str idx: ID of the execution :return: A dictionary with a message (error if the execution does not exist or status of the execution) and an integer with the HTTP status code. :rtype: Tuple(dict, integer) """ execution = self.data_model.get_one_object(user=self.get_user(), idx=idx) if execution is None: raise ObjectDoesNotExist() if execution.state not in [EXEC_STATE_RUNNING, EXEC_STATE_UNKNOWN]: # we only care on asking airflow if the status is unknown or is running. return execution, 200 def _raise_af_error(execution, error, state=EXEC_STATE_UNKNOWN): message = EXECUTION_STATE_MESSAGE_DICT[state] execution.update_state(state) raise AirflowError(error=error, payload=dict(message=message, state=state)) dag_run_id = execution.dag_run_id if not dag_run_id: # it's safe to say we will never get anything if we did not store the dag_run_id _raise_af_error( execution, state=EXEC_STATE_ERROR, error="The execution has no dag_run associated", ) af_client = Airflow.from_config(current_app.config) if not af_client.is_alive(): _raise_af_error(execution, "Airflow is not accessible") try: # TODO: get the dag_name from somewhere! response = af_client.get_dag_run_status(dag_name=execution.schema, dag_run_id=dag_run_id) except AirflowError as err: _raise_af_error(execution, f"Airflow responded with an error: {err}") data = response.json() state = AIRFLOW_TO_STATE_MAP.get(data["state"], EXEC_STATE_UNKNOWN) execution.update_state(state) return execution, 200
def get(self): af_client = Airflow.from_config(current_app.config) airflow_status = STATUS_HEALTHY cornflow_status = STATUS_HEALTHY if not af_client.is_alive(): airflow_status = STATUS_UNHEALTHY try: db.engine.execute("SELECT 1") except Exception: cornflow_status = STATUS_UNHEALTHY return { "cornflow_status": cornflow_status, "airflow_status": airflow_status }
def test_bad_connection(self): client = Airflow(url="http://127.0.0.1:8088", user="******", pwd="admin!") self.assertFalse(client.is_alive())
def setUp(self): self.client = Airflow(url="http://127.0.0.1:8080", user="******", pwd="admin")
class TestAirflowClient(TestCase): def setUp(self): self.client = Airflow(url="http://127.0.0.1:8080", user="******", pwd="admin") def test_alive(self): self.assertTrue(self.client.is_alive()) def test_connect_from_config(self): client = Airflow.from_config({ "AIRFLOW_URL": "http://127.0.0.1:8080", "AIRFLOW_USER": "******", "AIRFLOW_PWD": "admin", }) self.assertTrue(client.is_alive()) def test_bad_connection(self): client = Airflow(url="http://127.0.0.1:8088", user="******", pwd="admin!") self.assertFalse(client.is_alive()) def test_update_schemas(self): response = self.client.update_schemas() self.assertEqual(200, response.status_code) def test_update_dag_registry(self): response = self.client.update_dag_registry() self.assertEqual(200, response.status_code) def test_run_dag(self): data = _load_file(PULP_EXAMPLE) cf_client = CornFlow(url="http://127.0.0.1:5050/") cf_login = cf_client.login("user", "UserPassword1!") instance = cf_client.create_instance(data, "test_example", "test_description") execution = cf_client.create_execution( instance_id=instance["id"], config={ "solver": "PULP_CBC_CMD", "timeLimit": 100 }, name="test_execution", description="execution_description", schema="solve_model_dag", run=False, ) # Check that execution is not run status = cf_client.get_status(execution_id=execution["id"]) self.assertEqual(-4, status["state"]) # Run the execution response = self.client.run_dag(execution_id=execution["id"]) self.assertEqual(200, response.status_code) self.assertIn("dag_run_id", response.json().keys()) # Check that is optimal time.sleep(10) status = cf_client.get_status(execution_id=execution["id"]) self.assertEqual(1, status["state"])
def post(self, **kwargs): """ API method to create a new execution linked to an already existing instance It requires authentication to be passed in the form of a token that has to be linked to an existing session (login) made by a user :return: A dictionary with a message (error if authentication failed, error if data is not validated or the reference_id for the newly created execution if successful) and a integer wit the HTTP status code :rtype: Tuple(dict, integer) """ # TODO: should validation should be done even if the execution is not going to be run? # TODO: should the schema field be cross valdiated with the instance schema field? config = current_app.config if "schema" not in kwargs: kwargs["schema"] = "solve_model_dag" # TODO: review the order of these two operations # Get dag config schema and validate it marshmallow_obj = get_schema(config, kwargs["schema"], "config") validate_and_continue(marshmallow_obj(), kwargs["config"]) execution, status_code = self.post_list(data=kwargs) instance = InstanceModel.get_one_object(user=self.get_user(), idx=execution.instance_id) if instance is None: raise ObjectDoesNotExist( error="The instance to solve does not exist") # this allows testing without airflow interaction: if request.args.get("run", "1") == "0": execution.update_state(EXEC_STATE_NOT_RUN) return execution, 201 # We now try to launch the task in airflow af_client = Airflow.from_config(config) if not af_client.is_alive(): err = "Airflow is not accessible" log.error(err) execution.update_state(EXEC_STATE_ERROR_START) raise AirflowError( error=err, payload=dict( message=EXECUTION_STATE_MESSAGE_DICT[ EXEC_STATE_ERROR_START], state=EXEC_STATE_ERROR_START, ), ) # ask airflow if dag_name exists schema = execution.schema schema_info = af_client.get_dag_info(schema) # Validate that instance and dag_name are compatible marshmallow_obj = get_schema(config, schema, INSTANCE_SCHEMA) validate_and_continue(marshmallow_obj(), instance.data) info = schema_info.json() if info["is_paused"]: err = "The dag exists but it is paused in airflow" log.error(err) execution.update_state(EXEC_STATE_ERROR_START) raise AirflowError( error=err, payload=dict( message=EXECUTION_STATE_MESSAGE_DICT[ EXEC_STATE_ERROR_START], state=EXEC_STATE_ERROR_START, ), ) try: response = af_client.run_dag(execution.id, dag_name=schema) except AirflowError as err: error = "Airflow responded with an error: {}".format(err) log.error(error) execution.update_state(EXEC_STATE_ERROR) raise AirflowError( error=error, payload=dict( message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR], state=EXEC_STATE_ERROR, ), ) # if we succeed, we register the dag_run_id in the execution table: af_data = response.json() execution.dag_run_id = af_data["dag_run_id"] execution.update_state(EXEC_STATE_RUNNING) log.info("User {} creates execution {}".format(self.get_user_id(), execution.id)) return execution, 201