def remove_outdated_dags(cwl_id): logging.info(f"""Searching for dags based on cwl_id: {cwl_id}""") dags = {} for location in list_py_file_paths(DAGS_FOLDER, include_examples=False): dag_id = get_rootname(location) if cwl_id not in dag_id: continue dags[dag_id] = { "location": location, "modified": datetime.fromtimestamp(os.path.getmtime(location)) } logging.info(f"""Found dag_id: {dag_id}, modified: {dags[dag_id]["modified"]}""") for dag_id, dag_metadata in sorted(dags.items(), key=lambda i: i[1]["modified"])[:-1]: logging.info(f"""Cleaning dag_id: {dag_id}""") if len(DagRun.find(dag_id=dag_id, state=State.RUNNING)) == 0: try: delete_dag.delete_dag(dag_id) except Exception as ex: logging.error(f"""Failed to delete DAG\n {ex}""") for f in [ dag_metadata["location"], os.path.splitext(dag_metadata["location"])[0]+".cwl" ]: try: logging.info(f"""Deleting DAG file: {f}""") os.remove(f) except Exception as ex: logging.error(f"""Failed to delete file {f}\n {ex}""") else: logging.info("Skipping, DAG has running DagRuns")
def test_delete_dag_successful_delete(self): with create_session() as session: self.assertEqual( session.query(DM).filter(DM.dag_id == self.key).count(), 1) self.assertEqual( session.query(DR).filter(DR.dag_id == self.key).count(), 1) self.assertEqual( session.query(TI).filter(TI.dag_id == self.key).count(), 1) self.assertEqual( session.query(TF).filter(TF.dag_id == self.key).count(), 1) self.assertEqual( session.query(TR).filter(TR.dag_id == self.key).count(), 1) self.assertEqual( session.query(LOG).filter(LOG.dag_id == self.key).count(), 1) delete_dag(dag_id=self.key) with create_session() as session: self.assertEqual( session.query(DM).filter(DM.dag_id == self.key).count(), 0) self.assertEqual( session.query(DR).filter(DR.dag_id == self.key).count(), 0) self.assertEqual( session.query(TI).filter(TI.dag_id == self.key).count(), 0) self.assertEqual( session.query(TF).filter(TF.dag_id == self.key).count(), 0) self.assertEqual( session.query(TR).filter(TR.dag_id == self.key).count(), 0) self.assertEqual( session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)
def test_delete_dag_successful_delete(self): self.assertEqual( self.session.query(DM).filter(DM.dag_id == self.key).count(), 1) self.assertEqual( self.session.query(DS).filter(DS.dag_id == self.key).count(), 1) self.assertEqual( self.session.query(DR).filter(DR.dag_id == self.key).count(), 1) self.assertEqual( self.session.query(TI).filter(TI.dag_id == self.key).count(), 1) self.assertEqual( self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 1) delete_dag(dag_id=self.key) self.assertEqual( self.session.query(DM).filter(DM.dag_id == self.key).count(), 0) self.assertEqual( self.session.query(DS).filter(DS.dag_id == self.key).count(), 0) self.assertEqual( self.session.query(DR).filter(DR.dag_id == self.key).count(), 0) self.assertEqual( self.session.query(TI).filter(TI.dag_id == self.key).count(), 0) self.assertEqual( self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)
def test_delete_dag_successful_delete_not_keeping_records_in_log(self): with create_session() as session: self.assertEqual( session.query(DM).filter(DM.dag_id == self.key).count(), 1) self.assertEqual( session.query(DR).filter(DR.dag_id == self.key).count(), 1) self.assertEqual( session.query(TI).filter(TI.dag_id == self.key).count(), 1) self.assertEqual( session.query(TF).filter(TF.dag_id == self.key).count(), 1) self.assertEqual( session.query(TR).filter(TR.dag_id == self.key).count(), 1) self.assertEqual( session.query(LOG).filter(LOG.dag_id == self.key).count(), 1) delete_dag(dag_id=self.key, keep_records_in_log=False) with create_session() as session: self.assertEqual( session.query(DM).filter(DM.dag_id == self.key).count(), 0) self.assertEqual( session.query(DR).filter(DR.dag_id == self.key).count(), 0) self.assertEqual( session.query(TI).filter(TI.dag_id == self.key).count(), 0) self.assertEqual( session.query(TF).filter(TF.dag_id == self.key).count(), 0) self.assertEqual( session.query(TR).filter(TR.dag_id == self.key).count(), 0) self.assertEqual( session.query(LOG).filter(LOG.dag_id == self.key).count(), 0)
def delete_dag(dag_id): logging.info("Executing custom 'delete_dag' function") # Check dag_id argument if dag_id is None: logging.warning("The dag_id argument wasn't provided") return ApiResponse.bad_request( "The dag_id argument should be provided") if dag_id not in DagBag('dags').dags: return ApiResponse.bad_request("Dag id {} not found".format(dag_id)) try: # Pause Dag ApiUtil.pause_dag(dag_id) # Remove Dag File ApiUtil.remove_dag(dag_id) # Recall Airflow Delete URL from airflow.api.common.experimental import delete_dag delete_dag.delete_dag(dag_id) logging.info("Delete Result Success.") except Exception as e: error_message = "An error occurred while trying to Delete the DAG '" + str( dag_id) + "': " + str(e) logging.error(error_message) return ApiResponse.server_error(error_message) return ApiResponse.success("DAG [{}] has been deleted".format(dag_id))
def test_delete_dag_successful_delete_not_keeping_records_in_log(self): self.assertEqual(self.session.query(DM).filter(DM.dag_id == self.key).count(), 1) self.assertEqual(self.session.query(DR).filter(DR.dag_id == self.key).count(), 1) self.assertEqual(self.session.query(TI).filter(TI.dag_id == self.key).count(), 1) self.assertEqual(self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 1) delete_dag(dag_id=self.key, keep_records_in_log=False) self.assertEqual(self.session.query(DM).filter(DM.dag_id == self.key).count(), 0) self.assertEqual(self.session.query(DR).filter(DR.dag_id == self.key).count(), 0) self.assertEqual(self.session.query(TI).filter(TI.dag_id == self.key).count(), 0) self.assertEqual(self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 0)
def test_delete_dag_successful_delete(self): self.assertEqual(self.session.query(DM).filter(DM.dag_id == self.key).count(), 1) self.assertEqual(self.session.query(DR).filter(DR.dag_id == self.key).count(), 1) self.assertEqual(self.session.query(TI).filter(TI.dag_id == self.key).count(), 1) self.assertEqual(self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 1) delete_dag(dag_id=self.key) self.assertEqual(self.session.query(DM).filter(DM.dag_id == self.key).count(), 0) self.assertEqual(self.session.query(DR).filter(DR.dag_id == self.key).count(), 0) self.assertEqual(self.session.query(TI).filter(TI.dag_id == self.key).count(), 0) self.assertEqual(self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)
def delete_dag(self): """Custom Function for the delete_dag API. Delete dag according to dag id,and delete the dag file """ logging.info("Executing custom 'delete_dag' function") dag_id = self.get_argument(request, 'dag_id') logging.info("dag_id to delete: '" + str(dag_id) + "'") try: dag_full_path = airflow_dags_folder + os.sep + dag_id + ".py" if os.path.exists(dag_full_path): os.remove(dag_full_path) from airflow.api.common.experimental import delete_dag deleted_dags = delete_dag.delete_dag(dag_id, keep_records_in_log=False) if deleted_dags > 0: logging.info("Deleted dag " + dag_id) else: logging.info("No dags deleted") except Exception as e: error_message = "An error occurred while trying to delete the DAG '" + str(dag_id) + "': " + str(e) logging.error(error_message) return ApiResponse.server_error(error_message) return ApiResponse.success({ "message": "DAG [{}] deleted".format(dag_id) })
def remove_outdated_dags(cwl_id, dags_folder): """ Iterates over DAG files from the dags_folder (excluding Airflow examples). Assuming that dag_id written inside Python file is equal to its rootname and follows the naming rule "cwldid-commitsha", we check if there are any files that have target cwl_id in the rootname (aka in the dag_id). For all collected DAGs (based on cwl_id) we save modified timestamp and location, then sort them by timestamp excluding the newest one, thus forming a list of outdated DAGs for the same cwl_id (the same workflow). Then we iterate over the list of outdated DAGs and check whether we can safely remove it (both from DB and disk). The only condition when we don't delete outdated DAG is when there is at list one DagRun for it. """ logging.info( f"Searching for dags based on cwl_id: {cwl_id} in order to remove the old ones" ) dags = {} for location in list_py_file_paths(dags_folder, include_examples=False): dag_id = get_rootname(location) if cwl_id not in dag_id: continue dags[dag_id] = { "location": location, "modified": datetime.fromtimestamp(os.path.getmtime(location)) } logging.info( f"Found dag_id: {dag_id}, modified: {dags[dag_id]['modified']}") for dag_id, dag_metadata in sorted(dags.items(), key=lambda i: i[1]["modified"])[:-1]: logging.info(f"Cleaning dag_id: {dag_id}") if len(DagRun.find(dag_id=dag_id, state=State.RUNNING)) == 0: try: delete_dag.delete_dag(dag_id) except Exception as ex: logging.error(f"Failed to delete DAG\n {ex}") for f in [ dag_metadata["location"], os.path.splitext(dag_metadata["location"])[0] + ".cwl" ]: try: logging.info(f"Deleting DAG file: {f}") os.remove(f) except Exception as ex: logging.error(f"Failed to delete file {f}\n {ex}") else: logging.info("Skipping, DAG has running DagRuns")
def delete_dag(dag_id: str, session: Session): """Delete the specific DAG.""" # TODO: This function is shared with the /delete endpoint used by the web # UI, so we're reusing it to simplify maintenance. Refactor the function to # another place when the experimental/legacy API is removed. from airflow.api.common.experimental import delete_dag try: delete_dag.delete_dag(dag_id, session=session) except DagNotFound: raise NotFound(f"Dag with id: '{dag_id}' not found") except AirflowException: raise AlreadyExists( detail= f"Task instances of dag with id: '{dag_id}' are still running") return NoContent, 204
def test_delete_dag_dag_still_in_dagbag(self): models_to_check = ['DagModel', 'DagStat', 'DagRun', 'TaskInstance'] record_counts = {} for model_name in models_to_check: m = getattr(models, model_name) record_counts[model_name] = self.session.query(m).filter( m.dag_id == self.dag_id).count() with self.assertRaises(DagFileExists): delete_dag(self.dag_id) # No change should happen in DB for model_name in models_to_check: m = getattr(models, model_name) self.assertEqual( self.session.query(m).filter(m.dag_id == self.dag_id).count(), record_counts[model_name])
def delete_dag(dag_id): """Delete all DB records related to the specified Dag.""" try: count = delete.delete_dag(dag_id) except AirflowException as err: log.error(err) response = jsonify(error=f"{err}") response.status_code = err.status_code return response return jsonify(message=f"Removed {count} record(s)", count=count)
def test_delete_dag_dag_still_in_dagbag(self): models_to_check = ['DagModel', 'DagRun', 'TaskInstance'] record_counts = {} for model_name in models_to_check: m = getattr(models, model_name) record_counts[model_name] = self.session.query(m).filter(m.dag_id == self.dag_id).count() with self.assertRaises(DagFileExists): delete_dag(self.dag_id) # No change should happen in DB for model_name in models_to_check: m = getattr(models, model_name) self.assertEqual( self.session.query(m).filter( m.dag_id == self.dag_id ).count(), record_counts[model_name] )
def delete(self, session=None): dag_id = request.args.get('dag_id') DagenDagQueryset().delete_dag(dag_id).done() refresh_dagen_templates() try: delete_dag.delete_dag(dag_id) except DagNotFound: flash("DAG with id {} not found. Cannot delete".format(dag_id), 'error') return self._redirect_home() except DagFileExists: flash( "Dag id {} is still in DagBag. " "Remove the DAG file first.".format(dag_id), 'error') return self._redirect_home() flash("Deleting DAG with id {}. May take a couple minutes to fully" " disappear.".format(dag_id)) # Upon success return to home. return self._redirect_home()
def delete_dag(dag_id): """ Delete all DB records related to the specified Dag. """ try: count = delete.delete_dag(dag_id) except AirflowException as err: _log.error(err) response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response return jsonify(message="Removed {} record(s)".format(count), count=count)
def delete_dag(dag_id): """ Delete all DB records related to the specified Dag. """ try: count = delete.delete_dag(dag_id) except AirflowException as err: _log.error(err) response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response return jsonify(message="Removed {} record(s)".format(count), count=count)
def delete_dag(dag_id): """ Trigger a new dag run for a Dag with an execution date of now unless specified in the data. """ try: dd = delete.delete_dag(dag_id) except AirflowException as err: _log.error(err) response = jsonify(error="{}".format(err)) response.status_code = 404 return response if getattr(g, 'user', None): _log.info("User {} deleted {}".format(g.user, dd)) response = jsonify(message="Deleted {}".format(dd)) response.status_code = 204 return response
def delete_dag(self, dag_id): dr = delete_dag.delete_dag(dag_id) return "Deleted {}".format(dr)
def test_delete_dag_non_existent_dag(self): with self.assertRaises(DagNotFound): delete_dag("non-existent DAG")
def test_delete_dag_non_existent_dag(self): with self.assertRaises(DagNotFound): delete_dag("non-existent DAG")
def delete_dag(self, dag_id): count = delete_dag.delete_dag(dag_id) return "Removed {} record(s)".format(count)
def delete_dag(self, dag_id): count = delete_dag.delete_dag(dag_id) return "Removed {} record(s)".format(count)
def test_delete_subdag_successful_delete(self): self.setup_dag_models(for_sub_dag=True) self.check_dag_models_exists() delete_dag(dag_id=self.key, keep_records_in_log=False) self.check_dag_models_removed(expect_logs=0)
def test_delete_dag_successful_delete_not_keeping_records_in_log(self): self.setup_dag_models() self.check_dag_models_exists() delete_dag(dag_id=self.key, keep_records_in_log=False) self.check_dag_models_removed(expect_logs=0)
def test_delete_dag_successful_delete(self): self.setup_dag_models() self.check_dag_models_exists() delete_dag(dag_id=self.key) self.check_dag_models_removed(expect_logs=1)
def delete_dag(self, dag_id): count = delete_dag.delete_dag(dag_id) return f"Removed {count} record(s)"
def test_delete_dag_non_existent_dag(self): with pytest.raises(DagNotFound): delete_dag("non-existent DAG")