Ejemplo n.º 1
0
def remove_outdated_dags(cwl_id):
    logging.info(f"""Searching for dags based on cwl_id: {cwl_id}""")
    dags = {}
    for location in list_py_file_paths(DAGS_FOLDER, include_examples=False):
        dag_id = get_rootname(location)
        if cwl_id not in dag_id:
            continue
        dags[dag_id] = {
            "location": location,
            "modified": datetime.fromtimestamp(os.path.getmtime(location))
        }
        logging.info(f"""Found dag_id: {dag_id}, modified: {dags[dag_id]["modified"]}""")
    for dag_id, dag_metadata in sorted(dags.items(), key=lambda i: i[1]["modified"])[:-1]:
        logging.info(f"""Cleaning dag_id: {dag_id}""")
        if len(DagRun.find(dag_id=dag_id, state=State.RUNNING)) == 0:
            try:
                delete_dag.delete_dag(dag_id)
            except Exception as ex:
                logging.error(f"""Failed to delete DAG\n {ex}""")
            for f in [
                dag_metadata["location"],
                os.path.splitext(dag_metadata["location"])[0]+".cwl"
            ]:
                try:
                    logging.info(f"""Deleting DAG file: {f}""")
                    os.remove(f)
                except Exception as ex:
                    logging.error(f"""Failed to delete file {f}\n {ex}""")
        else:
            logging.info("Skipping, DAG has running DagRuns")
Ejemplo n.º 2
0
    def test_delete_dag_successful_delete(self):
        with create_session() as session:
            self.assertEqual(
                session.query(DM).filter(DM.dag_id == self.key).count(), 1)
            self.assertEqual(
                session.query(DR).filter(DR.dag_id == self.key).count(), 1)
            self.assertEqual(
                session.query(TI).filter(TI.dag_id == self.key).count(), 1)
            self.assertEqual(
                session.query(TF).filter(TF.dag_id == self.key).count(), 1)
            self.assertEqual(
                session.query(TR).filter(TR.dag_id == self.key).count(), 1)
            self.assertEqual(
                session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)

        delete_dag(dag_id=self.key)

        with create_session() as session:
            self.assertEqual(
                session.query(DM).filter(DM.dag_id == self.key).count(), 0)
            self.assertEqual(
                session.query(DR).filter(DR.dag_id == self.key).count(), 0)
            self.assertEqual(
                session.query(TI).filter(TI.dag_id == self.key).count(), 0)
            self.assertEqual(
                session.query(TF).filter(TF.dag_id == self.key).count(), 0)
            self.assertEqual(
                session.query(TR).filter(TR.dag_id == self.key).count(), 0)
            self.assertEqual(
                session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)
Ejemplo n.º 3
0
    def test_delete_dag_successful_delete(self):

        self.assertEqual(
            self.session.query(DM).filter(DM.dag_id == self.key).count(), 1)
        self.assertEqual(
            self.session.query(DS).filter(DS.dag_id == self.key).count(), 1)
        self.assertEqual(
            self.session.query(DR).filter(DR.dag_id == self.key).count(), 1)
        self.assertEqual(
            self.session.query(TI).filter(TI.dag_id == self.key).count(), 1)
        self.assertEqual(
            self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)

        delete_dag(dag_id=self.key)

        self.assertEqual(
            self.session.query(DM).filter(DM.dag_id == self.key).count(), 0)
        self.assertEqual(
            self.session.query(DS).filter(DS.dag_id == self.key).count(), 0)
        self.assertEqual(
            self.session.query(DR).filter(DR.dag_id == self.key).count(), 0)
        self.assertEqual(
            self.session.query(TI).filter(TI.dag_id == self.key).count(), 0)
        self.assertEqual(
            self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)
Ejemplo n.º 4
0
    def test_delete_dag_successful_delete_not_keeping_records_in_log(self):

        with create_session() as session:
            self.assertEqual(
                session.query(DM).filter(DM.dag_id == self.key).count(), 1)
            self.assertEqual(
                session.query(DR).filter(DR.dag_id == self.key).count(), 1)
            self.assertEqual(
                session.query(TI).filter(TI.dag_id == self.key).count(), 1)
            self.assertEqual(
                session.query(TF).filter(TF.dag_id == self.key).count(), 1)
            self.assertEqual(
                session.query(TR).filter(TR.dag_id == self.key).count(), 1)
            self.assertEqual(
                session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)

        delete_dag(dag_id=self.key, keep_records_in_log=False)

        with create_session() as session:
            self.assertEqual(
                session.query(DM).filter(DM.dag_id == self.key).count(), 0)
            self.assertEqual(
                session.query(DR).filter(DR.dag_id == self.key).count(), 0)
            self.assertEqual(
                session.query(TI).filter(TI.dag_id == self.key).count(), 0)
            self.assertEqual(
                session.query(TF).filter(TF.dag_id == self.key).count(), 0)
            self.assertEqual(
                session.query(TR).filter(TR.dag_id == self.key).count(), 0)
            self.assertEqual(
                session.query(LOG).filter(LOG.dag_id == self.key).count(), 0)
Ejemplo n.º 5
0
def delete_dag(dag_id):
    logging.info("Executing custom 'delete_dag' function")

    # Check dag_id argument
    if dag_id is None:
        logging.warning("The dag_id argument wasn't provided")
        return ApiResponse.bad_request(
            "The dag_id argument should be provided")
    if dag_id not in DagBag('dags').dags:
        return ApiResponse.bad_request("Dag id {} not found".format(dag_id))

    try:
        # Pause Dag
        ApiUtil.pause_dag(dag_id)
        # Remove Dag File
        ApiUtil.remove_dag(dag_id)
        # Recall Airflow Delete URL
        from airflow.api.common.experimental import delete_dag
        delete_dag.delete_dag(dag_id)
        logging.info("Delete Result Success.")
    except Exception as e:
        error_message = "An error occurred while trying to Delete the DAG '" + str(
            dag_id) + "': " + str(e)
        logging.error(error_message)
        return ApiResponse.server_error(error_message)
    return ApiResponse.success("DAG [{}] has been deleted".format(dag_id))
Ejemplo n.º 6
0
    def test_delete_dag_successful_delete_not_keeping_records_in_log(self):

        self.assertEqual(self.session.query(DM).filter(DM.dag_id == self.key).count(), 1)
        self.assertEqual(self.session.query(DR).filter(DR.dag_id == self.key).count(), 1)
        self.assertEqual(self.session.query(TI).filter(TI.dag_id == self.key).count(), 1)
        self.assertEqual(self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)

        delete_dag(dag_id=self.key, keep_records_in_log=False)

        self.assertEqual(self.session.query(DM).filter(DM.dag_id == self.key).count(), 0)
        self.assertEqual(self.session.query(DR).filter(DR.dag_id == self.key).count(), 0)
        self.assertEqual(self.session.query(TI).filter(TI.dag_id == self.key).count(), 0)
        self.assertEqual(self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 0)
Ejemplo n.º 7
0
    def test_delete_dag_successful_delete(self):

        self.assertEqual(self.session.query(DM).filter(DM.dag_id == self.key).count(), 1)
        self.assertEqual(self.session.query(DR).filter(DR.dag_id == self.key).count(), 1)
        self.assertEqual(self.session.query(TI).filter(TI.dag_id == self.key).count(), 1)
        self.assertEqual(self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)

        delete_dag(dag_id=self.key)

        self.assertEqual(self.session.query(DM).filter(DM.dag_id == self.key).count(), 0)
        self.assertEqual(self.session.query(DR).filter(DR.dag_id == self.key).count(), 0)
        self.assertEqual(self.session.query(TI).filter(TI.dag_id == self.key).count(), 0)
        self.assertEqual(self.session.query(LOG).filter(LOG.dag_id == self.key).count(), 1)
    def delete_dag(self):
        """Custom Function for the delete_dag API.
        Delete dag according to dag id,and delete the dag file
        """
        logging.info("Executing custom 'delete_dag' function")

        dag_id = self.get_argument(request, 'dag_id')
        logging.info("dag_id to delete: '" + str(dag_id) + "'")

        try:
            dag_full_path = airflow_dags_folder + os.sep + dag_id + ".py"

            if os.path.exists(dag_full_path):
                os.remove(dag_full_path)

            from airflow.api.common.experimental import delete_dag
            deleted_dags = delete_dag.delete_dag(dag_id, keep_records_in_log=False)
            if deleted_dags > 0:
                logging.info("Deleted dag " + dag_id)
            else:
                logging.info("No dags deleted")
        except Exception as e:
            error_message = "An error occurred while trying to delete the DAG '" + str(dag_id) + "': " + str(e)
            logging.error(error_message)
            return ApiResponse.server_error(error_message)

        return ApiResponse.success({
            "message": "DAG [{}] deleted".format(dag_id)
        })
Ejemplo n.º 9
0
def remove_outdated_dags(cwl_id, dags_folder):
    """
    Iterates over DAG files from the dags_folder (excluding Airflow examples). Assuming
    that dag_id written inside Python file is equal to its rootname and follows the naming
    rule "cwldid-commitsha", we check if there are any files that have target cwl_id in the
    rootname (aka in the dag_id). For all collected DAGs (based on cwl_id) we save modified
    timestamp and location, then sort them by timestamp excluding the newest one, thus
    forming a list of outdated DAGs for the same cwl_id (the same workflow). Then we iterate
    over the list of outdated DAGs and check whether we can safely remove it (both from DB
    and disk). The only condition when we don't delete outdated DAG is when there is at list
    one DagRun for it.
    """

    logging.info(
        f"Searching for dags based on cwl_id: {cwl_id} in order to remove the old ones"
    )
    dags = {}
    for location in list_py_file_paths(dags_folder, include_examples=False):
        dag_id = get_rootname(location)
        if cwl_id not in dag_id:
            continue
        dags[dag_id] = {
            "location": location,
            "modified": datetime.fromtimestamp(os.path.getmtime(location))
        }
        logging.info(
            f"Found dag_id: {dag_id}, modified: {dags[dag_id]['modified']}")
    for dag_id, dag_metadata in sorted(dags.items(),
                                       key=lambda i: i[1]["modified"])[:-1]:
        logging.info(f"Cleaning dag_id: {dag_id}")
        if len(DagRun.find(dag_id=dag_id, state=State.RUNNING)) == 0:
            try:
                delete_dag.delete_dag(dag_id)
            except Exception as ex:
                logging.error(f"Failed to delete DAG\n {ex}")
            for f in [
                    dag_metadata["location"],
                    os.path.splitext(dag_metadata["location"])[0] + ".cwl"
            ]:
                try:
                    logging.info(f"Deleting DAG file: {f}")
                    os.remove(f)
                except Exception as ex:
                    logging.error(f"Failed to delete file {f}\n {ex}")
        else:
            logging.info("Skipping, DAG has running DagRuns")
Ejemplo n.º 10
0
def delete_dag(dag_id: str, session: Session):
    """Delete the specific DAG."""
    # TODO: This function is shared with the /delete endpoint used by the web
    # UI, so we're reusing it to simplify maintenance. Refactor the function to
    # another place when the experimental/legacy API is removed.
    from airflow.api.common.experimental import delete_dag

    try:
        delete_dag.delete_dag(dag_id, session=session)
    except DagNotFound:
        raise NotFound(f"Dag with id: '{dag_id}' not found")
    except AirflowException:
        raise AlreadyExists(
            detail=
            f"Task instances of dag with id: '{dag_id}' are still running")

    return NoContent, 204
Ejemplo n.º 11
0
    def test_delete_dag_dag_still_in_dagbag(self):
        models_to_check = ['DagModel', 'DagStat', 'DagRun', 'TaskInstance']
        record_counts = {}

        for model_name in models_to_check:
            m = getattr(models, model_name)
            record_counts[model_name] = self.session.query(m).filter(
                m.dag_id == self.dag_id).count()

        with self.assertRaises(DagFileExists):
            delete_dag(self.dag_id)

        # No change should happen in DB
        for model_name in models_to_check:
            m = getattr(models, model_name)
            self.assertEqual(
                self.session.query(m).filter(m.dag_id == self.dag_id).count(),
                record_counts[model_name])
Ejemplo n.º 12
0
def delete_dag(dag_id):
    """Delete all DB records related to the specified Dag."""
    try:
        count = delete.delete_dag(dag_id)
    except AirflowException as err:
        log.error(err)
        response = jsonify(error=f"{err}")
        response.status_code = err.status_code
        return response
    return jsonify(message=f"Removed {count} record(s)", count=count)
Ejemplo n.º 13
0
    def test_delete_dag_dag_still_in_dagbag(self):
        models_to_check = ['DagModel', 'DagRun', 'TaskInstance']
        record_counts = {}

        for model_name in models_to_check:
            m = getattr(models, model_name)
            record_counts[model_name] = self.session.query(m).filter(m.dag_id == self.dag_id).count()

        with self.assertRaises(DagFileExists):
            delete_dag(self.dag_id)

        # No change should happen in DB
        for model_name in models_to_check:
            m = getattr(models, model_name)
            self.assertEqual(
                self.session.query(m).filter(
                    m.dag_id == self.dag_id
                ).count(),
                record_counts[model_name]
            )
Ejemplo n.º 14
0
    def delete(self, session=None):
        dag_id = request.args.get('dag_id')
        DagenDagQueryset().delete_dag(dag_id).done()
        refresh_dagen_templates()
        try:
            delete_dag.delete_dag(dag_id)
        except DagNotFound:
            flash("DAG with id {} not found. Cannot delete".format(dag_id),
                  'error')
            return self._redirect_home()
        except DagFileExists:
            flash(
                "Dag id {} is still in DagBag. "
                "Remove the DAG file first.".format(dag_id), 'error')
            return self._redirect_home()

        flash("Deleting DAG with id {}. May take a couple minutes to fully"
              " disappear.".format(dag_id))

        # Upon success return to home.
        return self._redirect_home()
Ejemplo n.º 15
0
def delete_dag(dag_id):
    """
    Delete all DB records related to the specified Dag.
    """
    try:
        count = delete.delete_dag(dag_id)
    except AirflowException as err:
        _log.error(err)
        response = jsonify(error="{}".format(err))
        response.status_code = err.status_code
        return response
    return jsonify(message="Removed {} record(s)".format(count), count=count)
Ejemplo n.º 16
0
def delete_dag(dag_id):
    """
    Delete all DB records related to the specified Dag.
    """
    try:
        count = delete.delete_dag(dag_id)
    except AirflowException as err:
        _log.error(err)
        response = jsonify(error="{}".format(err))
        response.status_code = err.status_code
        return response
    return jsonify(message="Removed {} record(s)".format(count), count=count)
Ejemplo n.º 17
0
def delete_dag(dag_id):
    """
    Trigger a new dag run for a Dag with an execution date of now unless
    specified in the data.
    """
    try:
        dd = delete.delete_dag(dag_id)
    except AirflowException as err:
        _log.error(err)
        response = jsonify(error="{}".format(err))
        response.status_code = 404
        return response

    if getattr(g, 'user', None):
        _log.info("User {} deleted {}".format(g.user, dd))

    response = jsonify(message="Deleted {}".format(dd))
    response.status_code = 204
    return response
Ejemplo n.º 18
0
 def delete_dag(self, dag_id):
     dr = delete_dag.delete_dag(dag_id)
     return "Deleted {}".format(dr)
Ejemplo n.º 19
0
 def test_delete_dag_non_existent_dag(self):
     with self.assertRaises(DagNotFound):
         delete_dag("non-existent DAG")
Ejemplo n.º 20
0
 def test_delete_dag_non_existent_dag(self):
     with self.assertRaises(DagNotFound):
         delete_dag("non-existent DAG")
Ejemplo n.º 21
0
 def delete_dag(self, dag_id):
     count = delete_dag.delete_dag(dag_id)
     return "Removed {} record(s)".format(count)
Ejemplo n.º 22
0
 def delete_dag(self, dag_id):
     count = delete_dag.delete_dag(dag_id)
     return "Removed {} record(s)".format(count)
Ejemplo n.º 23
0
 def test_delete_subdag_successful_delete(self):
     self.setup_dag_models(for_sub_dag=True)
     self.check_dag_models_exists()
     delete_dag(dag_id=self.key, keep_records_in_log=False)
     self.check_dag_models_removed(expect_logs=0)
Ejemplo n.º 24
0
 def test_delete_dag_successful_delete_not_keeping_records_in_log(self):
     self.setup_dag_models()
     self.check_dag_models_exists()
     delete_dag(dag_id=self.key, keep_records_in_log=False)
     self.check_dag_models_removed(expect_logs=0)
Ejemplo n.º 25
0
 def test_delete_dag_successful_delete(self):
     self.setup_dag_models()
     self.check_dag_models_exists()
     delete_dag(dag_id=self.key)
     self.check_dag_models_removed(expect_logs=1)
Ejemplo n.º 26
0
 def delete_dag(self, dag_id):
     count = delete_dag.delete_dag(dag_id)
     return f"Removed {count} record(s)"
Ejemplo n.º 27
0
 def test_delete_dag_non_existent_dag(self):
     with pytest.raises(DagNotFound):
         delete_dag("non-existent DAG")