Beispiel #1
0
    def get_dag_dependencies(cls,
                             session: Session = None
                             ) -> Dict[str, List['DagDependency']]:
        """
        Get the dependencies between DAGs

        :param session: ORM Session
        :type session: Session
        """
        dependencies = {}

        if session.bind.dialect.name in ["sqlite", "mysql"]:
            for row in session.query(
                    cls.dag_id,
                    func.json_extract(cls.data,
                                      "$.dag.dag_dependencies")).all():
                dependencies[row[0]] = [
                    DagDependency(**d) for d in json.loads(row[1])
                ]
        elif session.bind.dialect.name == "mssql":
            for row in session.query(
                    cls.dag_id,
                    func.json_query(cls.data, "$.dag.dag_dependencies")).all():
                dependencies[row[0]] = [
                    DagDependency(**d) for d in json.loads(row[1])
                ]
        else:
            for row in session.query(
                    cls.dag_id,
                    func.json_extract_path(cls.data, "dag",
                                           "dag_dependencies")).all():
                dependencies[row[0]] = [DagDependency(**d) for d in row[1]]

        return dependencies
Beispiel #2
0
    def get_dag_dependencies(cls,
                             session: Session = None
                             ) -> Dict[str, List['DagDependency']]:
        """
        Get the dependencies between DAGs

        :param session: ORM Session
        :type session: Session
        """
        if session.bind.dialect.name in ["sqlite", "mysql"]:
            query = session.query(
                cls.dag_id,
                func.json_extract(cls.data, "$.dag.dag_dependencies"))
            iterator = ((dag_id, json.loads(deps_data) if deps_data else [])
                        for dag_id, deps_data in query)
        elif session.bind.dialect.name == "mssql":
            query = session.query(
                cls.dag_id, func.json_query(cls.data,
                                            "$.dag.dag_dependencies"))
            iterator = ((dag_id, json.loads(deps_data) if deps_data else [])
                        for dag_id, deps_data in query)
        else:
            iterator = session.query(
                cls.dag_id,
                func.json_extract_path(cls.data, "dag", "dag_dependencies"))
        return {
            dag_id: [DagDependency(**d) for d in (deps_data or [])]
            for dag_id, deps_data in iterator
        }
Beispiel #3
0
    def execute(self, context: "Context") -> Optional[List[Any]]:
        with PsrpHook(
                self.conn_id,
                logging_level=self.logging_level,
                runspace_options=self.runspace_options,
                wsman_options=self.wsman_options,
                on_output_callback=self.log.info
                if not self.do_xcom_push else None,
        ) as hook, hook.invoke() as ps:
            if self.psrp_session_init is not None:
                ps.add_command(self.psrp_session_init)
            if self.command:
                ps.add_script(f"cmd.exe /c @'\n{self.command}\n'@")
            else:
                if self.cmdlet:
                    ps.add_cmdlet(self.cmdlet)
                else:
                    ps.add_script(self.powershell)
                if self.parameters:
                    ps.add_parameters(self.parameters)
                if self.do_xcom_push:
                    ps.add_cmdlet("ConvertTo-Json")

        if ps.had_errors:
            raise AirflowException("Process failed")

        rc = ps.runspace_pool.host.rc
        if rc:
            raise AirflowException(
                f"Process exited with non-zero status code: {rc}")

        if not self.do_xcom_push:
            return None

        return [json.loads(output) for output in ps.output]
Beispiel #4
0
    def data(self):
        # use __data_cache to avoid decompress and loads
        if not hasattr(self, "__data_cache") or self.__data_cache is None:
            if self._data_compressed:
                self.__data_cache = json.loads(zlib.decompress(self._data_compressed))
            else:
                self.__data_cache = self._data

        return self.__data_cache
Beispiel #5
0
def load_dag_schema_dict() -> dict:
    """Load & return Json Schema for DAG as Python dict"""
    schema_file_name = 'schema.json'
    schema_file = pkgutil.get_data(__name__, schema_file_name)

    if schema_file is None:
        raise AirflowException("Schema file {} does not exists".format(schema_file_name))

    schema = json.loads(schema_file.decode())
    return schema
Beispiel #6
0
    def validate_schema(cls, serialized_obj: Union[str, dict]) -> None:
        """Validate serialized_obj satisfies JSON schema."""
        if cls._json_schema is None:
            raise AirflowException('JSON schema of {:s} is not set.'.format(cls.__name__))

        if isinstance(serialized_obj, dict):
            cls._json_schema.validate(serialized_obj)
        elif isinstance(serialized_obj, str):
            cls._json_schema.validate(json.loads(serialized_obj))
        else:
            raise TypeError("Invalid type: Only dict and str are supported.")
Beispiel #7
0
def load_dag_schema() -> Validator:
    """
    Load Json Schema for DAG
    """
    schema_file_name = 'schema.json'
    schema_file = pkgutil.get_data(__name__, schema_file_name)

    if schema_file is None:
        raise AirflowException("Schema file {} does not exists".format(schema_file_name))

    schema = json.loads(schema_file.decode())
    jsonschema.Draft7Validator.check_schema(schema)
    return jsonschema.Draft7Validator(schema)
Beispiel #8
0
 def from_json(
     cls, serialized_obj: str
 ) -> Union['BaseSerialization', dict, list, set, tuple]:
     """Deserializes json_str and reconstructs all DAGs and operators it contains."""
     return cls.from_dict(json.loads(serialized_obj))
from airflow import DAG
from datetime import datetime, timedelta

from airflow.models import Variable
from airflow.settings import json
from airflow_dbt.operators.dbt_operator import (
    DbtRunOperator,
    DbtTestOperator
)
import os

from common.operators.covid19_to_ingestions import Covid19ToIngestions

default_args = json.loads(Variable.get('covid19'))
default_args.update({"retry_delay":  timedelta(minutes=default_args["retry_delay"])})


dbt_dir = os.environ["DBT_DIR"]
dbt_profiles_dir = os.environ["DBT_PROFILES_DIR"]

with DAG( 'covid19_dbt',
    default_args=default_args,
    description='Managing dbt data pipeline',
    schedule_interval='@daily') as dag:

    ingest_covid19_day_task = Covid19ToIngestions(task_id='ingest_covid19_day_to_dbt', dag=dag)

    dbt_run = DbtRunOperator(
        task_id='dbt_run',
        dir = dbt_dir,
        profiles_dir=dbt_profiles_dir,