Beispiel #1
0
    def test_check_operators(self):

        conn_id = "sqlite_default"

        captainHook = BaseHook.get_hook(conn_id=conn_id)
        captainHook.run("CREATE TABLE operator_test_table (a, b)")
        captainHook.run("insert into operator_test_table values (1,2)")

        t = operators.CheckOperator(
            task_id='check',
            sql="select count(*) from operator_test_table" ,
            conn_id=conn_id,
            dag=self.dag)
        t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, force=True)

        t = operators.ValueCheckOperator(
            task_id='value_check',
            pass_value=95,
            tolerance=0.1,
            conn_id=conn_id,
            sql="SELECT 100",
            dag=self.dag)
        t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, force=True)

        captainHook.run("drop table operator_test_table")
Beispiel #2
0
    def test_check_operators(self):

        conn_id = "sqlite_default"

        captainHook = BaseHook.get_hook(conn_id=conn_id)
        captainHook.run("CREATE TABLE operator_test_table (a, b)")
        captainHook.run("insert into operator_test_table values (1,2)")

        t = operators.CheckOperator(
            task_id='check',
            sql="select count(*) from operator_test_table" ,
            conn_id=conn_id,
            dag=self.dag)
        t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, force=True)

        t = operators.ValueCheckOperator(
            task_id='value_check',
            pass_value=95,
            tolerance=0.1,
            conn_id=conn_id,
            sql="SELECT 100",
            dag=self.dag)
        t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, force=True)

        captainHook.run("drop table operator_test_table")
Beispiel #3
0
    def db_facts(self, db_creds_name: str) -> DBFacts:
        from airflow.hooks import BaseHook
        conn = BaseHook.get_connection(db_creds_name)
        out: DBFacts = {}

        def add(key: str, value: Optional[Union[str, int]]) -> None:
            if value is not None:
                out[key] = value  # type: ignore

        register_secret(conn.password)

        add('host', conn.host)
        add('port', conn.port)
        add('database', conn.schema)
        add('user', conn.login)
        add('password', conn.password)
        # conn.extra_dejson returns {} if no 'extra' is set in the Connection:
        # https://airflow.apache.org/docs/stable/_modules/airflow/models/connection.html
        add('type', conn.extra_dejson.get('type', conn.conn_type.lower()))
        add('bq_default_project_id', conn.extra_dejson.get('extra__google_cloud_platform__project'))
        add('bq_default_dataset_id', conn.extra_dejson.get('bq_default_dataset_id'))
        add('bq_service_account_json',
            conn.extra_dejson.get('extra__google_cloud_platform__keyfile_dict'))
        add('protocol', conn.extra_dejson.get('protocol'))

        return out
Beispiel #4
0
    def poke(self, context):
        hook = BaseHook.get_connection(self.conn_id).get_hook()

        logging.info('Poking: ' + self.sql)
        records = hook.get_records(self.sql)
        if not records:
            return False
        else:
            if str(records[0][0]) in ('0', '',):
                return False
            else:
                return True
            print(records[0][0])
Beispiel #5
0
    def poke(self, context):
        hook = BaseHook.get_connection(self.conn_id).get_hook()

        logging.info('Poking: ' + self.sql)
        records = hook.get_records(self.sql)
        if not records:
            return False
        else:
            if str(records[0][0]) in ('0', '',):
                return False
            else:
                return True
            print(records[0][0])
 def get_conn(self) -> sa.engine.Engine:
     """
     :return: SQLAlchemy engine corresponding to this Airflow database connection ID.
     """
     conn = BaseHook.get_connection(self.db_conn_id)
     db_url = create_sqlalchemy_url(
         {
             'host': conn.host,
             'port': conn.port,
             'database': conn.schema,
             'user': conn.login,
             'password': conn.password,
             'type': conn.extra_dejson.get('type', conn.conn_type.lower()),
         }
     )
     return sa.create_engine(db_url)
default_args = {
    'owner': '*****@*****.**',
    'depends_on_past': False,
    'start_date': datetime(2016, 8, 18),
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 3,
    'retry_delay': timedelta(minutes=30),
}

try:
    dag = DAG('bugzilla_dataset', default_args=default_args, schedule_interval='@daily')

    connection_details = BaseHook.get_connection('bugzilla_db')

    env = {
        "DATABASE_USER": connection_details.login,
        "DATABASE_PASSWORD": connection_details.password,
        "DATABASE_HOST": connection_details.host,
        "DATABASE_PORT": connection_details.port,
        "DATABASE_NAME": connection_details.schema,
    }

    update_bugs = EMRSparkOperator(
        task_id="update_bugs",
        job_name="Bugzilla Dataset Update",
        execution_timeout=timedelta(hours=5),
        instance_count=1,
        env=env,
Beispiel #8
0
 def get_db_hook(self):
     return BaseHook.get_hook(conn_id=self.conn_id)
Beispiel #9
0
 def get_db_hook(self):
     return BaseHook.get_hook(conn_id=self.conn_id)