def test_check_operators(self): conn_id = "sqlite_default" captainHook = BaseHook.get_hook(conn_id=conn_id) captainHook.run("CREATE TABLE operator_test_table (a, b)") captainHook.run("insert into operator_test_table values (1,2)") t = operators.CheckOperator( task_id='check', sql="select count(*) from operator_test_table" , conn_id=conn_id, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, force=True) t = operators.ValueCheckOperator( task_id='value_check', pass_value=95, tolerance=0.1, conn_id=conn_id, sql="SELECT 100", dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, force=True) captainHook.run("drop table operator_test_table")
def db_facts(self, db_creds_name: str) -> DBFacts: from airflow.hooks import BaseHook conn = BaseHook.get_connection(db_creds_name) out: DBFacts = {} def add(key: str, value: Optional[Union[str, int]]) -> None: if value is not None: out[key] = value # type: ignore register_secret(conn.password) add('host', conn.host) add('port', conn.port) add('database', conn.schema) add('user', conn.login) add('password', conn.password) # conn.extra_dejson returns {} if no 'extra' is set in the Connection: # https://airflow.apache.org/docs/stable/_modules/airflow/models/connection.html add('type', conn.extra_dejson.get('type', conn.conn_type.lower())) add('bq_default_project_id', conn.extra_dejson.get('extra__google_cloud_platform__project')) add('bq_default_dataset_id', conn.extra_dejson.get('bq_default_dataset_id')) add('bq_service_account_json', conn.extra_dejson.get('extra__google_cloud_platform__keyfile_dict')) add('protocol', conn.extra_dejson.get('protocol')) return out
def poke(self, context): hook = BaseHook.get_connection(self.conn_id).get_hook() logging.info('Poking: ' + self.sql) records = hook.get_records(self.sql) if not records: return False else: if str(records[0][0]) in ('0', '',): return False else: return True print(records[0][0])
def get_conn(self) -> sa.engine.Engine: """ :return: SQLAlchemy engine corresponding to this Airflow database connection ID. """ conn = BaseHook.get_connection(self.db_conn_id) db_url = create_sqlalchemy_url( { 'host': conn.host, 'port': conn.port, 'database': conn.schema, 'user': conn.login, 'password': conn.password, 'type': conn.extra_dejson.get('type', conn.conn_type.lower()), } ) return sa.create_engine(db_url)
default_args = { 'owner': '*****@*****.**', 'depends_on_past': False, 'start_date': datetime(2016, 8, 18), 'email': ['*****@*****.**', '*****@*****.**'], 'email_on_failure': True, 'email_on_retry': True, 'retries': 3, 'retry_delay': timedelta(minutes=30), } try: dag = DAG('bugzilla_dataset', default_args=default_args, schedule_interval='@daily') connection_details = BaseHook.get_connection('bugzilla_db') env = { "DATABASE_USER": connection_details.login, "DATABASE_PASSWORD": connection_details.password, "DATABASE_HOST": connection_details.host, "DATABASE_PORT": connection_details.port, "DATABASE_NAME": connection_details.schema, } update_bugs = EMRSparkOperator( task_id="update_bugs", job_name="Bugzilla Dataset Update", execution_timeout=timedelta(hours=5), instance_count=1, env=env,
def get_db_hook(self): return BaseHook.get_hook(conn_id=self.conn_id)