def db_facts(self, db_creds_name: str) -> DBFacts: from airflow.hooks import BaseHook conn = BaseHook.get_connection(db_creds_name) out: DBFacts = {} def add(key: str, value: Optional[Union[str, int]]) -> None: if value is not None: out[key] = value # type: ignore register_secret(conn.password) add('host', conn.host) add('port', conn.port) add('database', conn.schema) add('user', conn.login) add('password', conn.password) # conn.extra_dejson returns {} if no 'extra' is set in the Connection: # https://airflow.apache.org/docs/stable/_modules/airflow/models/connection.html add('type', conn.extra_dejson.get('type', conn.conn_type.lower())) add('bq_default_project_id', conn.extra_dejson.get('extra__google_cloud_platform__project')) add('bq_default_dataset_id', conn.extra_dejson.get('bq_default_dataset_id')) add('bq_service_account_json', conn.extra_dejson.get('extra__google_cloud_platform__keyfile_dict')) add('protocol', conn.extra_dejson.get('protocol')) return out
def poke(self, context): hook = BaseHook.get_connection(self.conn_id).get_hook() logging.info('Poking: ' + self.sql) records = hook.get_records(self.sql) if not records: return False else: if str(records[0][0]) in ('0', '',): return False else: return True print(records[0][0])
def poke(self, context): hook = BaseHook.get_connection(self.conn_id).get_hook() logging.info('Poking: ' + self.sql) records = hook.get_records(self.sql) if not records: return False else: if str(records[0][0]) in ('0', '',): return False else: return True print(records[0][0])
def get_conn(self) -> sa.engine.Engine: """ :return: SQLAlchemy engine corresponding to this Airflow database connection ID. """ conn = BaseHook.get_connection(self.db_conn_id) db_url = create_sqlalchemy_url( { 'host': conn.host, 'port': conn.port, 'database': conn.schema, 'user': conn.login, 'password': conn.password, 'type': conn.extra_dejson.get('type', conn.conn_type.lower()), } ) return sa.create_engine(db_url)
default_args = { 'owner': '*****@*****.**', 'depends_on_past': False, 'start_date': datetime(2016, 8, 18), 'email': ['*****@*****.**', '*****@*****.**'], 'email_on_failure': True, 'email_on_retry': True, 'retries': 3, 'retry_delay': timedelta(minutes=30), } try: dag = DAG('bugzilla_dataset', default_args=default_args, schedule_interval='@daily') connection_details = BaseHook.get_connection('bugzilla_db') env = { "DATABASE_USER": connection_details.login, "DATABASE_PASSWORD": connection_details.password, "DATABASE_HOST": connection_details.host, "DATABASE_PORT": connection_details.port, "DATABASE_NAME": connection_details.schema, } update_bugs = EMRSparkOperator( task_id="update_bugs", job_name="Bugzilla Dataset Update", execution_timeout=timedelta(hours=5), instance_count=1, env=env,