def test_masking_from_db(self): """Test secrets are masked when loaded directly from the DB""" # Normally people will use `Variable.get`, but just in case, catch direct DB access too session = settings.Session() try: var = Variable( key=f"password-{os.getpid()}", val="s3cr3t", ) session.add(var) session.flush() # Make sure we re-load it, not just get the cached object back session.expunge(var) self.mask_secret.reset_mock() session.query(Variable).get(var.id) assert self.mask_secret.mock_calls == [ # We should have called it _again_ when loading from the DB mock.call("s3cr3t", var.key), ] finally: session.rollback()
def variable(session): variable = Variable( key=VARIABLE["key"], val=VARIABLE["val"], description=VARIABLE["description"], ) session.add(variable) session.commit() yield variable session.query(Variable).filter(Variable.key == VARIABLE["key"]).delete() session.commit()
def set_variable( self, var_id: str, value: str, is_encrypted: Optional[bool] = None ): assert repr(settings.engine.url) == self.sql_alchemy_conn session = settings.Session() new_var = Variable(key=var_id, _val=value, is_encrypted=is_encrypted) session.add(new_var) session.commit() session.close()
def importVariable(): session = settings.Session() tempfile = read_s3('variable.csv') with open(tempfile, 'r') as csvfile: reader = csv.reader(csvfile) rows = [] for row in reader: rows.append(Variable(row[0], row[1])) if len(rows) > 0: session.add_all(rows) session.commit() session.close()
def execute(self, context): # check if table is created # if not, create them logger.info('Setting up operator') with redis_session() as r: # TODO: find other way to handle data race (round-robin?) while r.keys('*'): logger.info('Not finished previous run, wait for 300 seconds.') time.sleep(300) start = time.perf_counter() if self.mode == 'local': _setup_local() db_cfg = Config.DATABASE conn_id = os.getenv('AIRFLOW_POSTGRES_CONN_ID') elif self.mode == 'redshift': db_cfg = Config.AWS['REDSHIFT'] conn_id = os.getenv('AIRFLOW_REDSHIFT_CONN_ID') cfg = ConnectionConfig(conn_id=conn_id, host=db_cfg['HOST'], login=db_cfg['USERNAME'], password=db_cfg['PASSWORD'], schema=db_cfg['DB_NAME'], port=db_cfg['PORT']) update_connection(cfg) for key, val in self.set_variable_keys.items(): logger.info(f'Setting key="{key}" to Airflow Variable') variable = get_variable(key=key) if variable is None: variable = Variable(key=key) variable.set_val(value=val) with create_session() as sess: sess.add(variable) end = time.perf_counter() logger.info( f'Process Time [{self.__class__.__name__}]: {end-start:.3f} sec.')
def setUp(self): variable = Variable() self.dagbag = DagBag(dag_folder=variable.get('dags_folder'))
from airflow.operators.bash import BashOperator from airflow.models import Variable from airflow.operators.python import PythonOperator from airflow.operators.subdag import SubDagOperator from airflow.decorators import dag, task from airflow.operators.python import task, get_current_context from airflow.utils.task_group import TaskGroup from airflow.operators.dummy import DummyOperator from plugins.smart_file_sensor import SmartFileSensor from airflow.utils.decorators import apply_defaults from slack_sdk import WebClient from slack_sdk.errors import SlackApiError SLACK_TOKEN = Variable().get_variable_from_secrets(key="slack_secret") # fetch slack token from Vault DBS = ["DB_1", "DB_2", "DB_3"] TRIGGER_DIR = Variable.get("trigger_dir") # fetch local path with run file # SLACK_TOKEN = Variable.get("slack_token") DAG_ID = "DAG_SENSOR" SUB_DAG_ID = "XCOM_sub_dag" START_DATE = datetime(2000,1,1) DEFAULT_ARGS = { "owner": "airflow", "depends_on_past": True, "email": ["*****@*****.**"], "email_on_failure": False, "email_on_retry": False, "retries": 1,
# reset_airflow_variables.py # # PROGRAMMER: Brian Pederson # DATE CREATED: 03/20/2020 # PURPOSE: utility script to set variables in Airflow environment for Data Pipelines Project 4. # from airflow import settings from airflow.models import Variable s3_bucket = Variable(key='s3_bucket', val='udacity-dend') s3_log_jsonpath = Variable(key='s3_log_jsonpath', val='log_json_path.json') #s3_song_prefix = Variable(key='s3_song_prefix', val='song-data') #s3_log_prefix = Variable(key='s3_log_prefix', val='log-data') s3_log_prefix = Variable( key='s3_log_prefix', val='log-data/{execution_date.year}/{execution_date.month}/') # minimize data volume for dev/testing s3_song_prefix = Variable(key='s3_song_prefix', val='song-data/B/') # 'song-data/A/A/A/' #s3_log_prefix = Variable(key='s3_log_prefix', val='log-data/2018/11/2018-11-30-events.json') # ditto air_start_date = Variable(key='air_start_date', val="2018-11-30") air_end_date = Variable(key='air_end_date', val="2018-11-30") stg_mode_events = Variable(key='stg_mode_events', val="replace") stg_mode_songs = Variable(key='stg_mode_songs', val="replace") dwh_mode_songplays = Variable(key='dwh_mode_songplays', val="replace") dwh_mode_songs = Variable(key='dwh_mode_songs', val="replace") dwh_mode_artists = Variable(key='dwh_mode_artists', val="replace")