class Config(): # Pick up definitions for the deployment context, and any secrets: deployment_context = os.environ['DEPLOYMENT_CONTEXT'] storage_path = os.environ['STORAGE_PATH'] # Define the connection parameters, e.g. you might want to change within a given deployment: # FIXME Do we need the webhdfs here? wh_conn = Connection.get_connection_from_secrets("hadoop_020_webhdfs") webhdfs_access_url = f"http://{wh_conn.host}:{wh_conn.port}" webhdfs_access_user = wh_conn.login pg_conn = Connection.get_connection_from_secrets("metrics_push_gateway") push_gateway = f"{pg_conn.host}:{pg_conn.port}" # Define the common parameters for running Docker tasks: w3act_task_image = 'ukwa/python-w3act:2.0.0' ukwa_task_image = 'ukwa/ukwa-manage:2.0.1' hadoop_docker_image = 'ukwa/docker-hadoop:2.0.0' postgres_image = 'postgres:9.6.2' # Get a copy of the default arguments: def get_default_args(self): return { # Shared configuration for all tasks: 'owner': 'airflow', 'retries': 3, # Shared configuration for all Docker tasks: 'extra_hosts': { 'h020nn': '192.168.1.103', 'h020jt': '192.168.1.104', # Note that H3 config uses proper domain names like h3rm.wa.bl.uk }, 'mounts': [ Mount( source=self.storage_path, target='/storage', type='bind' ) ], 'email_on_failure': True, 'email': [ Variable.get('alert_email_address') ], 'auto_remove': False, # True is a bit aggressive and stops Airflow grabbing container logs. 'do_xcom_push': False, # This is not currently working with DockerOperators so defaulting to off for now. 'mount_tmp_dir': False, # Not supported by docker-in-docker tasks }
def test_backend_fallback_to_env_var(self, mock_get_uri): mock_get_uri.return_value = None backends = ensure_secrets_loaded() backend_classes = [backend.__class__.__name__ for backend in backends] assert 'SystemsManagerParameterStoreBackend' in backend_classes conn = Connection.get_connection_from_secrets(conn_id="test_mysql") # Assert that SystemsManagerParameterStoreBackend.get_conn_uri was called mock_get_uri.assert_called_once_with(conn_id='test_mysql') assert 'mysql://*****:*****@host:5432/airflow' == conn.get_uri()
import os from airflow.models import Variable, DAG, Connection from airflow.utils.dates import days_ago from airflow.operators.docker_operator import DockerOperator from _common_ import Config # Pick up shared configuration: c = Config() # These args will get passed on to each operator/task: default_args = c.get_default_args() # Connection to W3ACT PostgreSQL DB to use: trackdb = Connection.get_connection_from_secrets("trackdb") trackdb_url = trackdb.get_uri().replace('%2F', '/') # Use a function to generate parameterised DAGs: def generate_update_dag(path, hadoop_service, schedule_interval, args): dag_id = 'update_trackdb_%s%s' % (hadoop_service, path.replace('/', '_')) with DAG(dag_id=dag_id, default_args=args, schedule_interval=schedule_interval, start_date=days_ago(1), max_active_runs=1, catchup=False, params={ 'path': path, 'lsr_txt': '/storage/hadoop_lsr_%s.txt' % dag_id,
def test_get_connection_first_try(self, mock_env_get, mock_meta_get): mock_env_get.side_effect = ["something"] # returns something Connection.get_connection_from_secrets("fake_conn_id") mock_env_get.assert_called_once_with(conn_id="fake_conn_id") mock_meta_get.not_called()
def test_get_connection_second_try(self, mock_env_get, mock_meta_get): mock_env_get.side_effect = [None] # return None Connection.get_connection_from_secrets("fake_conn_id") mock_meta_get.assert_called_once_with(conn_id="fake_conn_id") mock_env_get.assert_called_once_with(conn_id="fake_conn_id")
from airflow.operators.bash import BashOperator from airflow.providers.ssh.operators.ssh import SSHOperator from airflow.operators.docker_operator import DockerOperator from airflow.operators.python import get_current_context from airflow.models import Variable, Connection, DAG from _common_ import Config # Pick up shared configuration: c = Config() # These args will get passed on to each operator/task: default_args = c.get_default_args() # Connection to W3ACT PostgreSQL DB to use: access_w3act = Connection.get_connection_from_secrets("access_w3act") # Which Collections Solr to update: collections_solr = Connection.get_connection_from_secrets( "access_collections_solr") # Connection to commit to GitLab Wayback ACLs (including access token in it) try: gitlab_wayback_acl_remote = Connection.get_connection_from_secrets( "gitlab_wayback_acl_remote") # GitLab does not like the slash in the path being escaped: gitlab_wayback_acl_remote = gitlab_wayback_acl_remote.get_uri().replace( '%2F', '/') except Exception as e: print("WARNING! no gitlab_wayback_acl_remote found!") print(e)