Esempio n. 1
0
class Config():

    # Pick up definitions for the deployment context, and any secrets:
    deployment_context = os.environ['DEPLOYMENT_CONTEXT']
    storage_path = os.environ['STORAGE_PATH']

    # Define the connection parameters, e.g. you might want to change within a given deployment:
    # FIXME Do we need the webhdfs here?
    wh_conn = Connection.get_connection_from_secrets("hadoop_020_webhdfs")
    webhdfs_access_url = f"http://{wh_conn.host}:{wh_conn.port}"
    webhdfs_access_user = wh_conn.login
    pg_conn = Connection.get_connection_from_secrets("metrics_push_gateway")
    push_gateway = f"{pg_conn.host}:{pg_conn.port}"

    # Define the common parameters for running Docker tasks:
    w3act_task_image = 'ukwa/python-w3act:2.0.0'
    ukwa_task_image = 'ukwa/ukwa-manage:2.0.1'
    hadoop_docker_image = 'ukwa/docker-hadoop:2.0.0'
    postgres_image = 'postgres:9.6.2'

    # Get a copy of the default arguments:
    def get_default_args(self):
        return {
            # Shared configuration for all tasks:
            'owner': 'airflow',
            'retries': 3,
            # Shared configuration for all Docker tasks:
            'extra_hosts': {
                'h020nn': '192.168.1.103',
                'h020jt': '192.168.1.104',
                # Note that H3 config uses proper domain names like h3rm.wa.bl.uk
            },
            'mounts': [
                Mount( source=self.storage_path, target='/storage', type='bind' )
                 ],
            'email_on_failure': True,
            'email': [
                    Variable.get('alert_email_address')
                ],
            'auto_remove': False, # True is a bit aggressive and stops Airflow grabbing container logs.
            'do_xcom_push': False, # This is not currently working with DockerOperators so defaulting to off for now.
            'mount_tmp_dir': False, # Not supported by docker-in-docker tasks
        }   
Esempio n. 2
0
    def test_backend_fallback_to_env_var(self, mock_get_uri):
        mock_get_uri.return_value = None

        backends = ensure_secrets_loaded()
        backend_classes = [backend.__class__.__name__ for backend in backends]
        assert 'SystemsManagerParameterStoreBackend' in backend_classes

        conn = Connection.get_connection_from_secrets(conn_id="test_mysql")

        # Assert that SystemsManagerParameterStoreBackend.get_conn_uri was called
        mock_get_uri.assert_called_once_with(conn_id='test_mysql')

        assert 'mysql://*****:*****@host:5432/airflow' == conn.get_uri()
Esempio n. 3
0
import os

from airflow.models import Variable, DAG, Connection
from airflow.utils.dates import days_ago
from airflow.operators.docker_operator import DockerOperator

from _common_ import Config

# Pick up shared configuration:
c = Config()

# These args will get passed on to each operator/task:
default_args = c.get_default_args()

# Connection to W3ACT PostgreSQL DB to use:
trackdb = Connection.get_connection_from_secrets("trackdb")
trackdb_url = trackdb.get_uri().replace('%2F', '/')


# Use a function to generate parameterised DAGs:
def generate_update_dag(path, hadoop_service, schedule_interval, args):
    dag_id = 'update_trackdb_%s%s' % (hadoop_service, path.replace('/', '_'))
    with DAG(dag_id=dag_id,
             default_args=args,
             schedule_interval=schedule_interval,
             start_date=days_ago(1),
             max_active_runs=1,
             catchup=False,
             params={
                 'path': path,
                 'lsr_txt': '/storage/hadoop_lsr_%s.txt' % dag_id,
 def test_get_connection_first_try(self, mock_env_get, mock_meta_get):
     mock_env_get.side_effect = ["something"]  # returns something
     Connection.get_connection_from_secrets("fake_conn_id")
     mock_env_get.assert_called_once_with(conn_id="fake_conn_id")
     mock_meta_get.not_called()
 def test_get_connection_second_try(self, mock_env_get, mock_meta_get):
     mock_env_get.side_effect = [None]  # return None
     Connection.get_connection_from_secrets("fake_conn_id")
     mock_meta_get.assert_called_once_with(conn_id="fake_conn_id")
     mock_env_get.assert_called_once_with(conn_id="fake_conn_id")
Esempio n. 6
0
from airflow.operators.bash import BashOperator
from airflow.providers.ssh.operators.ssh import SSHOperator
from airflow.operators.docker_operator import DockerOperator
from airflow.operators.python import get_current_context
from airflow.models import Variable, Connection, DAG

from _common_ import Config

# Pick up shared configuration:
c = Config()

# These args will get passed on to each operator/task:
default_args = c.get_default_args()

# Connection to W3ACT PostgreSQL DB to use:
access_w3act = Connection.get_connection_from_secrets("access_w3act")

# Which Collections Solr to update:
collections_solr = Connection.get_connection_from_secrets(
    "access_collections_solr")

# Connection to commit to GitLab Wayback ACLs (including access token in it)
try:
    gitlab_wayback_acl_remote = Connection.get_connection_from_secrets(
        "gitlab_wayback_acl_remote")
    # GitLab does not like the slash in the path being escaped:
    gitlab_wayback_acl_remote = gitlab_wayback_acl_remote.get_uri().replace(
        '%2F', '/')
except Exception as e:
    print("WARNING! no gitlab_wayback_acl_remote found!")
    print(e)