def make_prediction(model_id, pool_id, default_args, localhost_dir, build_dir, start=None): with DAG("update-data-task", default_args=default_args, schedule_interval="@daily", catchup=True) as dag: data_op = DockerOperator( task_id="update-data-pool-id", image="eugenepy/akira-data:latest", api_version="auto", command="variable-task update-pool -i {{ params.pool_id }} -s " + "{{ (execution_date - macros.timedeltas(days=1)).strftime('%Y%m%d') }" + " -e {{ execution_date.strftime('%Y%d%m') }} -l investingdotcom save " + "--filename /build/akira_data.test.csv", params={"pool_id": pool_id}, volumes=[f"{localhost_dir}:{build_dir}"], network_mode="akira-project_default", docker_url="tcp://socat:2375") predict_op = DockerOperator( task_id="train-model", image="eugenepy/basket:latest", api_version="auto", command="python3 -m baksets predict -m /build/bmk.pkl " + "-i /build/akira_data.test.csv -o /build/akira_data.predict.csv", volumes=[f"{localhost_dir}:{build_dir}"], network_mode="akira-project_default", docker_url="tcp://socat:2375") data_op >> predict_op return dag
def test_execute_tls(self, client_class_mock, tls_class_mock): client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = mock.Mock() client_mock.images.return_value = [] client_mock.logs.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = 0 client_class_mock.return_value = client_mock tls_mock = mock.Mock() tls_class_mock.return_value = tls_mock operator = DockerOperator(docker_url='tcp://127.0.0.1:2376', image='ubuntu', owner='unittest', task_id='unittest', tls_client_cert='cert.pem', tls_ca_cert='ca.pem', tls_client_key='key.pem') operator.execute(None) tls_class_mock.assert_called_with(assert_hostname=None, ca_cert='ca.pem', client_cert=('cert.pem', 'key.pem'), ssl_version=None, verify=True) client_class_mock.assert_called_with(base_url='https://127.0.0.1:2376', tls=tls_mock, version=None)
def test_execute_no_docker_conn_id_no_hook(self, operator_client_mock): # Mock out a Docker client, so operations don't raise errors client_mock = mock.Mock(name='DockerOperator.APIClient mock', spec=APIClient) client_mock.images.return_value = [] client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.logs.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 0} operator_client_mock.return_value = client_mock # Create the DockerOperator operator = DockerOperator( image='publicregistry/someimage', owner='unittest', task_id='unittest' ) # Mock out the DockerHook hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook) hook_mock.get_conn.return_value = client_mock operator.get_hook = mock.Mock( name='DockerOperator.get_hook mock', spec=DockerOperator.get_hook, return_value=hook_mock ) operator.execute(None) self.assertEqual( operator.get_hook.call_count, 0, 'Hook called though no docker_conn_id configured' )
def train_model(model_image, model_id, pool_id, start, default_args): with DAG("update-data-task", default_args=default_args, schedule_interval="@daily", catchup=True) as dag: data_op = DockerOperator( task_id="update-data-pool-id", image="eugenepy/akira-data:latest", api_version="auto", command="variable-task update-pool -i {{ params.pool_id }} -s " + "{{ params.start }} -e {{ ds_nodash }} -l investingdotcom save " + "--filename /build/akira_data.csv", params={"start": start, "pool_id": pool_id}, volumes=[f"{localhost_dir}:{build_dir}"], network_mode="akira-project_default", docker_url="tcp://socat:2375") train_op = DockerOperator( task_id="train-model", image=model_image, api_version="auto", command="make -f Makefile.model train_bmk", volumes=[f"{localhost_dir}:{build_dir}"], network_mode="akira-project_default", docker_url="tcp://socat:2375") data_op >> train_op return dag
def test_execute_with_docker_conn_id_use_hook(self, operator_client_mock, operator_docker_hook): # Mock out a Docker client, so operations don't raise errors client_mock = mock.Mock(name='DockerOperator.APIClient mock', spec=APIClient) client_mock.images.return_value = [] client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.logs.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = 0 operator_client_mock.return_value = client_mock # Create the DockerOperator operator = DockerOperator(image='publicregistry/someimage', owner='unittest', task_id='unittest', docker_conn_id='some_conn_id') # Mock out the DockerHook hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook) hook_mock.get_conn.return_value = client_mock operator_docker_hook.return_value = hook_mock operator.execute(None) self.assertEqual( operator_client_mock.call_count, 0, 'APIClient was called on the operator instead of the hook') self.assertEqual( operator_docker_hook.call_count, 1, 'Hook was not called although docker_conn_id configured') self.assertEqual(client_mock.pull.call_count, 1, 'Image was not pulled using operator client')
def test_execute_no_docker_conn_id_no_hook(self, operator_client_mock): # Mock out a Docker client, so operations don't raise errors client_mock = mock.Mock(name='DockerOperator.APIClient mock', spec=APIClient) client_mock.images.return_value = [] client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.attach.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 0} operator_client_mock.return_value = client_mock # Create the DockerOperator operator = DockerOperator( image='publicregistry/someimage', owner='unittest', task_id='unittest' ) # Mock out the DockerHook hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook) hook_mock.get_conn.return_value = client_mock operator.get_hook = mock.Mock( name='DockerOperator.get_hook mock', spec=DockerOperator.get_hook, return_value=hook_mock ) operator.execute(None) self.assertEqual( operator.get_hook.call_count, 0, 'Hook called though no docker_conn_id configured' )
def test_execute(self, client_class_mock, mkdtemp_mock): host_config = mock.Mock() mkdtemp_mock.return_value = '/mkdtemp' client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = host_config client_mock.images.return_value = [] client_mock.logs.return_value = ['container log'] client_mock.pull.return_value = [b'{"status":"pull log"}'] client_mock.wait.return_value = {"StatusCode": 0} client_class_mock.return_value = client_mock operator = DockerOperator(api_version='1.19', command='env', environment={'UNIT': 'TEST'}, image='ubuntu:latest', network_mode='bridge', owner='unittest', task_id='unittest', volumes=['/host/path:/container/path'], working_dir='/container/path', shm_size=1000, host_tmp_dir='/host/airflow') operator.execute(None) client_class_mock.assert_called_with( base_url='unix://var/run/docker.sock', tls=None, version='1.19') client_mock.create_container.assert_called_with( command='env', environment={ 'AIRFLOW_TMP_DIR': '/tmp/airflow', 'UNIT': 'TEST' }, host_config=host_config, image='ubuntu:latest', user=None, working_dir='/container/path') client_mock.create_host_config.assert_called_with( binds=['/host/path:/container/path', '/mkdtemp:/tmp/airflow'], network_mode='bridge', shm_size=1000, cpu_shares=1024, mem_limit=None, auto_remove=False, dns=None, dns_search=None) mkdtemp_mock.assert_called_with(dir='/host/airflow', prefix='airflowtmp', suffix='') client_mock.images.assert_called_with(name='ubuntu:latest') client_mock.logs.assert_called_with(container='some_id', stream=True) client_mock.pull.assert_called_with('ubuntu:latest', stream=True) client_mock.wait.assert_called_with('some_id')
def test_on_kill(): client_mock = mock.Mock(spec=APIClient) operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest') operator.cli = client_mock operator.container = {'Id': 'some_id'} operator.on_kill() client_mock.stop.assert_called_with('some_id')
def test_execute_container_fails(self, client_class_mock): client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = mock.Mock() client_mock.images.return_value = [] client_mock.logs.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 1} client_class_mock.return_value = client_mock operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest') with self.assertRaises(AirflowException): operator.execute(None)
def test_execute_container_fails(self, client_class_mock): client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = mock.Mock() client_mock.images.return_value = [] client_mock.attach.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 1} client_class_mock.return_value = client_mock operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest') with self.assertRaises(AirflowException): operator.execute(None)
def make_etl_operator(task_id: str, operation: str): cmd = f"'etl --redis-url redis:6379 {operation}'" return DockerOperator(command=cmd, environment={"PYTHONUNBUFFERED": 1}, task_id=task_id, image=f"etl-dummy:latest", auto_remove=True, network_mode="airflow-tutorial_default")
def get_task(activity, city): return DockerOperator(task_id=f'mine_{activity}_{city}', image='tahasadiki/telecontact-scraper:latest', api_version='auto', auto_remove=True, command=f"{activity} {city}", docker_url="unix://var/run/docker.sock", network_mode="bridge")
def test_execute(self, client_class_mock, mkdtemp_mock): host_config = mock.Mock() mkdtemp_mock.return_value = '/mkdtemp' client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = host_config client_mock.images.return_value = [] client_mock.logs.return_value = ['container log'] client_mock.pull.return_value = [b'{"status":"pull log"}'] client_mock.wait.return_value = {"StatusCode": 0} client_class_mock.return_value = client_mock operator = DockerOperator(api_version='1.19', command='env', environment={'UNIT': 'TEST'}, image='ubuntu:latest', network_mode='bridge', owner='unittest', task_id='unittest', volumes=['/host/path:/container/path'], working_dir='/container/path', shm_size=1000) operator.execute(None) client_class_mock.assert_called_with(base_url='unix://var/run/docker.sock', tls=None, version='1.19') client_mock.create_container.assert_called_with(command='env', environment={ 'AIRFLOW_TMP_DIR': '/tmp/airflow', 'UNIT': 'TEST' }, host_config=host_config, image='ubuntu:latest', user=None, working_dir='/container/path' ) client_mock.create_host_config.assert_called_with(binds=['/host/path:/container/path', '/mkdtemp:/tmp/airflow'], network_mode='bridge', shm_size=1000, cpu_shares=1024, mem_limit=None, auto_remove=False, dns=None, dns_search=None) client_mock.images.assert_called_with(name='ubuntu:latest') client_mock.logs.assert_called_with(container='some_id', stream=True) client_mock.pull.assert_called_with('ubuntu:latest', stream=True) client_mock.wait.assert_called_with('some_id')
def mapping(dict, dag1): if not dict['ttl']: t1 = DockerOperator( task_id=dict['task_id'], image=dict['image'], command=eval(dict['command']), xcom_push=bool(dict['xcom_push']), dag=dag1) return [t1]
def test_execute_unicode_logs(self, client_class_mock): client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = mock.Mock() client_mock.images.return_value = [] client_mock.attach.return_value = ['unicode container log 😁'] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 0} client_class_mock.return_value = client_mock originalRaiseExceptions = logging.raiseExceptions # pylint: disable=invalid-name logging.raiseExceptions = True operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest') with mock.patch('traceback.print_exception') as print_exception_mock: operator.execute(None) logging.raiseExceptions = originalRaiseExceptions print_exception_mock.assert_not_called()
def test_execute_unicode_logs(self, client_class_mock): client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = mock.Mock() client_mock.images.return_value = [] client_mock.logs.return_value = ['unicode container log 😁'] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 0} client_class_mock.return_value = client_mock originalRaiseExceptions = logging.raiseExceptions logging.raiseExceptions = True operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest') with mock.patch('traceback.print_exception') as print_exception_mock: operator.execute(None) logging.raiseExceptions = originalRaiseExceptions print_exception_mock.assert_not_called()
def test_execute_with_docker_conn_id_use_hook(self, operator_client_mock): # Mock out a Docker client, so operations don't raise errors client_mock = mock.Mock(name='DockerOperator.Client mock', spec=Client) client_mock.images.return_value = [] client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.logs.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = 0 operator_client_mock.return_value = client_mock # Create the DockerOperator operator = DockerOperator( image='publicregistry/someimage', owner='unittest', task_id='unittest', docker_conn_id='some_conn_id' ) # Mock out the DockerHook hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook) hook_mock.get_conn.return_value = client_mock operator.get_hook = mock.Mock( name='DockerOperator.get_hook mock', spec=DockerOperator.get_hook, return_value=hook_mock ) operator.execute(None) self.assertEqual( operator_client_mock.call_count, 0, 'Client was called on the operator instead of the hook' ) self.assertEqual( operator.get_hook.call_count, 1, 'Hook was not called although docker_conn_id configured' ) self.assertEqual( client_mock.pull.call_count, 1, 'Image was not pulled using operator client' )
def create_docker_operator(params): """Create DockerOperator with default kwargs.""" # Create defaults. defaults = { 'remove': True, 'xcom_push': True, 'volumes': ['/var/log/filebeat:/usr/local/src/log'] } # Merge params. docker_params = defaults.copy() docker_params.update(params) # Return a new DockerOperator. return DockerOperator(**docker_params)
def test_execute_tls(self, client_class_mock, tls_class_mock): client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = mock.Mock() client_mock.images.return_value = [] client_mock.logs.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 0} client_class_mock.return_value = client_mock tls_mock = mock.Mock() tls_class_mock.return_value = tls_mock operator = DockerOperator(docker_url='tcp://127.0.0.1:2376', image='ubuntu', owner='unittest', task_id='unittest', tls_client_cert='cert.pem', tls_ca_cert='ca.pem', tls_client_key='key.pem') operator.execute(None) tls_class_mock.assert_called_with(assert_hostname=None, ca_cert='ca.pem', client_cert=('cert.pem', 'key.pem'), ssl_version=None, verify=True) client_class_mock.assert_called_with(base_url='https://127.0.0.1:2376', tls=tls_mock, version=None)
def dump_pool_file_to_arctic_subdag(name, pool_id, index_col, symbol_header, field_header, start, localhost_dir): # we update data at every morning 4AM@utc+8 build_dir = "/build" data_op = DockerOperator( task_id=name, image="eugenepy/akira-data:latest", api_version="auto", command="python -m akira_data variable-task update-pool -i {{ params.pool_id }} -s " + "{{ params.start }} -e {{ ds_nodash }} -l investingdotcom save " + \ "--filename {{ params.build_dir }}/{{params.pool_id}}.{{ ds_nodash }}.csv", params={"start": start, "pool_id": pool_id, "build_dir": build_dir}, volumes=[f"{localhost_dir}:{build_dir}"], # save at airflow's container? network_mode=network, # connect2mongodb docker_url="tcp://socat:2375", auto_remove=True) return data_op
def caom_commands(artifact, **kwargs): uri_list = "{{ task_instance.xcom_pull(task_ids='get_observations') }}" # return PythonOperator(python_callable=do_that, provide_context=True, # task_id='meta_{}'.format(artifact), # dag=poc_dag, op_kwargs={'artifact': artifact}) # file not found error # x = DockerOperator(docker_url='unix:///var/run/docker.sock', # connection refused # x = DockerOperator(docker_url='tcp://localhost:2375', # connection refused x = DockerOperator(docker_url='tcp://localhost:2376', command='omm_run {}'.format(artifact), image='opencadc/omm2caom2', network_mode='bridge', task_id='meta_{}'.format(artifact), docker_conn_id='my_docker', dag=poc_dag) return x
def _create_task(task_id, dag, image, command, environment): env = { 'WAREHOUSE_URL': get_postgres_uri('warehouse_db'), 'DATABASE_URL': get_postgres_uri('api_db'), 'EXPLORERDB_URL': get_postgres_uri('explorer_db'), 'PYTHON_ENV': airflow.models.Variable.get('ENV'), 'LOGGING_URL': airflow.models.Variable.get('LOGGING_URL'), 'DOWNLOAD_DELAY': airflow.models.Variable.get('DOWNLOAD_DELAY'), } env.update(environment) docker_api_version = os.environ.get('DOCKER_API_VERSION', '1.23') return DockerOperator( task_id=task_id, dag=dag, image=image, command=command, environment=env, api_version=docker_api_version, force_pull=True, )
def test_execute_xcom_behavior(self, client_class_mock, tempdir_mock): tempdir_mock.return_value.__enter__.return_value = '/mkdtemp' client_mock = mock.Mock(spec=APIClient) client_mock.images.return_value = [] client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.attach.return_value = ['container log'] client_mock.pull.return_value = [b'{"status":"pull log"}'] client_mock.wait.return_value = {"StatusCode": 0} client_class_mock.return_value = client_mock kwargs = { 'api_version': '1.19', 'command': 'env', 'environment': { 'UNIT': 'TEST' }, 'image': 'ubuntu:latest', 'network_mode': 'bridge', 'owner': 'unittest', 'task_id': 'unittest', 'volumes': ['/host/path:/container/path'], 'working_dir': '/container/path', 'shm_size': 1000, 'host_tmp_dir': '/host/airflow', 'container_name': 'test_container', 'tty': True, } xcom_push_operator = DockerOperator(**kwargs, do_xcom_push=True) no_xcom_push_operator = DockerOperator(**kwargs, do_xcom_push=False) xcom_push_result = xcom_push_operator.execute(None) no_xcom_push_result = no_xcom_push_operator.execute(None) self.assertEqual(xcom_push_result, b'container log') self.assertIs(no_xcom_push_result, None)
dag_id="atd_knack_signal_work_orders", default_args=default_args, schedule_interval="50 8 * * *", dagrun_timeout=timedelta(minutes=60), tags=["production", "knack"], catchup=False, ) as dag: # completely replace data on 15th day of every month # this is a failsafe catch records that may have been missed via incremental loading date_filter = "{{ '1970-01-01' if ds.endswith('15') else prev_execution_date_success or '1970-01-01' }}" # noqa:E501 t1 = DockerOperator( task_id="atd_knack_traffic_signal_work_orders_to_postgrest", image=docker_image, api_version="auto", auto_remove=True, command=f'./atd-knack-services/services/{script_task_1}.py -a {app_name} -c {container} -d "{date_filter}"', # noqa:E501 docker_url="tcp://localhost:2376", network_mode="bridge", environment=env_vars, tty=True, ) t2 = DockerOperator( task_id="atd_knack_traffic_signal_work_orders_to_socrata", image=docker_image, api_version="auto", auto_remove=True, command=f'./atd-knack-services/services/{script_task_2}.py -a {app_name} -c {container} -d "{date_filter}"', # noqa docker_url="tcp://localhost:2376", network_mode="bridge", environment=env_vars,
f"atd_mds_{mds_provider}_staging", default_args=default_args, schedule_interval="15 * * * *", catchup=False, tags=["staging", "mds"], ) as dag: # # Task: provider_extract # Description: Given a schedule block, the script extracts data from the MDS provider within the schedule's time window # then it uploads the data into S3 for further processing. # t1 = DockerOperator( task_id='provider_extract', image=docker_image, api_version='auto', auto_remove=True, command= f"./provider_extract.py --provider '{mds_provider}' --time-max '{time_max}' --interval 1", docker_url="tcp://localhost:2376", network_mode="bridge", environment=environment_vars) # # Task: provider_sync_db # Description: Downloads the extracted MDS data from S3, and inserts each trip into a postgres database. # t2 = DockerOperator( task_id='provider_sync_db', image=docker_image, api_version='auto', auto_remove=True, command=
* Moves WARCs from warcprox into the right place in the /heritrix/output folders. * **TBA** 'Closes' WARCs that are .open, if they are older than a few days. Configuration: * The tasks are configured to scan `/mnt/gluster/fc`. * The push gateway is configured to be `{c.push_gateway}`. How to check it's working: * Task Instance logs show how many WARCs were moved. * Prometheus updated via Push Gateway with `ukwa_files_moved_total_count{{kind='warcprox-warcs'}}` counts. * Look for job results in [the push gateway configured for this task](http://{c.push_gateway}). * For example results from Prometheus in production, see [here](http://monitor-prometheus.api.wa.bl.uk/graph?g0.expr=ukwa_files_moved_total_count{{kind='warcprox-warc'}}&g0.tab=0&g0.stacked=0&g0.range_input=4w). """ tidy = DockerOperator( task_id='move-warcprox-warcs', image=c.ukwa_task_image, command='store -v warctidy', user=0, # Run as root due to file permissions volumes=['/mnt/gluster/fc:/mnt/gluster/fc'], environment={ 'PUSH_GATEWAY': c.push_gateway, }, tty=True, # <-- So we see logging do_xcom_push=False, )
'retries': 5, 'retry_delay': timedelta(minutes=5) } dag = DAG( 'ByName', default_args=default_args, description='Filter by name dag', schedule_interval='@daily', ) t1 = DockerOperator(task_id='DockerOperator', image='faizan-k_devchallenge', api_version='auto', auto_remove=True, command='byname -n "{}"'.format(beer_name), docker_url="unix://var/run/docker.sock", network_mode="bridge", xcom_push=True, dag=dag) def perform_calculation(**context): output = json.loads(context['ti'].xcom_pull(task_ids='DockerOperator')) avg_ibu_ibv = json.dumps({ "avg_ibu": sum([i['ibu'] for i in output]) / (len(output) or 1), "avg_abv": sum([i['abv'] for i in output]) / (len(output) or 1) }) context['ti'].xcom_push(key="AVG_IBU_ABV", value=avg_ibu_ibv)
} with DAG('fix_s3_recording_url_pipeline', default_args=default_args, schedule_interval='*/10 * * * *', catchup=False) as dag: t1 = BashOperator( task_id='login_aws', bash_command= '$(aws ecr get-login --region eu-west-1 --no-include-email)') t2 = DockerOperator( task_id='fix_s3_recording_url_pipeline', auto_remove=True, image=IMAGE_NAME, api_version='auto', command=COMMAND, docker_url='unix://var/run/docker.sock', network_mode='host', environment={ 'DATABASE_HOST': DATABASE_HOST, 'ELASTICSEARCH_URL': ELASTICSEARCH_URL, 'DYNAMODB_HOST': DYNAMODB_HOST, }, volumes=[LOG_DIRECTORY, BOTO_CREDENTIAL], force_pull=True, ) t2.set_upstream(t1)
'on_failure_callback': send_alert_task_failure_to_slack } with DAG('sync_country_from_zendesk_pipeline', default_args=default_args, schedule_interval="0 0 * * *", catchup=False) as dag: t1 = BashOperator( task_id='login_aws', bash_command= '$(aws ecr get-login --region eu-west-1 --no-include-email)') t2 = DockerOperator( task_id='sync_country_from_zendesk_pipeline', auto_remove=True, image=IMAGE_NAME, api_version='auto', command=COMMAND, docker_url='unix://var/run/docker.sock', network_mode='host', environment={ 'DATABASE_HOST': DATABASE_HOST, 'ELASTICSEARCH_URL': ELASTICSEARCH_URL, 'DYNAMODB_HOST': DYNAMODB_HOST }, volumes=[LOG_DIRECTORY], force_pull=True, ) t2.set_upstream(t1)
default_args=default_args, schedule_interval='*/15 * * * *', catchup=False) as dag: t1 = BashOperator( task_id='print_start_time', bash_command='echo `date "+%Y-%m-%d%H:%M:%S"` "- Airflow Task Started"' ) t2 = DockerOperator(task_id='docker_command', image='entechlog/weather-alert-app:latest', api_version='auto', auto_remove=True, docker_url="unix://var/run/docker.sock", network_mode="weatheralertapp_default", environment={ 'bootstrap_servers': "broker:9092", 'schema_registry_url': "http://schema-registry:8081", 'topic_name': "weather.alert.app.source", 'lat': "8.270272", 'lon': "77.177274", 'OPEN_WEATHER_API_KEY': "" }) t3 = BashOperator( task_id='print_end_time', bash_command='echo `date "+%Y-%m-%d%H:%M:%S"` "- Airflow Task Finished"' ) t1 >> t2 >> t3
env_vars["SOCRATA_API_KEY_ID"] = Variable.get("atd_service_bot_socrata_api_key_id") env_vars["SOCRATA_API_KEY_SECRET"] = Variable.get( "atd_service_bot_socrata_api_key_secret" ) env_vars["SOCRATA_APP_TOKEN"] = Variable.get("atd_service_bot_socrata_app_token") with DAG( dag_id="atd_kits_sig_stat_pub", default_args=default_args, schedule_interval="*/5 * * * *", dagrun_timeout=timedelta(minutes=60), tags=["production", "socrata", "kits"], catchup=False, ) as dag: t1 = DockerOperator( task_id="atd_kits_sig_status_to_socrata", image=docker_image, api_version="auto", auto_remove=True, command="./atd-kits/atd-kits/signal_status_publisher.py", docker_url="tcp://localhost:2376", network_mode="bridge", environment=env_vars, tty=True, ) t1 if __name__ == "__main__": dag.cli()
default_args=default_args, schedule_interval="0 8 * * *", catchup=False, tags=["production", "visionzero"], ) as dag: # # Task: docker_command_crashes # Description: Imports a raw CSV file with crash records into our database via GraphSQL/Hasura. # crash = DockerOperator( task_id='docker_command_crashes', image='atddocker/atd-vz-etl:production', api_version='auto', auto_remove=True, command="/app/process_hasura_import.py crash", docker_url="tcp://localhost:2376", network_mode="bridge", environment=atd_visionzero_cris_envvars, volumes=[ atd_visionzero_cris_volumes["ATD_VOLUME_DATA"], atd_visionzero_cris_volumes["ATD_VOLUME_TEMP"], ], ) # # Task: docker_command_unit # Description: Imports a raw CSV file with unit records into our database via GraphSQL/Hasura. # unit = DockerOperator( task_id='docker_command_unit', image='atddocker/atd-vz-etl:production', api_version='auto',
'dataflow_default_options': { 'project': os.environ['GCP_PROJECT'] } } dag = DAG('firearm_seizures', default_args=default_args, schedule_interval='@monthly') gcs_load = DockerOperator( task_id='firearms_gcs_docker', image='gcr.io/data-rivers/pgh-firearms', api_version='auto', auto_remove=True, environment={ 'APRS_UN': os.environ['APRS_UN'], 'APRS_PW': os.environ['APRS_PW'], 'GCS_AUTH_FILE': '/root/firearm-seizures-report/data-rivers-service-acct.json', 'GCS_PREFIX': os.environ['GCS_PREFIX'] }, dag=dag) # dataflow_task = DataFlowPythonOperator( # task_id='firearms_dataflow', # job_name='firearms-dataflow', # py_file=os.getcwd() + '/airflow_scripts/dags/dependencies/dataflow_scripts/firearms_dataflow.py'), # dag=dag # ) dataflow_task = BashOperator(
from airflow.operators.docker_operator import DockerOperator default_args = { 'owner': 'airflowMAT', 'description': 'Use of the DockerOperator', 'depend_on_past': False, 'start_date': datetime(2018, 1, 3), 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5) } with DAG('docker_dag', default_args=default_args, schedule_interval="5 * * * *", catchup=False) as dag: t1 = BashOperator(task_id='print_current_date', bash_command='date') t2 = DockerOperator(task_id='docker_command', image='openjdk:latest', api_version='auto', auto_remove=True, command="/bin/sleep 30", docker_url="unix://var/run/docker.sock", network_mode="bridge") t3 = BashOperator(task_id='print_hello', bash_command='echo "hello world"') t1 >> t2 >> t3
'retry_delay': timedelta(minutes=15) } with DAG('bag_coronavirus', default_args=default_args, schedule_interval="15 * * * *", catchup=False) as dag: dag.doc_md = __doc__ upload_bag_datasets = DockerOperator( task_id='upload_bag_datasets', image='bag_coronavirus:latest', api_version='auto', auto_remove=True, command= '/bin/bash /code/data-processing/bag_coronavirus/etl_bag_datasets.sh ', container_name='bag_coronavirus--upload_bag_datasets', docker_url="unix://var/run/docker.sock", network_mode="bridge", tty=True, volumes=[ '/data/dev/workspace/data-processing:/code/data-processing', '/mnt/OGD-DataExch/StatA/BAG_Coronavirus_Tests:/code/data-processing/bag_coronavirus/data' ]) upload_vmdl = DockerOperator( task_id='upload_vmdl', image='bag_coronavirus:latest', api_version='auto', auto_remove=True, command='/bin/bash /code/data-processing/bag_coronavirus/etl_vmdl.sh ', container_name='bag_coronavirus--upload_vmdl',
fda_linker_task = SubDagOperator( dag=dag, subdag=fda_dap(parent_dag_name='fda', child_dag_name='linker', start_date=dag.start_date, schedule_interval=dag.schedule_interval), task_id='linker', ) remove_unknown_documentcloud_docs_task = DockerOperator( task_id='remove_unknown_documentcloud_docs', dag=dag, image='opentrials/processors:latest', force_pull=True, api_version='1.23', environment={ 'WAREHOUSE_URL': helpers.get_postgres_uri('warehouse_db'), 'DATABASE_URL': helpers.get_postgres_uri('api_db'), 'EXPLORERDB_URL': helpers.get_postgres_uri('explorer_db'), 'LOGGING_URL': Variable.get('LOGGING_URL'), 'DOCUMENTCLOUD_USERNAME': Variable.get('DOCUMENTCLOUD_USERNAME'), 'DOCUMENTCLOUD_PASSWORD': Variable.get('DOCUMENTCLOUD_PASSWORD'), 'DOCUMENTCLOUD_PROJECT': Variable.get('DOCUMENTCLOUD_PROJECT'), 'FERNET_KEY': os.environ['FERNET_KEY'], }, command='make start remove_unknown_documentcloud_docs') remove_unknown_documentcloud_docs_task.set_upstream(fda_linker_task) fda_linker_task.set_upstream(fda_dap_task)
'docker_sample', default_args=default_args, schedule_interval=timedelta(minutes=10)) t1 = BashOperator( task_id='print_date', bash_command='date', dag=dag) t2 = BashOperator( task_id='sleep', bash_command='sleep 5', retries=3, dag=dag) t3 = DockerOperator(api_version='1.19', docker_url='tcp://localhost:2375', #Set your docker URL command='/bin/sleep 30', image='centos:latest', network_mode='bridge', task_id='docker_op_tester', dag=dag) t4 = BashOperator( task_id='print_hello', bash_command='echo "hello world!!!"', dag=dag) t1.set_downstream(t2) t1.set_downstream(t3) t3.set_downstream(t4)