def test_file_transfer_with_intermediate_dir_error_get(self): test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command="echo '{0}' > {1}".format(test_remote_file_content, self.test_remote_filepath), do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(create_file_task) ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # get remote file to local get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, create_intermediate_dirs=True, dag=self.dag, ) self.assertIsNotNone(get_test_task) ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() # test the received content content_received = None with open(self.test_local_filepath_int_dir, 'r') as file: content_received = file.read() self.assertEqual(content_received.strip(), test_remote_file_content)
def test_file_transfer_with_intermediate_dir_put(self): test_local_file_content = ( b"This is local file content \n which is multiline " b"continuing....with other character\nanother line here \n this is last line" ) # create a test file locally with open(self.test_local_filepath, 'wb') as file: file.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath_int_dir, operation=SFTPOperation.PUT, create_intermediate_dirs=True, dag=self.dag, ) self.assertIsNotNone(put_test_task) ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() # check the remote file content check_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command="cat {0}".format(self.test_remote_filepath_int_dir), do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(), test_local_file_content)
def test_json_file_transfer_put(self): test_local_file_content = ( b"This is local file content \n which is multiline " b"continuing....with other character\nanother line here \n this is last line" ) # create a test file locally with open(self.test_local_filepath, 'wb') as file: file.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="put_test_task", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, dag=self.dag, ) assert put_test_task is not None ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() # check the remote file content check_file_task = SSHOperator( task_id="check_file_task", ssh_hook=self.hook, command=f"cat {self.test_remote_filepath}", do_xcom_push=True, dag=self.dag, ) assert check_file_task is not None ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() assert ti3.xcom_pull(task_ids=check_file_task.task_id, key='return_value').strip() == b64encode( test_local_file_content).decode('utf-8')
def test_pickle_file_transfer_get(self): test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command= f"echo '{test_remote_file_content}' > {self.test_remote_filepath}", do_xcom_push=True, dag=self.dag, ) assert create_file_task is not None ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # get remote file to local get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag, ) assert get_test_task is not None ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() # test the received content content_received = None with open(self.test_local_filepath) as file: content_received = file.read() assert content_received.strip() == test_remote_file_content
def test_file_transfer_no_intermediate_dir_error_get(self): test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command= f"echo '{test_remote_file_content}' > {self.test_remote_filepath}", do_xcom_push=True, dag=self.dag, ) assert create_file_task is not None ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # Try to GET test file from remote # This should raise an error with "No such file" as the directory # does not exist with pytest.raises(Exception) as ctx: get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag, ) assert get_test_task is not None ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() assert 'No such file' in str(ctx.value)
def test_file_transfer_no_intermediate_dir_error_get(self): test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command="echo '{0}' > {1}".format(test_remote_file_content, self.test_remote_filepath), do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(create_file_task) ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # Try to GET test file from remote # This should raise an error with "No such file" as the directory # does not exist with self.assertRaises(Exception) as error: get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag, ) self.assertIsNotNone(get_test_task) ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() self.assertIn('No such file', str(error.exception))
task_id="linkedin-ads-sync", connector_id="{{ var.value.linkedin_connector_id }}", ) linkedin_sensor = FivetranSensor( task_id="linkedin-sensor", connector_id="{{ var.value.linkedin_connector_id }}", poke_interval=600, ) twitter_sync = FivetranOperator( task_id="twitter-ads-sync", connector_id="{{ var.value.twitter_connector_id }}", ) twitter_sensor = FivetranSensor( task_id="twitter-sensor", connector_id="{{ var.value.twitter_connector_id }}", poke_interval=600, ) dbt_run = SSHOperator( task_id="dbt_ad_reporting", command="cd dbt_ad_reporting ; ~/.local/bin/dbt run -m +ad_reporting", ssh_conn_id="dbtvm", ) linkedin_sync >> linkedin_sensor twitter_sync >> twitter_sensor [linkedin_sensor, twitter_sensor] >> dbt_run
1, 'retry_delay': timedelta(minutes=2) } dag = DAG('refresh_npsr_project', default_args=default_args, catchup=False, schedule_interval='0 7 * * *') git_pull_bash = 'cd C:\\Anaconda\\ETL\\npsr\\clinical_revenue_model && git pull' # refresh_bash = 'cd C:\\Anaconda\\ETL\\npsr\\clinical_revenue_model\\code\\data_model && python refresh_data_model.py' refresh_aa_bash = 'cd C:\\Anaconda\\ETL\\npsr\\clinical_revenue_model\\code\\applied_ai_data && python refresh_applied_ai_data.py' gp = SSHOperator(ssh_conn_id='tableau_server', task_id='git_pull_latest', command=git_pull_bash, dag=dag) # r = SSHOperator(ssh_conn_id='tableau_server', # task_id='refresh_data', # command=refresh_bash, # dag=dag) ra = SSHOperator(ssh_conn_id='tableau_server', task_id='refresh_applied_ai_data', command=refresh_aa_bash, dag=dag) gp # >> r gp >> ra
def getsshoperator(taskname, xcompush, command): return SSHOperator(ssh_hook=getedgenodehook(), task_id="Run_{}".format(taskname), do_xcom_push=xcompush, command=command)
task_id='linkedin-ads-sync', connector_id=Variable.get("linkedin_connector_id"), dag=dag) linkedin_sensor = FivetranSensor( connector_id=Variable.get("linkedin_connector_id"), poke_interval=600, task_id='linkedin-sensor', dag=dag) twitter_sync = FivetranOperator( task_id='twitter-ads-sync', connector_id=Variable.get("twitter_connector_id"), dag=dag) twitter_sensor = FivetranSensor( connector_id=Variable.get("twitter_connector_id"), poke_interval=600, task_id='twitter-sensor', dag=dag) dbt_run = SSHOperator( task_id='dbt_ad_reporting', command='cd dbt_ad_reporting ; ~/.local/bin/dbt run -m +ad_reporting', ssh_conn_id='dbtvm', dag=dag) linkedin_sync >> linkedin_sensor twitter_sync >> twitter_sensor [linkedin_sensor, twitter_sensor] >> dbt_run
'email_on_failure': True, 'email_on_retry': False, 'start_date': datetime(2019, 3, 6, tzinfo=pendulum.timezone('America/Los_Angeles')), 'retries': 1, 'retry_delay': timedelta(minutes=2), } dag = DAG('hr_tableau_security_sync', default_args=default_args, catchup=False, schedule_interval='0 21 * * *') hr_bash = 'cd C:\\Anaconda\\ETL\\tableau && python hr_security.py' # epic_bash = 'cd C:\\Anaconda\\ETL\\tableau && python TableauEpicSecuritySync.py' unlicense_bash = 'cd C:\\Anaconda\\ETL\\tableau && python Unlicense_Users.py' mf_sched = 'cd C:\\Anaconda\\ETL\\tableau && python MF_Schedulers_Security.py' t1 = SSHOperator(ssh_conn_id='tableau_server', task_id='Sync_HR_Users_And_Groups', command=hr_bash, dag=dag) # t2 = SSHOperator(ssh_conn_id='tableau_server', # task_id='Sync_Epic_Users_And_Groups', # command=epic_bash, # dag=dag) t3 = SSHOperator(ssh_conn_id='tableau_server', task_id='Unlicense_Tableau_Users', command=unlicense_bash, dag=dag) t4 = SSHOperator(ssh_conn_id='tableau_server', task_id='MF_Schedulers_Security', command=mf_sched,
1, 'retry_delay': timedelta(minutes=2), } dag = DAG('update_foundation_data', default_args=default_args, catchup=False, schedule_interval='00 21 * * *') t1_bash = 'cd C:\\Anaconda\\ETL\\foundation && python DSS_D_Data.py' t2_bash = 'cd C:\\Anaconda\\ETL\\foundation && python LU_Physicians.py' t4_bash = 'cd C:\\Anaconda\\ETL\\misc_etl && python CovidWaiverData.py' t1 = SSHOperator(ssh_conn_id='tableau_server', task_id='refresh_dss_d_data', command=t1_bash, dag=dag) t3 = PythonOperator( task_id='refresh_rvu_extract', python_callable=refresh_tableau_extract, op_kwargs={'datasource_id': 'c08148a1-cf27-48df-8c8f-fc29f2c77c12'}, dag=dag) t4 = SSHOperator(ssh_conn_id='tableau_server', task_id='refresh_covid_waiver', command=t4_bash, dag=dag) t1 >> t3 t4
# -l 30 raises login timeout since it seems to be finicky # -h -1 removes header row and line of dashes underneath query_cmd_patient = (f'sqlcmd -S {claro_server} -d Clarity_PRD_Report ' f'-i {Variable.get("claro_query_filepath")} ' f'-o {output_path_patient} ' f'-s"|" -W -X -I -l 30 -h -1') copy_cmd_patient = f'pscp -pw {pw} {output_path_patient} {airflow_server_prod}:{basepath}/files' encrypt_cmd_patient = ( f"gpg --encrypt -vv --batch --yes --trust-model always -r " f"[email protected] {basepath}/files/{output_file_patient}") query_patient = SSHOperator(ssh_conn_id='tableau_server', task_id='query_claro_patient', command=query_cmd_patient, dag=dag) copy_patient = SSHOperator(ssh_conn_id='tableau_server', task_id='copy_claro_patient', command=copy_cmd_patient, dag=dag) encrypt_patient = BashOperator(task_id='encrypt_file_patient', bash_command=encrypt_cmd_patient, dag=dag) sftp_patient = SFTPOperator( task_id='upload_claro_to_sftp_patient', ssh_conn_id='claro_sftp', local_filepath=f'{basepath}/files/{output_file_patient}.gpg',
default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 12, 12, tzinfo=pendulum.timezone('America/Los_Angeles')), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=2) } dag = DAG('retry_failed_tableau_extracts', default_args=default_args, catchup=False, schedule_interval='0 15 * * *') refresh_bash = 'cd C:\\Anaconda\\ETL\\tableau && python run_failed_extracts.py' r = SSHOperator(ssh_conn_id='tableau_server', task_id='find_and_rerun_failed_extracts', command=refresh_bash, dag=dag)
pool_id = 'ebi_etl_pool' results_bash = 'cd C:\\Anaconda\\ETL\\patient_sat && python patient_sat_results.py' ranks_bash = 'cd C:\\Anaconda\\ETL\\patient_sat && python patient_sat_percentile.py' results_bash_new = 'cd C:\\Anaconda\\ETL\\patient_sat && python ResponseToDB1.py' ranks_bash_new = 'cd C:\\Anaconda\\ETL\\patient_sat && python RanksToDB1.py' red = MsSqlOperator(sql='EXEC EBI_PressGaney_Results_Clarity_Logic;', task_id='refresh_edw_data', autocommit=True, mssql_conn_id=conn_id, pool=pool_id, dag=dag) crd = SSHOperator(ssh_conn_id='tableau_server', task_id='copy_results_to_db1', command=results_bash, dag=dag) crk = SSHOperator(ssh_conn_id='tableau_server', task_id='copy_ranks_to_db1', command=ranks_bash, dag=dag) crdn = SSHOperator(ssh_conn_id='tableau_server', task_id='copy_results_to_db1_new', command=results_bash_new, dag=dag) crkn = SSHOperator(ssh_conn_id='tableau_server', task_id='copy_ranks_to_db1_new', command=ranks_bash_new,
'task_id': 'refresh_revenue_cycle_pb_workqueue_telehealth', 'datasource_id': '3fd8f8f1-439c-499c-9649-e15a3251bd35' }, { 'task_id': 'refresh_corporate_accounting_expense_reimbursement', 'datasource_id': 'bb771489-e5a1-45c0-943b-7c8a02ad7aac' }, { 'task_id': 'refresh_pb_tdl_transactions', 'datasource_id': '8a83879d-6937-4a31-a784-21107733854f' }, { 'task_id': 'refresh_epsi_department_utilization', 'datasource_id': 'ba019a61-06d5-41ca-b2a0-904ca811b922' }, { 'task_id': 'refresh_corporate_accounting_ap_aging', 'datasource_id': '04b12b4d-fb90-483e-b0ad-653f4e85867a' }] for d in datasources: task = PythonOperator(task_id=d['task_id'], python_callable=refresh_tableau_extract, op_kwargs={'datasource_id': d['datasource_id']}, dag=dag) task sync = SSHOperator(ssh_conn_id='tableau_server', task_id='Sync_Telehealth_Providers', command=telehlth_bash, dag=dag) sync
def test_arg_checking(self): # Exception should be raised if neither ssh_hook nor ssh_conn_id is provided with self.assertRaisesRegex( AirflowException, "Cannot operate without ssh_hook or ssh_conn_id."): task_0 = SSHOperator(task_id="test", command=COMMAND, timeout=TIMEOUT, dag=self.dag) task_0.execute(None) # if ssh_hook is invalid/not provided, use ssh_conn_id to create SSHHook task_1 = SSHOperator( task_id="test_1", ssh_hook="string_rather_than_SSHHook", # invalid ssh_hook ssh_conn_id=TEST_CONN_ID, command=COMMAND, timeout=TIMEOUT, dag=self.dag) try: task_1.execute(None) except Exception: # pylint: disable=broad-except pass self.assertEqual(task_1.ssh_hook.ssh_conn_id, TEST_CONN_ID) task_2 = SSHOperator( task_id="test_2", ssh_conn_id=TEST_CONN_ID, # no ssh_hook provided command=COMMAND, timeout=TIMEOUT, dag=self.dag) try: task_2.execute(None) except Exception: # pylint: disable=broad-except pass self.assertEqual(task_2.ssh_hook.ssh_conn_id, TEST_CONN_ID) # if both valid ssh_hook and ssh_conn_id are provided, ignore ssh_conn_id task_3 = SSHOperator(task_id="test_3", ssh_hook=self.hook, ssh_conn_id=TEST_CONN_ID, command=COMMAND, timeout=TIMEOUT, dag=self.dag) try: task_3.execute(None) except Exception: # pylint: disable=broad-except pass self.assertEqual(task_3.ssh_hook.ssh_conn_id, self.hook.ssh_conn_id)
tps = PythonOperator( task_id='refresh_tableau_permissions_stats', python_callable=refresh_tableau_extract, op_kwargs={'datasource_id': '78984f9a-f731-4e24-8379-7c992a88029e'}, dag=dag ) tus = PythonOperator( task_id='refresh_tableau_usage_stats', python_callable=refresh_tableau_extract, op_kwargs={'datasource_id': '733c626f-2729-479a-8cb6-d953fbeaed40'}, dag=dag ) u = SSHOperator(ssh_conn_id='tableau_server', task_id='tableau_users', command=users_bash, dag=dag) su = SSHOperator(ssh_conn_id='tableau_server', task_id='tableau_system_users', command=system_users_bash, dag=dag) v = SSHOperator(ssh_conn_id='tableau_server', task_id='tableau_views', command=views_bash, dag=dag) w = SSHOperator(ssh_conn_id='tableau_server', task_id='tableau_workbooks', command=workbooks_bash,
'depends_on_past': False, 'start_date': datetime(2019, 5, 20, tzinfo=pendulum.timezone('America/Los_Angeles')), 'email': ['*****@*****.**', '*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5) } dag = DAG('run_backup_stored_procedures', default_args=default_args, catchup=False, schedule_interval='0 21 * * *') t1_bash = 'python C:\\Anaconda\\ETL\\fi_dm_ebi\\backup_stored_procedures.py' t2_bash = 'python C:\\Anaconda\\ETL\\misc_etl\\EBIDictionary.py' t1 = SSHOperator(ssh_conn_id='tableau_server', task_id='run_backup', command=t1_bash, dag=dag) t2 = SSHOperator(ssh_conn_id='tableau_server', task_id='refresh_dictionary', command=t2_bash, dag=dag) # backup_stored_procedures.py # import textwrap # # import pandas as pd # import sqlalchemy as sa # # from config.get_salesforce_config import connection_string
def get_ssh_op(script): return SSHOperator(task_id=f'ssh_test', ssh_hook=SSHHook(ssh_conn_id='ssh_conn'), ssh_conn_id='operator_test', retries=0, command=script)
def test_s3_to_sftp_operation(self): # Setting test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # Test for creation of s3 bucket conn = boto3.client('s3') conn.create_bucket(Bucket=self.s3_bucket) self.assertTrue(self.s3_hook.check_for_bucket(self.s3_bucket)) with open(LOCAL_FILE_PATH, 'w') as file: file.write(test_remote_file_content) self.s3_hook.load_file(LOCAL_FILE_PATH, self.s3_key, bucket_name=BUCKET) # Check if object was created in s3 objects_in_dest_bucket = conn.list_objects(Bucket=self.s3_bucket, Prefix=self.s3_key) # there should be object found, and there should only be one object found self.assertEqual(len(objects_in_dest_bucket['Contents']), 1) # the object found should be consistent with dest_key specified earlier self.assertEqual(objects_in_dest_bucket['Contents'][0]['Key'], self.s3_key) # get remote file to local run_task = S3ToSFTPOperator( s3_bucket=BUCKET, s3_key=S3_KEY, sftp_path=SFTP_PATH, sftp_conn_id=SFTP_CONN_ID, s3_conn_id=S3_CONN_ID, task_id=TASK_ID, dag=self.dag, ) self.assertIsNotNone(run_task) run_task.execute(None) # Check that the file is created remotely check_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command="cat {0}".format(self.sftp_path), do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(), test_remote_file_content.encode('utf-8'), ) # Clean up after finishing with test conn.delete_object(Bucket=self.s3_bucket, Key=self.s3_key) conn.delete_bucket(Bucket=self.s3_bucket) self.assertFalse((self.s3_hook.check_for_bucket(self.s3_bucket)))
default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 12, 12, tzinfo=pendulum.timezone('America/Los_Angeles')), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=2) } dag = DAG('run_clinical_finance_tasks', default_args=default_args, catchup=False, schedule_interval='35 5 * * *') refresh_maps_bash = 'cd C:\\Anaconda\\ETL\\clinical_finance && python cfin_maps_to_ebi.py' m = SSHOperator(ssh_conn_id='tableau_server', task_id='refresh_mapping_tables', command=refresh_maps_bash, dag=dag)
catchup=False, tags=["customer_360", "aws"] ) as dag: aws_sensor = HttpSensor( task_id="watch_for_order_s3", endpoint="orders.csv", http_conn_id="orders_s3", retries=10, response_check=lambda response: response.status_code == 200, retry_delay=timedelta(seconds=10) ) ssh_edge_download_task = SSHOperator( task_id="download_orders", ssh_conn_id="cloudera", command=download_order_command, ) import_customers_info = SSHOperator( task_id="import_customers_from_sql", ssh_conn_id="cloudera", command=load_customer_info_cmd() ) upload_orders_to_hdfs = SSHOperator( task_id="upload_orders_to_hdfs", ssh_conn_id="cloudera", command="hdfs dfs -rm -R -f airflow_input && hdfs dfs -mkdir -p airflow_input && hadoop fs -put " "./airflow_pipeline/orders.csv airflow_input/ " ) run_spark_job = SSHOperator(
import airflow.utils.dates from airflow.providers.ssh.operators.ssh import SSHOperator with airflow.DAG( 'hpc_example', description='HPC interface test workflow', tags=['example'], start_date=airflow.utils.dates.days_ago(1), ) as dag: task1 = SSHOperator(task_id='hello_world', ssh_conn_id='bessemer', command="sacct")
datetime(2019, 3, 6, tzinfo=pendulum.timezone('America/Los_Angeles')), 'retries': 1, 'retry_delay': timedelta(minutes=2), } dag = DAG('update_salesforce', default_args=default_args, catchup=False, schedule_interval='0 8-20 * * *') t1_bash = 'cd C:\\Anaconda\\ETL\\salesforce && python get_salesforce.py' t1 = SSHOperator(ssh_conn_id='tableau_server', task_id='run_get_salesforce', command=t1_bash, dag=dag) datasources = [ #{'task_id': 'refresh_remedy_force_incident_trend', # 'datasource_id': 'B5C928D5-D60B-4ECA-A3F5-AF14078A8629'}, { 'task_id': 'refresh_salesforce_new_patient_leakage', 'datasource_id': '5B768E79-F89A-4B8F-8F83-2D87A641DC1D' }, { 'task_id': 'refresh_remedy_force_incident', 'datasource_id': '3F3A843B-CEE9-48B3-A045-658233E1437F' }, { 'task_id': 'refresh_provider_loa_submission',
## Define xcp operation details (change values as necessary to match your environment and desired operation) # Define xcp operation to perform xcpOperation = 'sync' # Must be 'copy' or 'sync' # Define source and destination for copy operation xcpCopySource = '192.168.200.41:/trident_pvc_957318e1_9b73_4e16_b857_dca7819dd263' xcpCopyDestination = '192.168.200.41:/trident_pvc_9e7607c2_29c8_4dbf_9b08_551ba72d0273' # Define catalog id for sync operation xcpSyncId = 'autoname_copy_2020-10-06_16.37.44.963391' ## Define xcp host details (change values as necessary to match your environment) xcpAirflowConnectionName = 'xcp_host' # Name of the Airflow connection of type 'ssh' that contains connection details for a host on which xcp is installed, configured, and accessible within $PATH ################################################################################################ # Construct xcp command xcpCommand = 'xcp help' if xcpOperation == 'copy': xcpCommand = 'xcp copy ' + xcpCopySource + ' ' + xcpCopyDestination elif xcpOperation == 'sync': xcpCommand = 'xcp sync -id ' + xcpSyncId # Define DAG steps/workflow with replicate_data_xcp_dag as dag: # Define step to invoke a NetApp XCP copy or sync operation invoke_xcp = SSHOperator(task_id="invoke-xcp", command=xcpCommand, ssh_conn_id=xcpAirflowConnectionName)
GCE_INSTANCE = os.environ.get('GCE_INSTANCE', 'target-instance') # [END howto_operator_gce_args_common] with models.DAG( 'example_compute_ssh', start_date=datetime(2021, 1, 1), catchup=False, tags=['example'], ) as dag: # # [START howto_execute_command_on_remote1] os_login_without_iap_tunnel = SSHOperator( task_id="os_login_without_iap_tunnel", ssh_hook=ComputeEngineSSHHook( instance_name=GCE_INSTANCE, zone=GCE_ZONE, project_id=GCP_PROJECT_ID, use_oslogin=True, use_iap_tunnel=False, ), command="echo os_login_without_iap_tunnel", ) # # [END howto_execute_command_on_remote1] # # [START howto_execute_command_on_remote2] metadata_without_iap_tunnel = SSHOperator( task_id="metadata_without_iap_tunnel", ssh_hook=ComputeEngineSSHHook( instance_name=GCE_INSTANCE, zone=GCE_ZONE, use_oslogin=False, use_iap_tunnel=False,
True, 'email_on_retry': False, 'start_date': datetime(2019, 4, 2, tzinfo=pendulum.timezone('America/Los_Angeles')), 'retries': 1, 'retry_delay': timedelta(minutes=5) } dag = DAG('update_arxview', default_args=default_args, catchup=False, schedule_interval='0 9 * * *') t1_bash = 'activate arxview && python C:\\Anaconda\\ETL\\arxview\\update_arxview.py' t1 = SSHOperator(ssh_conn_id='tableau_server', task_id='run_get_arxview', command=t1_bash, dag=dag) t2 = PythonOperator( task_id='refresh_arxview_arrays', python_callable=refresh_tableau_extract, op_kwargs={'datasource_id': '7d239d58-aea8-4dbb-bb98-cac214f1a021'}, dag=dag) t1 >> t2
pw = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']['password'] path = 'C:\\Airflow\\send_hims' ebi_db_server_prod = Variable.get('ebi_db_server_prod') airflow_server_prod = Variable.get('airflow_server_prod') query_cmd = (f'sqlcmd -S {ebi_db_server_prod} -d FI_DM_EBI -E ' f'-i {path}\\hims_query.sql ' f'-o {path}\\hims_results.csv ' '-s"|" -W -X -I') copy_cmd = (f'pscp -pw {pw} {path}\\hims_results.csv ' f'{airflow_server_prod}:/var/nfsshare/files') query = SSHOperator(ssh_conn_id='tableau_server', task_id='query_hims', command=query_cmd, dag=dag) copy = SSHOperator(ssh_conn_id='tableau_server', task_id='copy_hims', command=copy_cmd, dag=dag) email = EmailOperator( task_id='email_hims', to=['*****@*****.**', '*****@*****.**', '*****@*****.**'], cc=['*****@*****.**'], subject='HIMS Data {{ ds }}', html_content='See attached.', files=['/var/nfsshare/files/hims_results.csv'], dag=dag)
cd /storage/wayback_acls git config user.email '{{ var.value.alert_email_address }}' git config user.email git config user.name 'Airflow W3ACT Export Task' git commit -m 'Automated update from Airflow at {{ ts }} by {{ task_instance_key_str }}.' -a git pull origin master git push {{ params.gitlab_wayback_acl_remote }} master " """, ) acls_deploy = SSHOperator( task_id='deploy_updated_acls', ssh_conn_id='access_ssh', command="""bash -c " cd /root/gitlab/wayback_excludes_update/ git pull origin master " """, ) @task() def push_w3act_data_stats(): from prometheus_client import CollectorRegistry, Gauge, push_to_gateway registry = CollectorRegistry() # Gather stats from files: g = Gauge('ukwa_record_count', 'Number of records', ['kind'], registry=registry)