Esempio n. 1
0
    def test_file_transfer_with_intermediate_dir_error_get(self):
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command="echo '{0}' > {1}".format(test_remote_file_content,
                                              self.test_remote_filepath),
            do_xcom_push=True,
            dag=self.dag,
        )
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # get remote file to local
        get_test_task = SFTPOperator(
            task_id="test_sftp",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath_int_dir,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.GET,
            create_intermediate_dirs=True,
            dag=self.dag,
        )
        self.assertIsNotNone(get_test_task)
        ti2 = TaskInstance(task=get_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # test the received content
        content_received = None
        with open(self.test_local_filepath_int_dir, 'r') as file:
            content_received = file.read()
        self.assertEqual(content_received.strip(), test_remote_file_content)
Esempio n. 2
0
    def test_file_transfer_with_intermediate_dir_put(self):
        test_local_file_content = (
            b"This is local file content \n which is multiline "
            b"continuing....with other character\nanother line here \n this is last line"
        )
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as file:
            file.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
            task_id="test_sftp",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath_int_dir,
            operation=SFTPOperation.PUT,
            create_intermediate_dirs=True,
            dag=self.dag,
        )
        self.assertIsNotNone(put_test_task)
        ti2 = TaskInstance(task=put_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
            task_id="test_check_file",
            ssh_hook=self.hook,
            command="cat {0}".format(self.test_remote_filepath_int_dir),
            do_xcom_push=True,
            dag=self.dag,
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task,
                           execution_date=timezone.utcnow())
        ti3.run()
        self.assertEqual(
            ti3.xcom_pull(task_ids='test_check_file',
                          key='return_value').strip(), test_local_file_content)
Esempio n. 3
0
    def test_json_file_transfer_put(self):
        test_local_file_content = (
            b"This is local file content \n which is multiline "
            b"continuing....with other character\nanother line here \n this is last line"
        )
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as file:
            file.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
            task_id="put_test_task",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.PUT,
            dag=self.dag,
        )
        assert put_test_task is not None
        ti2 = TaskInstance(task=put_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
            task_id="check_file_task",
            ssh_hook=self.hook,
            command=f"cat {self.test_remote_filepath}",
            do_xcom_push=True,
            dag=self.dag,
        )
        assert check_file_task is not None
        ti3 = TaskInstance(task=check_file_task,
                           execution_date=timezone.utcnow())
        ti3.run()
        assert ti3.xcom_pull(task_ids=check_file_task.task_id,
                             key='return_value').strip() == b64encode(
                                 test_local_file_content).decode('utf-8')
Esempio n. 4
0
    def test_pickle_file_transfer_get(self):
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command=
            f"echo '{test_remote_file_content}' > {self.test_remote_filepath}",
            do_xcom_push=True,
            dag=self.dag,
        )
        assert create_file_task is not None
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # get remote file to local
        get_test_task = SFTPOperator(
            task_id="test_sftp",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.GET,
            dag=self.dag,
        )
        assert get_test_task is not None
        ti2 = TaskInstance(task=get_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # test the received content
        content_received = None
        with open(self.test_local_filepath) as file:
            content_received = file.read()
        assert content_received.strip() == test_remote_file_content
Esempio n. 5
0
    def test_file_transfer_no_intermediate_dir_error_get(self):
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command=
            f"echo '{test_remote_file_content}' > {self.test_remote_filepath}",
            do_xcom_push=True,
            dag=self.dag,
        )
        assert create_file_task is not None
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # Try to GET test file from remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with pytest.raises(Exception) as ctx:
            get_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath_int_dir,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.GET,
                dag=self.dag,
            )
            assert get_test_task is not None
            ti2 = TaskInstance(task=get_test_task,
                               execution_date=timezone.utcnow())
            ti2.run()
        assert 'No such file' in str(ctx.value)
Esempio n. 6
0
    def test_file_transfer_no_intermediate_dir_error_get(self):
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command="echo '{0}' > {1}".format(test_remote_file_content,
                                              self.test_remote_filepath),
            do_xcom_push=True,
            dag=self.dag,
        )
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # Try to GET test file from remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with self.assertRaises(Exception) as error:
            get_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath_int_dir,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.GET,
                dag=self.dag,
            )
            self.assertIsNotNone(get_test_task)
            ti2 = TaskInstance(task=get_test_task,
                               execution_date=timezone.utcnow())
            ti2.run()
        self.assertIn('No such file', str(error.exception))
        task_id="linkedin-ads-sync",
        connector_id="{{ var.value.linkedin_connector_id }}",
    )

    linkedin_sensor = FivetranSensor(
        task_id="linkedin-sensor",
        connector_id="{{ var.value.linkedin_connector_id }}",
        poke_interval=600,
    )

    twitter_sync = FivetranOperator(
        task_id="twitter-ads-sync",
        connector_id="{{ var.value.twitter_connector_id }}",
    )

    twitter_sensor = FivetranSensor(
        task_id="twitter-sensor",
        connector_id="{{ var.value.twitter_connector_id }}",
        poke_interval=600,
    )

    dbt_run = SSHOperator(
        task_id="dbt_ad_reporting",
        command="cd dbt_ad_reporting ; ~/.local/bin/dbt run -m +ad_reporting",
        ssh_conn_id="dbtvm",
    )

    linkedin_sync >> linkedin_sensor
    twitter_sync >> twitter_sensor
    [linkedin_sensor, twitter_sensor] >> dbt_run
    1,
    'retry_delay':
    timedelta(minutes=2)
}

dag = DAG('refresh_npsr_project',
          default_args=default_args,
          catchup=False,
          schedule_interval='0 7 * * *')

git_pull_bash = 'cd C:\\Anaconda\\ETL\\npsr\\clinical_revenue_model && git pull'
# refresh_bash = 'cd C:\\Anaconda\\ETL\\npsr\\clinical_revenue_model\\code\\data_model && python refresh_data_model.py'
refresh_aa_bash = 'cd C:\\Anaconda\\ETL\\npsr\\clinical_revenue_model\\code\\applied_ai_data && python refresh_applied_ai_data.py'

gp = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='git_pull_latest',
                 command=git_pull_bash,
                 dag=dag)

# r = SSHOperator(ssh_conn_id='tableau_server',
#                 task_id='refresh_data',
#                 command=refresh_bash,
#                 dag=dag)

ra = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='refresh_applied_ai_data',
                 command=refresh_aa_bash,
                 dag=dag)

gp  # >> r
gp >> ra
def getsshoperator(taskname, xcompush, command):
    return SSHOperator(ssh_hook=getedgenodehook(),
                       task_id="Run_{}".format(taskname),
                       do_xcom_push=xcompush,
                       command=command)
    task_id='linkedin-ads-sync',
    connector_id=Variable.get("linkedin_connector_id"),
    dag=dag)

linkedin_sensor = FivetranSensor(
    connector_id=Variable.get("linkedin_connector_id"),
    poke_interval=600,
    task_id='linkedin-sensor',
    dag=dag)

twitter_sync = FivetranOperator(
    task_id='twitter-ads-sync',
    connector_id=Variable.get("twitter_connector_id"),
    dag=dag)

twitter_sensor = FivetranSensor(
    connector_id=Variable.get("twitter_connector_id"),
    poke_interval=600,
    task_id='twitter-sensor',
    dag=dag)

dbt_run = SSHOperator(
    task_id='dbt_ad_reporting',
    command='cd dbt_ad_reporting ; ~/.local/bin/dbt run -m +ad_reporting',
    ssh_conn_id='dbtvm',
    dag=dag)

linkedin_sync >> linkedin_sensor
twitter_sync >> twitter_sensor
[linkedin_sensor, twitter_sensor] >> dbt_run
    'email_on_failure': True,
    'email_on_retry': False,
    'start_date': datetime(2019, 3, 6, tzinfo=pendulum.timezone('America/Los_Angeles')),
    'retries': 1,
    'retry_delay': timedelta(minutes=2),
    }

dag = DAG('hr_tableau_security_sync', default_args=default_args, catchup=False, schedule_interval='0 21 * * *')

hr_bash = 'cd C:\\Anaconda\\ETL\\tableau && python hr_security.py'
# epic_bash = 'cd C:\\Anaconda\\ETL\\tableau && python TableauEpicSecuritySync.py'
unlicense_bash = 'cd C:\\Anaconda\\ETL\\tableau && python Unlicense_Users.py'
mf_sched = 'cd C:\\Anaconda\\ETL\\tableau && python MF_Schedulers_Security.py'

t1 = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='Sync_HR_Users_And_Groups',
                 command=hr_bash,
                 dag=dag)

# t2 = SSHOperator(ssh_conn_id='tableau_server',
#                  task_id='Sync_Epic_Users_And_Groups',
#                  command=epic_bash,
#                  dag=dag)

t3 = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='Unlicense_Tableau_Users',
                 command=unlicense_bash,
                 dag=dag)

t4 = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='MF_Schedulers_Security',
                 command=mf_sched,
    1,
    'retry_delay':
    timedelta(minutes=2),
}

dag = DAG('update_foundation_data',
          default_args=default_args,
          catchup=False,
          schedule_interval='00 21 * * *')

t1_bash = 'cd C:\\Anaconda\\ETL\\foundation && python DSS_D_Data.py'
t2_bash = 'cd C:\\Anaconda\\ETL\\foundation && python LU_Physicians.py'
t4_bash = 'cd C:\\Anaconda\\ETL\\misc_etl && python CovidWaiverData.py'

t1 = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='refresh_dss_d_data',
                 command=t1_bash,
                 dag=dag)

t3 = PythonOperator(
    task_id='refresh_rvu_extract',
    python_callable=refresh_tableau_extract,
    op_kwargs={'datasource_id': 'c08148a1-cf27-48df-8c8f-fc29f2c77c12'},
    dag=dag)

t4 = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='refresh_covid_waiver',
                 command=t4_bash,
                 dag=dag)

t1 >> t3
t4
Esempio n. 13
0
# -l 30 raises login timeout since it seems to be finicky
# -h -1 removes header row and line of dashes underneath
query_cmd_patient = (f'sqlcmd -S {claro_server} -d Clarity_PRD_Report '
                     f'-i {Variable.get("claro_query_filepath")} '
                     f'-o {output_path_patient} '
                     f'-s"|" -W -X -I -l 30 -h -1')

copy_cmd_patient = f'pscp -pw {pw} {output_path_patient} {airflow_server_prod}:{basepath}/files'

encrypt_cmd_patient = (
    f"gpg --encrypt -vv --batch --yes --trust-model always -r "
    f"[email protected] {basepath}/files/{output_file_patient}")

query_patient = SSHOperator(ssh_conn_id='tableau_server',
                            task_id='query_claro_patient',
                            command=query_cmd_patient,
                            dag=dag)

copy_patient = SSHOperator(ssh_conn_id='tableau_server',
                           task_id='copy_claro_patient',
                           command=copy_cmd_patient,
                           dag=dag)

encrypt_patient = BashOperator(task_id='encrypt_file_patient',
                               bash_command=encrypt_cmd_patient,
                               dag=dag)

sftp_patient = SFTPOperator(
    task_id='upload_claro_to_sftp_patient',
    ssh_conn_id='claro_sftp',
    local_filepath=f'{basepath}/files/{output_file_patient}.gpg',
Esempio n. 14
0
default_args = {
    'owner':
    'airflow',
    'depends_on_past':
    False,
    'start_date':
    datetime(2020, 12, 12, tzinfo=pendulum.timezone('America/Los_Angeles')),
    'email': ['*****@*****.**'],
    'email_on_failure':
    True,
    'email_on_retry':
    False,
    'retries':
    1,
    'retry_delay':
    timedelta(minutes=2)
}

dag = DAG('retry_failed_tableau_extracts',
          default_args=default_args,
          catchup=False,
          schedule_interval='0 15 * * *')

refresh_bash = 'cd C:\\Anaconda\\ETL\\tableau && python run_failed_extracts.py'

r = SSHOperator(ssh_conn_id='tableau_server',
                task_id='find_and_rerun_failed_extracts',
                command=refresh_bash,
                dag=dag)
Esempio n. 15
0
pool_id = 'ebi_etl_pool'

results_bash = 'cd C:\\Anaconda\\ETL\\patient_sat && python patient_sat_results.py'
ranks_bash = 'cd C:\\Anaconda\\ETL\\patient_sat && python patient_sat_percentile.py'
results_bash_new = 'cd C:\\Anaconda\\ETL\\patient_sat && python ResponseToDB1.py'
ranks_bash_new = 'cd C:\\Anaconda\\ETL\\patient_sat && python RanksToDB1.py'

red = MsSqlOperator(sql='EXEC EBI_PressGaney_Results_Clarity_Logic;',
                    task_id='refresh_edw_data',
                    autocommit=True,
                    mssql_conn_id=conn_id,
                    pool=pool_id,
                    dag=dag)

crd = SSHOperator(ssh_conn_id='tableau_server',
                  task_id='copy_results_to_db1',
                  command=results_bash,
                  dag=dag)

crk = SSHOperator(ssh_conn_id='tableau_server',
                  task_id='copy_ranks_to_db1',
                  command=ranks_bash,
                  dag=dag)

crdn = SSHOperator(ssh_conn_id='tableau_server',
                   task_id='copy_results_to_db1_new',
                   command=results_bash_new,
                   dag=dag)

crkn = SSHOperator(ssh_conn_id='tableau_server',
                   task_id='copy_ranks_to_db1_new',
                   command=ranks_bash_new,
Esempio n. 16
0
    'task_id': 'refresh_revenue_cycle_pb_workqueue_telehealth',
    'datasource_id': '3fd8f8f1-439c-499c-9649-e15a3251bd35'
}, {
    'task_id': 'refresh_corporate_accounting_expense_reimbursement',
    'datasource_id': 'bb771489-e5a1-45c0-943b-7c8a02ad7aac'
}, {
    'task_id': 'refresh_pb_tdl_transactions',
    'datasource_id': '8a83879d-6937-4a31-a784-21107733854f'
}, {
    'task_id': 'refresh_epsi_department_utilization',
    'datasource_id': 'ba019a61-06d5-41ca-b2a0-904ca811b922'
}, {
    'task_id': 'refresh_corporate_accounting_ap_aging',
    'datasource_id': '04b12b4d-fb90-483e-b0ad-653f4e85867a'
}]

for d in datasources:
    task = PythonOperator(task_id=d['task_id'],
                          python_callable=refresh_tableau_extract,
                          op_kwargs={'datasource_id': d['datasource_id']},
                          dag=dag)

    task

sync = SSHOperator(ssh_conn_id='tableau_server',
                   task_id='Sync_Telehealth_Providers',
                   command=telehlth_bash,
                   dag=dag)

sync
Esempio n. 17
0
    def test_arg_checking(self):
        # Exception should be raised if neither ssh_hook nor ssh_conn_id is provided
        with self.assertRaisesRegex(
                AirflowException,
                "Cannot operate without ssh_hook or ssh_conn_id."):
            task_0 = SSHOperator(task_id="test",
                                 command=COMMAND,
                                 timeout=TIMEOUT,
                                 dag=self.dag)
            task_0.execute(None)

        # if ssh_hook is invalid/not provided, use ssh_conn_id to create SSHHook
        task_1 = SSHOperator(
            task_id="test_1",
            ssh_hook="string_rather_than_SSHHook",  # invalid ssh_hook
            ssh_conn_id=TEST_CONN_ID,
            command=COMMAND,
            timeout=TIMEOUT,
            dag=self.dag)
        try:
            task_1.execute(None)
        except Exception:  # pylint: disable=broad-except
            pass
        self.assertEqual(task_1.ssh_hook.ssh_conn_id, TEST_CONN_ID)

        task_2 = SSHOperator(
            task_id="test_2",
            ssh_conn_id=TEST_CONN_ID,  # no ssh_hook provided
            command=COMMAND,
            timeout=TIMEOUT,
            dag=self.dag)
        try:
            task_2.execute(None)
        except Exception:  # pylint: disable=broad-except
            pass
        self.assertEqual(task_2.ssh_hook.ssh_conn_id, TEST_CONN_ID)

        # if both valid ssh_hook and ssh_conn_id are provided, ignore ssh_conn_id
        task_3 = SSHOperator(task_id="test_3",
                             ssh_hook=self.hook,
                             ssh_conn_id=TEST_CONN_ID,
                             command=COMMAND,
                             timeout=TIMEOUT,
                             dag=self.dag)
        try:
            task_3.execute(None)
        except Exception:  # pylint: disable=broad-except
            pass
        self.assertEqual(task_3.ssh_hook.ssh_conn_id, self.hook.ssh_conn_id)
Esempio n. 18
0
tps = PythonOperator(
        task_id='refresh_tableau_permissions_stats',
        python_callable=refresh_tableau_extract,
        op_kwargs={'datasource_id': '78984f9a-f731-4e24-8379-7c992a88029e'},
        dag=dag
        )

tus = PythonOperator(
        task_id='refresh_tableau_usage_stats',
        python_callable=refresh_tableau_extract,
        op_kwargs={'datasource_id': '733c626f-2729-479a-8cb6-d953fbeaed40'},
        dag=dag
        )

u = SSHOperator(ssh_conn_id='tableau_server',
                task_id='tableau_users',
                command=users_bash,
                dag=dag)

su = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='tableau_system_users',
                 command=system_users_bash,
                 dag=dag)

v = SSHOperator(ssh_conn_id='tableau_server',
                task_id='tableau_views',
                command=views_bash,
                dag=dag)

w = SSHOperator(ssh_conn_id='tableau_server',
                task_id='tableau_workbooks',
                command=workbooks_bash,
    'depends_on_past': False,
    'start_date': datetime(2019, 5, 20, tzinfo=pendulum.timezone('America/Los_Angeles')),
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
    }

dag = DAG('run_backup_stored_procedures', default_args=default_args, catchup=False, schedule_interval='0 21 * * *')

t1_bash = 'python C:\\Anaconda\\ETL\\fi_dm_ebi\\backup_stored_procedures.py'
t2_bash = 'python C:\\Anaconda\\ETL\\misc_etl\\EBIDictionary.py'

t1 = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='run_backup',
                 command=t1_bash,
                 dag=dag)

t2 = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='refresh_dictionary',
                 command=t2_bash,
                 dag=dag)

# backup_stored_procedures.py

# import textwrap
#
# import pandas as pd
# import sqlalchemy as sa
#
# from config.get_salesforce_config import connection_string
Esempio n. 20
0
def get_ssh_op(script):
    return SSHOperator(task_id=f'ssh_test',
                       ssh_hook=SSHHook(ssh_conn_id='ssh_conn'),
                       ssh_conn_id='operator_test',
                       retries=0,
                       command=script)
Esempio n. 21
0
    def test_s3_to_sftp_operation(self):
        # Setting
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # Test for creation of s3 bucket
        conn = boto3.client('s3')
        conn.create_bucket(Bucket=self.s3_bucket)
        self.assertTrue(self.s3_hook.check_for_bucket(self.s3_bucket))

        with open(LOCAL_FILE_PATH, 'w') as file:
            file.write(test_remote_file_content)
        self.s3_hook.load_file(LOCAL_FILE_PATH,
                               self.s3_key,
                               bucket_name=BUCKET)

        # Check if object was created in s3
        objects_in_dest_bucket = conn.list_objects(Bucket=self.s3_bucket,
                                                   Prefix=self.s3_key)
        # there should be object found, and there should only be one object found
        self.assertEqual(len(objects_in_dest_bucket['Contents']), 1)

        # the object found should be consistent with dest_key specified earlier
        self.assertEqual(objects_in_dest_bucket['Contents'][0]['Key'],
                         self.s3_key)

        # get remote file to local
        run_task = S3ToSFTPOperator(
            s3_bucket=BUCKET,
            s3_key=S3_KEY,
            sftp_path=SFTP_PATH,
            sftp_conn_id=SFTP_CONN_ID,
            s3_conn_id=S3_CONN_ID,
            task_id=TASK_ID,
            dag=self.dag,
        )
        self.assertIsNotNone(run_task)

        run_task.execute(None)

        # Check that the file is created remotely
        check_file_task = SSHOperator(
            task_id="test_check_file",
            ssh_hook=self.hook,
            command="cat {0}".format(self.sftp_path),
            do_xcom_push=True,
            dag=self.dag,
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task,
                           execution_date=timezone.utcnow())
        ti3.run()
        self.assertEqual(
            ti3.xcom_pull(task_ids='test_check_file',
                          key='return_value').strip(),
            test_remote_file_content.encode('utf-8'),
        )

        # Clean up after finishing with test
        conn.delete_object(Bucket=self.s3_bucket, Key=self.s3_key)
        conn.delete_bucket(Bucket=self.s3_bucket)
        self.assertFalse((self.s3_hook.check_for_bucket(self.s3_bucket)))
default_args = {
    'owner':
    'airflow',
    'depends_on_past':
    False,
    'start_date':
    datetime(2020, 12, 12, tzinfo=pendulum.timezone('America/Los_Angeles')),
    'email': ['*****@*****.**'],
    'email_on_failure':
    True,
    'email_on_retry':
    False,
    'retries':
    1,
    'retry_delay':
    timedelta(minutes=2)
}

dag = DAG('run_clinical_finance_tasks',
          default_args=default_args,
          catchup=False,
          schedule_interval='35 5 * * *')

refresh_maps_bash = 'cd C:\\Anaconda\\ETL\\clinical_finance && python cfin_maps_to_ebi.py'

m = SSHOperator(ssh_conn_id='tableau_server',
                task_id='refresh_mapping_tables',
                command=refresh_maps_bash,
                dag=dag)
Esempio n. 23
0
        catchup=False,
        tags=["customer_360", "aws"]

) as dag:
    aws_sensor = HttpSensor(
        task_id="watch_for_order_s3",
        endpoint="orders.csv",
        http_conn_id="orders_s3",
        retries=10,
        response_check=lambda response: response.status_code == 200,
        retry_delay=timedelta(seconds=10)
    )

    ssh_edge_download_task = SSHOperator(
        task_id="download_orders",
        ssh_conn_id="cloudera",
        command=download_order_command,

    )
    import_customers_info = SSHOperator(
        task_id="import_customers_from_sql",
        ssh_conn_id="cloudera",
        command=load_customer_info_cmd()
    )

    upload_orders_to_hdfs = SSHOperator(
        task_id="upload_orders_to_hdfs",
        ssh_conn_id="cloudera",
        command="hdfs dfs -rm -R -f airflow_input && hdfs dfs -mkdir -p airflow_input && hadoop fs -put "
                "./airflow_pipeline/orders.csv airflow_input/ "
    )
    run_spark_job = SSHOperator(
Esempio n. 24
0
import airflow.utils.dates
from airflow.providers.ssh.operators.ssh import SSHOperator

with airflow.DAG(
        'hpc_example',
        description='HPC interface test workflow',
        tags=['example'],
        start_date=airflow.utils.dates.days_ago(1),
) as dag:
    task1 = SSHOperator(task_id='hello_world',
                        ssh_conn_id='bessemer',
                        command="sacct")
Esempio n. 25
0
    datetime(2019, 3, 6, tzinfo=pendulum.timezone('America/Los_Angeles')),
    'retries':
    1,
    'retry_delay':
    timedelta(minutes=2),
}

dag = DAG('update_salesforce',
          default_args=default_args,
          catchup=False,
          schedule_interval='0 8-20 * * *')

t1_bash = 'cd C:\\Anaconda\\ETL\\salesforce && python get_salesforce.py'

t1 = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='run_get_salesforce',
                 command=t1_bash,
                 dag=dag)

datasources = [
    #{'task_id': 'refresh_remedy_force_incident_trend',
    # 'datasource_id': 'B5C928D5-D60B-4ECA-A3F5-AF14078A8629'},
    {
        'task_id': 'refresh_salesforce_new_patient_leakage',
        'datasource_id': '5B768E79-F89A-4B8F-8F83-2D87A641DC1D'
    },
    {
        'task_id': 'refresh_remedy_force_incident',
        'datasource_id': '3F3A843B-CEE9-48B3-A045-658233E1437F'
    },
    {
        'task_id': 'refresh_provider_loa_submission',
## Define xcp operation details (change values as necessary to match your environment and desired operation)

# Define xcp operation to perform
xcpOperation = 'sync'  # Must be 'copy' or 'sync'

# Define source and destination for copy operation
xcpCopySource = '192.168.200.41:/trident_pvc_957318e1_9b73_4e16_b857_dca7819dd263'
xcpCopyDestination = '192.168.200.41:/trident_pvc_9e7607c2_29c8_4dbf_9b08_551ba72d0273'

# Define catalog id for sync operation
xcpSyncId = 'autoname_copy_2020-10-06_16.37.44.963391'

## Define xcp host details (change values as necessary to match your environment)
xcpAirflowConnectionName = 'xcp_host'  # Name of the Airflow connection of type 'ssh' that contains connection details for a host on which xcp is installed, configured, and accessible within $PATH

################################################################################################

# Construct xcp command
xcpCommand = 'xcp help'
if xcpOperation == 'copy':
    xcpCommand = 'xcp copy ' + xcpCopySource + ' ' + xcpCopyDestination
elif xcpOperation == 'sync':
    xcpCommand = 'xcp sync -id ' + xcpSyncId

# Define DAG steps/workflow
with replicate_data_xcp_dag as dag:

    # Define step to invoke a NetApp XCP copy or sync operation
    invoke_xcp = SSHOperator(task_id="invoke-xcp",
                             command=xcpCommand,
                             ssh_conn_id=xcpAirflowConnectionName)
GCE_INSTANCE = os.environ.get('GCE_INSTANCE', 'target-instance')
# [END howto_operator_gce_args_common]

with models.DAG(
        'example_compute_ssh',
        start_date=datetime(2021, 1, 1),
        catchup=False,
        tags=['example'],
) as dag:
    # # [START howto_execute_command_on_remote1]
    os_login_without_iap_tunnel = SSHOperator(
        task_id="os_login_without_iap_tunnel",
        ssh_hook=ComputeEngineSSHHook(
            instance_name=GCE_INSTANCE,
            zone=GCE_ZONE,
            project_id=GCP_PROJECT_ID,
            use_oslogin=True,
            use_iap_tunnel=False,
        ),
        command="echo os_login_without_iap_tunnel",
    )
    # # [END howto_execute_command_on_remote1]

    # # [START howto_execute_command_on_remote2]
    metadata_without_iap_tunnel = SSHOperator(
        task_id="metadata_without_iap_tunnel",
        ssh_hook=ComputeEngineSSHHook(
            instance_name=GCE_INSTANCE,
            zone=GCE_ZONE,
            use_oslogin=False,
            use_iap_tunnel=False,
Esempio n. 28
0
    True,
    'email_on_retry':
    False,
    'start_date':
    datetime(2019, 4, 2, tzinfo=pendulum.timezone('America/Los_Angeles')),
    'retries':
    1,
    'retry_delay':
    timedelta(minutes=5)
}

dag = DAG('update_arxview',
          default_args=default_args,
          catchup=False,
          schedule_interval='0 9 * * *')

t1_bash = 'activate arxview && python C:\\Anaconda\\ETL\\arxview\\update_arxview.py'

t1 = SSHOperator(ssh_conn_id='tableau_server',
                 task_id='run_get_arxview',
                 command=t1_bash,
                 dag=dag)

t2 = PythonOperator(
    task_id='refresh_arxview_arrays',
    python_callable=refresh_tableau_extract,
    op_kwargs={'datasource_id': '7d239d58-aea8-4dbb-bb98-cac214f1a021'},
    dag=dag)

t1 >> t2
Esempio n. 29
0
pw = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']['password']

path = 'C:\\Airflow\\send_hims'
ebi_db_server_prod = Variable.get('ebi_db_server_prod')
airflow_server_prod = Variable.get('airflow_server_prod')

query_cmd = (f'sqlcmd -S {ebi_db_server_prod} -d FI_DM_EBI -E '
             f'-i {path}\\hims_query.sql '
             f'-o {path}\\hims_results.csv '
             '-s"|" -W -X -I')

copy_cmd = (f'pscp -pw {pw} {path}\\hims_results.csv '
            f'{airflow_server_prod}:/var/nfsshare/files')

query = SSHOperator(ssh_conn_id='tableau_server',
                    task_id='query_hims',
                    command=query_cmd,
                    dag=dag)

copy = SSHOperator(ssh_conn_id='tableau_server',
                   task_id='copy_hims',
                   command=copy_cmd,
                   dag=dag)

email = EmailOperator(
    task_id='email_hims',
    to=['*****@*****.**', '*****@*****.**', '*****@*****.**'],
    cc=['*****@*****.**'],
    subject='HIMS Data {{ ds }}',
    html_content='See attached.',
    files=['/var/nfsshare/files/hims_results.csv'],
    dag=dag)
Esempio n. 30
0
        cd /storage/wayback_acls
        git config user.email '{{ var.value.alert_email_address }}'
        git config user.email
        git config user.name 'Airflow W3ACT Export Task'
        git commit -m 'Automated update from Airflow at {{ ts }} by {{ task_instance_key_str }}.' -a
        git pull origin master
        git push {{ params.gitlab_wayback_acl_remote }} master
        "
        """,
    )

    acls_deploy = SSHOperator(
        task_id='deploy_updated_acls',
        ssh_conn_id='access_ssh',
        command="""bash -c "
        cd /root/gitlab/wayback_excludes_update/
        git pull origin master
        "
        """,
    )

    @task()
    def push_w3act_data_stats():
        from prometheus_client import CollectorRegistry, Gauge, push_to_gateway

        registry = CollectorRegistry()
        # Gather stats from files:
        g = Gauge('ukwa_record_count',
                  'Number of records', ['kind'],
                  registry=registry)