Beispiel #1
0
    def test_get_hook(self, conn, hook_class, get_connection):
        """
        Operator should use odbc hook if conn type is ``odbc`` and pymssql-based hook otherwise.
        """

        get_connection.return_value = conn
        op = MsSqlOperator(task_id='test', sql='', mssql_conn_id=conn.conn_id)
        hook = op.get_hook()
        assert hook.__class__ == hook_class
Beispiel #2
0
 def test_get_hook(self, get_connection):
     """
     Operator should use odbc hook if conn type is ``odbc`` and pymssql-based hook otherwise.
     """
     for conn, hook_class in [(ODBC_CONN, OdbcHook), (PYMSSQL_CONN, MsSqlHook)]:
         get_connection.return_value = conn
         op = MsSqlOperator(task_id='test', sql='', mssql_conn_id=conn.conn_id)
         hook = op.get_hook()
         assert hook.__class__ == hook_class
Beispiel #3
0
    def test_get_hook_default(self, get_connection):
        """
        If :meth:`airflow.models.Connection.get_hook` does not return a hook (e.g. because of an invalid
        conn type), then :class:`~.MsSqlHook` should be used.
        """
        get_connection.return_value.get_hook.side_effect = Mock(
            side_effect=AirflowException())

        op = MsSqlOperator(task_id='test', sql='')
        assert op.get_hook().__class__.__name__ == 'MsSqlHook'
Beispiel #4
0
    def test_get_hook_from_conn(self, get_connection):
        """
        :class:`~.MsSqlOperator` should use the hook returned by :meth:`airflow.models.Connection.get_hook`
        if one is returned.

        This behavior is necessary in order to support usage of :class:`~.OdbcHook` with this operator.

        Specifically we verify here that :meth:`~.MsSqlOperator.get_hook` returns the hook returned from a
        call of ``get_hook`` on the object returned from :meth:`~.BaseHook.get_connection`.
        """
        mock_hook = MagicMock()
        get_connection.return_value.get_hook.return_value = mock_hook

        op = MsSqlOperator(task_id='test', sql='')
        assert op.get_hook() == mock_hook
dag = DAG('run_call_light_etl', default_args=default_args, catchup=False, schedule_interval='40 5 * * *')

conn_id = 'ebi_datamart'
pool_id = 'ebi_etl_pool'

# deps = ExternalTaskSensor(
#        external_dag_id='run_daily_census',
#        external_task_id='refresh_daily_census',
#        task_id='wait_for_daily_census',
#        dag=dag
#        )

CLT = MsSqlOperator(
        sql='EXEC EBI_CallLight_tmp_Logic;',
        task_id='call_light_tmp',
        autocommit=True,
        mssql_conn_id=conn_id,
        pool=pool_id,
        dag=dag
        )

CLC = MsSqlOperator(
        sql='EXEC EBI_CallLightUntC_Logic;',
        task_id='call_light_unt_c',
        autocommit=True,
        mssql_conn_id=conn_id,
        pool=pool_id,
        dag=dag
        )

CLCT = MsSqlOperator(
        sql='EXEC EBI_CallLightUntC_tmp_Logic;',
Beispiel #6
0
    1,
    'retry_delay':
    timedelta(minutes=2)
}

dag = DAG('refresh_scm',
          default_args=default_args,
          catchup=False,
          schedule_interval='0 16 * * *')

conn_id = 'ebi_datamart'
pool_id = 'ebi_etl_pool'

si = MsSqlOperator(sql='EXEC EBI_SCM_Items_Logic;',
                   task_id='load_scm_items',
                   autocommit=True,
                   mssql_conn_id=conn_id,
                   pool=pool_id,
                   dag=dag)

sv = MsSqlOperator(sql='EXEC EBI_SCM_Vendors_Logic;',
                   task_id='load_scm_vendors',
                   autocommit=True,
                   mssql_conn_id=conn_id,
                   pool=pool_id,
                   dag=dag)

vch = MsSqlOperator(sql='EXEC EBI_SCM_Vouchers_Logic;',
                    task_id='load_scm_vouchers',
                    autocommit=True,
                    mssql_conn_id=conn_id,
                    pool=pool_id,
Beispiel #7
0
    return [pd.read_json('/var/nfsshare/etl_deps/dev_{}.json'.format(name)) for name in names]


conn_id = 'ebi_datamart'
pool_id = 'ebi_etl_pool'

to_read = ['unique_procs', 'no_dep_procs', 'proc_map', 'unique_dep_procs', 'unique_ds', 'ds_map', 'unique_ds_procs']
unique_procs, no_dep_procs, proc_map, unique_dep_procs, unique_ds, ds_map, unique_ds_procs = read_json_files(to_read)

# create a sql operator for each procedure
sql_ops = {}
for p in unique_procs.procs:
    o = MsSqlOperator(
            sql='exec {};'.format(p),
            task_id='exec_{}'.format(p),
            autocommit=True,
            mssql_conn_id=conn_id,
            pool=pool_id,
            dag=dag
            )
    sql_ops[p] = o

# create a python operator for each tableau datasource
python_ops = {}
for ds in unique_ds.ds_name:
    ds_id = unique_ds.loc[unique_ds.ds_name == ds, 'id'].values[0]
    if ds == 'pb_tdl_transactions':
        weight = 0
    elif 'appointment' in ds:
        weight = 2
    else:
        weight = 1
Beispiel #8
0
    schedule_interval='@daily',
    start_date=datetime(2021, 10, 1),
    tags=['example'],
    catchup=False,
)

# [START howto_operator_mssql]

# Example of creating a task to create a table in MsSql

create_table_mssql_task = MsSqlOperator(
    task_id='create_country_table',
    mssql_conn_id='airflow_mssql',
    sql=r"""
    CREATE TABLE Country (
        country_id INT NOT NULL IDENTITY(1,1) PRIMARY KEY,
        name TEXT,
        continent TEXT
    );
    """,
    dag=dag,
)

# [END howto_operator_mssql]

# [START mssql_hook_howto_guide_insert_mssql_hook]


@dag.task(task_id="insert_mssql_task")
def insert_mssql_hook():
    mssql_hook = MsSqlHook(mssql_conn_id='airflow_mssql', schema='airflow')
conn_id = 'qrrm_datamart'

ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']
auth = TSC.TableauAuth(ebi['user'].split(sep='\\')[1], ebi['password'])
server = TSC.Server('https://ebi.coh.org', use_server_version=True)


def refresh_workbook_data(tableau_server, tableau_authentication,
                          workbook_luid):
    with server.auth.sign_in(tableau_authentication):
        server.workbooks.refresh(workbook_luid)


cp = MsSqlOperator(sql='EXEC Clarity_COVID19_SP;',
                   task_id='covid_proc',
                   autocommit=True,
                   mssql_conn_id=conn_id,
                   dag=dag)

ce = PythonOperator(task_id='covid_extract',
                    python_callable=refresh_workbook_data,
                    op_kwargs={
                        'tableau_server': server,
                        'tableau_authentication': auth,
                        'workbook_luid': '2fd3f851-37b9-45d1-bedc-0d7cdacdf888'
                    },
                    dag=dag)

cp >> ce
Beispiel #10
0
    'retry_delay':
    timedelta(minutes=2)
}

dag = DAG('refresh_kpis',
          default_args=default_args,
          catchup=False,
          concurrency=2,
          schedule_interval='0 19 * * *')

conn_id = 'ebi_datamart'
pool_id = 'ebi_etl_pool'

awt = MsSqlOperator(sql='EXEC sp_EBI_KPI_AGG_Adjusted_Wait_Time;',
                    task_id='adjusted_wait_time',
                    autocommit=True,
                    mssql_conn_id=conn_id,
                    pool=pool_id,
                    dag=dag)

ca = MsSqlOperator(sql='EXEC sp_EBI_KPI_AGG_Call_Abandonment;',
                   task_id='calls_abandoned',
                   autocommit=True,
                   mssql_conn_id=conn_id,
                   pool=pool_id,
                   dag=dag)

cfd = MsSqlOperator(sql='EXEC sp_EBI_KPI_AGG_Cancel_14;',
                    task_id='cancel_14_days',
                    autocommit=True,
                    mssql_conn_id=conn_id,
                    pool=pool_id,
Beispiel #11
0
    #print(list(df))
    engineorigem = sqlalchemy.create_engine('mssql+pyodbc://sa:[email protected]/bi_integracao?driver=ODBC+Driver+17+for+SQL+Server')
    #df.to_sql(name=users, con=engineorigem, if_exists='replace')


with  DAG('user_processing', schedule_interval='@daily', default_args=default_args, catchup=False) as dag:
    # define tasks / operators

    creating_table = MsSqlOperator(
        task_id='creating_table',
        mssql_conn_id ='sql_server',
        database = 'bi_integracao',
        sql = '''
                CREATE TABLE users
                (
                firstname    varchar(100) NOT NULL,
                lastname     varchar(100) NOT NULL, 
                country      varchar(100) NOT NULL,
                username     varchar(100) NOT NULL,
                password     varchar(30)  NOT NULL,
                email        varchar(100) NOT NULL PRIMARY KEY
                )
              '''
    )

    is_api_available = HttpSensor(
        task_id = 'is_api_available',
        http_conn_id='user_api',
        endpoint='api/'
    )

    extract_user = SimpleHttpOperator (
Beispiel #12
0
    1,
    'retry_delay':
    timedelta(minutes=2)
}

dag = DAG('refresh_total_charges',
          default_args=default_args,
          catchup=False,
          schedule_interval='0 17 * * *')

conn_id = 'ebi_datamart'
pool_id = 'ebi_etl_pool'

tc = MsSqlOperator(sql='EXEC EBI_Total_Charges_Clarity_Logic;',
                   task_id='load_total_charges_clarity',
                   autocommit=True,
                   mssql_conn_id=conn_id,
                   pool=pool_id,
                   dag=dag)

new = PythonOperator(
    task_id='refresh_new_em_provider',
    python_callable=refresh_tableau_extract,
    op_kwargs={'datasource_id': 'ea9a44b2-3f27-4560-9a00-4e056ede95bd'},
    dag=dag)

trj = PythonOperator(
    task_id='refresh_patient_trajectory',
    python_callable=refresh_tableau_extract,
    op_kwargs={'datasource_id': '951a76e5-e7a7-466a-a0aa-6b592bd9a370'},
    dag=dag)
Beispiel #13
0
dag = DAG('refresh_patient_satisfaction',
          default_args=default_args,
          catchup=False,
          schedule_interval='0 18 * * *')

conn_id = 'ebi_datamart'
pool_id = 'ebi_etl_pool'

results_bash = 'cd C:\\Anaconda\\ETL\\patient_sat && python patient_sat_results.py'
ranks_bash = 'cd C:\\Anaconda\\ETL\\patient_sat && python patient_sat_percentile.py'
results_bash_new = 'cd C:\\Anaconda\\ETL\\patient_sat && python ResponseToDB1.py'
ranks_bash_new = 'cd C:\\Anaconda\\ETL\\patient_sat && python RanksToDB1.py'

red = MsSqlOperator(sql='EXEC EBI_PressGaney_Results_Clarity_Logic;',
                    task_id='refresh_edw_data',
                    autocommit=True,
                    mssql_conn_id=conn_id,
                    pool=pool_id,
                    dag=dag)

crd = SSHOperator(ssh_conn_id='tableau_server',
                  task_id='copy_results_to_db1',
                  command=results_bash,
                  dag=dag)

crk = SSHOperator(ssh_conn_id='tableau_server',
                  task_id='copy_ranks_to_db1',
                  command=ranks_bash,
                  dag=dag)

crdn = SSHOperator(ssh_conn_id='tableau_server',
                   task_id='copy_results_to_db1_new',
Beispiel #14
0
    'email_on_retry':
    False,
    'retries':
    1,
    'retry_delay':
    timedelta(minutes=2)
}

dag = DAG('run_edw_qa',
          default_args=default_args,
          catchup=False,
          schedule_interval='30 6 * * *')

conn_id = 'ebi_datamart'
pool_id = 'ebi_etl_pool'

scp = MsSqlOperator(sql='EXEC EBI_SEM_Census_QA_Logic;',
                    task_id='sem_census_qa',
                    autocommit=True,
                    mssql_conn_id=conn_id,
                    pool=pool_id,
                    dag=dag)

sce = PythonOperator(
    task_id='refresh_scm_census_qa_extract',
    python_callable=refresh_tableau_extract,
    op_kwargs={'datasource_id': 'df1da0b7-7b06-4e4e-bd8e-0dfd5c3a42a1'},
    dag=dag)

scp >> sce