Ejemplo n.º 1
0
def query_narrativedx(service):
    ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']

    ppw_params = quote_plus('DRIVER={driver};'
                            'SERVER={server};'
                            'DATABASE={database};'
                            'UID={user};'
                            'PWD={password};'
                            'PORT={port};'
                            'TDS_Version={tds_version};'.format(**ebi))

    ppw_engine = sa.create_engine(
        f'mssql+pyodbc:///?odbc_connect={ppw_params}')

    with open(basepath.joinpath('narrativedx_query.sql')) as file:
        sql = file.read()

    # get custom dates if they exist in Airflow variables, otherwise do first and last day of prev month
    first_of_month = date.today().replace(day=1)
    end_date = Variable.get('narrativedx_end_date',
                            default_var=first_of_month - timedelta(days=1))
    start_date = Variable.get('narrativedx_start_date',
                              default_var=first_of_month -
                              timedelta(days=end_date.day))

    sql = sql.format(start_date=start_date, end_date=end_date, surv=service)
    df = pd.read_sql(sql, ppw_engine)

    df.to_csv(basepath.joinpath(f'NarrativeDX - {service} - {exec_date}.csv'))
Ejemplo n.º 2
0
def kill_job(**context):
    tb_job_id = context['task_instance'].xcom_pull(task_ids='get_job_id')
    ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']
    auth = TSC.TableauAuth(ebi['user'].split(sep='\\')[1], ebi['password'])
    server = TSC.Server(Variable.get('tableau_server_url'),
                        use_server_version=True)

    server.auth.sign_in(auth)
    resp = server.jobs.cancel(tb_job_id)
    server.auth.sign_out()
Ejemplo n.º 3
0
def get_job_id(tb_job_id):
    # TODO allow for a list to be input, but how is the variable returned from the UI if it's a list?
    pg = get_json_secret('ebi_db_conn')['db_connections']['tableau_pg']
    pg_params = '{user}:{password}@{server}:{port}/{database}'.format(**pg)
    tpg_engine = sa.create_engine('postgresql+psycopg2://{}'.format(pg_params))

    sql = 'select id, luid from background_jobs where id = {}'.format(
        tb_job_id)

    df = pd.read_sql(sql, tpg_engine)

    return str(df.luid[0])
Ejemplo n.º 4
0
def refresh_extracts():
    server_url = 'https://ebi.coh.org'
    ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']
    tableau_auth = TSC.TableauAuth(ebi['user'].split(sep='\\')[1],
                                   ebi['password'])
    server = TSC.Server(server_url, use_server_version=True)

    query = '''{
    tags (filter: {name: "dbt"}){
        publishedDatasources {
        luid
        }
    }
    }'''

    with server.auth.sign_in(tableau_auth):
        ds_ls = server.metadata.query(
            query)['data']['tags'][0]['publishedDatasources']
        [
            server.datasources.refresh(server.datasources.get_by_id(
                ds['luid'])) for ds in ds_ls
        ]
Ejemplo n.º 5
0
    'email_on_failure':
    True,
    'email_on_retry':
    False,
    'retries':
    2,
    'retry_delay':
    timedelta(minutes=5)
}

dag = DAG('send_hims',
          default_args=default_args,
          catchup=False,
          schedule_interval='0 9 * * 1')

pw = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']['password']

path = 'C:\\Airflow\\send_hims'
ebi_db_server_prod = Variable.get('ebi_db_server_prod')
airflow_server_prod = Variable.get('airflow_server_prod')

query_cmd = (f'sqlcmd -S {ebi_db_server_prod} -d FI_DM_EBI -E '
             f'-i {path}\\hims_query.sql '
             f'-o {path}\\hims_results.csv '
             '-s"|" -W -X -I')

copy_cmd = (f'pscp -pw {pw} {path}\\hims_results.csv '
            f'{airflow_server_prod}:/var/nfsshare/files')

query = SSHOperator(ssh_conn_id='tableau_server',
                    task_id='query_hims',
Ejemplo n.º 6
0
from airflow import DAG
from airflow.operators.python import PythonOperator
from auxiliary.outils import get_json_secret
from sqlalchemy.exc import ProgrammingError


def attempt_connection(db_engine):
    while True:
        try:
            db_engine.connect()
            break
        except ProgrammingError as e:
            sleep(300)


ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']

params = quote_plus('DRIVER={driver};'
                    'SERVER={server};'
                    'DATABASE={database};'
                    'UID={user};'
                    'PWD={password};'
                    'PORT={port};'
                    'TDS_Version={tds_version};'.format(**ebi))

engine = sa.create_engine('mssql+pyodbc:///?odbc_connect={}'.format(params))

default_args = {
    'owner':
    'airflow',
    'depends_on_past':
    'email_on_failure':
    True,
    'email_on_retry':
    False,
    'retries':
    1,
    'retry_delay':
    timedelta(minutes=2)
}

dag = DAG('tableau_server_maintenance',
          default_args=default_args,
          catchup=False,
          schedule_interval='0 1 * * *')

ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']
pg = get_json_secret('ebi_db_conn')['db_connections']['tableau_pg']

pg_params = '{user}:{password}@{server}:{port}/{database}'.format(**pg)
tpg_engine = sa.create_engine('postgresql+psycopg2://{}'.format(pg_params))
server_url = Variable.get('tableau_server_url')

tableau_auth = TSC.TableauAuth(ebi['user'].split(sep='\\')[1], ebi['password'])
server = TSC.Server(server_url, use_server_version=True)

tagsql = """select s.name as site
      ,p.name as project
      ,w.name as workbook
      ,w.luid as luid
from workbooks w
    inner join sites s