Esempio n. 1
0
def execute_from_file(connection_name, query):
    try:
        logging.info('Executing: ' + str(query))
        hook = MsSqlHook(mssql_conn_id=connection_name)
        hook.run(str(query))
    except Exception as e:
        raise AirflowException(e)
    def execute(self, context):
        hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        mssql = MsSqlHook(mssql_conn_id=self.mssql_conn_id)

        self.logger.info(
            "Dumping Microsoft SQL Server query results to local file")
        conn = mssql.get_conn()
        cursor = conn.cursor()
        cursor.execute(self.sql)
        with NamedTemporaryFile("w") as f:
            csv_writer = csv.writer(f,
                                    delimiter=self.delimiter,
                                    encoding='utf-8')
            field_dict = OrderedDict()
            col_count = 0
            for field in cursor.description:
                col_count += 1
                col_position = "Column{position}".format(position=col_count)
                field_dict[col_position if field[0] ==
                           '' else field[0]] = self.type_map(field[1])
            csv_writer.writerows(cursor)
            f.flush()
            cursor.close()
            conn.close()
            self.logger.info("Loading file into Hive")
            hive.load_file(f.name,
                           self.hive_table,
                           field_dict=field_dict,
                           create=self.create,
                           partition=self.partition,
                           delimiter=self.delimiter,
                           recreate=self.recreate,
                           tblproperties=self.tblproperties)
def get_sidewalk_data(**kwargs):
    """Get sidewalk condition data from DB."""
    sw_query = general.file_to_string('./sql/sidewalk_insp.sql', __file__)
    sw_conn = MsSqlHook(mssql_conn_id='streets_cg_sql')

    df = sw_conn.get_pandas_df(sw_query)

    # Rename columns we're keeping
    df = df.rename(
        columns={
            'sap_id': 'seg_id',
            'legacy_id': 'geojoin_id',
            'inspectiondate': 'oci_date',
            'rating': 'oci_desc',
            'condition': 'oci'
        })

    df = df.drop(['cgLastModified', 'MaxInspect', 'MaxMod'], axis=1)

    # Write csv
    logging.info('Writing ' + str(df.shape[0]))

    general.pos_write_csv(df, cond_file, date_format=conf['date_format_ymd'])

    return "Successfully wrote prod file"
 def execute(self, context):
     self.log.info('Executing: %s', self.sql)
     hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id,
                      schema=self.database)
     hook.run(self.sql,
              autocommit=self.autocommit,
              parameters=self.parameters)
Esempio n. 5
0
    def __init__(self,
                 source_conn_id,
                 s3_conn_id,
                 s3_bucket,
                 s3_key,
                 query_string,
                 query_merge_field=None,
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)

        #Define our connections +
        #Create connections to our DB and our storage
        self.source_conn_id = source_conn_id
        self.s3_conn_id = s3_conn_id
        self.source_hook = self.source_hook = MsSqlHook(self.source_conn_id)
        self.s3_hook = S3Hook(aws_conn_id=self.s3_conn_id)

        #Get our DB query + merge fields
        self.query_string = query_string
        self.query_merge_field = query_merge_field

        #Get our S3 Bucket and Key
        self.s3_bucket = s3_bucket
        self.s3_key = s3_key

        #Get the session and endpoint url
        self.session = self.s3_hook.get_session()
        self.endpoint_url = self.s3_hook.get_endpoint_url()
 def execute(self, context):
     logging.info('MS SQL Query Operator Starting')
     hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id)
     if isinstance(self.s3_conn_id, str):
         self.execute_results_to_s3(hook=hook)
     else:
         self.execute_no_results(hook=hook)
     logging.info('MS SQL Query Operator Complete')
Esempio n. 7
0
def workflow(src_conn_id, src_schema, dt, target_conn_id, target_table):
    # TODO Split code into functions
    etl_conn = PostgresHook(postgres_conn_id='etl').get_conn()

    with Session(etl_conn, f'{dag.dag_id}__{src_schema}') as session:
        # Load data
        source_conn = MsSqlHook(mssql_conn_id=src_conn_id,
                                schema=src_schema).get_conn()

        query = f"""
            SELECT 
                id, start_time, end_time, type, data
            FROM dbo.Orders
            WHERE
                CONVERT(DATE, start_time) = '{dt}'
            """

        df = pd.read_sql_query(query, source_conn)

        # Skip if no rows
        if df.empty:
            raise AirflowSkipException('No rows to load')

        # Add service fields
        df['etl_source'] = src_schema
        df['etl_id'] = session.id
        df['hash_id'] = hash_pandas_object(df[['etl_source', 'id']])

        # Export data to CSV buffer
        buffer = StringIO()
        df.to_csv(buffer,
                  index=False,
                  sep='|',
                  na_rep='NUL',
                  quoting=csv.QUOTE_MINIMAL,
                  header=False,
                  float_format='%.8f',
                  doublequote=False,
                  escapechar='\\')
        buffer.seek(0)

        # Push CSV
        target_conn = VerticaHook(vertica_conn_id=target_conn_id).get_conn()

        copy_stmt = f"""
            COPY {target_table}({df.columns.to_list()}) 
            FROM STDIN 
            DELIMITER '|' 
            ENCLOSED '"' 
            ABORT ON ERROR 
            NULL 'NUL'
            """

        cursor = target_conn.cursor()
        cursor.copy(copy_stmt, buffer)

        session.loaded_rows = cursor.rowcount
        session.successful = True
Esempio n. 8
0
    def query(self):
        """
        Queries MSSQL and returns a cursor of results.

        :return: mssql cursor
        """
        mssql = MsSqlHook(mssql_conn_id=self.mssql_conn_id)
        conn = mssql.get_conn()
        cursor = conn.cursor()
        cursor.execute(self.sql)
        return cursor
Esempio n. 9
0
 def get_hook(self):
     try:
         if self.conn_type == 'mysql':
             from airflow.hooks.mysql_hook import MySqlHook
             return MySqlHook(mysql_conn_id=self.conn_id)
         elif self.conn_type == 'google_cloud_platform':
             from airflow.contrib.hooks.bigquery_hook import BigQueryHook
             return BigQueryHook(bigquery_conn_id=self.conn_id)
         elif self.conn_type == 'postgres':
             from airflow.hooks.postgres_hook import PostgresHook
             return PostgresHook(postgres_conn_id=self.conn_id)
         elif self.conn_type == 'hive_cli':
             from airflow.hooks.hive_hooks import HiveCliHook
             return HiveCliHook(hive_cli_conn_id=self.conn_id)
         elif self.conn_type == 'presto':
             from airflow.hooks.presto_hook import PrestoHook
             return PrestoHook(presto_conn_id=self.conn_id)
         elif self.conn_type == 'hiveserver2':
             from airflow.hooks.hive_hooks import HiveServer2Hook
             return HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
         elif self.conn_type == 'sqlite':
             from airflow.hooks.sqlite_hook import SqliteHook
             return SqliteHook(sqlite_conn_id=self.conn_id)
         elif self.conn_type == 'jdbc':
             from airflow.hooks.jdbc_hook import JdbcHook
             return JdbcHook(jdbc_conn_id=self.conn_id)
         elif self.conn_type == 'mssql':
             from airflow.hooks.mssql_hook import MsSqlHook
             return MsSqlHook(mssql_conn_id=self.conn_id)
         elif self.conn_type == 'oracle':
             from airflow.hooks.oracle_hook import OracleHook
             return OracleHook(oracle_conn_id=self.conn_id)
         elif self.conn_type == 'vertica':
             from airflow.contrib.hooks.vertica_hook import VerticaHook
             return VerticaHook(vertica_conn_id=self.conn_id)
         elif self.conn_type == 'cloudant':
             from airflow.contrib.hooks.cloudant_hook import CloudantHook
             return CloudantHook(cloudant_conn_id=self.conn_id)
         elif self.conn_type == 'jira':
             from airflow.contrib.hooks.jira_hook import JiraHook
             return JiraHook(jira_conn_id=self.conn_id)
         elif self.conn_type == 'redis':
             from airflow.contrib.hooks.redis_hook import RedisHook
             return RedisHook(redis_conn_id=self.conn_id)
         elif self.conn_type == 'wasb':
             from airflow.contrib.hooks.wasb_hook import WasbHook
             return WasbHook(wasb_conn_id=self.conn_id)
         elif self.conn_type == 'docker':
             from airflow.hooks.docker_hook import DockerHook
             return DockerHook(docker_conn_id=self.conn_id)
     except:
         pass
Esempio n. 10
0
def get_special_events():
    """Get special events from DB."""
    se_query = general.file_to_string('./sql/special_events.sql', __file__)
    se_conn = MsSqlHook(mssql_conn_id='special_events_sql')
    df = se_conn.get_pandas_df(se_query)
    df['event_id'] = pd.to_numeric(df['event_id'],
                                   errors='coerce',
                                   downcast='integer')

    general.pos_write_csv(df,
                          temp_file,
                          date_format=conf['date_format_ymd_hms'])

    return "Retrieved special events to temp file."
Esempio n. 11
0
def get_streets_paving_data():
    """Get streets paving data from DB."""
    
    pv_query = general.file_to_string('./sql/pavement_ex.sql', __file__)
    pv_conn = MsSqlHook(mssql_conn_id='streets_cg_sql')

    df = pv_conn.get_pandas_df(pv_query)

    results = df.shape[0]

    general.pos_write_csv(
        df, temp_file)
    
    return f"Successfully wrote temp file with {results} records"
Esempio n. 12
0
    def execute(self, context):
        mssql_hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id)
        df = mssql_hook.get_pandas_df(self.select_sql)

        # Convert columns data types to int
        if self.int_columns:
            for col in self.int_columns:
                df[col] = df[col].astype("Int64")

        # Create folder if not exists
        if not os.path.exists(self.target_file_dir):
            os.mkdir(self.target_file_dir)

        file_path = os.path.join(self.target_file_dir, self.file_name)
        df.to_csv(file_path, index=False)
Esempio n. 13
0
    def execute(self, context):
        self.log.info("Querying data from source: {0}".format(
            self.mssql_source_conn_id))

        src_mssql_hook = MsSqlHook(mssql_conn_id=self.mssql_source_conn_id)
        df = src_mssql_hook.get_pandas_df(sql=self.source_sql,
                                          parameters=self.source_sql_params)
        rows_total = df.shape[0]

        self.log.info("Writing data to {0}.".format(self.destination_filepath))
        df.to_csv(self.destination_filepath, sep=self.sep, index=False)

        self.log.info("Total inserted to file: {0} rows".format(rows_total))

        return rows_total
Esempio n. 14
0
def update_finger_data():
    import pandas as pd
    import os
    import requests
    import datetime
    from pprint import pprint
    from os.path import join, dirname
    from airflow.hooks.mssql_hook import MsSqlHook

    try:
        yest = datetime.datetime.now() - datetime.timedelta(days=1)
        dt_string = yest.date().strftime("%Y-%m-%d")

        hook = MsSqlHook(mssql_conn_id="odoo_finger")

        conn = hook.get_conn()
        # what is the output of conn ?
        df = pd.read_sql(
            "SELECT max(bsevtdt) as checkout,min(bsevtdt) as checkin ,user_id from TA.dbo.punchlog where CONVERT (date,createdAt)=CONVERT(date, GETDATE()-1) GROUP by user_id;",
            conn)
        # catch read_sql connection errors
        attendances = []
        for line in range(0, len(df)):
            attendances.append({
                'check_in': df['checkin'][line].isoformat(),
                'check_out': df['checkout'][line].isoformat(),
                'emp_code': df['user_id'][line],
                'index': line
            })
        DOMAIN = "http://10.0.1.49/b/v1"
        ADD_ATT = DOMAIN + "/attendance/add"
        json_data = {
            'attendances': attendances,
            'tz': 'Asia/Kuwait',
            'name': dt_string,
            'db': 'Boutiquaat_Test',
            'login': '******',
            'pswd': 'admin',
        }

        print(json_data, "PPPPPPPPPPPPPP")
        response = requests.post(ADD_ATT, json=json_data)
        print('__________ Response : ')
        pprint(response.json())

    except Exception as e:
        raise AirflowException(e)
Esempio n. 15
0
def get_fd_data(**kwargs):
    """Get fire department data from Data Base."""

    logging.info("Get fire department data from CAD archive")

    fd_query = general.file_to_string('./sql/fd.sql', __file__)
    fd_conn = MsSqlHook(mssql_conn_id='fire_department')

    logging.info("Read data to panda DataFrame")

    df = fd_conn.get_pandas_df(fd_query)
    # Write csv
    logging.info('Writing ' + prod_file)
    general.pos_write_csv(df,
                          prod_file,
                          date_format=conf['date_format_ymd_hms'])

    return "Successfully wrote prod file at " + prod_file
Esempio n. 16
0
def pg_to_mssql(**kwargs):
    pg_hook = PostgresHook('etl_postgres')
    mssql_hook = MsSqlHook('mssql_wtproperty')

    # The fields we want to work with
    target_fields = [
        'master_account_num', 'master_account_name', 'sub_account_num',
        'sub_account_name', 'short_name', 'account_status', 'group_num',
        'item_num', 'original_balance', 'fed_withholding_tax_this_period',
        'ytd_fed_withholding_tax', 'int_paid_this_period', 'ytd_int_paid',
        'int_split_this_period', 'escrow_balance'
    ]

    # pull records from postgres
    recs = pg_hook.get_records(
        f"select {', '.join(target_fields)}  from escrow.escrow")

    # insert records to mssql
    mssql_hook.insert_rows('property_data_escrowbalance', recs, target_fields)
Esempio n. 17
0
def get_table_cols_name(conn_id: str, schema: str, table: str):
    """
    Obtem a lista de colunas de uma tabela.
    """
    conn_values = BaseHook.get_connection(conn_id)

    if conn_values.conn_type == 'mssql':
        db_hook = MsSqlHook(mssql_conn_id=conn_id)
    elif conn_values.conn_type == 'postgres':
        db_hook = PostgresHook(postgres_conn_id=conn_id)
    else:
        raise Exception('Conn_type not implemented.')

    with db_hook.get_conn() as db_conn:
        with db_conn.cursor() as db_cur:
            db_cur.execute(f'SELECT * FROM {schema}.{table} WHERE 1=2')
            column_names = [tup[0] for tup in db_cur.description]

    return column_names
Esempio n. 18
0
def get_sidewalk_gis(**kwargs):
    """ Get sidewalk geodatabase from shared drive"""

    sde_query = general.file_to_string('./sql/sidewalk_sde.sql', __file__)
    sde_conn = MsSqlHook(mssql_conn_id='atlas_sde')

    #sde_server = conf['sde_server']
    #sde_user = conf['sde_user']
    #sde_pw = conf['sde_pw']

    #sde_conn = pymssql.connect(sde_server, sde_user, sde_pw, 'sdw')
    #query = "SELECT *, [Shape].STAsText() as geom FROM SDW.IAMSD.SIDEWALK"

    df = sde_conn.get_pandas_df(sde_query)

    df.columns = [x.lower() for x in df.columns]

    df = df.drop('shape', 1)

    df = df.rename(
        columns={
            'sapid': 'seg_id',
            'cdcode': 'council',
            'cpcode': 'comm_plan',
            'legacy_id': 'geojoin_id',
            'iamfloc': 'fun_loc_id',
            'loc_descr': 'loc_desc'
        })

    df = df.fillna('')

    logging.info('processed {} rows'.format(rows))

    geospatial.df2shp(df=df,
                      folder=prod_dir,
                      layername=layername,
                      dtypes=dtypes,
                      gtype=gtype,
                      epsg=2230)

    return "Converted table to shapefile"
Esempio n. 19
0
def get_documentum(mode, **kwargs):
    """Get tables from Documentum."""
    logging.info('Getting files from documentum')
    table_name = dn.table_name(mode)
    for name in table_name:
        logging.info('Querying for {0} table'.format(name))
        query_string = 'SELECT * FROM SCSLEGIS.dbo.{0};'.format(name)
        logging.info('Connecting to MS Database')
        documentum_conn = MsSqlHook(mssql_conn_id='docm_sql')
        logging.info('Reading data to Pandas DataFrame')
        df = documentum_conn.get_pandas_df(query_string)

        logging.info('Correcting title column')
        
        df['TITLE'] = fix_title(df[['TITLE','OBJECT_NAME']])

        save_path =  conf['prod_data_dir'] + '/documentum_{0}.csv'.format(name.lower())
        logging.info('Writing Production file')
        general.pos_write_csv(df, save_path)

    return "Successfully retrieved Documentum tables"
    def __init__(self,
                 source_conn_id,
                 target_conn_id,
                 target_schema,
                 target_table,
                 query_string,
                 query_merge_field=None,
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)

        #Create connections to our source and target db
        self.source_db = MsSqlHook(source_conn_id)
        self.target_db = PostgresHook(target_conn_id)

        #If there are additional parameters in the query, then take care of them
        self.query = query_string

        #Get our target db schema and table
        self.target_schema = target_schema
        self.target_table = target_table
Esempio n. 21
0
def get_fd_data(**kwargs):
    """Get fire department data from Data Base."""

    logging.info("Get fire department data from Data Base")
    fd_query = general.file_to_string('./sql/fd_pn.sql', __file__)
    fd_conn = MsSqlHook(mssql_conn_id='fire_department')

    logging.info("Read data to panda DataFrame")
    df = fd_conn.get_pandas_df(fd_query)

    df = df.rename(
        columns={
            'city': 'address_city',
            'response_month': 'month_response',
            'response_year': 'year_response'
        })

    logging.info("Writing {} rows to prod".format(df.shape[0]))

    # Write csv
    general.pos_write_csv(df, prod_file)
    return "Successfully wrote prod file"
Esempio n. 22
0
def get_onbase_test():
    """Get tables from OnBase."""
    logging.info('Getting files from onbase')
    for root, dirs, files in os.walk('./poseidon/dags/city_docs/sql/onbase'):
        for name in files:
            logging.info('Querying for '+name)
            path = './sql/onbase/{}'.format(name)
            query_string = general.file_to_string(path, __file__)
            logging.info('Connecting to MS Database')
            onbase_conn = MsSqlHook(mssql_conn_id='onbase_test_sql')
            logging.info('Reading data to Pandas DataFrame')
            df = onbase_conn.get_pandas_df(query_string)
            table_type = name[0:-4]

            logging.info('Correcting title column')
            df['TITLE'] = fix_title(df[['TITLE','OBJECT_NAME']])

            save_path =  '{0}/onbase_test_{1}.csv'.format(conf['prod_data_dir'],table_type)
            logging.info('Writting Production file')
            general.pos_write_csv(df, save_path)

    return "Successfully retrieved OnBase tables"
    def execute(self, context):
        source_hook = WasbHook(wasb_conn_id=self.azure_blob_conn_id)

        # Assumption 1: there is sufficient disk space to download the blob in question
        # Assumption 2: The file is a correctly formatted csv file
        with NamedTemporaryFile(mode='a+', delete=True) as f:
            source_hook.get_file(file_path=f.name,
                                 container_name=self.src_blob_container,
                                 blob_name=self.src_blob)
            f.flush()
            self.log.info("Saving file to %s", f.name)

            csv_reader = reader(f)

            list_of_tuples = list(map(tuple, csv_reader))
            self.log.info(list_of_tuples)

            self.log.info(f"Inserting into {self.dest_table}")
            hook = MsSqlHook(mssql_conn_id=self.azure_sql_conn_id,
                             schema=self.database)
            hook.insert_rows(self.dest_table, list_of_tuples)

        self.log.info(f"Data inserted into {self.database}.{self.dest_table}")
Esempio n. 24
0
def get_documentum_test():
    """Get tables from Documentum test database."""
    logging.info('Getting files for documentum test')
    table_name = dn.table_name('schedule_daily')+dn.table_name('schedule_hourly_15')+dn.table_name('schedule_hourly_30')
    logging.info(table_name)
    for name in table_name:
        logging.info('Querying for {0} table'.format(name))
        query_string = 'SELECT * FROM SCSLEGIS.dbo.{0};'.format(name)
        logging.info('Connecting to MS Database')
        documentum_conn = MsSqlHook(mssql_conn_id='docm_test_sql')
        logging.info('Reading data to Pandas DataFrame')
        try:
            df = documentum_conn.get_pandas_df(query_string)
            logging.info('Correcting title column')
        
            df['TITLE'] = fix_title(df[['TITLE','OBJECT_NAME']])

            save_path =  conf['prod_data_dir'] + '/documentum_{0}_test.csv'.format(name.lower())
            general.pos_write_csv(df, save_path)

        except Exception as e:
            logging.info(f'Could not read {0} because {e}')

    return "Successfully retrieved Documentum tables"
Esempio n. 25
0
    def execute(self, context):
        self.log.info("Querying data from source: {0}".format(
            self.mssql_source_conn_id))

        src_mssql_hook = MsSqlHook(mssql_conn_id=self.mssql_source_conn_id)
        df = src_mssql_hook.get_pandas_df(sql=self.source_sql,
                                          parameters=self.source_sql_params)
        rows_total = df.shape[0]

        self.log.info("Writing data to {0}.".format(self.destination_filepath))
        writer = pd.ExcelWriter(self.destination_filepath,
                                engine=self.excel_engine)
        if self.write_mode == 'append_tab' and path.isfile(
                self.destination_filepath):
            writer.book = load_workbook(self.destination_filepath)

        pandas.io.formats.excel.header_style = None
        if self.sheet_name is not None:
            df.to_excel(writer, sheet_name=self.sheet_name, index=False)
        else:
            df.to_excel(writer, index=False)
        writer.save()

        self.log.info("Total inserted to file: {0} rows".format(rows_total))
Esempio n. 26
0
 def get_hook(self):
     if self.conn_type == 'mysql':
         from airflow.hooks.mysql_hook import MySqlHook
         return MySqlHook(mysql_conn_id=self.conn_id)
     elif self.conn_type == 'google_cloud_platform':
         from airflow.gcp.hooks.bigquery import BigQueryHook
         return BigQueryHook(bigquery_conn_id=self.conn_id)
     elif self.conn_type == 'postgres':
         from airflow.hooks.postgres_hook import PostgresHook
         return PostgresHook(postgres_conn_id=self.conn_id)
     elif self.conn_type == 'pig_cli':
         from airflow.hooks.pig_hook import PigCliHook
         return PigCliHook(pig_cli_conn_id=self.conn_id)
     elif self.conn_type == 'hive_cli':
         from airflow.hooks.hive_hooks import HiveCliHook
         return HiveCliHook(hive_cli_conn_id=self.conn_id)
     elif self.conn_type == 'presto':
         from airflow.hooks.presto_hook import PrestoHook
         return PrestoHook(presto_conn_id=self.conn_id)
     elif self.conn_type == 'hiveserver2':
         from airflow.hooks.hive_hooks import HiveServer2Hook
         return HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
     elif self.conn_type == 'sqlite':
         from airflow.hooks.sqlite_hook import SqliteHook
         return SqliteHook(sqlite_conn_id=self.conn_id)
     elif self.conn_type == 'jdbc':
         from airflow.hooks.jdbc_hook import JdbcHook
         return JdbcHook(jdbc_conn_id=self.conn_id)
     elif self.conn_type == 'mssql':
         from airflow.hooks.mssql_hook import MsSqlHook
         return MsSqlHook(mssql_conn_id=self.conn_id)
     elif self.conn_type == 'oracle':
         from airflow.hooks.oracle_hook import OracleHook
         return OracleHook(oracle_conn_id=self.conn_id)
     elif self.conn_type == 'vertica':
         from airflow.contrib.hooks.vertica_hook import VerticaHook
         return VerticaHook(vertica_conn_id=self.conn_id)
     elif self.conn_type == 'cloudant':
         from airflow.contrib.hooks.cloudant_hook import CloudantHook
         return CloudantHook(cloudant_conn_id=self.conn_id)
     elif self.conn_type == 'jira':
         from airflow.contrib.hooks.jira_hook import JiraHook
         return JiraHook(jira_conn_id=self.conn_id)
     elif self.conn_type == 'redis':
         from airflow.contrib.hooks.redis_hook import RedisHook
         return RedisHook(redis_conn_id=self.conn_id)
     elif self.conn_type == 'wasb':
         from airflow.contrib.hooks.wasb_hook import WasbHook
         return WasbHook(wasb_conn_id=self.conn_id)
     elif self.conn_type == 'docker':
         from airflow.hooks.docker_hook import DockerHook
         return DockerHook(docker_conn_id=self.conn_id)
     elif self.conn_type == 'azure_data_lake':
         from airflow.contrib.hooks.azure_data_lake_hook import AzureDataLakeHook
         return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id)
     elif self.conn_type == 'azure_cosmos':
         from airflow.contrib.hooks.azure_cosmos_hook import AzureCosmosDBHook
         return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id)
     elif self.conn_type == 'cassandra':
         from airflow.contrib.hooks.cassandra_hook import CassandraHook
         return CassandraHook(cassandra_conn_id=self.conn_id)
     elif self.conn_type == 'mongo':
         from airflow.contrib.hooks.mongo_hook import MongoHook
         return MongoHook(conn_id=self.conn_id)
     elif self.conn_type == 'gcpcloudsql':
         from airflow.gcp.hooks.cloud_sql import CloudSqlDatabaseHook
         return CloudSqlDatabaseHook(gcp_cloudsql_conn_id=self.conn_id)
     elif self.conn_type == 'grpc':
         from airflow.contrib.hooks.grpc_hook import GrpcHook
         return GrpcHook(grpc_conn_id=self.conn_id)
     raise AirflowException("Unknown hook type {}".format(self.conn_type))
 def execute(self, context):
     _log.info('Executing: ' + str(self.sql))
     hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id)
     hook.run(self.sql,
              autocommit=self.autocommit,
              parameters=self.parameters)
Esempio n. 28
0
 def execute(self, context):
     logging.info('Executing: ' + str(self.sql))
     hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id)
     hook.run(self.sql, parameters=self.parameters)
Esempio n. 29
0
            common.read_json(PATHS['report_tracker_json_path'])[BANNER]
            ['report_next_week_starting_date']
        })

    transform_load = PythonOperator(
        task_id='transform_load',
        python_callable=transform_load.wal_transform_load,
        op_kwargs={
            'folder_path':
            PATHS['download_report_folder'],
            'sql_query_file_path':
            PATHS['sql'] + "/%s_customer_itemnmbr.sql" % BANNER,
            'table_name':
            "POS",
            'connection_string':
            MsSqlHook('asterix_test').get_uri() +
            DATABASE['drivers']['sql_server_17']
        })

    archive_report = PythonOperator(task_id='archive_report',
                                    python_callable=common.move_file,
                                    op_kwargs={
                                        'from_folder_path':
                                        PATHS['download_report_folder'],
                                        'to_folder_path':
                                        PATHS['report_archive'],
                                        'prefix':
                                        None
                                    })

    update_report_tracker = PythonOperator(
Esempio n. 30
0
 def _setup_mssql_connection(self):
     self.mssql_hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id)
     self.mssql_conn = self.mssql_hook.get_conn()