Ejemplo n.º 1
0
class JdbcOperator(BaseOperator):
    """
    Executes sql code in a database using jdbc driver.

    Requires jaydebeapi.

    :param jdbc_conn_id: reference to a predefined database
    :type jdbc_conn_id: string
    :param sql: the sql code to be executed. (templated)
    :type sql: Can receive a str representing a sql statement,
        a list of str (sql statements), or reference to a template file.
        Template reference are recognized by str ending in '.sql'
    """

    template_fields = ('sql',)
    template_ext = ('.sql',)
    ui_color = '#ededed'

    @apply_defaults
    def __init__(
            self, sql,
            jdbc_conn_id='jdbc_default', autocommit=False, parameters=None,
            *args, **kwargs):
        super(JdbcOperator, self).__init__(*args, **kwargs)
        self.parameters = parameters

        self.sql = sql
        self.jdbc_conn_id = jdbc_conn_id
        self.autocommit = autocommit

    def execute(self, context):
        self.log.info('Executing: %s', self.sql)
        self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id)
        self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
 def _query_jdbc(self):
     """
     Queries jdbc and returns a cursor to the results.
     """
     jdbc = JdbcHook(jdbc_conn_id=self.jdbc_conn_id)
     conn = jdbc.get_conn()
     cursor = conn.cursor()
     self.log.info('Querying SQL: %s', self.sql)
     cursor.execute(self.sql)
     return cursor
Ejemplo n.º 3
0
class JdbcOperator(BaseOperator):
    """
    Executes sql code in a database using jdbc driver.

    Requires jaydebeapi.

    :param jdbc_url: driver specific connection url with string variables, e.g. for exasol jdbc:exa:{0}:{1};schema={2}
    Template vars are defined like this: {0} = hostname, {1} = port, {2} = dbschema, {3} = extra
    :type jdbc_url: string
    :param jdbc_driver_name: classname of the specific jdbc driver, for exasol com.exasol.jdbc.EXADriver
    :type jdbc_driver_name: string
    :param jdbc_driver_loc: absolute path to jdbc driver location, for example /var/exasol/exajdbc.jar
    :type jdbc_driver_loc: string

    :param conn_id: reference to a predefined database
    :type conn_id: string
    :param sql: the sql code to be executed
    :type sql: Can receive a str representing a sql statement,
        a list of str (sql statements), or reference to a template file.
        Template reference are recognized by str ending in '.sql'
    """

    template_fields = ('sql', )
    template_ext = ('.sql', )
    ui_color = '#ededed'

    @apply_defaults
    def __init__(self,
                 sql,
                 jdbc_conn_id='jdbc_default',
                 autocommit=False,
                 parameters=None,
                 *args,
                 **kwargs):
        super(JdbcOperator, self).__init__(*args, **kwargs)
        self.parameters = parameters

        self.sql = sql
        self.jdbc_conn_id = jdbc_conn_id
        self.autocommit = autocommit

    def execute(self, context):
        _log.info('Executing: ' + str(self.sql))
        self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id)
        self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
class JdbcOperator(BaseOperator):
    """
    Executes sql code in a database using jdbc driver.

    Requires jaydebeapi.

    :param jdbc_url: driver specific connection url with string variables, e.g. for exasol jdbc:exa:{0}:{1};schema={2}
    Template vars are defined like this: {0} = hostname, {1} = port, {2} = dbschema, {3} = extra
    :type jdbc_url: string
    :param jdbc_driver_name: classname of the specific jdbc driver, for exasol com.exasol.jdbc.EXADriver
    :type jdbc_driver_name: string
    :param jdbc_driver_loc: absolute path to jdbc driver location, for example /var/exasol/exajdbc.jar
    :type jdbc_driver_loc: string

    :param conn_id: reference to a predefined database
    :type conn_id: string
    :param sql: the sql code to be executed
    :type sql: Can receive a str representing a sql statement,
        a list of str (sql statements), or reference to a template file.
        Template reference are recognized by str ending in '.sql'
    """

    template_fields = ('sql',)
    template_ext = ('.sql',)
    ui_color = '#ededed'

    @apply_defaults
    def __init__(
            self, sql,
            jdbc_conn_id='jdbc_default', autocommit=False, parameters=None,
            *args, **kwargs):
        super(JdbcOperator, self).__init__(*args, **kwargs)
        self.parameters = parameters

        self.sql = sql
        self.jdbc_conn_id = jdbc_conn_id
        self.autocommit = autocommit

    def execute(self, context):
        logging.info('Executing: ' + str(self.sql))
        self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id)
        self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
Ejemplo n.º 5
0
class JdbcOperator(BaseOperator):
    """
    Executes sql code in a database using jdbc driver.

    Requires jaydebeapi.

    :param sql: the sql code to be executed. (templated)
    :type sql: Can receive a str representing a sql statement,
        a list of str (sql statements), or reference to a template file.
        Template reference are recognized by str ending in '.sql'
    :param jdbc_conn_id: reference to a predefined database
    :type jdbc_conn_id: str
    :param autocommit: if True, each command is automatically committed.
        (default value: False)
    :type autocommit: bool
    :param parameters: (optional) the parameters to render the SQL query with.
    :type parameters: mapping or iterable
    """

    template_fields = ('sql', )
    template_ext = ('.sql', )
    ui_color = '#ededed'

    @apply_defaults
    def __init__(self,
                 sql,
                 jdbc_conn_id='jdbc_default',
                 autocommit=False,
                 parameters=None,
                 *args,
                 **kwargs):
        super(JdbcOperator, self).__init__(*args, **kwargs)
        self.parameters = parameters

        self.sql = sql
        self.jdbc_conn_id = jdbc_conn_id
        self.autocommit = autocommit

    def execute(self, context):
        self.log.info('Executing: %s', self.sql)
        self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id)
        self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
Ejemplo n.º 6
0
 def get_hook(self):
     try:
         if self.conn_type == 'mysql':
             from airflow.hooks.mysql_hook import MySqlHook
             return MySqlHook(mysql_conn_id=self.conn_id)
         elif self.conn_type == 'google_cloud_platform':
             from airflow.contrib.hooks.bigquery_hook import BigQueryHook
             return BigQueryHook(bigquery_conn_id=self.conn_id)
         elif self.conn_type == 'postgres':
             from airflow.hooks.postgres_hook import PostgresHook
             return PostgresHook(postgres_conn_id=self.conn_id)
         elif self.conn_type == 'hive_cli':
             from airflow.hooks.hive_hooks import HiveCliHook
             return HiveCliHook(hive_cli_conn_id=self.conn_id)
         elif self.conn_type == 'presto':
             from airflow.hooks.presto_hook import PrestoHook
             return PrestoHook(presto_conn_id=self.conn_id)
         elif self.conn_type == 'hiveserver2':
             from airflow.hooks.hive_hooks import HiveServer2Hook
             return HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
         elif self.conn_type == 'sqlite':
             from airflow.hooks.sqlite_hook import SqliteHook
             return SqliteHook(sqlite_conn_id=self.conn_id)
         elif self.conn_type == 'jdbc':
             from airflow.hooks.jdbc_hook import JdbcHook
             return JdbcHook(jdbc_conn_id=self.conn_id)
         elif self.conn_type == 'mssql':
             from airflow.hooks.mssql_hook import MsSqlHook
             return MsSqlHook(mssql_conn_id=self.conn_id)
         elif self.conn_type == 'oracle':
             from airflow.hooks.oracle_hook import OracleHook
             return OracleHook(oracle_conn_id=self.conn_id)
         elif self.conn_type == 'vertica':
             from airflow.contrib.hooks.vertica_hook import VerticaHook
             return VerticaHook(vertica_conn_id=self.conn_id)
         elif self.conn_type == 'cloudant':
             from airflow.contrib.hooks.cloudant_hook import CloudantHook
             return CloudantHook(cloudant_conn_id=self.conn_id)
         elif self.conn_type == 'jira':
             from airflow.contrib.hooks.jira_hook import JiraHook
             return JiraHook(jira_conn_id=self.conn_id)
         elif self.conn_type == 'redis':
             from airflow.contrib.hooks.redis_hook import RedisHook
             return RedisHook(redis_conn_id=self.conn_id)
         elif self.conn_type == 'wasb':
             from airflow.contrib.hooks.wasb_hook import WasbHook
             return WasbHook(wasb_conn_id=self.conn_id)
         elif self.conn_type == 'docker':
             from airflow.hooks.docker_hook import DockerHook
             return DockerHook(docker_conn_id=self.conn_id)
     except:
         pass
Ejemplo n.º 7
0
    def execute(self, context):
        self.hook = JdbcHook(jdbc_conn_id=self.snowflake_conn_id)
        self.s3 = S3Hook(s3_conn_id=self.s3_conn_id)

        sql = self.pre_sql
        if self.drop_and_create:
            sql += self._build_pre_sql()

        s3_bucket, s3_key = self.s3.parse_s3_url(self.data_s3_key)
        if s3_bucket != S3_BUCKET:
            raise ValueError(
                'For Snowflake loads the S3 bucket must be {}. Got: {}'.format(
                    S3_BUCKET, s3_bucket))
        copy_sql = """
            COPY INTO {table}
            FROM @airflow.{stage}/{s3_key};
        """.format(
            table=self.table,
            stage=self.stage,
            s3_key=s3_key,
        )
        sql.append(copy_sql)
        self.hook.run(['BEGIN;'] + sql + ['COMMIT;'])
Ejemplo n.º 8
0
class JdbcOperator(BaseOperator):
    """
    Executes sql code in a database using jdbc driver.

    Requires jaydebeapi.

    :param jdbc_conn_id: reference to a predefined database
    :type jdbc_conn_id: string
    :param sql: the sql code to be executed
    :type sql: Can receive a str representing a sql statement,
        a list of str (sql statements), or reference to a template file.
        Template reference are recognized by str ending in '.sql'
    """

    template_fields = ('sql', )
    template_ext = ('.sql', )
    ui_color = '#ededed'

    @apply_defaults
    def __init__(self,
                 sql,
                 jdbc_conn_id='jdbc_default',
                 autocommit=False,
                 parameters=None,
                 *args,
                 **kwargs):
        super(JdbcOperator, self).__init__(*args, **kwargs)
        self.parameters = parameters

        self.sql = sql
        self.jdbc_conn_id = jdbc_conn_id
        self.autocommit = autocommit

    def execute(self, context):
        self.log.info('Executing: %s', self.sql)
        self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id)
        self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
Ejemplo n.º 9
0
class JdbcOperator(BaseOperator):
    """
    Executes sql code in a database using jdbc driver.

    Requires jaydebeapi.

    :param jdbc_url: driver specific connection url with string variables, e.g. for exasol jdbc:exa:{0}:{1};schema={2}
    Template vars are defined like this: {0} = hostname, {1} = port, {2} = dbschema, {3} = extra
    :type jdbc_url: string
    :param jdbc_driver_name: classname of the specific jdbc driver, for exasol com.exasol.jdbc.EXADriver
    :type jdbc_driver_name: string
    :param jdbc_driver_loc: absolute path to jdbc driver location, for example /var/exasol/exajdbc.jar
    :type jdbc_driver_loc: string

    :param conn_id: reference to a predefined database
    :type conn_id: string
    :param sql: the sql code to be executed
    :type sql: string or string pointing to a template file. File must have
        a '.sql' extensions.
    """

    template_fields = ('sql',)
    template_ext = ('.sql',)
    ui_color = '#ededed'

    @apply_defaults
    def __init__(
            self, sql,
            jdbc_url, jdbc_driver_name, jdbc_driver_loc,
            conn_id='jdbc_default', autocommit=False,
            *args, **kwargs):
        super(JdbcOperator, self).__init__(*args, **kwargs)

        self.jdbc_url=jdbc_url
        self.jdbc_driver_name=jdbc_driver_name
        self.jdbc_driver_loc=jdbc_driver_loc
        self.sql = sql
        self.conn_id = conn_id
        self.autocommit = autocommit

    def execute(self, context):
        logging.info('Executing: ' + self.sql)
        self.hook = JdbcHook(conn_id=self.conn_id,jdbc_driver_loc=self.jdbc_driver_loc, jdbc_driver_name=self.jdbc_driver_name,jdbc_url=self.jdbc_url)
        for row in self.hook.get_records(self.sql, self.autocommit):
            logging.info('Result: ' + ','.join(map(str,row)) )
Ejemplo n.º 10
0
 def execute(self, context):
     self.log.info('Executing: %s', self.sql)
     self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id)
     self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
Ejemplo n.º 11
0
class FBS3ToSnowflakeOperator(BaseOperator):
    template_fields = (
        'table',
        'data_s3_key',
        'pre_sql',
        'schema_s3_key',
    )

    @apply_defaults
    @require_keyword_args(['task_id', 'table', 'data_s3_key', 'stage', 'dag'])
    def __init__(self,
                 snowflake_conn_id=SNOWFLAKE_CONN_ID,
                 pre_sql=[],
                 s3_conn_id='s3_default',
                 drop_and_create=False,
                 schema_s3_key=None,
                 forced_string_columns=[],
                 *args,
                 **kwargs):
        self.snowflake_conn_id = snowflake_conn_id
        self.table = kwargs['table']
        self.data_s3_key = kwargs['data_s3_key']
        if isinstance(pre_sql, str):
            pre_sql = [pre_sql]
        elif not isinstance(pre_sql, list):
            raise TypeError('pre_sql must be str or list!')
        self.pre_sql = pre_sql
        self.s3_conn_id = s3_conn_id
        self.stage = kwargs['stage']
        self.drop_and_create = drop_and_create
        self.schema_s3_key = schema_s3_key
        self.forced_string_columns = forced_string_columns

        del kwargs['table']
        del kwargs['data_s3_key']
        del kwargs['stage']
        super(FBS3ToSnowflakeOperator, self).__init__(*args, **kwargs)

    def _build_pre_sql(self):
        # A helper function that only needs to be called in the `_build_pre_sql` function
        def determine_schema():
            schema_sql = ''
            logging.info('Reading from s3: ' + self.schema_s3_key)
            schema_key = self.s3.get_key(self.schema_s3_key)
            if schema_key is None:
                raise AirflowException(
                    's3 key {} was not found. Did you forget to run a dependency?'
                    .format(schema_key))
            # Schema must be stored as a JSONified array
            schema_array = json.loads(schema_key.get_contents_as_string())
            schema_strings = []

            for column in schema_array:
                column_name = column[0]
                if column_name in COLUMNS_TO_QUOTE:
                    column[0] = '"{}"'.format(column_name)

                # We're assuming well-formed type information
                type_and_len = column[1].lower().split('(')
                use_precise_type = (
                    type_and_len[0] in POSTGRES_TO_SNOWFLAKE_DATA_TYPES
                    and column_name not in self.forced_string_columns)
                if use_precise_type:
                    new_type = POSTGRES_TO_SNOWFLAKE_DATA_TYPES[
                        type_and_len[0]]
                    if new_type != FLOATESQUE_TYPE:
                        column[1] = new_type
                    # For numeric and decimal, if no arguments is provided then postgres
                    # says "numeric values of any precision and scale can be stored".
                    # The only way to emulate this behavior is to use a float (which is what
                    # matillion + redshift also does).
                    elif new_type == FLOATESQUE_TYPE and len(
                            type_and_len) == 1:
                        column[1] = FLOAT_TYPE
                else:
                    # Replace any non-supported data types with the string type, aka VARCHAR
                    column[1] = STRING_TYPE

                schema_strings.append(' '.join(column))

            # Extra spaces added to make it look good in the logs
            return ',\n               '.join(schema_strings)

        pre_sql = [
            'DROP TABLE IF EXISTS {table};'.format(table=self.table), """
           CREATE TABLE IF NOT EXISTS {table} (
               {schema}
           );
           """.format(table=self.table, schema=determine_schema())
        ]
        return pre_sql

    def execute(self, context):
        self.hook = JdbcHook(jdbc_conn_id=self.snowflake_conn_id)
        self.s3 = S3Hook(s3_conn_id=self.s3_conn_id)

        sql = self.pre_sql
        if self.drop_and_create:
            sql += self._build_pre_sql()

        s3_bucket, s3_key = self.s3.parse_s3_url(self.data_s3_key)
        if s3_bucket != S3_BUCKET:
            raise ValueError(
                'For Snowflake loads the S3 bucket must be {}. Got: {}'.format(
                    S3_BUCKET, s3_bucket))
        copy_sql = """
            COPY INTO {table}
            FROM @airflow.{stage}/{s3_key};
        """.format(
            table=self.table,
            stage=self.stage,
            s3_key=s3_key,
        )
        sql.append(copy_sql)
        self.hook.run(['BEGIN;'] + sql + ['COMMIT;'])
Ejemplo n.º 12
0
 def execute(self, context):
     logging.info('Executing: ' + self.sql)
     self.hook = JdbcHook(conn_id=self.conn_id,jdbc_driver_loc=self.jdbc_driver_loc, jdbc_driver_name=self.jdbc_driver_name,jdbc_url=self.jdbc_url)
     for row in self.hook.get_records(self.sql, self.autocommit):
         logging.info('Result: ' + ','.join(map(str,row)) )
Ejemplo n.º 13
0
 def get_hook(self):
     if self.conn_type == 'mysql':
         from airflow.hooks.mysql_hook import MySqlHook
         return MySqlHook(mysql_conn_id=self.conn_id)
     elif self.conn_type == 'google_cloud_platform':
         from airflow.gcp.hooks.bigquery import BigQueryHook
         return BigQueryHook(bigquery_conn_id=self.conn_id)
     elif self.conn_type == 'postgres':
         from airflow.hooks.postgres_hook import PostgresHook
         return PostgresHook(postgres_conn_id=self.conn_id)
     elif self.conn_type == 'pig_cli':
         from airflow.hooks.pig_hook import PigCliHook
         return PigCliHook(pig_cli_conn_id=self.conn_id)
     elif self.conn_type == 'hive_cli':
         from airflow.hooks.hive_hooks import HiveCliHook
         return HiveCliHook(hive_cli_conn_id=self.conn_id)
     elif self.conn_type == 'presto':
         from airflow.hooks.presto_hook import PrestoHook
         return PrestoHook(presto_conn_id=self.conn_id)
     elif self.conn_type == 'hiveserver2':
         from airflow.hooks.hive_hooks import HiveServer2Hook
         return HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
     elif self.conn_type == 'sqlite':
         from airflow.hooks.sqlite_hook import SqliteHook
         return SqliteHook(sqlite_conn_id=self.conn_id)
     elif self.conn_type == 'jdbc':
         from airflow.hooks.jdbc_hook import JdbcHook
         return JdbcHook(jdbc_conn_id=self.conn_id)
     elif self.conn_type == 'mssql':
         from airflow.hooks.mssql_hook import MsSqlHook
         return MsSqlHook(mssql_conn_id=self.conn_id)
     elif self.conn_type == 'oracle':
         from airflow.hooks.oracle_hook import OracleHook
         return OracleHook(oracle_conn_id=self.conn_id)
     elif self.conn_type == 'vertica':
         from airflow.contrib.hooks.vertica_hook import VerticaHook
         return VerticaHook(vertica_conn_id=self.conn_id)
     elif self.conn_type == 'cloudant':
         from airflow.contrib.hooks.cloudant_hook import CloudantHook
         return CloudantHook(cloudant_conn_id=self.conn_id)
     elif self.conn_type == 'jira':
         from airflow.contrib.hooks.jira_hook import JiraHook
         return JiraHook(jira_conn_id=self.conn_id)
     elif self.conn_type == 'redis':
         from airflow.contrib.hooks.redis_hook import RedisHook
         return RedisHook(redis_conn_id=self.conn_id)
     elif self.conn_type == 'wasb':
         from airflow.contrib.hooks.wasb_hook import WasbHook
         return WasbHook(wasb_conn_id=self.conn_id)
     elif self.conn_type == 'docker':
         from airflow.hooks.docker_hook import DockerHook
         return DockerHook(docker_conn_id=self.conn_id)
     elif self.conn_type == 'azure_data_lake':
         from airflow.contrib.hooks.azure_data_lake_hook import AzureDataLakeHook
         return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id)
     elif self.conn_type == 'azure_cosmos':
         from airflow.contrib.hooks.azure_cosmos_hook import AzureCosmosDBHook
         return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id)
     elif self.conn_type == 'cassandra':
         from airflow.contrib.hooks.cassandra_hook import CassandraHook
         return CassandraHook(cassandra_conn_id=self.conn_id)
     elif self.conn_type == 'mongo':
         from airflow.contrib.hooks.mongo_hook import MongoHook
         return MongoHook(conn_id=self.conn_id)
     elif self.conn_type == 'gcpcloudsql':
         from airflow.gcp.hooks.cloud_sql import CloudSqlDatabaseHook
         return CloudSqlDatabaseHook(gcp_cloudsql_conn_id=self.conn_id)
     elif self.conn_type == 'grpc':
         from airflow.contrib.hooks.grpc_hook import GrpcHook
         return GrpcHook(grpc_conn_id=self.conn_id)
     raise AirflowException("Unknown hook type {}".format(self.conn_type))
Ejemplo n.º 14
0
 def test_jdbc_conn_connection(self, jdbc_mock):
     jdbc_hook = JdbcHook()
     jdbc_conn = jdbc_hook.get_conn()
     self.assertTrue(jdbc_mock.called)
     self.assertIsInstance(jdbc_conn, Mock)
     self.assertEqual(jdbc_conn.name, jdbc_mock.return_value.name)
Ejemplo n.º 15
0
 def execute(self, context):
     logging.info('Executing: ' + str(self.sql))
     self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id)
     self.hook.run(self.sql, self.autocommit)
Ejemplo n.º 16
0
 def execute(self, context):
     self.log.info('Executing: %s', self.sql)
     self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id)
     self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
Ejemplo n.º 17
0
 def execute(self, context):
     logging.info('Executing: ' + str(self.sql))
     self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id)
     self.hook.run(self.sql, self.autocommit)
Ejemplo n.º 18
0
 def _execute(self, sql):
     logging.info('Executing: ' + str(sql))
     hook = JdbcHook(jdbc_conn_id=self.conn_id)
     hook.run(sql, self.autocommit)
Ejemplo n.º 19
0
 def test_jdbc_conn_get_autocommit(self, _):
     jdbc_hook = JdbcHook()
     jdbc_conn = jdbc_hook.get_conn()
     jdbc_hook.get_autocommit(jdbc_conn)
     jdbc_conn.jconn.getAutoCommit.assert_called_once_with()
Ejemplo n.º 20
0
 def _select(self, sql):
     logging.info('Querying: ' + str(sql))
     hook = JdbcHook(jdbc_conn_id=self.conn_id)
     return hook.get_records(sql)