class JdbcOperator(BaseOperator): """ Executes sql code in a database using jdbc driver. Requires jaydebeapi. :param jdbc_conn_id: reference to a predefined database :type jdbc_conn_id: string :param sql: the sql code to be executed. (templated) :type sql: Can receive a str representing a sql statement, a list of str (sql statements), or reference to a template file. Template reference are recognized by str ending in '.sql' """ template_fields = ('sql',) template_ext = ('.sql',) ui_color = '#ededed' @apply_defaults def __init__( self, sql, jdbc_conn_id='jdbc_default', autocommit=False, parameters=None, *args, **kwargs): super(JdbcOperator, self).__init__(*args, **kwargs) self.parameters = parameters self.sql = sql self.jdbc_conn_id = jdbc_conn_id self.autocommit = autocommit def execute(self, context): self.log.info('Executing: %s', self.sql) self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id) self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
class JdbcOperator(BaseOperator): """ Executes sql code in a database using jdbc driver. Requires jaydebeapi. :param jdbc_url: driver specific connection url with string variables, e.g. for exasol jdbc:exa:{0}:{1};schema={2} Template vars are defined like this: {0} = hostname, {1} = port, {2} = dbschema, {3} = extra :type jdbc_url: string :param jdbc_driver_name: classname of the specific jdbc driver, for exasol com.exasol.jdbc.EXADriver :type jdbc_driver_name: string :param jdbc_driver_loc: absolute path to jdbc driver location, for example /var/exasol/exajdbc.jar :type jdbc_driver_loc: string :param conn_id: reference to a predefined database :type conn_id: string :param sql: the sql code to be executed :type sql: Can receive a str representing a sql statement, a list of str (sql statements), or reference to a template file. Template reference are recognized by str ending in '.sql' """ template_fields = ('sql', ) template_ext = ('.sql', ) ui_color = '#ededed' @apply_defaults def __init__(self, sql, jdbc_conn_id='jdbc_default', autocommit=False, parameters=None, *args, **kwargs): super(JdbcOperator, self).__init__(*args, **kwargs) self.parameters = parameters self.sql = sql self.jdbc_conn_id = jdbc_conn_id self.autocommit = autocommit def execute(self, context): _log.info('Executing: ' + str(self.sql)) self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id) self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
class JdbcOperator(BaseOperator): """ Executes sql code in a database using jdbc driver. Requires jaydebeapi. :param jdbc_url: driver specific connection url with string variables, e.g. for exasol jdbc:exa:{0}:{1};schema={2} Template vars are defined like this: {0} = hostname, {1} = port, {2} = dbschema, {3} = extra :type jdbc_url: string :param jdbc_driver_name: classname of the specific jdbc driver, for exasol com.exasol.jdbc.EXADriver :type jdbc_driver_name: string :param jdbc_driver_loc: absolute path to jdbc driver location, for example /var/exasol/exajdbc.jar :type jdbc_driver_loc: string :param conn_id: reference to a predefined database :type conn_id: string :param sql: the sql code to be executed :type sql: Can receive a str representing a sql statement, a list of str (sql statements), or reference to a template file. Template reference are recognized by str ending in '.sql' """ template_fields = ('sql',) template_ext = ('.sql',) ui_color = '#ededed' @apply_defaults def __init__( self, sql, jdbc_conn_id='jdbc_default', autocommit=False, parameters=None, *args, **kwargs): super(JdbcOperator, self).__init__(*args, **kwargs) self.parameters = parameters self.sql = sql self.jdbc_conn_id = jdbc_conn_id self.autocommit = autocommit def execute(self, context): logging.info('Executing: ' + str(self.sql)) self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id) self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
class JdbcOperator(BaseOperator): """ Executes sql code in a database using jdbc driver. Requires jaydebeapi. :param sql: the sql code to be executed. (templated) :type sql: Can receive a str representing a sql statement, a list of str (sql statements), or reference to a template file. Template reference are recognized by str ending in '.sql' :param jdbc_conn_id: reference to a predefined database :type jdbc_conn_id: str :param autocommit: if True, each command is automatically committed. (default value: False) :type autocommit: bool :param parameters: (optional) the parameters to render the SQL query with. :type parameters: mapping or iterable """ template_fields = ('sql', ) template_ext = ('.sql', ) ui_color = '#ededed' @apply_defaults def __init__(self, sql, jdbc_conn_id='jdbc_default', autocommit=False, parameters=None, *args, **kwargs): super(JdbcOperator, self).__init__(*args, **kwargs) self.parameters = parameters self.sql = sql self.jdbc_conn_id = jdbc_conn_id self.autocommit = autocommit def execute(self, context): self.log.info('Executing: %s', self.sql) self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id) self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
class JdbcOperator(BaseOperator): """ Executes sql code in a database using jdbc driver. Requires jaydebeapi. :param jdbc_conn_id: reference to a predefined database :type jdbc_conn_id: string :param sql: the sql code to be executed :type sql: Can receive a str representing a sql statement, a list of str (sql statements), or reference to a template file. Template reference are recognized by str ending in '.sql' """ template_fields = ('sql', ) template_ext = ('.sql', ) ui_color = '#ededed' @apply_defaults def __init__(self, sql, jdbc_conn_id='jdbc_default', autocommit=False, parameters=None, *args, **kwargs): super(JdbcOperator, self).__init__(*args, **kwargs) self.parameters = parameters self.sql = sql self.jdbc_conn_id = jdbc_conn_id self.autocommit = autocommit def execute(self, context): self.log.info('Executing: %s', self.sql) self.hook = JdbcHook(jdbc_conn_id=self.jdbc_conn_id) self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
def _execute(self, sql): logging.info('Executing: ' + str(sql)) hook = JdbcHook(jdbc_conn_id=self.conn_id) hook.run(sql, self.autocommit)
class FBS3ToSnowflakeOperator(BaseOperator): template_fields = ( 'table', 'data_s3_key', 'pre_sql', 'schema_s3_key', ) @apply_defaults @require_keyword_args(['task_id', 'table', 'data_s3_key', 'stage', 'dag']) def __init__(self, snowflake_conn_id=SNOWFLAKE_CONN_ID, pre_sql=[], s3_conn_id='s3_default', drop_and_create=False, schema_s3_key=None, forced_string_columns=[], *args, **kwargs): self.snowflake_conn_id = snowflake_conn_id self.table = kwargs['table'] self.data_s3_key = kwargs['data_s3_key'] if isinstance(pre_sql, str): pre_sql = [pre_sql] elif not isinstance(pre_sql, list): raise TypeError('pre_sql must be str or list!') self.pre_sql = pre_sql self.s3_conn_id = s3_conn_id self.stage = kwargs['stage'] self.drop_and_create = drop_and_create self.schema_s3_key = schema_s3_key self.forced_string_columns = forced_string_columns del kwargs['table'] del kwargs['data_s3_key'] del kwargs['stage'] super(FBS3ToSnowflakeOperator, self).__init__(*args, **kwargs) def _build_pre_sql(self): # A helper function that only needs to be called in the `_build_pre_sql` function def determine_schema(): schema_sql = '' logging.info('Reading from s3: ' + self.schema_s3_key) schema_key = self.s3.get_key(self.schema_s3_key) if schema_key is None: raise AirflowException( 's3 key {} was not found. Did you forget to run a dependency?' .format(schema_key)) # Schema must be stored as a JSONified array schema_array = json.loads(schema_key.get_contents_as_string()) schema_strings = [] for column in schema_array: column_name = column[0] if column_name in COLUMNS_TO_QUOTE: column[0] = '"{}"'.format(column_name) # We're assuming well-formed type information type_and_len = column[1].lower().split('(') use_precise_type = ( type_and_len[0] in POSTGRES_TO_SNOWFLAKE_DATA_TYPES and column_name not in self.forced_string_columns) if use_precise_type: new_type = POSTGRES_TO_SNOWFLAKE_DATA_TYPES[ type_and_len[0]] if new_type != FLOATESQUE_TYPE: column[1] = new_type # For numeric and decimal, if no arguments is provided then postgres # says "numeric values of any precision and scale can be stored". # The only way to emulate this behavior is to use a float (which is what # matillion + redshift also does). elif new_type == FLOATESQUE_TYPE and len( type_and_len) == 1: column[1] = FLOAT_TYPE else: # Replace any non-supported data types with the string type, aka VARCHAR column[1] = STRING_TYPE schema_strings.append(' '.join(column)) # Extra spaces added to make it look good in the logs return ',\n '.join(schema_strings) pre_sql = [ 'DROP TABLE IF EXISTS {table};'.format(table=self.table), """ CREATE TABLE IF NOT EXISTS {table} ( {schema} ); """.format(table=self.table, schema=determine_schema()) ] return pre_sql def execute(self, context): self.hook = JdbcHook(jdbc_conn_id=self.snowflake_conn_id) self.s3 = S3Hook(s3_conn_id=self.s3_conn_id) sql = self.pre_sql if self.drop_and_create: sql += self._build_pre_sql() s3_bucket, s3_key = self.s3.parse_s3_url(self.data_s3_key) if s3_bucket != S3_BUCKET: raise ValueError( 'For Snowflake loads the S3 bucket must be {}. Got: {}'.format( S3_BUCKET, s3_bucket)) copy_sql = """ COPY INTO {table} FROM @airflow.{stage}/{s3_key}; """.format( table=self.table, stage=self.stage, s3_key=s3_key, ) sql.append(copy_sql) self.hook.run(['BEGIN;'] + sql + ['COMMIT;'])