def json_schema_to_sql_type(self, schema): _type = json_schema.get_type(schema) not_null = True ln = len(_type) if ln == 1: _type = _type[0] if ln == 2 and json_schema.NULL in _type: not_null = False if _type.index(json_schema.NULL) == 0: _type = _type[1] else: _type = _type[0] elif ln > 2: raise SnowflakeError('Multiple types per column not supported') sql_type = 'text' if 'format' in schema and \ schema['format'] == 'date-time' and \ _type == 'string': sql_type = 'TIMESTAMP_TZ' elif _type == 'boolean': sql_type = 'BOOLEAN' elif _type == 'integer': sql_type = 'NUMBER' elif _type == 'number': sql_type = 'FLOAT' if not_null: sql_type += ' NOT NULL' return sql_type
def _get_table_metadata(self, cur, table_name): cur.execute(''' SHOW TABLES LIKE '{}' IN SCHEMA {}.{} '''.format( table_name, sql.identifier(self.connection.configured_database), sql.identifier(self.connection.configured_schema), )) tables = cur.fetchall() if not tables: return None if len(tables) != 1: raise SnowflakeError( '{} tables returned while searching for: {}.{}.{}'.format( len(tables), self.connection.configured_database, self.connection.configured_schema, table_name)) comment = tables[0][5] if comment: try: comment_meta = json.loads(comment) except: self.LOGGER.exception('Could not load table comment metadata') raise else: comment_meta = None return comment_meta
def sql_type_to_json_schema(self, sql_type, is_nullable): """ Given a string representing a SnowflakeSQL column type, and a boolean indicating whether the associated column is nullable, return a compatible JSONSchema structure. :param sql_type: String :param is_nullable: boolean :return: JSONSchema """ _format = None if sql_type == 'TIMESTAMP_TZ': json_type = 'string' _format = 'date-time' elif sql_type == 'NUMBER': json_type = 'integer' elif sql_type == 'FLOAT': json_type = 'number' elif sql_type == 'BOOLEAN': json_type = 'boolean' elif sql_type == 'TEXT': json_type = 'string' else: raise SnowflakeError( 'Unsupported type `{}` in existing target table'.format( sql_type)) json_type = [json_type] if is_nullable: json_type.append(json_schema.NULL) ret_json_schema = {'type': json_type} if _format: ret_json_schema['format'] = _format return ret_json_schema
def write_batch(self, stream_buffer): if not self.persist_empty_tables and stream_buffer.count == 0: return None with self.connection.cursor() as cur: try: self.setup_table_mapping_cache(cur) root_table_name = self.add_table_mapping_helper( (stream_buffer.stream, ), self.table_mapping_cache)['to'] current_table_schema = self.get_table_schema( cur, root_table_name) current_table_version = None if current_table_schema: current_table_version = current_table_schema.get( 'version', None) if set(stream_buffer.key_properties) \ != set(current_table_schema.get('key_properties')): raise SnowflakeError( '`key_properties` change detected. Existing values are: {}. Streamed values are: {}' .format(current_table_schema.get('key_properties'), stream_buffer.key_properties)) for key_property in stream_buffer.key_properties: canonicalized_key, remote_column_schema = self.fetch_column_from_path( (key_property, ), current_table_schema) if self.json_schema_to_sql_type(remote_column_schema) \ != self.json_schema_to_sql_type(stream_buffer.schema['properties'][key_property]): raise SnowflakeError(( '`key_properties` type change detected for "{}". ' + 'Existing values are: {}. ' + 'Streamed values are: {}, {}, {}').format( key_property, json_schema.get_type( current_table_schema['schema'] ['properties'][key_property]), json_schema.get_type( stream_buffer.schema['properties'] [key_property]), self.json_schema_to_sql_type( current_table_schema['schema'] ['properties'][key_property]), self.json_schema_to_sql_type( stream_buffer.schema['properties'] [key_property]))) target_table_version = current_table_version or stream_buffer.max_version self.LOGGER.info( 'Stream {} ({}) with max_version {} targetting {}'.format( stream_buffer.stream, root_table_name, stream_buffer.max_version, target_table_version)) root_table_name = stream_buffer.stream if current_table_version is not None and \ stream_buffer.max_version is not None: if stream_buffer.max_version < current_table_version: self.LOGGER.warning( '{} - Records from an earlier table version detected.' .format(stream_buffer.stream)) self.connection.rollback() return None elif stream_buffer.max_version > current_table_version: root_table_name += SEPARATOR + str( stream_buffer.max_version) target_table_version = stream_buffer.max_version self.LOGGER.info('Root table name {}'.format(root_table_name)) written_batches_details = self.write_batch_helper( cur, root_table_name, stream_buffer.schema, stream_buffer.key_properties, stream_buffer.get_batch(), {'version': target_table_version}) self.connection.commit() return written_batches_details except Exception as ex: self.connection.rollback() message = 'Exception writing records' self.LOGGER.exception(message) raise SnowflakeError(message, ex)
def activate_version(self, stream_buffer, version): with self.connection.cursor() as cur: try: self.setup_table_mapping_cache(cur) root_table_name = self.add_table_mapping( cur, (stream_buffer.stream, ), {}) current_table_schema = self.get_table_schema( cur, root_table_name) if not current_table_schema: self.LOGGER.error( '{} - Table for stream does not exist'.format( stream_buffer.stream)) elif current_table_schema.get( 'version') is not None and current_table_schema.get( 'version') >= version: self.LOGGER.warning( '{} - Table version {} already active'.format( stream_buffer.stream, version)) else: versioned_root_table = root_table_name + SEPARATOR + str( version) names_to_paths = dict([ (v, k) for k, v in self.table_mapping_cache.items() ]) cur.execute(''' SHOW TABLES LIKE '{}%' IN SCHEMA {}.{} '''.format( versioned_root_table, sql.identifier(self.connection.configured_database), sql.identifier(self.connection.configured_schema))) for versioned_table_name in [x[1] for x in cur.fetchall()]: table_name = root_table_name + versioned_table_name[ len(versioned_root_table):] table_path = names_to_paths[table_name] args = { 'db_schema': '{}.{}'.format( sql.identifier( self.connection.configured_database), sql.identifier( self.connection.configured_schema)), 'stream_table_old': sql.identifier(table_name + SEPARATOR + 'OLD'), 'stream_table': sql.identifier(table_name), 'version_table': sql.identifier(versioned_table_name) } cur.execute(''' ALTER TABLE {db_schema}.{stream_table} RENAME TO {db_schema}.{stream_table_old} '''.format(**args)) cur.execute(''' ALTER TABLE {db_schema}.{version_table} RENAME TO {db_schema}.{stream_table} '''.format(**args)) cur.execute(''' DROP TABLE {db_schema}.{stream_table_old} '''.format(**args)) self.connection.commit() metadata = self._get_table_metadata(cur, table_name) self.LOGGER.info( 'Activated {}, setting path to {}'.format( metadata, table_path)) metadata['path'] = table_path self._set_table_metadata(cur, table_name, metadata) except Exception as ex: self.connection.rollback() message = '{} - Exception activating table version {}'.format( stream_buffer.stream, version) self.LOGGER.exception(message) raise SnowflakeError(message, ex)