def schema_for_column(c, pks_for_table): # Return Schema(None) to avoid calling lower() on a column with no datatype if c.data_type is None: LOGGER.info('Skipping column %s since it had no datatype', c.column_name) return Schema(None) data_type = c.data_type.lower() result = Schema() # Scale of None indicates default of 6 digits numeric_scale = c.numeric_scale if data_type == 'number' and numeric_scale is not None and numeric_scale <= 0: result.type = nullable_column(c.column_name, 'integer', pks_for_table) return result elif data_type == 'number': # NB: Due to scale and precision variations in Oracle version, and # among numeric types, we're using a custom `singer.decimal` string # formatter for this, with no opinion on scale/precision. result.type = nullable_column(c.column_name, 'string', pks_for_table) result.format = 'singer.decimal' return result elif data_type == 'date' or data_type.startswith("timestamp"): result.type = nullable_column(c.column_name, 'string', pks_for_table) result.format = 'date-time' return result elif data_type in FLOAT_TYPES: result.type = nullable_column(c.column_name, 'number', pks_for_table) return result elif data_type in STRING_TYPES: character_used = c.character_used result.type = nullable_column(c.column_name, 'string', pks_for_table) if character_used == 'C': result.maxLength = c.char_length return result #these column types are insane. they are NOT actually ieee754 floats #instead they are represented as decimals, but despite this #it appears we can say nothing about their max or min #"float", "double_precision", "real" elif data_type in ['float', 'double_precision']: result.type = nullable_column(c.column_name, 'string', pks_for_table) result.format = 'singer.decimal' return result return Schema(None)
def schema_for_column(c): '''Returns the Schema object for the given Column.''' data_type = c.data_type.lower() inclusion = 'available' result = Schema(inclusion=inclusion) if data_type == 'boolean': result.type = ['null', 'boolean'] elif data_type == 'number' or data_type == 'real' or data_type == 'float' or data_type == 'fixed': result.type = ['null', 'number'] elif data_type == 'text': result.type = ['null', 'string'] elif data_type == 'timestamp_ntz': result.type = ['null', 'string'] result.format = 'date-time' elif data_type == 'variant' or data_type == 'array': result.type = ['null', 'string'] else: result = Schema(None, inclusion='unsupported', description='Unsupported column type {}'.format(data_type)) return result
def schema_for_column(c): '''Returns the Schema object for the given Column.''' data_type = c.data_type.lower() inclusion = 'available' result = Schema(inclusion=inclusion) if data_type == 'boolean': result.type = ['null', 'boolean'] elif data_type in INTEGER_TYPES: result.type = ['null', 'number'] elif data_type in FLOAT_TYPES: result.type = ['null', 'number'] elif data_type in NUMBER_TYPES: result.type = ['null', 'number'] elif data_type in STRING_TYPES: result.type = ['null', 'string'] result.maxLength = c.character_maximum_length elif data_type in DATETIME_TYPES: result.type = ['null', 'string'] result.format = 'date-time' elif data_type == 'time': result.type = ['null', 'string'] result.format = 'time' elif data_type in BINARY_TYPES: result.type = ['null', 'string'] result.format = 'binary' elif data_type in OBJECT_TYPES: result.type = ['null', 'object', 'array'] else: result = Schema( None, inclusion='unsupported', description='Unsupported data type {}'.format(data_type)) return result
def schema_for_column(c): '''Returns the Schema object for the given Column.''' column_type = c['type'].lower() column_nullable = c['nullable'].lower() inclusion = 'available' result = Schema(inclusion=inclusion) if column_type == 'bool': result.type = 'boolean' elif column_type in BYTES_FOR_INTEGER_TYPE: result.type = 'integer' bits = BYTES_FOR_INTEGER_TYPE[column_type] * 8 result.minimum = 0 - 2**(bits - 1) result.maximum = 2**(bits - 1) - 1 elif column_type in FLOAT_TYPES: result.type = 'number' elif column_type == 'numeric': result.type = 'number' elif column_type in STRING_TYPES: result.type = 'string' elif column_type in DATETIME_TYPES: result.type = 'string' result.format = 'date-time' elif column_type in DATE_TYPES: result.type = 'string' result.format = 'date' else: result = Schema( None, inclusion='unsupported', description='Unsupported column type {}'.format(column_type)) if column_nullable == 'yes': result.type = ['null', result.type] return result
def schema_for_column(c): """Returns the Schema object for the given Column.""" data_type = c.data_type.lower() inclusion = "available" if c.is_primary_key == 1: inclusion = "automatic" result = Schema(inclusion=inclusion) if data_type == "bit": result.type = ["null", "boolean"] #In SQL Server tinyint is unsigned. elif data_type in BYTES_FOR_UNSIGNED_INTEGER_TYPE: result.type = ["null", "integer"] bits = BYTES_FOR_UNSIGNED_INTEGER_TYPE[data_type] * 8 result.minimum = 0 result.maximum = 2**bits - 1 elif data_type in BYTES_FOR_INTEGER_TYPE: result.type = ["null", "integer"] bits = BYTES_FOR_INTEGER_TYPE[data_type] * 8 result.minimum = 0 - 2**(bits - 1) result.maximum = 2**(bits - 1) - 1 elif data_type in FLOAT_TYPES: result.type = ["null", "number"] elif data_type in PRECISE_NUMERIC_TYPES: result.type = ["null", "number"] result.multipleOf = 10**(0 - c.numeric_scale) return result elif data_type in STRING_TYPES: result.type = ["null", "string"] if c.character_maximum_length is not None and c.character_maximum_length > 0: result.maxLength = c.character_maximum_length elif data_type in DATETIME_TYPES: result.type = ["null", "string"] result.format = "date-time" elif data_type in VARIANT_TYPES: result.type = ["null", "object"] else: result = Schema( None, inclusion="unsupported", description="Unsupported column type", ) return result
def schema_for_column(c): '''Returns the Schema object for the given Column.''' data_type = c.data_type.lower() column_type = c.column_type.lower() inclusion = 'available' # We want to automatically include all primary key columns if c.column_key.lower() == 'pri': inclusion = 'automatic' result = Schema(inclusion=inclusion) if data_type == 'bit' or column_type.startswith('tinyint(1)'): result.type = ['null', 'boolean'] elif data_type in BYTES_FOR_INTEGER_TYPE: result.type = ['null', 'integer'] bits = BYTES_FOR_INTEGER_TYPE[data_type] * 8 if 'unsigned' in c.column_type: result.minimum = 0 result.maximum = 2**bits - 1 else: result.minimum = 0 - 2**(bits - 1) result.maximum = 2**(bits - 1) - 1 elif data_type in FLOAT_TYPES: result.type = ['null', 'number'] elif data_type == 'decimal': result.type = ['null', 'number'] result.exclusiveMaximum = True result.maximum = 10**(c.numeric_precision - c.numeric_scale) result.multipleOf = 10**(0 - c.numeric_scale) if 'unsigned' in column_type: result.minimum = 0 else: result.exclusiveMinimum = True result.minimum = -10**(c.numeric_precision - c.numeric_scale) return result elif data_type in STRING_TYPES: result.type = ['null', 'string'] result.maxLength = c.character_maximum_length elif data_type in DATETIME_TYPES: result.type = ['null', 'string'] result.format = 'date-time' else: result = Schema( None, inclusion='unsupported', description='Unsupported column type {}'.format(column_type)) return result
def schema_for_column(c): """Returns the Schema object for the given Column.""" data_type = c.data_type.lower() column_type = c.column_type.lower() inclusion = "available" # We want to automatically include all primary key columns if c.column_key.lower() == "pri": inclusion = "automatic" result = Schema(inclusion=inclusion) if data_type == "bit" or column_type.startswith("tinyint(1)"): result.type = ["null", "boolean"] elif data_type in BYTES_FOR_INTEGER_TYPE: result.type = ["null", "integer"] bits = BYTES_FOR_INTEGER_TYPE[data_type] * 8 if "unsigned" in c.column_type: result.minimum = 0 result.maximum = 2 ** bits - 1 else: result.minimum = 0 - 2 ** (bits - 1) result.maximum = 2 ** (bits - 1) - 1 elif data_type in FLOAT_TYPES: result.type = ["null", "number"] result.multipleOf = 10 ** (0 - 6) elif data_type == "decimal": result.type = ["null", "number"] result.multipleOf = 10 ** (0 - c.numeric_scale) return result elif data_type in STRING_TYPES: result.type = ["null", "string"] result.maxLength = c.character_maximum_length elif data_type in DATETIME_TYPES: result.type = ["null", "string"] result.format = "date-time" elif data_type in VARIANT_TYPES: result.type = ["null", "object"] else: result = Schema( None, inclusion="unsupported", description="Unsupported column type {}".format(column_type), ) return result
def schema_for_column(c): data_type = c.sql_data_type.lower() result = Schema() if data_type in INTEGER_TYPES: result.type = nullable_column('integer', c.is_primary_key) result.minimum = -1 * (2**(c.numeric_precision - 1)) result.maximum = 2**(c.numeric_precision - 1) - 1 return result elif data_type == 'bit' and c.character_maximum_length == 1: result.type = nullable_column('boolean', c.is_primary_key) return result elif data_type == 'boolean': result.type = nullable_column('boolean', c.is_primary_key) return result elif data_type == 'uuid': result.type = nullable_column('string', c.is_primary_key) return result elif data_type == 'hstore': result.type = nullable_column('string', c.is_primary_key) return result elif data_type == 'citext': result.type = nullable_column('string', c.is_primary_key) return result elif data_type in JSON_TYPES: result.type = nullable_column('string', c.is_primary_key) return result elif data_type == 'numeric': result.type = nullable_column('number', c.is_primary_key) if c.numeric_scale is None or c.numeric_scale > MAX_SCALE: LOGGER.warning( 'capping decimal scale to 38. THIS MAY CAUSE TRUNCATION') scale = MAX_SCALE else: scale = c.numeric_scale if c.numeric_precision is None or c.numeric_precision > MAX_PRECISION: LOGGER.warning( 'capping decimal precision to 100. THIS MAY CAUSE TRUNCATION') precision = MAX_PRECISION else: precision = c.numeric_precision result.exclusiveMaximum = True result.maximum = 10**(precision - scale) result.multipleOf = 10**(0 - scale) result.exclusiveMinimum = True result.minimum = -10**(precision - scale) return result elif data_type in {'time without time zone', 'time with time zone'}: #times are treated as ordinary strings as they can not possible match RFC3339 result.type = nullable_column('string', c.is_primary_key) return result elif data_type in ('date', 'timestamp without time zone', 'timestamp with time zone'): result.type = nullable_column('string', c.is_primary_key) result.format = 'date-time' return result elif data_type in FLOAT_TYPES: result.type = nullable_column('number', c.is_primary_key) return result elif data_type == 'text': result.type = nullable_column('string', c.is_primary_key) return result elif data_type == 'character varying': result.type = nullable_column('string', c.is_primary_key) result.maxLength = c.character_maximum_length return result elif data_type == 'character': result.type = nullable_column('string', c.is_primary_key) result.maxLength = c.character_maximum_length return result return Schema(None)
def schema_for_column(c, pks_for_table): data_type = c.data_type.lower() result = Schema() numeric_scale = c.numeric_scale or DEFAULT_NUMERIC_SCALE numeric_precision = c.numeric_precision or DEFAULT_NUMERIC_PRECISION if data_type == 'number' and numeric_scale <= 0: result.type = nullable_column(c.column_name, 'integer', pks_for_table) result.minimum = -1 * (10**numeric_precision - 1) result.maximum = (10**numeric_precision - 1) if numeric_scale < 0: result.multipleOf = -10 * numeric_scale return result elif data_type == 'number': result.type = nullable_column(c.column_name, 'number', pks_for_table) result.exclusiveMaximum = True result.maximum = 10**(numeric_precision - numeric_scale) result.multipleOf = 10**(0 - numeric_scale) result.exclusiveMinimum = True result.minimum = -10**(numeric_precision - numeric_scale) return result elif data_type == 'date' or data_type.startswith("timestamp"): result.type = nullable_column(c.column_name, 'string', pks_for_table) result.format = 'date-time' return result elif data_type in FLOAT_TYPES: result.type = nullable_column(c.column_name, 'number', pks_for_table) return result elif data_type in STRING_TYPES: character_used = c.character_used result.type = nullable_column(c.column_name, 'string', pks_for_table) if character_used == 'C': result.maxLength = c.char_length return result #these column types are insane. they are NOT actually ieee754 floats #instead they are represented as decimals, but despite this #it appears we can say nothing about their max or min #"real" elif data_type == 'float' and c.numeric_precision == 63: result.type = nullable_column(c.column_name, 'number', pks_for_table) result.multipleOf = 10**-18 return result #"float", "double_precision", elif data_type in ['float', 'double_precision']: result.type = nullable_column(c.column_name, 'number', pks_for_table) result.multipleOf = 10**-38 return result return Schema(None)
def discover_catalog(conn): """Returns a Catalog describing the table structure of the target database""" entries = [] for table in conn.get_tables(): # the stream is in format database_name__table_name with all non alphanumeric # and `_` characters replaced with an `_`. stream = re.sub( '[^0-9a-z_]+', '_', "{}__{}".format(table.get('database_name').lower(), table.get('name')).lower() ) # by default we will ALWAYS have 'rid' as an automatically included primary key field. schema = Schema( type='object', additionalProperties=False, properties={ 'rid': Schema( type=['string'], inclusion='automatic', ) } ) metadata = [] for field in conn.get_fields(table.get('id')): field_type = ['null'] field_format = None # https://help.quickbase.com/user-assistance/field_types.html if field.get('base_type') == 'bool': field_type.append('boolean') elif field.get('base_type') == 'float': field_type.append('number') elif field.get('base_type') == 'int64': if field.get('type') in ('timestamp', 'date'): field_type.append('string') field_format = 'date-time' else: # `timeofday` comes out of the API as an integer for how many milliseconds # through the day, 900000 would be 12:15am # `duration` comes out as an integer for how many milliseconds the duration is, # 1000 would be 1 second # let's just pass these as an integer field_type.append('integer') else: field_type.append('string') property_schema = Schema( type=field_type, inclusion='available' if field.get('id') != '2' else 'automatic', ) if field_format is not None: property_schema.format = field_format schema.properties[field.get('name')] = property_schema metadata.append({ 'metadata': { 'id': field.get('id') }, 'breadcrumb': [ 'properties', field.get('name') ] }) entry = CatalogEntry( database=conn.appid, table=table.get('id'), stream_alias=table.get('name'), stream=stream, tap_stream_id=stream, key_properties=['rid'], schema=schema, metadata=metadata ) entries.append(entry) return Catalog(entries)