Exemple #1
0
def schema_for_column(c, pks_for_table):
    # Return Schema(None) to avoid calling lower() on a column with no datatype
    if c.data_type is None:
        LOGGER.info('Skipping column %s since it had no datatype',
                    c.column_name)
        return Schema(None)

    data_type = c.data_type.lower()
    result = Schema()

    # Scale of None indicates default of 6 digits
    numeric_scale = c.numeric_scale

    if data_type == 'number' and numeric_scale is not None and numeric_scale <= 0:
        result.type = nullable_column(c.column_name, 'integer', pks_for_table)

        return result

    elif data_type == 'number':
        # NB: Due to scale and precision variations in Oracle version, and
        #     among numeric types, we're using a custom `singer.decimal` string
        #     formatter for this, with no opinion on scale/precision.
        result.type = nullable_column(c.column_name, 'string', pks_for_table)
        result.format = 'singer.decimal'

        return result

    elif data_type == 'date' or data_type.startswith("timestamp"):
        result.type = nullable_column(c.column_name, 'string', pks_for_table)

        result.format = 'date-time'
        return result

    elif data_type in FLOAT_TYPES:
        result.type = nullable_column(c.column_name, 'number', pks_for_table)
        return result

    elif data_type in STRING_TYPES:
        character_used = c.character_used
        result.type = nullable_column(c.column_name, 'string', pks_for_table)

        if character_used == 'C':
            result.maxLength = c.char_length
        return result

    #these column types are insane. they are NOT actually ieee754 floats
    #instead they are represented as decimals, but despite this
    #it appears we can say nothing about their max or min

    #"float", "double_precision", "real"
    elif data_type in ['float', 'double_precision']:
        result.type = nullable_column(c.column_name, 'string', pks_for_table)
        result.format = 'singer.decimal'
        return result

    return Schema(None)
Exemple #2
0
def schema_for_column(c):
    '''Returns the Schema object for the given Column.'''
    data_type = c.data_type.lower()

    inclusion = 'available'

    result = Schema(inclusion=inclusion)

    if data_type == 'boolean':
        result.type = ['null', 'boolean']

    elif data_type == 'number' or data_type == 'real' or data_type == 'float' or data_type == 'fixed':
        result.type = ['null', 'number']

    elif data_type == 'text':
        result.type = ['null', 'string']

    elif data_type == 'timestamp_ntz':
        result.type = ['null', 'string']
        result.format = 'date-time'

    elif data_type == 'variant' or data_type == 'array':
        result.type = ['null', 'string']

    else:
        result = Schema(None,
                        inclusion='unsupported',
                        description='Unsupported column type {}'.format(data_type))
    
    return result
Exemple #3
0
def schema_for_column(c):
    '''Returns the Schema object for the given Column.'''
    data_type = c.data_type.lower()

    inclusion = 'available'
    result = Schema(inclusion=inclusion)

    if data_type == 'boolean':
        result.type = ['null', 'boolean']

    elif data_type in INTEGER_TYPES:
        result.type = ['null', 'number']

    elif data_type in FLOAT_TYPES:
        result.type = ['null', 'number']

    elif data_type in NUMBER_TYPES:
        result.type = ['null', 'number']

    elif data_type in STRING_TYPES:
        result.type = ['null', 'string']
        result.maxLength = c.character_maximum_length

    elif data_type in DATETIME_TYPES:
        result.type = ['null', 'string']
        result.format = 'date-time'

    elif data_type == 'time':
        result.type = ['null', 'string']
        result.format = 'time'

    elif data_type in BINARY_TYPES:
        result.type = ['null', 'string']
        result.format = 'binary'

    elif data_type in OBJECT_TYPES:
        result.type = ['null', 'object', 'array']

    else:
        result = Schema(
            None,
            inclusion='unsupported',
            description='Unsupported data type {}'.format(data_type))
    return result
Exemple #4
0
def schema_for_column(c):
    '''Returns the Schema object for the given Column.'''
    column_type = c['type'].lower()
    column_nullable = c['nullable'].lower()
    inclusion = 'available'
    result = Schema(inclusion=inclusion)

    if column_type == 'bool':
        result.type = 'boolean'

    elif column_type in BYTES_FOR_INTEGER_TYPE:
        result.type = 'integer'
        bits = BYTES_FOR_INTEGER_TYPE[column_type] * 8
        result.minimum = 0 - 2**(bits - 1)
        result.maximum = 2**(bits - 1) - 1

    elif column_type in FLOAT_TYPES:
        result.type = 'number'

    elif column_type == 'numeric':
        result.type = 'number'

    elif column_type in STRING_TYPES:
        result.type = 'string'

    elif column_type in DATETIME_TYPES:
        result.type = 'string'
        result.format = 'date-time'

    elif column_type in DATE_TYPES:
        result.type = 'string'
        result.format = 'date'

    else:
        result = Schema(
            None,
            inclusion='unsupported',
            description='Unsupported column type {}'.format(column_type))

    if column_nullable == 'yes':
        result.type = ['null', result.type]

    return result
def schema_for_column(c):
    """Returns the Schema object for the given Column."""
    data_type = c.data_type.lower()

    inclusion = "available"

    if c.is_primary_key == 1:
        inclusion = "automatic"

    result = Schema(inclusion=inclusion)

    if data_type == "bit":
        result.type = ["null", "boolean"]

    #In SQL Server tinyint is unsigned.
    elif data_type in BYTES_FOR_UNSIGNED_INTEGER_TYPE:
        result.type = ["null", "integer"]
        bits = BYTES_FOR_UNSIGNED_INTEGER_TYPE[data_type] * 8
        result.minimum = 0
        result.maximum = 2**bits - 1

    elif data_type in BYTES_FOR_INTEGER_TYPE:
        result.type = ["null", "integer"]
        bits = BYTES_FOR_INTEGER_TYPE[data_type] * 8
        result.minimum = 0 - 2**(bits - 1)
        result.maximum = 2**(bits - 1) - 1

    elif data_type in FLOAT_TYPES:
        result.type = ["null", "number"]

    elif data_type in PRECISE_NUMERIC_TYPES:
        result.type = ["null", "number"]
        result.multipleOf = 10**(0 - c.numeric_scale)
        return result

    elif data_type in STRING_TYPES:
        result.type = ["null", "string"]
        if c.character_maximum_length is not None and c.character_maximum_length > 0:
            result.maxLength = c.character_maximum_length

    elif data_type in DATETIME_TYPES:
        result.type = ["null", "string"]
        result.format = "date-time"

    elif data_type in VARIANT_TYPES:
        result.type = ["null", "object"]

    else:
        result = Schema(
            None,
            inclusion="unsupported",
            description="Unsupported column type",
        )
    return result
Exemple #6
0
def schema_for_column(c):
    '''Returns the Schema object for the given Column.'''
    data_type = c.data_type.lower()
    column_type = c.column_type.lower()

    inclusion = 'available'
    # We want to automatically include all primary key columns
    if c.column_key.lower() == 'pri':
        inclusion = 'automatic'

    result = Schema(inclusion=inclusion)

    if data_type == 'bit' or column_type.startswith('tinyint(1)'):
        result.type = ['null', 'boolean']

    elif data_type in BYTES_FOR_INTEGER_TYPE:
        result.type = ['null', 'integer']
        bits = BYTES_FOR_INTEGER_TYPE[data_type] * 8
        if 'unsigned' in c.column_type:
            result.minimum = 0
            result.maximum = 2**bits - 1
        else:
            result.minimum = 0 - 2**(bits - 1)
            result.maximum = 2**(bits - 1) - 1

    elif data_type in FLOAT_TYPES:
        result.type = ['null', 'number']

    elif data_type == 'decimal':
        result.type = ['null', 'number']
        result.exclusiveMaximum = True
        result.maximum = 10**(c.numeric_precision - c.numeric_scale)
        result.multipleOf = 10**(0 - c.numeric_scale)
        if 'unsigned' in column_type:
            result.minimum = 0
        else:
            result.exclusiveMinimum = True
            result.minimum = -10**(c.numeric_precision - c.numeric_scale)
        return result

    elif data_type in STRING_TYPES:
        result.type = ['null', 'string']
        result.maxLength = c.character_maximum_length

    elif data_type in DATETIME_TYPES:
        result.type = ['null', 'string']
        result.format = 'date-time'

    else:
        result = Schema(
            None,
            inclusion='unsupported',
            description='Unsupported column type {}'.format(column_type))
    return result
Exemple #7
0
def schema_for_column(c):
    """Returns the Schema object for the given Column."""
    data_type = c.data_type.lower()
    column_type = c.column_type.lower()

    inclusion = "available"
    # We want to automatically include all primary key columns
    if c.column_key.lower() == "pri":
        inclusion = "automatic"

    result = Schema(inclusion=inclusion)

    if data_type == "bit" or column_type.startswith("tinyint(1)"):
        result.type = ["null", "boolean"]

    elif data_type in BYTES_FOR_INTEGER_TYPE:
        result.type = ["null", "integer"]
        bits = BYTES_FOR_INTEGER_TYPE[data_type] * 8
        if "unsigned" in c.column_type:
            result.minimum = 0
            result.maximum = 2 ** bits - 1
        else:
            result.minimum = 0 - 2 ** (bits - 1)
            result.maximum = 2 ** (bits - 1) - 1

    elif data_type in FLOAT_TYPES:
        result.type = ["null", "number"]
        result.multipleOf = 10 ** (0 - 6)

    elif data_type == "decimal":
        result.type = ["null", "number"]
        result.multipleOf = 10 ** (0 - c.numeric_scale)
        return result

    elif data_type in STRING_TYPES:
        result.type = ["null", "string"]
        result.maxLength = c.character_maximum_length

    elif data_type in DATETIME_TYPES:
        result.type = ["null", "string"]
        result.format = "date-time"

    elif data_type in VARIANT_TYPES:
        result.type = ["null", "object"]

    else:
        result = Schema(
            None,
            inclusion="unsupported",
            description="Unsupported column type {}".format(column_type),
        )
    return result
Exemple #8
0
def schema_for_column(c):
    data_type = c.sql_data_type.lower()
    result = Schema()

    if data_type in INTEGER_TYPES:
        result.type = nullable_column('integer', c.is_primary_key)
        result.minimum = -1 * (2**(c.numeric_precision - 1))
        result.maximum = 2**(c.numeric_precision - 1) - 1
        return result

    elif data_type == 'bit' and c.character_maximum_length == 1:
        result.type = nullable_column('boolean', c.is_primary_key)
        return result

    elif data_type == 'boolean':
        result.type = nullable_column('boolean', c.is_primary_key)
        return result

    elif data_type == 'uuid':
        result.type = nullable_column('string', c.is_primary_key)
        return result

    elif data_type == 'hstore':
        result.type = nullable_column('string', c.is_primary_key)
        return result

    elif data_type == 'citext':
        result.type = nullable_column('string', c.is_primary_key)
        return result

    elif data_type in JSON_TYPES:
        result.type = nullable_column('string', c.is_primary_key)
        return result

    elif data_type == 'numeric':
        result.type = nullable_column('number', c.is_primary_key)
        if c.numeric_scale is None or c.numeric_scale > MAX_SCALE:
            LOGGER.warning(
                'capping decimal scale to 38.  THIS MAY CAUSE TRUNCATION')
            scale = MAX_SCALE
        else:
            scale = c.numeric_scale

        if c.numeric_precision is None or c.numeric_precision > MAX_PRECISION:
            LOGGER.warning(
                'capping decimal precision to 100.  THIS MAY CAUSE TRUNCATION')
            precision = MAX_PRECISION
        else:
            precision = c.numeric_precision

        result.exclusiveMaximum = True
        result.maximum = 10**(precision - scale)
        result.multipleOf = 10**(0 - scale)
        result.exclusiveMinimum = True
        result.minimum = -10**(precision - scale)
        return result

    elif data_type in {'time without time zone', 'time with time zone'}:
        #times are treated as ordinary strings as they can not possible match RFC3339
        result.type = nullable_column('string', c.is_primary_key)
        return result

    elif data_type in ('date', 'timestamp without time zone',
                       'timestamp with time zone'):
        result.type = nullable_column('string', c.is_primary_key)

        result.format = 'date-time'
        return result

    elif data_type in FLOAT_TYPES:
        result.type = nullable_column('number', c.is_primary_key)
        return result

    elif data_type == 'text':
        result.type = nullable_column('string', c.is_primary_key)
        return result

    elif data_type == 'character varying':
        result.type = nullable_column('string', c.is_primary_key)
        result.maxLength = c.character_maximum_length
        return result

    elif data_type == 'character':
        result.type = nullable_column('string', c.is_primary_key)
        result.maxLength = c.character_maximum_length
        return result

    return Schema(None)
Exemple #9
0
def schema_for_column(c, pks_for_table):
    data_type = c.data_type.lower()
    result = Schema()

    numeric_scale = c.numeric_scale or DEFAULT_NUMERIC_SCALE
    numeric_precision = c.numeric_precision or DEFAULT_NUMERIC_PRECISION

    if data_type == 'number' and numeric_scale <= 0:
        result.type = nullable_column(c.column_name, 'integer', pks_for_table)
        result.minimum = -1 * (10**numeric_precision - 1)
        result.maximum = (10**numeric_precision - 1)

        if numeric_scale < 0:
            result.multipleOf = -10 * numeric_scale
        return result

    elif data_type == 'number':
        result.type = nullable_column(c.column_name, 'number', pks_for_table)

        result.exclusiveMaximum = True
        result.maximum = 10**(numeric_precision - numeric_scale)
        result.multipleOf = 10**(0 - numeric_scale)
        result.exclusiveMinimum = True
        result.minimum = -10**(numeric_precision - numeric_scale)
        return result

    elif data_type == 'date' or data_type.startswith("timestamp"):
        result.type = nullable_column(c.column_name, 'string', pks_for_table)

        result.format = 'date-time'
        return result

    elif data_type in FLOAT_TYPES:
        result.type = nullable_column(c.column_name, 'number', pks_for_table)
        return result

    elif data_type in STRING_TYPES:
        character_used = c.character_used
        result.type = nullable_column(c.column_name, 'string', pks_for_table)

        if character_used == 'C':
            result.maxLength = c.char_length
        return result

    #these column types are insane. they are NOT actually ieee754 floats
    #instead they are represented as decimals, but despite this
    #it appears we can say nothing about their max or min

    #"real"
    elif data_type == 'float' and c.numeric_precision == 63:
        result.type = nullable_column(c.column_name, 'number', pks_for_table)
        result.multipleOf = 10**-18
        return result

    #"float", "double_precision",
    elif data_type in ['float', 'double_precision']:

        result.type = nullable_column(c.column_name, 'number', pks_for_table)
        result.multipleOf = 10**-38
        return result

    return Schema(None)
Exemple #10
0
def discover_catalog(conn):
    """Returns a Catalog describing the table structure of the target database"""
    entries = []

    for table in conn.get_tables():
        # the stream is in format database_name__table_name with all non alphanumeric
        # and `_` characters replaced with an `_`.
        stream = re.sub(
            '[^0-9a-z_]+',
            '_',
            "{}__{}".format(table.get('database_name').lower(), table.get('name')).lower()
        )

        # by default we will ALWAYS have 'rid' as an automatically included primary key field.
        schema = Schema(
            type='object',
            additionalProperties=False,
            properties={
                'rid': Schema(
                    type=['string'],
                    inclusion='automatic',
                )
            }
        )
        metadata = []

        for field in conn.get_fields(table.get('id')):
            field_type = ['null']
            field_format = None

            # https://help.quickbase.com/user-assistance/field_types.html
            if field.get('base_type') == 'bool':
                field_type.append('boolean')
            elif field.get('base_type') == 'float':
                field_type.append('number')
            elif field.get('base_type') == 'int64':
                if field.get('type') in ('timestamp', 'date'):
                    field_type.append('string')
                    field_format = 'date-time'
                else:
                    # `timeofday` comes out of the API as an integer for how many milliseconds
                    #       through the day, 900000 would be 12:15am
                    # `duration` comes out as an integer for how many milliseconds the duration is,
                    #       1000 would be 1 second
                    # let's just pass these as an integer
                    field_type.append('integer')

            else:
                field_type.append('string')

            property_schema = Schema(
                type=field_type,
                inclusion='available' if field.get('id') != '2' else 'automatic',
            )
            if field_format is not None:
                property_schema.format = field_format
            schema.properties[field.get('name')] = property_schema

            metadata.append({
                'metadata': {
                    'id': field.get('id')
                },
                'breadcrumb': [
                    'properties',
                    field.get('name')
                ]
            })

        entry = CatalogEntry(
            database=conn.appid,
            table=table.get('id'),
            stream_alias=table.get('name'),
            stream=stream,
            tap_stream_id=stream,
            key_properties=['rid'],
            schema=schema,
            metadata=metadata
        )

        entries.append(entry)

    return Catalog(entries)