def test_is_nullable(): assert json_schema.is_nullable({ 'type': ['array', 'null'], 'items': { 'type': ['boolean'] } }) assert json_schema.is_nullable({'type': ['integer', 'null']}) assert not json_schema.is_nullable({'type': ['string']}) assert not json_schema.is_nullable({})
def _denest_schema(table_path, table_json_schema, key_prop_schemas, subtables, level=-1): new_properties = {} for prop, item_json_schema in _denest_schema__singular_schemas( table_json_schema): if json_schema.is_object(item_json_schema): _denest_schema_helper(table_path + (prop, ), (prop, ), item_json_schema, json_schema.is_nullable(item_json_schema), new_properties, key_prop_schemas, subtables, level) elif json_schema.is_iterable(item_json_schema): _create_subtable(table_path + (prop, ), item_json_schema, key_prop_schemas, subtables, level + 1) elif json_schema.is_literal(item_json_schema): if (prop, ) in new_properties: new_properties[(prop, )]['anyOf'].append(item_json_schema) else: new_properties[(prop, )] = {'anyOf': [item_json_schema]} table_json_schema['properties'] = new_properties
def test_complex_objects__logical_statements(): every_type = { 'type': ['null', 'integer', 'number', 'boolean', 'string', 'array', 'object'], 'items': { 'type': 'integer' }, 'format': 'date-time', 'properties': { 'a': { 'type': 'integer' }, 'b': { 'type': 'number' }, 'c': { 'type': 'boolean' } } } assert json_schema.is_iterable(every_type) assert json_schema.is_nullable(every_type) assert json_schema.is_iterable(every_type) assert json_schema.is_object(every_type)
def test__anyOf__schema__implicit_any_of(): denested = error_check_denest( { 'properties': { 'every_type': { 'type': ['integer', 'null', 'number', 'boolean', 'string', 'array', 'object'], 'items': {'type': 'integer'}, 'format': 'date-time', 'properties': { 'i': {'type': 'integer'}, 'n': {'type': 'number'}, 'b': {'type': 'boolean'} } } } }, [], []) assert 2 == len(denested) table_batch = _get_table_batch_with_path(denested, tuple()) denested_props = table_batch['streamed_schema']['schema']['properties'] assert 4 == len(denested_props) anyof_schemas = denested_props[('every_type',)]['anyOf'] assert 4 == len(anyof_schemas) assert 4 == len([x for x in anyof_schemas if json_schema.is_literal(x)]) assert 4 == len([x for x in anyof_schemas if json_schema.is_nullable(x)]) assert 1 == len([x for x in anyof_schemas if json_schema.is_datetime(x)])
def test_simplify__allOf__nullable(): assert json_schema.is_nullable( json_schema.simplify( {'allOf': [{ 'type': ['integer'] }, { 'type': ['string', 'null'] }]}))
def denest_schema_helper(self, table_name, table_json_schema, not_null, top_level_schema, current_path, key_prop_schemas, subtables, level): for prop, item_json_schema in table_json_schema['properties'].items(): next_path = current_path + self.NESTED_SEPARATOR + prop if json_schema.is_object(item_json_schema): self.denest_schema_helper(table_name, item_json_schema, not_null, top_level_schema, next_path, key_prop_schemas, subtables, level) elif json_schema.is_iterable(item_json_schema): self.create_subtable(table_name + self.NESTED_SEPARATOR + prop, item_json_schema, key_prop_schemas, subtables, level + 1) else: if not_null and json_schema.is_nullable(item_json_schema): item_json_schema['type'].remove('null') elif not json_schema.is_nullable(item_json_schema): item_json_schema['type'].append('null') top_level_schema[next_path] = item_json_schema
def _literal_only_schema(schema): ret_types = json_schema.get_type(schema) if json_schema.is_object(schema): ret_types.remove(json_schema.OBJECT) if json_schema.is_iterable(schema): ret_types.remove(json_schema.ARRAY) if json_schema.is_nullable(schema): ret_types.remove(json_schema.NULL) ret_schemas = [] for t in ret_types: s = deepcopy(schema) s['type'] = [t] if json_schema.is_nullable(schema): s = json_schema.make_nullable(s) ret_schemas.append(s) return {'anyOf': ret_schemas}
def _denest_schema_helper(table_path, table_json_schema, nullable, top_level_schema, key_prop_schemas, subtables, level): for prop, item_json_schema in table_json_schema['properties'].items(): if json_schema.is_object(item_json_schema): _denest_schema_helper(table_path + (prop, ), item_json_schema, nullable, top_level_schema, key_prop_schemas, subtables, level) if json_schema.is_iterable(item_json_schema): _create_subtable(table_path + (prop, ), item_json_schema, key_prop_schemas, subtables, level + 1) if json_schema.is_literal(item_json_schema): if nullable and not json_schema.is_nullable(item_json_schema): item_json_schema['type'].append('null') top_level_schema[table_path + (prop, )] = _literal_only_schema(item_json_schema)
def merge_put_schemas(self, cur, table_schema, table_name, existing_schema, new_schema): new_properties = new_schema['properties'] existing_properties = existing_schema['schema']['properties'] for name, schema in new_properties.items(): ## Mapping exists if self.get_mapping(existing_schema, name, schema) is not None: pass ## New column elif name not in existing_properties: existing_properties[name] = schema self.add_column(cur, table_schema, table_name, name, schema) ## Existing column non-nullable, new column is nullable elif not json_schema.is_nullable(existing_properties[name]) \ and json_schema.get_type(schema) \ == json_schema.get_type(json_schema.make_nullable(existing_properties[name])): existing_properties[name] = json_schema.make_nullable( existing_properties[name]) self.make_column_nullable(cur, table_schema, table_name, name) ## Existing column, types compatible elif json_schema.to_sql(json_schema.make_nullable(schema)) \ == json_schema.to_sql(json_schema.make_nullable(existing_properties[name])): pass ## Column type change elif self.mapping_name(name, schema) not in existing_properties \ and self.mapping_name(name, existing_properties[name]) not in existing_properties: self.split_column(cur, table_schema, table_name, name, schema, existing_properties) ## Error else: raise PostgresError( 'Cannot handle column type change for: {}.{} columns {} and {}. Name collision likely.' .format(table_schema, table_name, name, self.mapping_name(name, schema)))
def test__anyOf__schema__stitch_date_times(): denested = error_check_denest( {'properties': { 'a': { "anyOf": [ { "type": "string", "format": "date-time" }, {"type": ["string", "null"]}]}}}, [], []) table_batch = _get_table_batch_with_path(denested, tuple()) anyof_schemas = table_batch['streamed_schema']['schema']['properties'][('a',)]['anyOf'] assert 2 == len(anyof_schemas) assert 2 == len([x for x in anyof_schemas if json_schema.is_literal(x)]) assert 2 == len([x for x in anyof_schemas if json_schema.is_nullable(x)]) assert 1 == len([x for x in anyof_schemas if json_schema.is_datetime(x)])
def add_column(self, cur, table_schema, table_name, column_name, column_schema): data_type = json_schema.to_sql(column_schema) if not json_schema.is_nullable(column_schema) \ and not self.is_table_empty(cur, table_schema, table_name): self.logger.warning( 'Forcing new column `{}.{}.{}` to be nullable due to table not empty.' .format(table_schema, table_name, column_name)) data_type = json_schema.to_sql( json_schema.make_nullable(column_schema)) to_execute = sql.SQL('ALTER TABLE {table_schema}.{table_name} ' + 'ADD COLUMN {column_name} {data_type};').format( table_schema=sql.Identifier(table_schema), table_name=sql.Identifier(table_name), column_name=sql.Identifier(column_name), data_type=sql.SQL(data_type)) cur.execute(to_execute)
def _denest_schema(table_path, table_json_schema, key_prop_schemas, subtables, level=-1): new_properties = {} for prop, item_json_schema in table_json_schema['properties'].items(): if json_schema.is_object(item_json_schema): _denest_schema_helper(table_path + (prop, ), item_json_schema, json_schema.is_nullable(item_json_schema), new_properties, key_prop_schemas, subtables, level) if json_schema.is_iterable(item_json_schema): _create_subtable(table_path + (prop, ), item_json_schema, key_prop_schemas, subtables, level + 1) if json_schema.is_literal(item_json_schema): new_properties[(prop, )] = _literal_only_schema(item_json_schema) table_json_schema['properties'] = new_properties
def upsert_table_helper(self, connection, schema, metadata, log_schema_changes=True): """ Upserts the `schema` to remote by: - creating table if necessary - adding columns - adding column mappings - migrating data from old columns to new, etc. :param connection: remote connection, type left to be determined by implementing class :param schema: TABLE_SCHEMA(local) :param metadata: additional information necessary for downstream operations, :param log_schema_changes: defaults to True, set to false to disable logging of table level schema changes :return: TABLE_SCHEMA(remote) """ table_path = schema['path'] _metadata = deepcopy(metadata) _metadata['schema_version'] = CURRENT_SCHEMA_VERSION table_name = self.add_table_mapping(connection, table_path, _metadata) existing_schema = self._get_table_schema(connection, table_path, table_name) if existing_schema is None: self.add_table(connection, table_name, _metadata) existing_schema = self._get_table_schema(connection, table_path, table_name) self.add_key_properties(connection, table_name, schema.get('key_properties', None)) ## Only process columns which have single, nullable, types single_type_columns = [] for column_name__or__path, column_schema in schema['schema'][ 'properties'].items(): column_path = column_name__or__path if isinstance(column_name__or__path, str): column_path = (column_name__or__path, ) single_type_column_schema = deepcopy(column_schema) column_types = json_schema.get_type(single_type_column_schema) make_nullable = json_schema.is_nullable(column_schema) for type in column_types: if type == json_schema.NULL: continue single_type_column_schema['type'] = [type] if make_nullable: single_type_columns.append( (column_path, json_schema.make_nullable(single_type_column_schema))) else: single_type_columns.append( (column_path, deepcopy(single_type_column_schema))) ## Process new columns against existing raw_mappings = existing_schema.get('mappings', {}) mappings = [] for to, m in raw_mappings.items(): mapping = json_schema.simple_type(m) mapping['from'] = tuple(m['from']) mapping['to'] = to mappings.append(mapping) table_empty = self.is_table_empty(connection, table_name) for column_path, column_schema in single_type_columns: upsert_table_helper__start__column = time.monotonic() canonicalized_column_name = self._canonicalize_column_identifier( column_path, column_schema, mappings) nullable_column_schema = json_schema.make_nullable(column_schema) def log_message(msg): if log_schema_changes: self.LOGGER.info( 'Table Schema Change [`{}`.`{}`:`{}`] {} (took {} millis)' .format( table_name, column_path, canonicalized_column_name, msg, _duration_millis( upsert_table_helper__start__column))) ## NEW COLUMN if not column_path in [m['from'] for m in mappings]: upsert_table_helper__column = "New column" ### NON EMPTY TABLE if not table_empty: upsert_table_helper__column += ", non empty table" self.LOGGER.warning( 'NOT EMPTY: Forcing new column `{}` in table `{}` to be nullable due to table not empty.' .format(column_path, table_name)) column_schema = nullable_column_schema self.add_column(connection, table_name, canonicalized_column_name, column_schema) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, column_schema) mapping = json_schema.simple_type(column_schema) mapping['from'] = column_path mapping['to'] = canonicalized_column_name mappings.append(mapping) log_message(upsert_table_helper__column) continue ## EXISTING COLUMNS ### SCHEMAS MATCH if [ True for m in mappings if m['from'] == column_path and self.json_schema_to_sql_type( m) == self.json_schema_to_sql_type(column_schema) ]: continue ### NULLABLE SCHEMAS MATCH ### New column _is not_ nullable, existing column _is_ if [ True for m in mappings if m['from'] == column_path and self.json_schema_to_sql_type(m) == self.json_schema_to_sql_type(nullable_column_schema) ]: continue ### NULL COMPATIBILITY ### New column _is_ nullable, existing column is _not_ non_null_original_column = [ m for m in mappings if m['from'] == column_path and json_schema.shorthand(m) == json_schema.shorthand(column_schema) ] if non_null_original_column: ## MAKE NULLABLE self.make_column_nullable(connection, table_name, canonicalized_column_name) self.drop_column_mapping(connection, table_name, canonicalized_column_name) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) mappings = [ m for m in mappings if not (m['from'] == column_path and json_schema.shorthand( m) == json_schema.shorthand(column_schema)) ] mapping = json_schema.simple_type(nullable_column_schema) mapping['from'] = column_path mapping['to'] = canonicalized_column_name mappings.append(mapping) log_message( "Made existing column nullable. New column is nullable, existing column is not" ) continue ### FIRST MULTI TYPE ### New column matches existing column path, but the types are incompatible duplicate_paths = [m for m in mappings if m['from'] == column_path] if 1 == len(duplicate_paths): existing_mapping = duplicate_paths[0] existing_column_name = existing_mapping['to'] if existing_column_name: self.drop_column_mapping(connection, table_name, existing_column_name) ## Update existing properties mappings = [m for m in mappings if m['from'] != column_path] mapping = json_schema.simple_type(nullable_column_schema) mapping['from'] = column_path mapping['to'] = canonicalized_column_name mappings.append(mapping) existing_column_new_normalized_name = self._canonicalize_column_identifier( column_path, existing_mapping, mappings) mapping = json_schema.simple_type( json_schema.make_nullable(existing_mapping)) mapping['from'] = column_path mapping['to'] = existing_column_new_normalized_name mappings.append(mapping) ## Add new columns ### NOTE: all migrated columns will be nullable and remain that way #### Table Metadata self.add_column_mapping( connection, table_name, column_path, existing_column_new_normalized_name, json_schema.make_nullable(existing_mapping)) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) #### Columns self.add_column(connection, table_name, existing_column_new_normalized_name, json_schema.make_nullable(existing_mapping)) self.add_column(connection, table_name, canonicalized_column_name, nullable_column_schema) ## Migrate existing data self.migrate_column(connection, table_name, existing_mapping['to'], existing_column_new_normalized_name) ## Drop existing column self.drop_column(connection, table_name, existing_mapping['to']) upsert_table_helper__column = "Splitting `{}` into `{}` and `{}`. New column matches existing column path, but the types are incompatible.".format( existing_column_name, existing_column_new_normalized_name, canonicalized_column_name) ## REST MULTI TYPE elif 1 < len(duplicate_paths): ## Add new column self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) self.add_column(connection, table_name, canonicalized_column_name, nullable_column_schema) mapping = json_schema.simple_type(nullable_column_schema) mapping['from'] = column_path mapping['to'] = canonicalized_column_name mappings.append(mapping) upsert_table_helper__column = "Adding new column to split column `{}`. New column matches existing column's path, but no types were compatible.".format( column_path) ## UNKNOWN else: raise Exception( 'UNKNOWN: Cannot handle merging column `{}` (canonicalized as: `{}`) in table `{}`.' .format(column_path, canonicalized_column_name, table_name)) log_message(upsert_table_helper__column) return self._get_table_schema(connection, table_path, table_name)
def upsert_table_helper(self, connection, schema, metadata): """ Upserts the `schema` to remote by: - creating table if necessary - adding columns - adding column mappings - migrating data from old columns to new, etc. :param connection: remote connection, type left to be determined by implementing class :param schema: TABLE_SCHEMA(local) :param metadata: additional information necessary for downstream operations :return: TABLE_SCHEMA(remote) """ table_path = schema['path'] table_name = self.add_table_mapping(connection, table_path, metadata) existing_schema = self.get_table_schema(connection, table_path, table_name) if existing_schema is None: self.add_table(connection, table_name, metadata) existing_schema = self.get_table_schema(connection, table_path, table_name) self.add_key_properties(connection, table_name, schema.get('key_properties', None)) ## Only process columns which have single, nullable, types single_type_columns = [] for column_name__or__path, column_schema in schema['schema'][ 'properties'].items(): column_path = column_name__or__path if isinstance(column_name__or__path, str): column_path = (column_name__or__path, ) single_type_column_schema = deepcopy(column_schema) column_types = json_schema.get_type(single_type_column_schema) make_nullable = json_schema.is_nullable(column_schema) for type in column_types: if type == json_schema.NULL: continue single_type_column_schema['type'] = [type] if make_nullable: single_type_columns.append( (column_path, json_schema.make_nullable(single_type_column_schema))) else: single_type_columns.append( (column_path, single_type_column_schema)) ## Process new columns against existing raw_mappings = existing_schema.get('mappings', {}) mappings = [] for to, m in raw_mappings.items(): mappings.append({ 'from': tuple(m['from']), 'to': to, 'type': m['type'] }) table_empty = self.is_table_empty(connection, table_name) for column_path, column_schema in single_type_columns: canonicalized_column_name = self._canonicalize_column_identifier( column_path, column_schema, mappings) nullable_column_schema = json_schema.make_nullable(column_schema) ## NEW COLUMN if not column_path in [m['from'] for m in mappings]: ### NON EMPTY TABLE if not table_empty: self.LOGGER.warning( 'NOT EMPTY: Forcing new column `{}` in table `{}` to be nullable due to table not empty.' .format(column_path, table_name)) column_schema = nullable_column_schema self.add_column(connection, table_name, canonicalized_column_name, column_schema) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, column_schema) mappings.append({ 'from': column_path, 'to': canonicalized_column_name, 'type': json_schema.get_type(column_schema) }) continue ## EXISTING COLUMNS ### SCHEMAS MATCH if [ True for m in mappings if m['from'] == column_path and json_schema.to_sql(m) == json_schema.to_sql(column_schema) ]: continue ### NULLABLE SCHEMAS MATCH ### New column _is not_ nullable, existing column _is_ if [ True for m in mappings if m['from'] == column_path and json_schema.to_sql(m) == json_schema.to_sql(nullable_column_schema) ]: continue ### NULL COMPATIBILITY ### New column _is_ nullable, existing column is _not_ non_null_original_column = [ m for m in mappings if m['from'] == column_path and json_schema.sql_shorthand(m) == json_schema.sql_shorthand(column_schema) ] if non_null_original_column: ## MAKE NULLABLE self.make_column_nullable(connection, table_name, canonicalized_column_name) self.drop_column_mapping(connection, table_name, canonicalized_column_name) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) mappings = [ m for m in mappings if not ( m['from'] == column_path and json_schema.sql_shorthand( m) == json_schema.sql_shorthand(column_schema)) ] mappings.append({ 'from': column_path, 'to': canonicalized_column_name, 'type': json_schema.get_type(nullable_column_schema) }) continue ### FIRST MULTI TYPE ### New column matches existing column path, but the types are incompatible duplicate_paths = [m for m in mappings if m['from'] == column_path] if 1 == len(duplicate_paths): existing_mapping = duplicate_paths[0] existing_column_name = existing_mapping['to'] if existing_column_name: self.drop_column_mapping(connection, table_name, existing_column_name) ## Update existing properties mappings = [m for m in mappings if m['from'] != column_path] mappings.append({ 'from': column_path, 'to': canonicalized_column_name, 'type': json_schema.get_type(nullable_column_schema) }) existing_column_new_normalized_name = self._canonicalize_column_identifier( column_path, existing_mapping, mappings) mappings.append({ 'from': column_path, 'to': existing_column_new_normalized_name, 'type': json_schema.get_type( json_schema.make_nullable(existing_mapping)) }) ## Add new columns ### NOTE: all migrated columns will be nullable and remain that way #### Table Metadata self.add_column_mapping( connection, table_name, column_path, existing_column_new_normalized_name, json_schema.make_nullable(existing_mapping)) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) #### Columns self.add_column(connection, table_name, existing_column_new_normalized_name, json_schema.make_nullable(existing_mapping)) self.add_column(connection, table_name, canonicalized_column_name, nullable_column_schema) ## Migrate existing data self.migrate_column(connection, table_name, existing_mapping['to'], existing_column_new_normalized_name) ## Drop existing column self.drop_column(connection, table_name, existing_mapping['to']) ## REST MULTI TYPE elif 1 < len(duplicate_paths): ## Add new column self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) self.add_column(connection, table_name, canonicalized_column_name, nullable_column_schema) mappings.append({ 'from': column_path, 'to': canonicalized_column_name, 'type': json_schema.get_type(nullable_column_schema) }) ## UNKNOWN else: raise Exception( 'UNKNOWN: Cannot handle merging column `{}` (canonicalized as: `{}`) in table `{}`.' .format(column_path, canonicalized_column_name, table_name)) return self.get_table_schema(connection, table_path, table_name)