def _make_schema_nullable(schema): # Redshift does not allow for creation of columns which are non null without a default nullable_schema = deepcopy(schema) nullable_properties = nullable_schema['properties'] for field in nullable_properties: nullable_properties[field] = json_schema.make_nullable(nullable_properties[field]) if 'anyOf' in nullable_properties[field]: nullable_properties[field]['anyOf'] = [ json_schema.make_nullable(sub_schema) for sub_schema in nullable_properties[field]['anyOf'] ] return nullable_schema
def test_make_nullable(): assert {'type': ['boolean', 'null']} \ == json_schema.make_nullable({'type': 'boolean'}) assert {'type': ['null', 'boolean']} \ == json_schema.make_nullable({'type': ['null', 'boolean']}) assert {'type': ['null', 'string']} \ == json_schema.make_nullable({'type': ['null', 'string']}) ## Make sure we're not modifying the original schema = {'type': ['string']} assert json_schema.get_type(schema) == ['string'] assert {'type': ['string', 'null']} \ == json_schema.make_nullable(schema) assert json_schema.get_type(schema) == ['string'] assert { 'definitions': { 'address': { 'type': 'object', 'properties': { 'street_address': {'type': 'string'}, 'city': {'type': 'string'}, 'state': {'type': 'string'} }, 'required': ['street_address', 'city', 'state'] } }, 'type': ['object', 'null'], 'properties': { 'billing_address': {'$ref': '#/definitions/address'}, 'shipping_address': {'$ref': '#/definitions/address'}}} \ == json_schema.make_nullable( { 'definitions': { 'address': { 'type': 'object', 'properties': { 'street_address': {'type': 'string'}, 'city': {'type': 'string'}, 'state': {'type': 'string'} }, 'required': ['street_address', 'city', 'state'] } }, 'type': 'object', 'properties': { 'billing_address': {'$ref': '#/definitions/address'}, 'shipping_address': {'$ref': '#/definitions/address'}}})
def _denest_schema_helper(table_path, prop_path, table_json_schema, nullable, top_level_schema, key_prop_schemas, subtables, level): for prop, item_json_schema in _denest_schema__singular_schemas( table_json_schema): if json_schema.is_object(item_json_schema): _denest_schema_helper(table_path + (prop, ), prop_path + (prop, ), item_json_schema, nullable, top_level_schema, key_prop_schemas, subtables, level) elif json_schema.is_iterable(item_json_schema): _create_subtable(table_path + (prop, ), item_json_schema, key_prop_schemas, subtables, level + 1) elif json_schema.is_literal(item_json_schema): if nullable: item_json_schema = json_schema.make_nullable(item_json_schema) p = prop_path + (prop, ) if p in top_level_schema: top_level_schema[p]['anyOf'].append(item_json_schema) else: top_level_schema[p] = {'anyOf': [item_json_schema]}
def add_table(self, cur, path, name, metadata): sql.valid_identifier(name) cur.execute(''' CREATE TABLE {}.{}.{} ({} {}) '''.format( sql.identifier(self.connection.configured_database), sql.identifier(self.connection.configured_schema), sql.identifier(name), # Snowflake does not allow for creation of tables with no columns sql.identifier(self.CREATE_TABLE_INITIAL_COLUMN), self.CREATE_TABLE_INITIAL_COLUMN_TYPE)) self._set_table_metadata( cur, name, { 'path': path, 'version': metadata.get('version', None), 'schema_version': metadata['schema_version'], 'mappings': {} }) self.add_column_mapping( cur, name, (self.CREATE_TABLE_INITIAL_COLUMN, ), self.CREATE_TABLE_INITIAL_COLUMN, json_schema.make_nullable({'type': json_schema.BOOLEAN}))
def sql_type_to_json_schema(self, sql_type, is_nullable): if sql_type == 'character varying': schema = {'type': [json_schema.STRING]} if is_nullable: return json_schema.make_nullable(schema) return schema return PostgresTarget.sql_type_to_json_schema(self, sql_type, is_nullable)
def merge_put_schemas(self, cur, table_schema, table_name, existing_schema, new_schema): new_properties = new_schema['properties'] existing_properties = existing_schema['schema']['properties'] for name, schema in new_properties.items(): ## Mapping exists if self.get_mapping(existing_schema, name, schema) is not None: pass ## New column elif name not in existing_properties: existing_properties[name] = schema self.add_column(cur, table_schema, table_name, name, schema) ## Existing column non-nullable, new column is nullable elif not json_schema.is_nullable(existing_properties[name]) \ and json_schema.get_type(schema) \ == json_schema.get_type(json_schema.make_nullable(existing_properties[name])): existing_properties[name] = json_schema.make_nullable( existing_properties[name]) self.make_column_nullable(cur, table_schema, table_name, name) ## Existing column, types compatible elif json_schema.to_sql(json_schema.make_nullable(schema)) \ == json_schema.to_sql(json_schema.make_nullable(existing_properties[name])): pass ## Column type change elif self.mapping_name(name, schema) not in existing_properties \ and self.mapping_name(name, existing_properties[name]) not in existing_properties: self.split_column(cur, table_schema, table_name, name, schema, existing_properties) ## Error else: raise PostgresError( 'Cannot handle column type change for: {}.{} columns {} and {}. Name collision likely.' .format(table_schema, table_name, name, self.mapping_name(name, schema)))
def test_loading__invalid__column_type_change__pks__nullable(): main(CONFIG, input_stream=CatStream(20)) stream = CatStream(20) stream.schema = deepcopy(stream.schema) stream.schema['schema']['properties']['id'] = json_schema.make_nullable( stream.schema['schema']['properties']['id']) with pytest.raises(postgres.PostgresError, match=r'.*key_properties. type change detected'): main(CONFIG, input_stream=stream)
def split_column(self, cur, table_schema, table_name, column_name, column_schema, existing_properties): ## column_name -> column_name__<current-type>, column_name__<new-type> existing_column_mapping = self.mapping_name( column_name, existing_properties[column_name]) new_column_mapping = self.mapping_name(column_name, column_schema) ## Update existing properties existing_properties[ existing_column_mapping] = json_schema.make_nullable( existing_properties[column_name]) existing_properties[new_column_mapping] = json_schema.make_nullable( column_schema) ## Add new columns ### NOTE: all migrated columns will be nullable and remain that way #### Table Metadata self.add_column_mapping(cur, table_schema, table_name, column_name, existing_column_mapping, existing_properties[existing_column_mapping]) self.add_column_mapping(cur, table_schema, table_name, column_name, new_column_mapping, existing_properties[new_column_mapping]) #### Columns self.add_column(cur, table_schema, table_name, existing_column_mapping, existing_properties[existing_column_mapping]) self.add_column(cur, table_schema, table_name, new_column_mapping, existing_properties[new_column_mapping]) ## Migrate existing data self.migrate_column(cur, table_schema, table_name, column_name, existing_column_mapping) ## Drop existing column self.drop_column(cur, table_schema, table_name, column_name) ## Remove column (field) from existing_properties del existing_properties[column_name]
def test_make_nullable(): assert {'type': ['boolean', 'null']} \ == json_schema.make_nullable({'type': 'boolean'}) assert {'type': ['null', 'boolean']} \ == json_schema.make_nullable({'type': ['null', 'boolean']}) assert {'type': ['null', 'string']} \ == json_schema.make_nullable({'type': ['null', 'string']}) assert { 'definitions': { 'address': { 'type': 'object', 'properties': { 'street_address': {'type': 'string'}, 'city': {'type': 'string'}, 'state': {'type': 'string'} }, 'required': ['street_address', 'city', 'state'] } }, 'type': ['object', 'null'], 'properties': { 'billing_address': {'$ref': '#/definitions/address'}, 'shipping_address': {'$ref': '#/definitions/address'}}} \ == json_schema.make_nullable( { 'definitions': { 'address': { 'type': 'object', 'properties': { 'street_address': {'type': 'string'}, 'city': {'type': 'string'}, 'state': {'type': 'string'} }, 'required': ['street_address', 'city', 'state'] } }, 'type': 'object', 'properties': { 'billing_address': {'$ref': '#/definitions/address'}, 'shipping_address': {'$ref': '#/definitions/address'}}})
def add_table(self, cur, name, metadata): self._validate_identifier(name) create_table_sql = sql.SQL('CREATE TABLE {}.{} ({} {})').format( sql.Identifier(self.postgres_schema), sql.Identifier(name), # Redshift does not allow for creation of tables with no columns sql.Identifier(self.CREATE_TABLE_INITIAL_COLUMN), sql.SQL(self.CREATE_TABLE_INITIAL_COLUMN_TYPE)) cur.execute(sql.SQL('{};').format(create_table_sql)) self._set_table_metadata(cur, name, {'version': metadata.get('version', None)}) self.add_column_mapping( cur, name, (self.CREATE_TABLE_INITIAL_COLUMN, ), self.CREATE_TABLE_INITIAL_COLUMN, json_schema.make_nullable({'type': json_schema.BOOLEAN}))
def add_column(self, cur, table_schema, table_name, column_name, column_schema): data_type = json_schema.to_sql(column_schema) if not json_schema.is_nullable(column_schema) \ and not self.is_table_empty(cur, table_schema, table_name): self.logger.warning( 'Forcing new column `{}.{}.{}` to be nullable due to table not empty.' .format(table_schema, table_name, column_name)) data_type = json_schema.to_sql( json_schema.make_nullable(column_schema)) to_execute = sql.SQL('ALTER TABLE {table_schema}.{table_name} ' + 'ADD COLUMN {column_name} {data_type};').format( table_schema=sql.Identifier(table_schema), table_name=sql.Identifier(table_name), column_name=sql.Identifier(column_name), data_type=sql.SQL(data_type)) cur.execute(to_execute)
def _literal_only_schema(schema): ret_types = json_schema.get_type(schema) if json_schema.is_object(schema): ret_types.remove(json_schema.OBJECT) if json_schema.is_iterable(schema): ret_types.remove(json_schema.ARRAY) if json_schema.is_nullable(schema): ret_types.remove(json_schema.NULL) ret_schemas = [] for t in ret_types: s = deepcopy(schema) s['type'] = [t] if json_schema.is_nullable(schema): s = json_schema.make_nullable(s) ret_schemas.append(s) return {'anyOf': ret_schemas}
def upsert_table_helper(self, connection, schema, metadata, log_schema_changes=True): """ Upserts the `schema` to remote by: - creating table if necessary - adding columns - adding column mappings - migrating data from old columns to new, etc. :param connection: remote connection, type left to be determined by implementing class :param schema: TABLE_SCHEMA(local) :param metadata: additional information necessary for downstream operations, :param log_schema_changes: defaults to True, set to false to disable logging of table level schema changes :return: TABLE_SCHEMA(remote) """ table_path = schema['path'] with self._set_timer_tags(metrics.job_timer(), 'upsert_table_schema', table_path) as timer: _metadata = deepcopy(metadata) _metadata['schema_version'] = CURRENT_SCHEMA_VERSION table_name = self.add_table_mapping(connection, table_path, _metadata) self._set_metrics_tags__table(timer, table_name) existing_schema = self._get_table_schema(connection, table_name) existing_table = True if existing_schema is None: self.add_table(connection, table_path, table_name, _metadata) existing_schema = self._get_table_schema(connection, table_name) existing_table = False self.add_key_properties(connection, table_name, schema.get('key_properties', None)) ## Build up mappings to compare new columns against existing mappings = [] for to, m in existing_schema.get('mappings', {}).items(): mapping = json_schema.simple_type(m) mapping['from'] = tuple(m['from']) mapping['to'] = to mappings.append(mapping) ## Only process columns which have single, nullable, types column_paths_seen = set() single_type_columns = [] for column_path, column_schema in schema['schema']['properties'].items(): column_paths_seen.add(column_path) for sub_schema in column_schema['anyOf']: single_type_columns.append((column_path, deepcopy(sub_schema))) ### Add any columns missing from new schema for m in mappings: if not m['from'] in column_paths_seen: single_type_columns.append((m['from'], json_schema.make_nullable(m))) ## Process new columns against existing table_empty = self.is_table_empty(connection, table_name) for column_path, column_schema in single_type_columns: upsert_table_helper__start__column = time.monotonic() canonicalized_column_name = self._canonicalize_column_identifier(column_path, column_schema, mappings) nullable_column_schema = json_schema.make_nullable(column_schema) def log_message(msg): if log_schema_changes: self.LOGGER.info( 'Table Schema Change [`{}`.`{}`:`{}`] {} (took {} millis)'.format( table_name, column_path, canonicalized_column_name, msg, _duration_millis(upsert_table_helper__start__column))) ## NEW COLUMN if not column_path in [m['from'] for m in mappings]: upsert_table_helper__column = "New column" ### NON EMPTY TABLE if not table_empty: upsert_table_helper__column += ", non empty table" self.LOGGER.warning( 'NOT EMPTY: Forcing new column `{}` in table `{}` to be nullable due to table not empty.'.format( column_path, table_name)) column_schema = nullable_column_schema self.add_column(connection, table_name, canonicalized_column_name, column_schema) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, column_schema) mapping = json_schema.simple_type(column_schema) mapping['from'] = column_path mapping['to'] = canonicalized_column_name mappings.append(mapping) log_message(upsert_table_helper__column) continue ## EXISTING COLUMNS ### SCHEMAS MATCH if [True for m in mappings if m['from'] == column_path and self.json_schema_to_sql_type(m) == self.json_schema_to_sql_type(column_schema)]: continue ### NULLABLE SCHEMAS MATCH ### New column _is not_ nullable, existing column _is_ if [True for m in mappings if m['from'] == column_path and self.json_schema_to_sql_type(m) == self.json_schema_to_sql_type(nullable_column_schema)]: continue ### NULL COMPATIBILITY ### New column _is_ nullable, existing column is _not_ non_null_original_column = [m for m in mappings if m['from'] == column_path and json_schema.shorthand( m) == json_schema.shorthand(column_schema)] if non_null_original_column: ## MAKE NULLABLE self.make_column_nullable(connection, table_name, canonicalized_column_name) self.drop_column_mapping(connection, table_name, canonicalized_column_name) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) mappings = [m for m in mappings if not (m['from'] == column_path and json_schema.shorthand( m) == json_schema.shorthand(column_schema))] mapping = json_schema.simple_type(nullable_column_schema) mapping['from'] = column_path mapping['to'] = canonicalized_column_name mappings.append(mapping) log_message("Made existing column nullable.") continue ### FIRST MULTI TYPE ### New column matches existing column path, but the types are incompatible duplicate_paths = [m for m in mappings if m['from'] == column_path] if 1 == len(duplicate_paths): existing_mapping = duplicate_paths[0] existing_column_name = existing_mapping['to'] if existing_column_name: self.drop_column_mapping(connection, table_name, existing_column_name) ## Update existing properties mappings = [m for m in mappings if m['from'] != column_path] mapping = json_schema.simple_type(nullable_column_schema) mapping['from'] = column_path mapping['to'] = canonicalized_column_name mappings.append(mapping) existing_column_new_normalized_name = self._canonicalize_column_identifier(column_path, existing_mapping, mappings) mapping = json_schema.simple_type(json_schema.make_nullable(existing_mapping)) mapping['from'] = column_path mapping['to'] = existing_column_new_normalized_name mappings.append(mapping) ## Add new columns ### NOTE: all migrated columns will be nullable and remain that way #### Table Metadata self.add_column_mapping(connection, table_name, column_path, existing_column_new_normalized_name, json_schema.make_nullable(existing_mapping)) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) #### Columns self.add_column(connection, table_name, existing_column_new_normalized_name, json_schema.make_nullable(existing_mapping)) self.add_column(connection, table_name, canonicalized_column_name, nullable_column_schema) ## Migrate existing data self.migrate_column(connection, table_name, existing_mapping['to'], existing_column_new_normalized_name) ## Drop existing column self.drop_column(connection, table_name, existing_mapping['to']) upsert_table_helper__column = "Splitting `{}` into `{}` and `{}`. New column matches existing column path, but the types are incompatible.".format( existing_column_name, existing_column_new_normalized_name, canonicalized_column_name ) ## REST MULTI TYPE elif 1 < len(duplicate_paths): ## Add new column self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) self.add_column(connection, table_name, canonicalized_column_name, nullable_column_schema) mapping = json_schema.simple_type(nullable_column_schema) mapping['from'] = column_path mapping['to'] = canonicalized_column_name mappings.append(mapping) upsert_table_helper__column = "Adding new column to split column `{}`. New column matches existing column's path, but no types were compatible.".format( column_path ) ## UNKNOWN else: raise Exception( 'UNKNOWN: Cannot handle merging column `{}` (canonicalized as: `{}`) in table `{}`.'.format( column_path, canonicalized_column_name, table_name )) log_message(upsert_table_helper__column) if not existing_table: for column_names in self.new_table_indexes(schema): self.add_index(connection, table_name, column_names) return self._get_table_schema(connection, table_name)
def upsert_table_helper(self, connection, schema, metadata): """ Upserts the `schema` to remote by: - creating table if necessary - adding columns - adding column mappings - migrating data from old columns to new, etc. :param connection: remote connection, type left to be determined by implementing class :param schema: TABLE_SCHEMA(local) :param metadata: additional information necessary for downstream operations :return: TABLE_SCHEMA(remote) """ table_path = schema['path'] table_name = self.add_table_mapping(connection, table_path, metadata) existing_schema = self.get_table_schema(connection, table_path, table_name) if existing_schema is None: self.add_table(connection, table_name, metadata) existing_schema = self.get_table_schema(connection, table_path, table_name) self.add_key_properties(connection, table_name, schema.get('key_properties', None)) ## Only process columns which have single, nullable, types single_type_columns = [] for column_name__or__path, column_schema in schema['schema'][ 'properties'].items(): column_path = column_name__or__path if isinstance(column_name__or__path, str): column_path = (column_name__or__path, ) single_type_column_schema = deepcopy(column_schema) column_types = json_schema.get_type(single_type_column_schema) make_nullable = json_schema.is_nullable(column_schema) for type in column_types: if type == json_schema.NULL: continue single_type_column_schema['type'] = [type] if make_nullable: single_type_columns.append( (column_path, json_schema.make_nullable(single_type_column_schema))) else: single_type_columns.append( (column_path, single_type_column_schema)) ## Process new columns against existing raw_mappings = existing_schema.get('mappings', {}) mappings = [] for to, m in raw_mappings.items(): mappings.append({ 'from': tuple(m['from']), 'to': to, 'type': m['type'] }) table_empty = self.is_table_empty(connection, table_name) for column_path, column_schema in single_type_columns: canonicalized_column_name = self._canonicalize_column_identifier( column_path, column_schema, mappings) nullable_column_schema = json_schema.make_nullable(column_schema) ## NEW COLUMN if not column_path in [m['from'] for m in mappings]: ### NON EMPTY TABLE if not table_empty: self.LOGGER.warning( 'NOT EMPTY: Forcing new column `{}` in table `{}` to be nullable due to table not empty.' .format(column_path, table_name)) column_schema = nullable_column_schema self.add_column(connection, table_name, canonicalized_column_name, column_schema) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, column_schema) mappings.append({ 'from': column_path, 'to': canonicalized_column_name, 'type': json_schema.get_type(column_schema) }) continue ## EXISTING COLUMNS ### SCHEMAS MATCH if [ True for m in mappings if m['from'] == column_path and json_schema.to_sql(m) == json_schema.to_sql(column_schema) ]: continue ### NULLABLE SCHEMAS MATCH ### New column _is not_ nullable, existing column _is_ if [ True for m in mappings if m['from'] == column_path and json_schema.to_sql(m) == json_schema.to_sql(nullable_column_schema) ]: continue ### NULL COMPATIBILITY ### New column _is_ nullable, existing column is _not_ non_null_original_column = [ m for m in mappings if m['from'] == column_path and json_schema.sql_shorthand(m) == json_schema.sql_shorthand(column_schema) ] if non_null_original_column: ## MAKE NULLABLE self.make_column_nullable(connection, table_name, canonicalized_column_name) self.drop_column_mapping(connection, table_name, canonicalized_column_name) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) mappings = [ m for m in mappings if not ( m['from'] == column_path and json_schema.sql_shorthand( m) == json_schema.sql_shorthand(column_schema)) ] mappings.append({ 'from': column_path, 'to': canonicalized_column_name, 'type': json_schema.get_type(nullable_column_schema) }) continue ### FIRST MULTI TYPE ### New column matches existing column path, but the types are incompatible duplicate_paths = [m for m in mappings if m['from'] == column_path] if 1 == len(duplicate_paths): existing_mapping = duplicate_paths[0] existing_column_name = existing_mapping['to'] if existing_column_name: self.drop_column_mapping(connection, table_name, existing_column_name) ## Update existing properties mappings = [m for m in mappings if m['from'] != column_path] mappings.append({ 'from': column_path, 'to': canonicalized_column_name, 'type': json_schema.get_type(nullable_column_schema) }) existing_column_new_normalized_name = self._canonicalize_column_identifier( column_path, existing_mapping, mappings) mappings.append({ 'from': column_path, 'to': existing_column_new_normalized_name, 'type': json_schema.get_type( json_schema.make_nullable(existing_mapping)) }) ## Add new columns ### NOTE: all migrated columns will be nullable and remain that way #### Table Metadata self.add_column_mapping( connection, table_name, column_path, existing_column_new_normalized_name, json_schema.make_nullable(existing_mapping)) self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) #### Columns self.add_column(connection, table_name, existing_column_new_normalized_name, json_schema.make_nullable(existing_mapping)) self.add_column(connection, table_name, canonicalized_column_name, nullable_column_schema) ## Migrate existing data self.migrate_column(connection, table_name, existing_mapping['to'], existing_column_new_normalized_name) ## Drop existing column self.drop_column(connection, table_name, existing_mapping['to']) ## REST MULTI TYPE elif 1 < len(duplicate_paths): ## Add new column self.add_column_mapping(connection, table_name, column_path, canonicalized_column_name, nullable_column_schema) self.add_column(connection, table_name, canonicalized_column_name, nullable_column_schema) mappings.append({ 'from': column_path, 'to': canonicalized_column_name, 'type': json_schema.get_type(nullable_column_schema) }) ## UNKNOWN else: raise Exception( 'UNKNOWN: Cannot handle merging column `{}` (canonicalized as: `{}`) in table `{}`.' .format(column_path, canonicalized_column_name, table_name)) return self.get_table_schema(connection, table_path, table_name)
def test_loading__column_type_change__nullable(db_cleanup): cat_count = 20 main(CONFIG, input_stream=CatStream(cat_count)) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_columns_sql('cats')) columns = cur.fetchall() assert set(columns) == { ('_sdc_batched_at', 'timestamp with time zone', 'YES'), ('_sdc_received_at', 'timestamp with time zone', 'YES'), ('_sdc_sequence', 'bigint', 'YES'), ('_sdc_table_version', 'bigint', 'YES'), ('adoption__adopted_on', 'timestamp with time zone', 'YES'), ('adoption__was_foster', 'boolean', 'YES'), ('age', 'bigint', 'YES'), ('id', 'bigint', 'NO'), ('name', 'text', 'NO'), ('paw_size', 'bigint', 'NO'), ('paw_colour', 'text', 'NO'), ('flea_check_complete', 'boolean', 'NO'), ('pattern', 'text', 'YES') } cur.execute( sql.SQL('SELECT {} FROM {}').format(sql.Identifier('name'), sql.Identifier('cats'))) persisted_records = cur.fetchall() ## Assert that the original data is present assert cat_count == len(persisted_records) assert cat_count == len( [x for x in persisted_records if x[0] is not None]) class NameNullCatStream(CatStream): def generate_record(self): record = CatStream.generate_record(self) record['id'] = record['id'] + cat_count record['name'] = None return record stream = NameNullCatStream(cat_count) stream.schema = deepcopy(stream.schema) stream.schema['schema']['properties']['name'] = json_schema.make_nullable( stream.schema['schema']['properties']['name']) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_columns_sql('cats')) columns = cur.fetchall() assert set(columns) == { ('_sdc_batched_at', 'timestamp with time zone', 'YES'), ('_sdc_received_at', 'timestamp with time zone', 'YES'), ('_sdc_sequence', 'bigint', 'YES'), ('_sdc_table_version', 'bigint', 'YES'), ('adoption__adopted_on', 'timestamp with time zone', 'YES'), ('adoption__was_foster', 'boolean', 'YES'), ('age', 'bigint', 'YES'), ('id', 'bigint', 'NO'), ('name', 'text', 'YES'), ('paw_size', 'bigint', 'NO'), ('paw_colour', 'text', 'NO'), ('flea_check_complete', 'boolean', 'NO'), ('pattern', 'text', 'YES') } cur.execute( sql.SQL('SELECT {} FROM {}').format(sql.Identifier('name'), sql.Identifier('cats'))) persisted_records = cur.fetchall() ## Assert that the column is has migrated data assert 2 * cat_count == len(persisted_records) assert cat_count == len( [x for x in persisted_records if x[0] is not None]) assert cat_count == len( [x for x in persisted_records if x[0] is None]) class NameNonNullCatStream(CatStream): def generate_record(self): record = CatStream.generate_record(self) record['id'] = record['id'] + 2 * cat_count return record main(CONFIG, input_stream=NameNonNullCatStream(cat_count)) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_columns_sql('cats')) columns = cur.fetchall() assert set(columns) == { ('_sdc_batched_at', 'timestamp with time zone', 'YES'), ('_sdc_received_at', 'timestamp with time zone', 'YES'), ('_sdc_sequence', 'bigint', 'YES'), ('_sdc_table_version', 'bigint', 'YES'), ('adoption__adopted_on', 'timestamp with time zone', 'YES'), ('adoption__was_foster', 'boolean', 'YES'), ('age', 'bigint', 'YES'), ('id', 'bigint', 'NO'), ('name', 'text', 'YES'), ('paw_size', 'bigint', 'NO'), ('paw_colour', 'text', 'NO'), ('flea_check_complete', 'boolean', 'NO'), ('pattern', 'text', 'YES') } cur.execute( sql.SQL('SELECT {} FROM {}').format(sql.Identifier('name'), sql.Identifier('cats'))) persisted_records = cur.fetchall() ## Assert that the column is has migrated data assert 3 * cat_count == len(persisted_records) assert 2 * cat_count == len( [x for x in persisted_records if x[0] is not None]) assert cat_count == len( [x for x in persisted_records if x[0] is None])