def test_upsert(db_cleanup): stream = CatStream(100) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) assert cur.fetchone()[0] == 100 assert_records(conn, stream.records, 'cats', 'id') stream = CatStream(100) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) assert cur.fetchone()[0] == 100 assert_records(conn, stream.records, 'cats', 'id') stream = CatStream(200) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) assert cur.fetchone()[0] == 200 assert_records(conn, stream.records, 'cats', 'id')
def test_deduplication_existing_new_rows(db_prep): stream = CatStream(100, nested_count=2) main(CONFIG, input_stream=stream) original_sequence = stream.sequence stream = CatStream(100, nested_count=2, sequence=original_sequence - 20) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('CATS')) table_count = cur.fetchone()[0] cur.execute(get_count_sql('CATS__ADOPTION__IMMUNIZATIONS')) nested_table_count = cur.fetchone()[0] cur.execute(''' SELECT DISTINCT "_SDC_SEQUENCE" FROM {}.{}.{} '''.format(sql.identifier(CONFIG['snowflake_database']), sql.identifier(CONFIG['snowflake_schema']), sql.identifier('CATS'))) sequences = cur.fetchall() assert table_count == 100 assert nested_table_count == 200 assert len(sequences) == 1 assert sequences[0][0] == original_sequence
def test_loading__invalid__configuration__schema(): stream = CatStream(1) stream.schema = deepcopy(stream.schema) stream.schema['schema']['type'] = 'invalid type for a JSON Schema' with pytest.raises(Exception, match=r'.*invalid JSON Schema instance.*'): main(CONFIG, input_stream=stream)
def test_multiple_batches_by_memory_upsert(db_cleanup): config = CONFIG.copy() config['max_batch_size'] = 1024 config['batch_detection_threshold'] = 5 stream = CatStream(100, nested_count=2) main(config, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) assert cur.fetchone()[0] == 100 cur.execute(get_count_sql('cats__adoption__immunizations')) assert cur.fetchone()[0] == 200 assert_records(conn, stream.records, 'cats', 'id') stream = CatStream(100, nested_count=3) main(config, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) assert cur.fetchone()[0] == 100 cur.execute(get_count_sql('cats__adoption__immunizations')) assert cur.fetchone()[0] == 300 assert_records(conn, stream.records, 'cats', 'id')
def test_deduplication_existing_new_rows(db_prep): stream = CatStream(100, nested_count=2) main(CONFIG, input_stream=stream) original_sequence = stream.sequence stream = CatStream(100, nested_count=2, sequence=original_sequence - 20) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) table_count = cur.fetchone()[0] cur.execute(get_count_sql('cats__adoption__immunizations')) nested_table_count = cur.fetchone()[0] cur.execute( sql.SQL('SELECT DISTINCT _sdc_sequence FROM {}.{}').format( sql.Identifier(CONFIG['redshift_schema']), sql.Identifier('cats'))) sequences = cur.fetchall() assert table_count == 100 assert nested_table_count == 200 assert len(sequences) == 1 assert sequences[0][0] == original_sequence
def test_add_record_message(): stream = CatStream(10) singer_stream = BufferedSingerStream(CATS_SCHEMA['stream'], CATS_SCHEMA['schema'], CATS_SCHEMA['key_properties']) assert singer_stream.add_record_message( stream.generate_record_message()) is None assert not singer_stream.peek_invalid_records() assert [] == missing_sdc_properties(singer_stream)
def test_loading__empty__enabled_config__repeatability(db_prep): config = CONFIG.copy() config['persist_empty_tables'] = True main(config, input_stream=CatStream(0)) main(config, input_stream=CatStream(0)) main(config, input_stream=CatStream(0))
def test_multiple_batches__old_records__by_rows(): stream_oldest = CatStream(100, version=0) stream_middle_aged = CatStream(100, version=5) stream_latest = CatStream(100, version=10) singer_stream = BufferedSingerStream(CATS_SCHEMA['stream'], CATS_SCHEMA['schema'], CATS_SCHEMA['key_properties'], max_rows=20) assert len(singer_stream.peek_buffer()) == 0 while not singer_stream.buffer_full: singer_stream.add_record_message( stream_oldest.generate_record_message()) assert len(singer_stream.peek_buffer()) == 20 singer_stream.flush_buffer() assert len(singer_stream.peek_buffer()) == 0 singer_stream.add_record_message(stream_latest.generate_record_message()) assert len(singer_stream.peek_buffer()) == 1 reasonable_cutoff = 1000 while not singer_stream.buffer_full and reasonable_cutoff != 0: singer_stream.add_record_message( stream_middle_aged.generate_record_message()) reasonable_cutoff -= 1 assert reasonable_cutoff == 0 assert len(singer_stream.peek_buffer()) == 1 assert [] == missing_sdc_properties(singer_stream)
def test_upsert__invalid__primary_key_change(db_cleanup): stream = CatStream(100) main(CONFIG, input_stream=stream) stream = CatStream(100) schema = deepcopy(stream.schema) schema['key_properties'].append('name') stream.schema = schema with pytest.raises(postgres.PostgresError, match=r'.*key_properties.*'): main(CONFIG, input_stream=stream)
def test_loading__invalid__column_type_change__pks__nullable(): main(CONFIG, input_stream=CatStream(20)) stream = CatStream(20) stream.schema = deepcopy(stream.schema) stream.schema['schema']['properties']['id'] = json_schema.make_nullable( stream.schema['schema']['properties']['id']) with pytest.raises(postgres.PostgresError, match=r'.*key_properties. type change detected'): main(CONFIG, input_stream=stream)
def test_full_table_replication(db_prep): stream = CatStream(110, version=0, nested_count=3) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('CATS')) version_0_count = cur.fetchone()[0] cur.execute(get_count_sql('CATS__ADOPTION__IMMUNIZATIONS')) version_0_sub_count = cur.fetchone()[0] assert_records(conn, stream.records, 'CATS', 'ID', match_pks=True) assert version_0_count == 110 assert version_0_sub_count == 330 stream = CatStream(100, version=1, nested_count=3) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('CATS')) version_1_count = cur.fetchone()[0] cur.execute(get_count_sql('CATS__ADOPTION__IMMUNIZATIONS')) version_1_sub_count = cur.fetchone()[0] assert_records(conn, stream.records, 'CATS', 'ID', match_pks=True) assert version_1_count == 100 assert version_1_sub_count == 300 stream = CatStream(120, version=2, nested_count=2) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('CATS')) version_2_count = cur.fetchone()[0] cur.execute(get_count_sql('CATS__ADOPTION__IMMUNIZATIONS')) version_2_sub_count = cur.fetchone()[0] assert_records(conn, stream.records, 'CATS', 'ID', match_pks=True) assert version_2_count == 120 assert version_2_sub_count == 240 ## Test that an outdated version cannot overwrite stream = CatStream(314, version=1, nested_count=2) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('CATS')) older_version_count = cur.fetchone()[0] assert older_version_count == version_2_count
def test_deduplication_older_rows(db_prep): stream = CatStream(100, nested_count=2, duplicates=2, duplicate_sequence_delta=-100) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('CATS')) table_count = cur.fetchone()[0] cur.execute(get_count_sql('CATS__ADOPTION__IMMUNIZATIONS')) nested_table_count = cur.fetchone()[0] cur.execute(''' SELECT "_SDC_SEQUENCE" FROM {}.{}.{} WHERE "ID" in ({}) '''.format( sql.identifier(CONFIG['snowflake_database']), sql.identifier(CONFIG['snowflake_schema']), sql.identifier('CATS'), ','.join(["'{}'".format(x) for x in stream.duplicate_pks_used]))) dup_cat_records = cur.fetchall() assert stream.record_message_count == 102 assert table_count == 100 assert nested_table_count == 200 for record in dup_cat_records: assert record[0] == stream.sequence
def test_state__doesnt_emit_when_it_isnt_different_than_the_previous_emission( capsys): config = CONFIG.copy() config['max_batch_rows'] = 5 config['batch_detection_threshold'] = 1 rows = list(CatStream(100)) target = Target() def test_stream(): yield rows[0] for row in rows[slice(1, 21)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}}) output = filtered_output(capsys) assert len(output) == 1 for row in rows[slice(22, 99)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}}) output = filtered_output(capsys) assert len(output) == 0 target_tools.stream_to_target(test_stream(), target, config=config) output = filtered_output(capsys) assert len(output) == 0
def test_loading__simple(db_cleanup): stream = CatStream(100) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_columns_sql('cats')) columns = cur.fetchall() assert set(columns) == { ('_sdc_batched_at', 'timestamp with time zone', 'YES'), ('_sdc_received_at', 'timestamp with time zone', 'YES'), ('_sdc_sequence', 'bigint', 'YES'), ('_sdc_table_version', 'bigint', 'YES'), ('adoption__adopted_on', 'timestamp with time zone', 'YES'), ('adoption__was_foster', 'boolean', 'YES'), ('age', 'bigint', 'YES'), ('id', 'bigint', 'NO'), ('name', 'text', 'NO'), ('pattern', 'text', 'YES') } cur.execute(get_columns_sql('cats__adoption__immunizations')) columns = cur.fetchall() assert set(columns) == {('_sdc_level_0_id', 'bigint', 'NO'), ('_sdc_sequence', 'bigint', 'YES'), ('_sdc_source_key_id', 'bigint', 'NO'), ('date_administered', 'timestamp with time zone', 'YES'), ('type', 'text', 'YES')} cur.execute(get_count_sql('cats')) assert cur.fetchone()[0] == 100 assert_records(conn, stream.records, 'cats', 'id')
def test_deduplication_newer_rows(db_prep): stream = CatStream(100, nested_count=3, duplicates=2) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) table_count = cur.fetchone()[0] cur.execute(get_count_sql('cats__adoption__immunizations')) nested_table_count = cur.fetchone()[0] cur.execute( sql.SQL('SELECT _sdc_sequence FROM {}.{} WHERE id in ' + '({})'.format(','.join( map(str, stream.duplicate_pks_used)))).format( sql.Identifier(CONFIG['redshift_schema']), sql.Identifier('cats'), sql.Literal(','.join( map(str, stream.duplicate_pks_used))))) dup_cat_records = cur.fetchall() assert stream.record_message_count == 102 assert table_count == 100 assert nested_table_count == 300 for record in dup_cat_records: assert record[0] == stream.sequence + 200
def test_deduplication_older_rows(db_cleanup): stream = CatStream(100, nested_count=2, duplicates=2, duplicate_sequence_delta=-100) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) table_count = cur.fetchone()[0] cur.execute(get_count_sql('cats__adoption__immunizations')) nested_table_count = cur.fetchone()[0] cur.execute( 'SELECT _sdc_sequence FROM cats WHERE id in ({})'.format( ','.join(map(str, stream.duplicate_pks_used)))) dup_cat_records = cur.fetchall() assert stream.record_message_count == 102 assert table_count == 100 assert nested_table_count == 200 for record in dup_cat_records: assert record[0] == stream.sequence
def test_loading__empty__enabled_config(db_prep): config = CONFIG.copy() config['persist_empty_tables'] = True stream = CatStream(0) main(config, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: assert_columns_equal( cur, 'CATS', {('_SDC_BATCHED_AT', 'TIMESTAMP_TZ', 'YES'), ('_SDC_RECEIVED_AT', 'TIMESTAMP_TZ', 'YES'), ('_SDC_SEQUENCE', 'NUMBER', 'YES'), ('_SDC_TABLE_VERSION', 'NUMBER', 'YES'), ('_SDC_TARGET_SNOWFLAKE_CREATE_TABLE_PLACEHOLDER', 'BOOLEAN', 'YES'), ('ADOPTION__ADOPTED_ON', 'TIMESTAMP_TZ', 'YES'), ('ADOPTION__WAS_FOSTER', 'BOOLEAN', 'YES'), ('AGE', 'NUMBER', 'YES'), ('ID', 'NUMBER', 'NO'), ('NAME', 'TEXT', 'NO'), ('PAW_SIZE', 'NUMBER', 'NO'), ('PAW_COLOUR', 'TEXT', 'NO'), ('FLEA_CHECK_COMPLETE', 'BOOLEAN', 'NO'), ('PATTERN', 'TEXT', 'YES')}) assert_columns_equal( cur, 'CATS__ADOPTION__IMMUNIZATIONS', {('_SDC_LEVEL_0_ID', 'NUMBER', 'NO'), ('_SDC_SEQUENCE', 'NUMBER', 'YES'), ('_SDC_SOURCE_KEY_ID', 'NUMBER', 'NO'), ('_SDC_TARGET_SNOWFLAKE_CREATE_TABLE_PLACEHOLDER', 'BOOLEAN', 'YES'), ('DATE_ADMINISTERED', 'TIMESTAMP_TZ', 'YES'), ('TYPE', 'TEXT', 'YES')}) assert_count_equal(cur, 'CATS', 0)
def test_multiple_batches__by_memory(): stream = CatStream(100) singer_stream = BufferedSingerStream(CATS_SCHEMA['stream'], CATS_SCHEMA['schema'], CATS_SCHEMA['key_properties'], max_buffer_size=1024) assert len(singer_stream.peek_buffer()) == 0 while not singer_stream.buffer_full: singer_stream.add_record_message(stream.generate_record_message()) assert len(singer_stream.peek_buffer()) == 1 assert [] == missing_sdc_properties(singer_stream) singer_stream.flush_buffer() assert len(singer_stream.peek_buffer()) == 0
def test_loading__new_non_null_column(db_prep): cat_count = 50 main(CONFIG, input_stream=CatStream(cat_count)) class NonNullStream(CatStream): def generate_record(self): record = CatStream.generate_record(self) record['id'] = record['id'] + cat_count return record non_null_stream = NonNullStream(cat_count) non_null_stream.schema = deepcopy(non_null_stream.schema) non_null_stream.schema['schema']['properties']['paw_toe_count'] = {'type': 'integer', 'default': 5} main(CONFIG, input_stream=non_null_stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: assert_columns_equal(cur, 'CATS', { ('_SDC_BATCHED_AT', 'TIMESTAMP_TZ', 'YES'), ('_SDC_RECEIVED_AT', 'TIMESTAMP_TZ', 'YES'), ('_SDC_SEQUENCE', 'NUMBER', 'YES'), ('_SDC_TABLE_VERSION', 'NUMBER', 'YES'), ('_SDC_TARGET_SNOWFLAKE_CREATE_TABLE_PLACEHOLDER', 'BOOLEAN', 'YES'), ('ADOPTION__ADOPTED_ON', 'TIMESTAMP_TZ', 'YES'), ('ADOPTION__WAS_FOSTER', 'BOOLEAN', 'YES'), ('AGE', 'NUMBER', 'YES'), ('ID', 'NUMBER', 'NO'), ('NAME', 'TEXT', 'NO'), ('PAW_SIZE', 'NUMBER', 'NO'), ('PAW_COLOUR', 'TEXT', 'NO'), ('PAW_TOE_COUNT', 'NUMBER', 'YES'), ('FLEA_CHECK_COMPLETE', 'BOOLEAN', 'NO'), ('PATTERN', 'TEXT', 'YES') }) cur.execute(''' SELECT {}, {} FROM {}.{}.{} '''.format( sql.identifier('ID'), sql.identifier('PAW_TOE_COUNT'), sql.identifier(CONFIG['snowflake_database']), sql.identifier(CONFIG['snowflake_schema']), sql.identifier('CATS') )) persisted_records = cur.fetchall() ## Assert that the split columns before/after new non-null data assert 2 * cat_count == len(persisted_records) assert cat_count == len([x for x in persisted_records if x[1] is None]) assert cat_count == len([x for x in persisted_records if x[1] is not None])
def test_full_table_replication(db_cleanup): stream = CatStream(110, version=0, nested_count=3) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) version_0_count = cur.fetchone()[0] cur.execute(get_count_sql('cats__adoption__immunizations')) version_0_sub_count = cur.fetchone()[0] assert_records(conn, stream.records, 'cats', 'id', match_pks=True) assert version_0_count == 110 assert version_0_sub_count == 330 stream = CatStream(100, version=1, nested_count=3) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) version_1_count = cur.fetchone()[0] cur.execute(get_count_sql('cats__adoption__immunizations')) version_1_sub_count = cur.fetchone()[0] assert_records(conn, stream.records, 'cats', 'id', match_pks=True) assert version_1_count == 100 assert version_1_sub_count == 300 stream = CatStream(120, version=2, nested_count=2) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) version_2_count = cur.fetchone()[0] cur.execute(get_count_sql('cats__adoption__immunizations')) version_2_sub_count = cur.fetchone()[0] assert_records(conn, stream.records, 'cats', 'id', match_pks=True) assert version_2_count == 120 assert version_2_sub_count == 240
def test_nested_delete_on_parent(db_cleanup): stream = CatStream(100, nested_count=3) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats__adoption__immunizations')) high_nested = cur.fetchone()[0] assert_records(conn, stream.records, 'cats', 'id') stream = CatStream(100, nested_count=2) main(CONFIG, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats__adoption__immunizations')) low_nested = cur.fetchone()[0] assert_records(conn, stream.records, 'cats', 'id') assert low_nested < high_nested
def test_nested_delete_on_parent(db_prep): stream = CatStream(100, nested_count=3) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('CATS__ADOPTION__IMMUNIZATIONS')) high_nested = cur.fetchone()[0] assert_records(conn, stream.records, 'CATS', 'ID') stream = CatStream(100, nested_count=2) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('CATS__ADOPTION__IMMUNIZATIONS')) low_nested = cur.fetchone()[0] assert_records(conn, stream.records, 'CATS', 'ID') assert low_nested < high_nested
def test_multiple_batches_by_memory(db_cleanup): with patch.object(postgres.PostgresTarget, 'write_batch', side_effect=mocked_mock_write_batch) as mock_write_batch: config = CONFIG.copy() config['max_batch_size'] = 1024 config['batch_detection_threshold'] = 5 stream = CatStream(100) main(config, input_stream=stream) assert mock_write_batch.call_count == 21
def test_loading__new_non_null_column(db_cleanup): cat_count = 50 main(CONFIG, input_stream=CatStream(cat_count)) class NonNullStream(CatStream): def generate_record(self): record = CatStream.generate_record(self) record['id'] = record['id'] + cat_count return record non_null_stream = NonNullStream(cat_count) non_null_stream.schema = deepcopy(non_null_stream.schema) non_null_stream.schema['schema']['properties']['paw_toe_count'] = { 'type': 'integer', 'default': 5 } main(CONFIG, input_stream=non_null_stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_columns_sql('cats')) columns = cur.fetchall() assert set(columns) == { ('_sdc_batched_at', 'timestamp with time zone', 'YES'), ('_sdc_received_at', 'timestamp with time zone', 'YES'), ('_sdc_sequence', 'bigint', 'YES'), ('_sdc_table_version', 'bigint', 'YES'), ('adoption__adopted_on', 'timestamp with time zone', 'YES'), ('adoption__was_foster', 'boolean', 'YES'), ('age', 'bigint', 'YES'), ('id', 'bigint', 'NO'), ('name', 'text', 'NO'), ('paw_size', 'bigint', 'NO'), ('paw_colour', 'text', 'NO'), ('paw_toe_count', 'bigint', 'YES'), ('flea_check_complete', 'boolean', 'NO'), ('pattern', 'text', 'YES') } cur.execute( sql.SQL('SELECT {}, {} FROM {}').format( sql.Identifier('id'), sql.Identifier('paw_toe_count'), sql.Identifier('cats'))) persisted_records = cur.fetchall() ## Assert that the split columns before/after new non-null data assert 2 * cat_count == len(persisted_records) assert cat_count == len( [x for x in persisted_records if x[1] is None]) assert cat_count == len( [x for x in persisted_records if x[1] is not None])
def test_init__empty_key_properties(): singer_stream = BufferedSingerStream(CATS_SCHEMA['stream'], CATS_SCHEMA['schema'], []) stream = CatStream(100) for _ in range(20): singer_stream.add_record_message(stream.generate_record_message()) assert singer_stream assert [] == missing_sdc_properties(singer_stream) assert [SINGER_PK] == singer_stream.key_properties rows_missing_pk = [] rows_checked = 0 for r in singer_stream.get_batch(): if not r[SINGER_PK]: rows_missing_pk.append(r) rows_checked += 1 assert rows_checked > 1 assert [] == rows_missing_pk
def test_state__emits_most_recent_state_when_final_flush_occurs(capsys): config = CONFIG.copy() config['max_batch_rows'] = 20 config['batch_detection_threshold'] = 1 rows = list(CatStream(5)) rows.append(json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}})) target_tools.stream_to_target(rows, Target(), config=config) # The final state message should have been outputted after the last records were loaded despite not reaching # one full flushable batch output = filtered_output(capsys) assert len(output) == 1 assert json.loads(output[0])['value']['test'] == 'state-1'
def test_state__doesnt_emit_when_only_one_of_several_streams_is_flushing( capsys): config = CONFIG.copy() config['max_batch_rows'] = 20 config['batch_detection_threshold'] = 1 cat_rows = list(CatStream(100)) dog_rows = list(DogStream(50)) target = Target() # Simulate one stream that yields a lot of records with another that yields few records and ensure both need to be flushed # before any state messages are emitted def test_stream(): yield cat_rows[0] yield dog_rows[0] for row in cat_rows[slice(1, 5)]: yield row for row in dog_rows[slice(1, 5)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}}) for row in cat_rows[slice(6, 45)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-2'}}) for row in cat_rows[slice(46, 65)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-3'}}) # After some state messages but before the batch size has been hit for both streams no state messages should have been emitted assert len(target.calls['write_batch']) == 3 output = filtered_output(capsys) assert output == [] for row in dog_rows[slice(6, 25)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-4'}}) # After the batch size has been hit and a write_batch call was made, the most recent safe to emit state should have been emitted assert len(target.calls['write_batch']) == 4 output = filtered_output(capsys) assert len(output) == 1 assert json.loads(output[0])['value']['test'] == 'state-2' target_tools.stream_to_target(test_stream(), target, config=config) # The final state message should have been outputted after the last dog records were loaded despite not reaching one full flushable batch output = filtered_output(capsys) assert len(output) == 1 assert json.loads(output[0])['value']['test'] == 'state-4'
def test_upsert(db_prep): stream = CatStream(100) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: assert_count_equal(cur, 'CATS', 100) assert_records(conn, stream.records, 'CATS', 'ID') stream = CatStream(100) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: assert_count_equal(cur, 'CATS', 100) assert_records(conn, stream.records, 'CATS', 'ID') stream = CatStream(200) main(CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: assert_count_equal(cur, 'CATS', 200) assert_records(conn, stream.records, 'CATS', 'ID')
def test_loading__invalid__column_type_change__pks(): main(CONFIG, input_stream=CatStream(20)) class StringIdCatStream(CatStream): def generate_record(self): record = CatStream.generate_record(self) record['id'] = str(record['id']) return record stream = StringIdCatStream(20) stream.schema = deepcopy(stream.schema) stream.schema['schema']['properties']['id'] = {'type': 'string'} with pytest.raises(postgres.PostgresError, match=r'.*key_properties. type change detected'): main(CONFIG, input_stream=stream)
def test_loading__simple__s3_staging(db_prep): stream = CatStream(100) main(S3_CONFIG, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: assert_columns_equal(cur, 'CATS', { ('_SDC_BATCHED_AT', 'TIMESTAMP_TZ', 'YES'), ('_SDC_RECEIVED_AT', 'TIMESTAMP_TZ', 'YES'), ('_SDC_SEQUENCE', 'NUMBER', 'YES'), ('_SDC_TABLE_VERSION', 'NUMBER', 'YES'), ('_SDC_TARGET_SNOWFLAKE_CREATE_TABLE_PLACEHOLDER', 'BOOLEAN', 'YES'), ('ADOPTION__ADOPTED_ON', 'TIMESTAMP_TZ', 'YES'), ('ADOPTION__WAS_FOSTER', 'BOOLEAN', 'YES'), ('AGE', 'NUMBER', 'YES'), ('ID', 'NUMBER', 'NO'), ('NAME', 'TEXT', 'NO'), ('PAW_SIZE', 'NUMBER', 'NO'), ('PAW_COLOUR', 'TEXT', 'NO'), ('FLEA_CHECK_COMPLETE', 'BOOLEAN', 'NO'), ('PATTERN', 'TEXT', 'YES') }) assert_columns_equal(cur, 'CATS__ADOPTION__IMMUNIZATIONS', { ('_SDC_LEVEL_0_ID', 'NUMBER', 'NO'), ('_SDC_SEQUENCE', 'NUMBER', 'YES'), ('_SDC_SOURCE_KEY_ID', 'NUMBER', 'NO'), ('_SDC_TARGET_SNOWFLAKE_CREATE_TABLE_PLACEHOLDER', 'BOOLEAN', 'YES'), ('DATE_ADMINISTERED', 'TIMESTAMP_TZ', 'YES'), ('TYPE', 'TEXT', 'YES') }) assert_count_equal(cur, 'CATS', 100) for record in stream.records: record['paw_size'] = 314159 record['paw_colour'] = '' record['flea_check_complete'] = False assert_records(conn, stream.records, 'CATS', 'ID')