def test_loading__empty__enabled_config(db_prep): config = CONFIG.copy() config['persist_empty_tables'] = True stream = CatStream(0) main(config, input_stream=stream) with connect(**TEST_DB) as conn: with conn.cursor() as cur: assert_columns_equal( cur, 'CATS', {('_SDC_BATCHED_AT', 'TIMESTAMP_TZ', 'YES'), ('_SDC_RECEIVED_AT', 'TIMESTAMP_TZ', 'YES'), ('_SDC_SEQUENCE', 'NUMBER', 'YES'), ('_SDC_TABLE_VERSION', 'NUMBER', 'YES'), ('_SDC_TARGET_SNOWFLAKE_CREATE_TABLE_PLACEHOLDER', 'BOOLEAN', 'YES'), ('ADOPTION__ADOPTED_ON', 'TIMESTAMP_TZ', 'YES'), ('ADOPTION__WAS_FOSTER', 'BOOLEAN', 'YES'), ('AGE', 'NUMBER', 'YES'), ('ID', 'NUMBER', 'NO'), ('NAME', 'TEXT', 'NO'), ('PAW_SIZE', 'NUMBER', 'NO'), ('PAW_COLOUR', 'TEXT', 'NO'), ('FLEA_CHECK_COMPLETE', 'BOOLEAN', 'NO'), ('PATTERN', 'TEXT', 'YES')}) assert_columns_equal( cur, 'CATS__ADOPTION__IMMUNIZATIONS', {('_SDC_LEVEL_0_ID', 'NUMBER', 'NO'), ('_SDC_SEQUENCE', 'NUMBER', 'YES'), ('_SDC_SOURCE_KEY_ID', 'NUMBER', 'NO'), ('_SDC_TARGET_SNOWFLAKE_CREATE_TABLE_PLACEHOLDER', 'BOOLEAN', 'YES'), ('DATE_ADMINISTERED', 'TIMESTAMP_TZ', 'YES'), ('TYPE', 'TEXT', 'YES')}) assert_count_equal(cur, 'CATS', 0)
def test_state__doesnt_emit_when_it_isnt_different_than_the_previous_emission( capsys): config = CONFIG.copy() config['max_batch_rows'] = 5 config['batch_detection_threshold'] = 1 rows = list(CatStream(100)) target = Target() def test_stream(): yield rows[0] for row in rows[slice(1, 21)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}}) output = filtered_output(capsys) assert len(output) == 1 for row in rows[slice(22, 99)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}}) output = filtered_output(capsys) assert len(output) == 0 target_tools.stream_to_target(test_stream(), target, config=config) output = filtered_output(capsys) assert len(output) == 0
def test_multiple_batches_by_memory_upsert(db_cleanup): config = CONFIG.copy() config['max_batch_size'] = 1024 config['batch_detection_threshold'] = 5 stream = CatStream(100, nested_count=2) main(config, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) assert cur.fetchone()[0] == 100 cur.execute(get_count_sql('cats__adoption__immunizations')) assert cur.fetchone()[0] == 200 assert_records(conn, stream.records, 'cats', 'id') stream = CatStream(100, nested_count=3) main(config, input_stream=stream) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: cur.execute(get_count_sql('cats')) assert cur.fetchone()[0] == 100 cur.execute(get_count_sql('cats__adoption__immunizations')) assert cur.fetchone()[0] == 300 assert_records(conn, stream.records, 'cats', 'id')
def test_loading__empty__enabled_config__repeatability(db_prep): config = CONFIG.copy() config['persist_empty_tables'] = True main(config, input_stream=CatStream(0)) main(config, input_stream=CatStream(0)) main(config, input_stream=CatStream(0))
def test_multiple_batches_by_memory(db_cleanup): with patch.object(postgres.PostgresTarget, 'write_batch', side_effect=mocked_mock_write_batch) as mock_write_batch: config = CONFIG.copy() config['max_batch_size'] = 1024 config['batch_detection_threshold'] = 5 stream = CatStream(100) main(config, input_stream=stream) assert mock_write_batch.call_count == 21
def test_state__emits_most_recent_state_when_final_flush_occurs(capsys): config = CONFIG.copy() config['max_batch_rows'] = 20 config['batch_detection_threshold'] = 1 rows = list(CatStream(5)) rows.append(json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}})) target_tools.stream_to_target(rows, Target(), config=config) # The final state message should have been outputted after the last records were loaded despite not reaching # one full flushable batch output = filtered_output(capsys) assert len(output) == 1 assert json.loads(output[0])['value']['test'] == 'state-1'
def test_state__doesnt_emit_when_only_one_of_several_streams_is_flushing( capsys): config = CONFIG.copy() config['max_batch_rows'] = 20 config['batch_detection_threshold'] = 1 cat_rows = list(CatStream(100)) dog_rows = list(DogStream(50)) target = Target() # Simulate one stream that yields a lot of records with another that yields few records and ensure both need to be flushed # before any state messages are emitted def test_stream(): yield cat_rows[0] yield dog_rows[0] for row in cat_rows[slice(1, 5)]: yield row for row in dog_rows[slice(1, 5)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}}) for row in cat_rows[slice(6, 45)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-2'}}) for row in cat_rows[slice(46, 65)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-3'}}) # After some state messages but before the batch size has been hit for both streams no state messages should have been emitted assert len(target.calls['write_batch']) == 3 output = filtered_output(capsys) assert output == [] for row in dog_rows[slice(6, 25)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-4'}}) # After the batch size has been hit and a write_batch call was made, the most recent safe to emit state should have been emitted assert len(target.calls['write_batch']) == 4 output = filtered_output(capsys) assert len(output) == 1 assert json.loads(output[0])['value']['test'] == 'state-2' target_tools.stream_to_target(test_stream(), target, config=config) # The final state message should have been outputted after the last dog records were loaded despite not reaching one full flushable batch output = filtered_output(capsys) assert len(output) == 1 assert json.loads(output[0])['value']['test'] == 'state-4'
def test_state__emits_only_messages_when_all_records_before_have_been_flushed( capsys): config = CONFIG.copy() config['max_batch_rows'] = 20 config['batch_detection_threshold'] = 1 rows = list(CatStream(100)) target = Target() def test_stream(): yield rows[0] for row in rows[slice(1, 5)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}}) for row in rows[slice(6, 10)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-2'}}) for row in rows[slice(11, 15)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-3'}}) # After some state messages but before the batch size has been hit no state messages should have been emitted assert len(target.calls['write_batch']) == 0 output = filtered_output(capsys) assert output == [] for row in rows[slice(16, 25)]: yield row yield json.dumps({'type': 'STATE', 'value': {'test': 'state-4'}}) # After the batch size has been hit and a write_batch call was made, the most recent safe to emit state should have been emitted assert len(target.calls['write_batch']) == 1 output = filtered_output(capsys) assert len(output) == 1 assert json.loads(output[0])['value']['test'] == 'state-3' for row in rows[slice(26, 31)]: yield row target_tools.stream_to_target(test_stream(), target, config=config) # The final state message should have been outputted after the last records were loaded output = filtered_output(capsys) assert len(output) == 1 assert json.loads(output[0])['value']['test'] == 'state-4'
def test_hubspot__sandbox(db_cleanup): config = CONFIG.copy() config['persist_empty_tables'] = True main(config, input_stream=HubspotStream()) with psycopg2.connect(**TEST_DB) as conn: with conn.cursor() as cur: assert_tables_equal(cur, {'deals'}) assert_columns_equal( cur, 'deals', {('_sdc_table_version', 'bigint', 'YES'), ('_sdc_received_at', 'timestamp with time zone', 'YES'), ('_sdc_sequence', 'bigint', 'YES'), ('_sdc_primary_key', 'text', 'NO'), ('_sdc_batched_at', 'timestamp with time zone', 'YES'), ('properties__num_contacted_notes__value__f', 'double precision', 'YES'), ('properties__num_contacted_notes__value__s', 'text', 'YES')}) assert_count_equal(cur, 'deals', 7)