def test_loading__empty__enabled_config(db_prep):
    config = CONFIG.copy()
    config['persist_empty_tables'] = True

    stream = CatStream(0)
    main(config, input_stream=stream)

    with connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            assert_columns_equal(
                cur, 'CATS',
                {('_SDC_BATCHED_AT', 'TIMESTAMP_TZ', 'YES'),
                 ('_SDC_RECEIVED_AT', 'TIMESTAMP_TZ', 'YES'),
                 ('_SDC_SEQUENCE', 'NUMBER', 'YES'),
                 ('_SDC_TABLE_VERSION', 'NUMBER', 'YES'),
                 ('_SDC_TARGET_SNOWFLAKE_CREATE_TABLE_PLACEHOLDER', 'BOOLEAN',
                  'YES'), ('ADOPTION__ADOPTED_ON', 'TIMESTAMP_TZ', 'YES'),
                 ('ADOPTION__WAS_FOSTER', 'BOOLEAN', 'YES'),
                 ('AGE', 'NUMBER', 'YES'), ('ID', 'NUMBER', 'NO'),
                 ('NAME', 'TEXT', 'NO'), ('PAW_SIZE', 'NUMBER', 'NO'),
                 ('PAW_COLOUR', 'TEXT', 'NO'),
                 ('FLEA_CHECK_COMPLETE', 'BOOLEAN', 'NO'),
                 ('PATTERN', 'TEXT', 'YES')})

            assert_columns_equal(
                cur, 'CATS__ADOPTION__IMMUNIZATIONS',
                {('_SDC_LEVEL_0_ID', 'NUMBER', 'NO'),
                 ('_SDC_SEQUENCE', 'NUMBER', 'YES'),
                 ('_SDC_SOURCE_KEY_ID', 'NUMBER', 'NO'),
                 ('_SDC_TARGET_SNOWFLAKE_CREATE_TABLE_PLACEHOLDER', 'BOOLEAN',
                  'YES'), ('DATE_ADMINISTERED', 'TIMESTAMP_TZ', 'YES'),
                 ('TYPE', 'TEXT', 'YES')})

            assert_count_equal(cur, 'CATS', 0)
def test_state__doesnt_emit_when_it_isnt_different_than_the_previous_emission(
        capsys):
    config = CONFIG.copy()
    config['max_batch_rows'] = 5
    config['batch_detection_threshold'] = 1
    rows = list(CatStream(100))
    target = Target()

    def test_stream():
        yield rows[0]
        for row in rows[slice(1, 21)]:
            yield row
        yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}})
        output = filtered_output(capsys)
        assert len(output) == 1

        for row in rows[slice(22, 99)]:
            yield row
        yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}})

        output = filtered_output(capsys)
        assert len(output) == 0

    target_tools.stream_to_target(test_stream(), target, config=config)

    output = filtered_output(capsys)
    assert len(output) == 0
Example #3
0
def test_multiple_batches_by_memory_upsert(db_cleanup):
    config = CONFIG.copy()
    config['max_batch_size'] = 1024
    config['batch_detection_threshold'] = 5

    stream = CatStream(100, nested_count=2)
    main(config, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            assert cur.fetchone()[0] == 100
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            assert cur.fetchone()[0] == 200
        assert_records(conn, stream.records, 'cats', 'id')

    stream = CatStream(100, nested_count=3)
    main(config, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            assert cur.fetchone()[0] == 100
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            assert cur.fetchone()[0] == 300
        assert_records(conn, stream.records, 'cats', 'id')
def test_loading__empty__enabled_config__repeatability(db_prep):
    config = CONFIG.copy()
    config['persist_empty_tables'] = True

    main(config, input_stream=CatStream(0))

    main(config, input_stream=CatStream(0))

    main(config, input_stream=CatStream(0))
Example #5
0
def test_multiple_batches_by_memory(db_cleanup):
    with patch.object(postgres.PostgresTarget,
                      'write_batch',
                      side_effect=mocked_mock_write_batch) as mock_write_batch:
        config = CONFIG.copy()
        config['max_batch_size'] = 1024
        config['batch_detection_threshold'] = 5

        stream = CatStream(100)
        main(config, input_stream=stream)

        assert mock_write_batch.call_count == 21
def test_state__emits_most_recent_state_when_final_flush_occurs(capsys):
    config = CONFIG.copy()
    config['max_batch_rows'] = 20
    config['batch_detection_threshold'] = 1
    rows = list(CatStream(5))
    rows.append(json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}}))

    target_tools.stream_to_target(rows, Target(), config=config)

    # The final state message should have been outputted after the last records were loaded despite not reaching
    # one full flushable batch
    output = filtered_output(capsys)
    assert len(output) == 1
    assert json.loads(output[0])['value']['test'] == 'state-1'
def test_state__doesnt_emit_when_only_one_of_several_streams_is_flushing(
        capsys):
    config = CONFIG.copy()
    config['max_batch_rows'] = 20
    config['batch_detection_threshold'] = 1
    cat_rows = list(CatStream(100))
    dog_rows = list(DogStream(50))
    target = Target()

    # Simulate one stream that yields a lot of records with another that yields few records and ensure both need to be flushed
    # before any state messages are emitted
    def test_stream():
        yield cat_rows[0]
        yield dog_rows[0]
        for row in cat_rows[slice(1, 5)]:
            yield row
        for row in dog_rows[slice(1, 5)]:
            yield row
        yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}})

        for row in cat_rows[slice(6, 45)]:
            yield row
        yield json.dumps({'type': 'STATE', 'value': {'test': 'state-2'}})

        for row in cat_rows[slice(46, 65)]:
            yield row
        yield json.dumps({'type': 'STATE', 'value': {'test': 'state-3'}})

        # After some state messages but before the batch size has been hit for both streams no state messages should have been emitted
        assert len(target.calls['write_batch']) == 3
        output = filtered_output(capsys)
        assert output == []

        for row in dog_rows[slice(6, 25)]:
            yield row
        yield json.dumps({'type': 'STATE', 'value': {'test': 'state-4'}})

        # After the batch size has been hit and a write_batch call was made, the most recent safe to emit state should have been emitted
        assert len(target.calls['write_batch']) == 4
        output = filtered_output(capsys)
        assert len(output) == 1
        assert json.loads(output[0])['value']['test'] == 'state-2'

    target_tools.stream_to_target(test_stream(), target, config=config)

    # The final state message should have been outputted after the last dog records were loaded despite not reaching one full flushable batch
    output = filtered_output(capsys)
    assert len(output) == 1
    assert json.loads(output[0])['value']['test'] == 'state-4'
def test_state__emits_only_messages_when_all_records_before_have_been_flushed(
        capsys):
    config = CONFIG.copy()
    config['max_batch_rows'] = 20
    config['batch_detection_threshold'] = 1
    rows = list(CatStream(100))
    target = Target()

    def test_stream():
        yield rows[0]
        for row in rows[slice(1, 5)]:
            yield row
        yield json.dumps({'type': 'STATE', 'value': {'test': 'state-1'}})
        for row in rows[slice(6, 10)]:
            yield row
        yield json.dumps({'type': 'STATE', 'value': {'test': 'state-2'}})
        for row in rows[slice(11, 15)]:
            yield row
        yield json.dumps({'type': 'STATE', 'value': {'test': 'state-3'}})

        # After some state messages but before the batch size has been hit no state messages should have been emitted
        assert len(target.calls['write_batch']) == 0
        output = filtered_output(capsys)
        assert output == []

        for row in rows[slice(16, 25)]:
            yield row
        yield json.dumps({'type': 'STATE', 'value': {'test': 'state-4'}})

        # After the batch size has been hit and a write_batch call was made, the most recent safe to emit state should have been emitted
        assert len(target.calls['write_batch']) == 1
        output = filtered_output(capsys)
        assert len(output) == 1
        assert json.loads(output[0])['value']['test'] == 'state-3'

        for row in rows[slice(26, 31)]:
            yield row

    target_tools.stream_to_target(test_stream(), target, config=config)

    # The final state message should have been outputted after the last records were loaded
    output = filtered_output(capsys)
    assert len(output) == 1
    assert json.loads(output[0])['value']['test'] == 'state-4'
Example #9
0
def test_hubspot__sandbox(db_cleanup):
    config = CONFIG.copy()
    config['persist_empty_tables'] = True
    main(config, input_stream=HubspotStream())

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            assert_tables_equal(cur, {'deals'})

            assert_columns_equal(
                cur, 'deals',
                {('_sdc_table_version', 'bigint', 'YES'),
                 ('_sdc_received_at', 'timestamp with time zone', 'YES'),
                 ('_sdc_sequence', 'bigint', 'YES'),
                 ('_sdc_primary_key', 'text', 'NO'),
                 ('_sdc_batched_at', 'timestamp with time zone', 'YES'),
                 ('properties__num_contacted_notes__value__f',
                  'double precision', 'YES'),
                 ('properties__num_contacted_notes__value__s', 'text', 'YES')})

            assert_count_equal(cur, 'deals', 7)