Esempio n. 1
0
def test_loading__invalid__configuration__schema():
    stream = CatStream(1)
    stream.schema = deepcopy(stream.schema)
    stream.schema['schema']['type'] = 'invalid type for a JSON Schema'

    with pytest.raises(Exception, match=r'.*invalid JSON Schema instance.*'):
        main(CONFIG, input_stream=stream)
Esempio n. 2
0
def test_loading__simple(db_cleanup):
    stream = CatStream(100)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_columns_sql('cats'))
            columns = cur.fetchall()

            assert set(columns) == {
                ('_sdc_batched_at', 'timestamp with time zone', 'YES'),
                ('_sdc_received_at', 'timestamp with time zone', 'YES'),
                ('_sdc_sequence', 'bigint', 'YES'),
                ('_sdc_table_version', 'bigint', 'YES'),
                ('adoption__adopted_on', 'timestamp with time zone', 'YES'),
                ('adoption__was_foster', 'boolean', 'YES'),
                ('age', 'bigint', 'YES'), ('id', 'bigint', 'NO'),
                ('name', 'text', 'NO'), ('pattern', 'text', 'YES')
            }

            cur.execute(get_columns_sql('cats__adoption__immunizations'))
            columns = cur.fetchall()

            assert set(columns) == {('_sdc_level_0_id', 'bigint', 'NO'),
                                    ('_sdc_sequence', 'bigint', 'YES'),
                                    ('_sdc_source_key_id', 'bigint', 'NO'),
                                    ('date_administered',
                                     'timestamp with time zone', 'YES'),
                                    ('type', 'text', 'YES')}

            cur.execute(get_count_sql('cats'))
            assert cur.fetchone()[0] == 100

        assert_records(conn, stream.records, 'cats', 'id')
Esempio n. 3
0
def test_deduplication_older_rows(db_cleanup):
    stream = CatStream(100,
                       nested_count=2,
                       duplicates=2,
                       duplicate_sequence_delta=-100)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            table_count = cur.fetchone()[0]
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            nested_table_count = cur.fetchone()[0]

            cur.execute(
                'SELECT _sdc_sequence FROM cats WHERE id in ({})'.format(
                    ','.join(map(str, stream.duplicate_pks_used))))
            dup_cat_records = cur.fetchall()

    assert stream.record_message_count == 102
    assert table_count == 100
    assert nested_table_count == 200

    for record in dup_cat_records:
        assert record[0] == stream.sequence
def test_bigcommerce__sandbox(db_cleanup):
    main(CONFIG, input_stream=BigCommerceStream())

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            assert_tables_equal(
                cur, {
                    'products', 'customers', 'products__categories',
                    'products__related_products'
                })

            ## form_fields should not show up as it can only be `null`
            assert_columns_equal(
                cur, 'customers',
                {('_sdc_table_version', 'bigint', 'YES'),
                 ('_sdc_received_at', 'timestamp with time zone', 'YES'),
                 ('_sdc_sequence', 'bigint', 'YES'),
                 ('_sdc_batched_at', 'timestamp with time zone', 'YES'),
                 ('id', 'bigint', 'NO'),
                 ('date_modified', 'timestamp with time zone', 'NO'),
                 ('store_credit', 'text', 'YES'), ('notes', 'text', 'YES'),
                 ('tax_exempt_category', 'text', 'YES'),
                 ('email', 'text', 'YES'), ('company', 'text', 'YES'),
                 ('customer_group_id', 'bigint', 'YES'),
                 ('registration_ip_address', 'text', 'YES'),
                 ('date_created', 'timestamp with time zone', 'NO'),
                 ('accepts_marketing', 'boolean', 'YES'),
                 ('addresses__resource', 'text', 'YES'),
                 ('reset_pass_on_login', 'boolean', 'YES'),
                 ('addresses__url', 'text', 'YES'),
                 ('first_name', 'text', 'YES'), ('phone', 'text', 'YES'),
                 ('last_name', 'text', 'YES')})
Esempio n. 5
0
def test_multiple_batches_by_memory_upsert(db_cleanup):
    config = CONFIG.copy()
    config['max_batch_size'] = 1024
    config['batch_detection_threshold'] = 5

    stream = CatStream(100, nested_count=2)
    main(config, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            assert cur.fetchone()[0] == 100
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            assert cur.fetchone()[0] == 200
        assert_records(conn, stream.records, 'cats', 'id')

    stream = CatStream(100, nested_count=3)
    main(config, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            assert cur.fetchone()[0] == 100
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            assert cur.fetchone()[0] == 300
        assert_records(conn, stream.records, 'cats', 'id')
Esempio n. 6
0
def test_loading__invalid__default_null_value__non_nullable_column():
    class NullDefaultCatStream(CatStream):
        def generate_record(self):
            record = CatStream.generate_record(self)
            record['name'] = postgres.RESERVED_NULL_DEFAULT
            return record

    with pytest.raises(postgres.PostgresError, match=r'.*IntegrityError.*'):
        main(CONFIG, input_stream=NullDefaultCatStream(20))
Esempio n. 7
0
def test_upsert__invalid__primary_key_change(db_cleanup):
    stream = CatStream(100)
    main(CONFIG, input_stream=stream)

    stream = CatStream(100)
    schema = deepcopy(stream.schema)
    schema['key_properties'].append('name')
    stream.schema = schema

    with pytest.raises(postgres.PostgresError, match=r'.*key_properties.*'):
        main(CONFIG, input_stream=stream)
Esempio n. 8
0
def test_loading__invalid__column_type_change__pks__nullable():
    main(CONFIG, input_stream=CatStream(20))

    stream = CatStream(20)
    stream.schema = deepcopy(stream.schema)
    stream.schema['schema']['properties']['id'] = json_schema.make_nullable(
        stream.schema['schema']['properties']['id'])

    with pytest.raises(postgres.PostgresError,
                       match=r'.*key_properties. type change detected'):
        main(CONFIG, input_stream=stream)
Esempio n. 9
0
def test_multiple_batches_by_memory(db_cleanup):
    with patch.object(postgres.PostgresTarget,
                      'write_batch',
                      side_effect=mocked_mock_write_batch) as mock_write_batch:
        config = CONFIG.copy()
        config['max_batch_size'] = 1024
        config['batch_detection_threshold'] = 5

        stream = CatStream(100)
        main(config, input_stream=stream)

        assert mock_write_batch.call_count == 21
Esempio n. 10
0
def test_loading__invalid__records__disable():
    config = deepcopy(CONFIG)
    config['invalid_records_detect'] = False

    main(config, input_stream=InvalidCatStream(100))

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_columns_sql('cats'))
            # No columns for a non existent table
            ## Since all `cat`s records were invalid, we could not persist them, hence, no table created
            assert not cur.fetchall()
Esempio n. 11
0
def test_loading__new_non_null_column(db_cleanup):
    cat_count = 50
    main(CONFIG, input_stream=CatStream(cat_count))

    class NonNullStream(CatStream):
        def generate_record(self):
            record = CatStream.generate_record(self)
            record['id'] = record['id'] + cat_count
            return record

    non_null_stream = NonNullStream(cat_count)
    non_null_stream.schema = deepcopy(non_null_stream.schema)
    non_null_stream.schema['schema']['properties']['paw_toe_count'] = {
        'type': 'integer',
        'default': 5
    }

    main(CONFIG, input_stream=non_null_stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_columns_sql('cats'))
            columns = cur.fetchall()

            assert set(columns) == {
                ('_sdc_batched_at', 'timestamp with time zone', 'YES'),
                ('_sdc_received_at', 'timestamp with time zone', 'YES'),
                ('_sdc_sequence', 'bigint', 'YES'),
                ('_sdc_table_version', 'bigint', 'YES'),
                ('adoption__adopted_on', 'timestamp with time zone', 'YES'),
                ('adoption__was_foster', 'boolean', 'YES'),
                ('age', 'bigint', 'YES'), ('id', 'bigint', 'NO'),
                ('name', 'text', 'NO'), ('paw_size', 'bigint', 'NO'),
                ('paw_colour', 'text', 'NO'),
                ('paw_toe_count', 'bigint', 'YES'),
                ('flea_check_complete', 'boolean', 'NO'),
                ('pattern', 'text', 'YES')
            }

            cur.execute(
                sql.SQL('SELECT {}, {} FROM {}').format(
                    sql.Identifier('id'), sql.Identifier('paw_toe_count'),
                    sql.Identifier('cats')))

            persisted_records = cur.fetchall()

            ## Assert that the split columns before/after new non-null data
            assert 2 * cat_count == len(persisted_records)
            assert cat_count == len(
                [x for x in persisted_records if x[1] is None])
            assert cat_count == len(
                [x for x in persisted_records if x[1] is not None])
Esempio n. 12
0
def test_loading__invalid__column_type_change__pks():
    main(CONFIG, input_stream=CatStream(20))

    class StringIdCatStream(CatStream):
        def generate_record(self):
            record = CatStream.generate_record(self)
            record['id'] = str(record['id'])
            return record

    stream = StringIdCatStream(20)
    stream.schema = deepcopy(stream.schema)
    stream.schema['schema']['properties']['id'] = {'type': 'string'}

    with pytest.raises(postgres.PostgresError,
                       match=r'.*key_properties. type change detected'):
        main(CONFIG, input_stream=stream)
Esempio n. 13
0
def test_nested_delete_on_parent(db_cleanup):
    stream = CatStream(100, nested_count=3)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            high_nested = cur.fetchone()[0]
        assert_records(conn, stream.records, 'cats', 'id')

    stream = CatStream(100, nested_count=2)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            low_nested = cur.fetchone()[0]
        assert_records(conn, stream.records, 'cats', 'id')

    assert low_nested < high_nested
Esempio n. 14
0
def test_hubspot__sandbox(db_cleanup):
    config = CONFIG.copy()
    config['persist_empty_tables'] = True
    main(config, input_stream=HubspotStream())

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            assert_tables_equal(cur, {'deals'})

            assert_columns_equal(
                cur, 'deals',
                {('_sdc_table_version', 'bigint', 'YES'),
                 ('_sdc_received_at', 'timestamp with time zone', 'YES'),
                 ('_sdc_sequence', 'bigint', 'YES'),
                 ('_sdc_primary_key', 'text', 'NO'),
                 ('_sdc_batched_at', 'timestamp with time zone', 'YES'),
                 ('properties__num_contacted_notes__value__f',
                  'double precision', 'YES'),
                 ('properties__num_contacted_notes__value__s', 'text', 'YES')})

            assert_count_equal(cur, 'deals', 7)
Esempio n. 15
0
def test_upsert(db_cleanup):
    stream = CatStream(100)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            assert cur.fetchone()[0] == 100
        assert_records(conn, stream.records, 'cats', 'id')

    stream = CatStream(100)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            assert cur.fetchone()[0] == 100
        assert_records(conn, stream.records, 'cats', 'id')

    stream = CatStream(200)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            assert cur.fetchone()[0] == 200
        assert_records(conn, stream.records, 'cats', 'id')
Esempio n. 16
0
def test_deduplication_existing_new_rows(db_cleanup):
    stream = CatStream(100, nested_count=2)
    main(CONFIG, input_stream=stream)

    original_sequence = stream.sequence

    stream = CatStream(100, nested_count=2, sequence=original_sequence - 20)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            table_count = cur.fetchone()[0]
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            nested_table_count = cur.fetchone()[0]

            cur.execute('SELECT DISTINCT _sdc_sequence FROM cats')
            sequences = cur.fetchall()

    assert table_count == 100
    assert nested_table_count == 200

    assert len(sequences) == 1
    assert sequences[0][0] == original_sequence
Esempio n. 17
0
def test_full_table_replication(db_cleanup):
    stream = CatStream(110, version=0, nested_count=3)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            version_0_count = cur.fetchone()[0]
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            version_0_sub_count = cur.fetchone()[0]
        assert_records(conn, stream.records, 'cats', 'id', match_pks=True)

    assert version_0_count == 110
    assert version_0_sub_count == 330

    stream = CatStream(100, version=1, nested_count=3)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            version_1_count = cur.fetchone()[0]
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            version_1_sub_count = cur.fetchone()[0]
        assert_records(conn, stream.records, 'cats', 'id', match_pks=True)

    assert version_1_count == 100
    assert version_1_sub_count == 300

    stream = CatStream(120, version=2, nested_count=2)
    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_count_sql('cats'))
            version_2_count = cur.fetchone()[0]
            cur.execute(get_count_sql('cats__adoption__immunizations'))
            version_2_sub_count = cur.fetchone()[0]
        assert_records(conn, stream.records, 'cats', 'id', match_pks=True)

    assert version_2_count == 120
    assert version_2_sub_count == 240
Esempio n. 18
0
def test_loading__invalid__records():
    with pytest.raises(singer_stream.SingerStreamError, match=r'.*'):
        main(CONFIG, input_stream=InvalidCatStream(1))
Esempio n. 19
0
def test_loading__invalid__records__threshold():
    config = deepcopy(CONFIG)
    config['invalid_records_threshold'] = 10

    with pytest.raises(singer_stream.SingerStreamError, match=r'.*.10*'):
        main(config, input_stream=InvalidCatStream(20))
Esempio n. 20
0
def test_loading__column_type_change__nullable(db_cleanup):
    cat_count = 20
    main(CONFIG, input_stream=CatStream(cat_count))

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_columns_sql('cats'))
            columns = cur.fetchall()

            assert set(columns) == {
                ('_sdc_batched_at', 'timestamp with time zone', 'YES'),
                ('_sdc_received_at', 'timestamp with time zone', 'YES'),
                ('_sdc_sequence', 'bigint', 'YES'),
                ('_sdc_table_version', 'bigint', 'YES'),
                ('adoption__adopted_on', 'timestamp with time zone', 'YES'),
                ('adoption__was_foster', 'boolean', 'YES'),
                ('age', 'bigint', 'YES'), ('id', 'bigint', 'NO'),
                ('name', 'text', 'NO'), ('paw_size', 'bigint', 'NO'),
                ('paw_colour', 'text', 'NO'),
                ('flea_check_complete', 'boolean', 'NO'),
                ('pattern', 'text', 'YES')
            }

            cur.execute(
                sql.SQL('SELECT {} FROM {}').format(sql.Identifier('name'),
                                                    sql.Identifier('cats')))
            persisted_records = cur.fetchall()

            ## Assert that the original data is present
            assert cat_count == len(persisted_records)
            assert cat_count == len(
                [x for x in persisted_records if x[0] is not None])

    class NameNullCatStream(CatStream):
        def generate_record(self):
            record = CatStream.generate_record(self)
            record['id'] = record['id'] + cat_count
            record['name'] = None
            return record

    stream = NameNullCatStream(cat_count)
    stream.schema = deepcopy(stream.schema)
    stream.schema['schema']['properties']['name'] = json_schema.make_nullable(
        stream.schema['schema']['properties']['name'])

    main(CONFIG, input_stream=stream)

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_columns_sql('cats'))
            columns = cur.fetchall()

            assert set(columns) == {
                ('_sdc_batched_at', 'timestamp with time zone', 'YES'),
                ('_sdc_received_at', 'timestamp with time zone', 'YES'),
                ('_sdc_sequence', 'bigint', 'YES'),
                ('_sdc_table_version', 'bigint', 'YES'),
                ('adoption__adopted_on', 'timestamp with time zone', 'YES'),
                ('adoption__was_foster', 'boolean', 'YES'),
                ('age', 'bigint', 'YES'), ('id', 'bigint', 'NO'),
                ('name', 'text', 'YES'), ('paw_size', 'bigint', 'NO'),
                ('paw_colour', 'text', 'NO'),
                ('flea_check_complete', 'boolean', 'NO'),
                ('pattern', 'text', 'YES')
            }

            cur.execute(
                sql.SQL('SELECT {} FROM {}').format(sql.Identifier('name'),
                                                    sql.Identifier('cats')))
            persisted_records = cur.fetchall()

            ## Assert that the column is has migrated data
            assert 2 * cat_count == len(persisted_records)
            assert cat_count == len(
                [x for x in persisted_records if x[0] is not None])
            assert cat_count == len(
                [x for x in persisted_records if x[0] is None])

    class NameNonNullCatStream(CatStream):
        def generate_record(self):
            record = CatStream.generate_record(self)
            record['id'] = record['id'] + 2 * cat_count
            return record

    main(CONFIG, input_stream=NameNonNullCatStream(cat_count))

    with psycopg2.connect(**TEST_DB) as conn:
        with conn.cursor() as cur:
            cur.execute(get_columns_sql('cats'))
            columns = cur.fetchall()

            assert set(columns) == {
                ('_sdc_batched_at', 'timestamp with time zone', 'YES'),
                ('_sdc_received_at', 'timestamp with time zone', 'YES'),
                ('_sdc_sequence', 'bigint', 'YES'),
                ('_sdc_table_version', 'bigint', 'YES'),
                ('adoption__adopted_on', 'timestamp with time zone', 'YES'),
                ('adoption__was_foster', 'boolean', 'YES'),
                ('age', 'bigint', 'YES'), ('id', 'bigint', 'NO'),
                ('name', 'text', 'YES'), ('paw_size', 'bigint', 'NO'),
                ('paw_colour', 'text', 'NO'),
                ('flea_check_complete', 'boolean', 'NO'),
                ('pattern', 'text', 'YES')
            }

            cur.execute(
                sql.SQL('SELECT {} FROM {}').format(sql.Identifier('name'),
                                                    sql.Identifier('cats')))
            persisted_records = cur.fetchall()

            ## Assert that the column is has migrated data
            assert 3 * cat_count == len(persisted_records)
            assert 2 * cat_count == len(
                [x for x in persisted_records if x[0] is not None])
            assert cat_count == len(
                [x for x in persisted_records if x[0] is None])