Exemple #1
0
    def tearDown(self):
        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(
                    """ SELECT pg_drop_replication_slot('stitch_dev') """)

        with db_utils.get_test_connection('postgres') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(
                    """ SELECT pg_drop_replication_slot('stitch_postgres') """)
    def test_run(self):
        """Parametrized automatic fields test running against each replication method."""

        # Test running a sync with no fields selected using full-table replication
        self.default_replication_method = self.FULL_TABLE
        full_table_conn_id = connections.ensure_connection(self)
        self.automatic_fields_test(full_table_conn_id)

        # NB | We expect primary keys and replication keys to have inclusion automatic for
        #      key-based incremental replication. But that is only true for primary keys.
        #      As a result we cannot run a sync with no fields selected. This BUG should not
        #      be carried over into hp-postgres, but will not be fixed for this tap.

        # Test running a sync with no fields selected using key-based incremental replication
        # self.default_replication_method = self.INCREMENTAL
        # incremental_conn_id = connections.ensure_connection(self, original_properties=False)
        # self.automatic_fields_test(incremental_conn_id)

        # Test running a sync with no fields selected using logical replication
        self.default_replication_method = self.LOG_BASED
        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                db_utils.ensure_replication_slot(cur, test_db)
        log_based_conn_id = connections.ensure_connection(
            self, original_properties=False)
        self.automatic_fields_test(log_based_conn_id)
Exemple #3
0
    def setUp(self):
        db_utils.ensure_environment_variables_set()

        db_utils.ensure_db('discovery0')

        with db_utils.get_test_connection('discovery0') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                old_table = cur.execute(
                    """SELECT EXISTS (
                                          SELECT 1
                                          FROM  information_schema.tables
                                          WHERE  table_schema = %s
                                          AND  table_name =   %s);""",
                    [test_schema_name, test_table_name])
                old_table = cur.fetchone()[0]
                if old_table:
                    cur.execute("DROP TABLE {}".format(
                        canonicalized_table_name(test_schema_name,
                                                 test_table_name, cur)))

                cur = conn.cursor()
                cur.execute(
                    """ SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """
                )
                if cur.fetchone()[0] is None:
                    cur.execute(""" CREATE EXTENSION hstore; """)

                #pylint: disable=line-too-long
                create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(
                    canonicalized_table_name(test_schema_name, test_table_name,
                                             cur))

                cur.execute(create_table_sql)
Exemple #4
0
    def setUp(self):
        db_utils.ensure_environment_variables_set()

        db_utils.ensure_db()

        self.maxDiff = None

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                old_table = cur.execute("""SELECT EXISTS (
                                          SELECT 1
                                          FROM  information_schema.tables
                                          WHERE  table_schema = %s
                                          AND  table_name =   %s);""",
                                        [test_schema_name, test_table_name])
                old_table = cur.fetchone()[0]

                if old_table:
                    cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur)))


                cur = conn.cursor()
                cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """)
                if cur.fetchone()[0] is None:
                    cur.execute(""" CREATE EXTENSION hstore; """)

                cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""")
                cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """)
                cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """)


                create_table_sql = """
CREATE TABLE {} (id                      SERIAL PRIMARY KEY,
                our_bit_array            BIT(1)[],
                our_boolean_array        BOOLEAN[],
                our_cidr_array           CIDR[],
                our_citext_array         CITEXT[],
                our_date_array           DATE[],
                our_decimal_array        NUMERIC(12,2)[],
                our_double_array         DOUBLE PRECISION[],
                our_enum_array           ALIGNMENT[],
                our_float_array          FLOAT[],
                our_hstore_array         HSTORE[],
                our_inet_array           INET[],
                our_int_array            INTEGER[][],
                our_json_array           JSON[],
                our_jsonb_array          JSONB[],
                our_mac_array            MACADDR[],
                our_money_array          MONEY[],
                our_real_array           REAL[],
                our_smallint_array       SMALLINT[],
                our_string_array         VARCHAR[],
                our_text_array           TEXT[],
                our_time_array           TIME[],
                our_ts_tz_array          TIMESTAMP WITH TIME ZONE[],
                our_uuid_array           UUID[])
                """.format(canonicalized_table_name(test_schema_name, test_table_name, cur))

                cur.execute(create_table_sql)
Exemple #5
0
    def setUp(self):
        db_utils.ensure_environment_variables_set()

        db_utils.ensure_db()

        self.maxDiff = None

        with db_utils.get_test_connection() as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                for table in [test_table_name_1, test_table_name_2]:
                    old_table = cur.execute("""SELECT EXISTS (
                                                  SELECT 1
                                                    FROM  information_schema.tables
                                                   WHERE table_schema = %s
                                                     AND  table_name =  %s)""",
                                            [test_schema_name, table])
                    old_table = cur.fetchone()[0]
                    if old_table:
                        cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur)))


                cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur)))
                cur.execute("""CREATE TABLE {}
                                (id SERIAL PRIMARY KEY,
                                 name VARCHAR,
                                 size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur)))

                cur.execute("""CREATE TABLE {}
                                (fk_id bigint,
                                 age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur)))

                cur.execute("""CREATE VIEW {} AS
                            (SELECT *
                              FROM {}
                              join {}
                                on {}.id = {}.fk_id
                    )""".format(quote_ident(test_view, cur),
                                canonicalized_table_name(test_schema_name, test_table_name_1, cur),
                                canonicalized_table_name(test_schema_name, test_table_name_2, cur),
                                canonicalized_table_name(test_schema_name, test_table_name_1, cur),
                                canonicalized_table_name(test_schema_name, test_table_name_2, cur)))

                self.rec_1 = { 'name' : 'fred', 'size' : 'big' }
                insert_record(cur, test_table_name_1, self.rec_1)

                cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur)))
                fk_id = cur.fetchone()[0]

                self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 }
                insert_record(cur, test_table_name_2, self.rec_2)
    def setUp(self):
        db_utils.ensure_db('discovery0')
        creds = {}
        missing_envs = [
            x for x in [
                os.getenv('TAP_POSTGRES_HOST'),
                os.getenv('TAP_POSTGRES_USER'),
                os.getenv('TAP_POSTGRES_PASSWORD'),
                os.getenv('TAP_POSTGRES_PORT'),
                os.getenv('TAP_POSTGRES_DBNAME')
            ] if x == None
        ]
        if len(missing_envs) != 0:
            #pylint: disable=line-too-long
            raise Exception(
                "set TAP_POSTGRES_HOST, TAP_POSTGRES_DBNAME, TAP_POSTGRES_USER, TAP_POSTGRES_PASSWORD, TAP_POSTGRES_PORT"
            )

        with db_utils.get_test_connection('discovery0') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                old_table = cur.execute(
                    """SELECT EXISTS (
                                          SELECT 1
                                          FROM  information_schema.tables
                                          WHERE  table_schema = %s
                                          AND  table_name =   %s);""",
                    [test_schema_name, test_table_name])
                old_table = cur.fetchone()[0]
                if old_table:
                    cur.execute("DROP TABLE {}".format(
                        canonicalized_table_name(test_schema_name,
                                                 test_table_name, cur)))

                cur = conn.cursor()
                cur.execute(
                    """ SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """
                )
                if cur.fetchone()[0] is None:
                    cur.execute(""" CREATE EXTENSION hstore; """)

                #pylint: disable=line-too-long
                create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(
                    canonicalized_table_name(test_schema_name, test_table_name,
                                             cur))

                cur.execute(create_table_sql)
    def test_run(self):
        conn_id = connections.ensure_connection(self)

        # Run discovery
        check_job_name = runner.run_check_mode(self, conn_id)

        # Verify check exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        # There should not be any tables in this database
        with db_utils.get_test_connection('discovery0') as conn:
            cur = conn.cursor()
            cur.execute("DROP TABLE {}".format(
                canonicalized_table_name(test_schema_name, test_table_name,
                                         cur)))

        # Run discovery again
        check_job_name = runner.run_check_mode(self, conn_id)
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)

        # When discovery mode finds 0 tables, the tap returns an error
        self.assertEqual(exit_status['discovery_exit_status'], 1)
Exemple #8
0
    def test_run(self):
        conn_id = connections.ensure_connection(self)

        # run in check mode
        check_job_name = runner.run_check_mode(self, conn_id)

        # verify check  exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        # verify the tap discovered the right streams
        found_catalogs = [
            fc for fc in menagerie.get_catalogs(conn_id)
            if fc['tap_stream_id'] in self.expected_check_streams()
        ]

        self.assertGreaterEqual(
            len(found_catalogs),
            1,
            msg="unable to locate schemas for connection {}".format(conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))
        diff = self.expected_check_streams().symmetric_difference(
            found_catalog_names)
        self.assertEqual(
            len(diff),
            0,
            msg="discovered schemas do not match: {}".format(diff))

        # verify that persisted streams have the correct properties
        test_catalog = found_catalogs[0]

        self.assertEqual('postgres_logical_replication_test',
                         test_catalog['stream_name'])

        print("discovered streams are correct")

        additional_md = [{
            "breadcrumb": [],
            "metadata": {
                'replication-method': 'LOG_BASED'
            }
        }]
        #don't selcted our_text_2
        _ = connections.select_catalog_and_fields_via_metadata(
            conn_id, test_catalog,
            menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']),
            additional_md, ['our_text_2'])

        # clear state
        menagerie.set_state(conn_id, {})

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(record_count_by_stream,
                         {'postgres_logical_replication_test': 4})
        records_by_stream = runner.get_records_from_target_output()

        table_version = records_by_stream['postgres_logical_replication_test'][
            'table_version']

        self.assertEqual(
            records_by_stream['postgres_logical_replication_test']['messages']
            [0]['action'], 'activate_version')

        self.assertEqual(
            records_by_stream['postgres_logical_replication_test']['messages']
            [1]['action'], 'upsert')

        self.assertEqual(
            records_by_stream['postgres_logical_replication_test']['messages']
            [2]['action'], 'upsert')

        self.assertEqual(
            records_by_stream['postgres_logical_replication_test']['messages']
            [3]['action'], 'upsert')

        self.assertEqual(
            records_by_stream['postgres_logical_replication_test']['messages']
            [4]['action'], 'upsert')

        self.assertEqual(
            records_by_stream['postgres_logical_replication_test']['messages']
            [5]['action'], 'activate_version')

        # verify state and bookmarks
        state = menagerie.get_state(conn_id)

        bookmark = state['bookmarks'][
            'logical_1-public-postgres_logical_replication_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        self.assertIsNotNone(bookmark['lsn'],
                             msg="expected bookmark for stream to have an lsn")
        lsn_1 = bookmark['lsn']

        self.assertEqual(bookmark['version'],
                         table_version,
                         msg="expected bookmark for stream to match version")

        #----------------------------------------------------------------------
        # invoke the sync job again after adding a record
        #----------------------------------------------------------------------
        print("inserting a record 5")

        with db_utils.get_test_connection(test_db) as conn:
            conn.autocommit = True
            with conn.cursor() as cur:
                #insert fixture data 3
                our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(3, 4, 5)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1933, 3, 3)
                my_uuid = str(uuid.uuid1())

                #STRINGS:
                #OUR TS: '1993-03-03 03:03:03.333333'
                #OUR TS TZ: '1993-03-03 08:03:03.333333+00'
                #'OUR TIME': '03:04:05'
                #'OUR TIME TZ': '03:04:05+00'
                self.rec_5 = {
                    'our_varchar': "our_varchar 5",  # str
                    'our_varchar_10': "varchar13",  # str
                    'our_text': "some text 3",  #str
                    'our_text_2': "NOT SELECTED",
                    'our_integer': 96000,  #int
                    'our_smallint': 3,  # int
                    'our_bigint': 3000000,  #int
                    'our_decimal': decimal.Decimal(
                        '1234567890.03'
                    ),  #1234567890.03 / our_decimal is a <class 'float'>
                    quote_ident('OUR TS', cur):
                    our_ts,  # str '1993-03-03 03:03:03.333333'
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,  #str '1993-03-03 08:03:03.333333+00'
                    quote_ident('OUR TIME', cur): our_time,  # str '03:04:05'
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,  # str '03:04:05+00'
                    quote_ident('OUR DATE', cur):
                    our_date,  #1933-03-03 / OUR DATE is a <class 'str'>
                    'our_double': 3.3,  #3.3 / our_double is a <class 'float'>
                    'our_real': 6.6,  #6.6 / our_real is a <class 'float'>
                    'our_boolean': True,  #boolean
                    'our_bit': '1',  #string
                    'our_json': json.dumps({'secret': 33}),  #string
                    'our_jsonb': json.dumps(['burgers make me hungry']),
                    'our_uuid': my_uuid,  #string
                    'our_store': 'jumps=>"high",name=>"betty"',  #string
                    'our_citext': 'maGICKal 3',
                    'our_cidr': '192.168.102.128/32',
                    'our_inet': '192.168.102.128/32',
                    'our_mac': '08:00:2b:01:02:05',
                    'our_money': '$412.1234'
                }

                insert_record(cur, test_table_name, self.rec_5)

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(record_count_by_stream,
                         {'postgres_logical_replication_test': 1})
        records_by_stream = runner.get_records_from_target_output()

        self.assertTrue(len(records_by_stream) > 0)

        for stream, recs in records_by_stream.items():
            # verify the persisted schema was correct
            self.assertDictEqual(recs['schema'], expected_schemas[stream])

        self.assertEqual(
            1,
            len(records_by_stream['postgres_logical_replication_test']
                ['messages']))
        actual_record_2 = records_by_stream[
            'postgres_logical_replication_test']['messages'][0]['data']
        actual_sdc_lsn_2 = int(actual_record_2['_sdc_lsn'])
        del actual_record_2['_sdc_lsn']

        expected_inserted_record = {
            'our_text': 'some text 3',
            'our_real': decimal.Decimal('6.6'),
            '_sdc_deleted_at': None,
            'our_store': {
                'name': 'betty',
                'jumps': 'high'
            },
            'our_bigint': 3000000,
            'our_varchar': 'our_varchar 5',
            'our_double': decimal.Decimal('3.3'),
            'our_bit': True,
            'our_uuid': self.rec_5['our_uuid'],
            'OUR TS': '1993-03-03T03:03:03.333333+00:00',
            'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00',
            'OUR TIME': '03:04:05',
            'OUR TIME TZ': '03:04:05-04:00',
            'OUR DATE': '1933-03-03T00:00:00+00:00',
            'our_decimal': decimal.Decimal('1234567890.03'),
            'id': 5,
            'our_varchar_10': 'varchar13',
            'our_json': '{"secret": 33}',
            'our_jsonb': self.rec_5['our_jsonb'],
            'our_smallint': 3,
            'our_integer': 96000,
            'our_boolean': True,
            'our_citext': 'maGICKal 3',
            'our_cidr': self.rec_5['our_cidr'],
            'our_inet': '192.168.102.128',
            'our_mac': self.rec_5['our_mac'],
            'our_alignment_enum': None,
            'our_money': '$412.12'
        }
        self.assertDictEqual(expected_inserted_record, actual_record_2)

        self.assertEqual(
            records_by_stream['postgres_logical_replication_test']['messages']
            [0]['action'], 'upsert')
        print("inserted record is correct")

        state = menagerie.get_state(conn_id)
        chicken_bookmark = state['bookmarks'][
            'logical_1-public-postgres_logical_replication_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        self.assertIsNotNone(
            chicken_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_2 = chicken_bookmark['lsn']

        self.assertTrue(lsn_2 >= lsn_1)

        #table_version does NOT change
        self.assertEqual(
            chicken_bookmark['version'],
            table_version,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )

        #----------------------------------------------------------------------
        # invoke the sync job again after deleting a record
        #----------------------------------------------------------------------
        print("delete row from source db")
        with db_utils.get_test_connection(test_db) as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute("DELETE FROM {} WHERE id = 3".format(
                    canonicalized_table_name(test_schema_name, test_table_name,
                                             cur)))

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        # verify the inserted record's lsn is less than or equal to the bookmarked lsn
        self.assertGreaterEqual(lsn_2, actual_sdc_lsn_2)
        expected_record_count = 1 if actual_sdc_lsn_2 < lsn_2 else 2
        self.assertEqual(
            record_count_by_stream,
            {'postgres_logical_replication_test': expected_record_count})

        records_by_stream = runner.get_records_from_target_output()

        for stream, recs in records_by_stream.items():
            # verify the persisted schema was correct
            self.assertEqual(
                recs['schema'],
                expected_schemas[stream],
                msg=
                "Persisted schema did not match expected schema for stream `{}`."
                .format(stream))

        # if there are 2 records...
        if expected_record_count == 2:
            # the 1st message will be the previous insert
            insert_message = records_by_stream[
                'postgres_logical_replication_test']['messages'][0]['data']
            del insert_message['_sdc_lsn']

            self.assertDictEqual(insert_message, expected_inserted_record)

        #the 2nd message will be the delete
        delete_message = records_by_stream[
            'postgres_logical_replication_test']['messages'][
                expected_record_count - 1]
        self.assertEqual(delete_message['action'], 'upsert')

        sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at')
        self.assertIsNotNone(sdc_deleted_at)
        self.assertEqual(delete_message['data']['id'], 3)
        print("deleted record is correct")

        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'logical_1-public-postgres_logical_replication_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        self.assertIsNotNone(
            bookmark['lsn'],
            msg="expected bookmark for stream ROOT-CHICKEN to have an scn")

        lsn_3 = bookmark['lsn']
        self.assertTrue(lsn_3 >= lsn_2)
        #----------------------------------------------------------------------
        # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format
        #----------------------------------------------------------------------
        print("delete row from source db")
        with db_utils.get_test_connection(test_db) as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(
                    "DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)"
                    .format(
                        canonicalized_table_name(test_schema_name,
                                                 test_table_name, cur),
                        canonicalized_table_name(test_schema_name,
                                                 test_table_name, cur)))

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(record_count_by_stream,
                         {'postgres_logical_replication_test': 2})
        records_by_stream = runner.get_records_from_target_output()

        for stream, recs in records_by_stream.items():
            # verify the persisted schema was correct
            self.assertEqual(
                recs['schema'],
                expected_schemas[stream],
                msg=
                "Persisted schema did not match expected schema for stream `{}`."
                .format(stream))

        #first record will be the previous delete
        delete_message = records_by_stream[
            'postgres_logical_replication_test']['messages'][0]
        sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at')
        self.assertIsNotNone(sdc_deleted_at)
        self.assertEqual(delete_message['data']['id'], 3)

        #the 2nd message will be the more recent delete
        delete_message = records_by_stream[
            'postgres_logical_replication_test']['messages'][1]
        self.assertEqual(delete_message['action'], 'upsert')

        sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at')
        self.assertIsNotNone(sdc_deleted_at)
        self.assertEqual(delete_message['data']['id'], 2)
        print("deleted record is correct")

        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'logical_1-public-postgres_logical_replication_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        self.assertIsNotNone(
            bookmark['lsn'],
            msg="expected bookmark for stream ROOT-CHICKEN to have an scn")

        lsn_4 = bookmark['lsn']
        self.assertTrue(lsn_4 >= lsn_3)

        #table_version does NOT change
        self.assertEqual(
            bookmark['version'],
            table_version,
            msg=
            "expected bookmark for stream postgres_logical_replication_test to match version"
        )
        #----------------------------------------------------------------------
        # invoke the sync job again after deleting a record using the 'id IN (<id>, <id>)' format
        #----------------------------------------------------------------------
        print("delete row from source db")
        with db_utils.get_test_connection(test_db) as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(
                    canonicalized_table_name(test_schema_name, test_table_name,
                                             cur)))

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(record_count_by_stream,
                         {'postgres_logical_replication_test': 3})
        records_by_stream = runner.get_records_from_target_output()

        for stream, recs in records_by_stream.items():
            # verify the persisted schema was correct
            self.assertEqual(
                recs['schema'],
                expected_schemas[stream],
                msg=
                "Persisted schema did not match expected schema for stream `{}`."
                .format(stream))

        #first record will be the previous delete
        delete_message = records_by_stream[
            'postgres_logical_replication_test']['messages'][0]
        sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at')
        self.assertIsNotNone(sdc_deleted_at)
        self.assertEqual(delete_message['data']['id'], 2)

        #the 2nd message will be the more recent delete
        delete_message = records_by_stream[
            'postgres_logical_replication_test']['messages'][1]
        self.assertEqual(delete_message['action'], 'upsert')

        sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at')
        self.assertIsNotNone(sdc_deleted_at)
        self.assertEqual(delete_message['data']['id'], 4)
        print("deleted record is correct")

        #the 3rd message will be the more recent delete
        delete_message = records_by_stream[
            'postgres_logical_replication_test']['messages'][2]
        self.assertEqual(delete_message['action'], 'upsert')

        sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at')
        self.assertIsNotNone(sdc_deleted_at)
        self.assertEqual(delete_message['data']['id'], 5)
        print("deleted record is correct")

        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'logical_1-public-postgres_logical_replication_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        self.assertIsNotNone(
            bookmark['lsn'],
            msg="expected bookmark for stream ROOT-CHICKEN to have an scn")

        lsn_5 = bookmark['lsn']
        self.assertTrue(lsn_5 >= lsn_4)

        #table_version does NOT change
        self.assertEqual(
            bookmark['version'],
            table_version,
            msg=
            "expected bookmark for stream postgres_logical_replication_test to match version"
        )

        #----------------------------------------------------------------------
        # invoke the sync job again after updating a record
        #----------------------------------------------------------------------
        print("updating row from source db")
        with db_utils.get_test_connection(test_db) as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(
                    "UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1"
                    .format(
                        canonicalized_table_name(test_schema_name,
                                                 test_table_name, cur)))

        sync_job_name = runner.run_sync_mode(self, conn_id)
        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        self.assertEqual(record_count_by_stream,
                         {'postgres_logical_replication_test': 3})
        records_by_stream = runner.get_records_from_target_output()
        for stream, recs in records_by_stream.items():
            # verify the persisted schema was correct
            self.assertEqual(
                recs['schema'],
                expected_schemas[stream],
                msg=
                "Persisted schema did not match expected schema for stream `{}`."
                .format(stream))

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        self.assertEqual(
            len(records_by_stream['postgres_logical_replication_test']
                ['messages']), 3)
        #first record will be the previous first delete
        delete_message = records_by_stream[
            'postgres_logical_replication_test']['messages'][0]
        sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at')
        self.assertIsNotNone(sdc_deleted_at)
        self.assertEqual(delete_message['data']['id'], 4)

        #second record will be the previous second delete
        delete_message = records_by_stream[
            'postgres_logical_replication_test']['messages'][1]
        sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at')
        self.assertIsNotNone(sdc_deleted_at)
        self.assertEqual(delete_message['data']['id'], 5)

        #third record will be the new update
        updated_message = records_by_stream[
            'postgres_logical_replication_test']['messages'][2]
        del updated_message['data']['_sdc_lsn']

        self.assertEqual(updated_message['action'], 'upsert')

        expected_updated_rec = {
            'our_varchar': 'THIS HAS BEEN UPDATED',
            'id': 1,
            'our_varchar_10': "varchar_10",
            'our_text': "some text",
            'our_integer': 44100,
            'our_smallint': 1,
            'our_bigint': 1000000,
            'our_decimal': None,
            'OUR TS': '1997-02-02T02:02:02.722184+00:00',
            'OUR TS TZ': '1997-02-02T07:02:02.722184+00:00',
            'OUR TIME': '12:11:10',
            'OUR TIME TZ': '12:11:10-04:00',
            'OUR DATE': '1998-03-04T00:00:00+00:00',
            'our_double': None,
            'our_real': None,
            'our_boolean': True,
            'our_bit': False,
            'our_json': '{"secret": 55}',
            'our_jsonb': self.rec_1['our_jsonb'],
            'our_uuid': self.rec_1['our_uuid'],
            '_sdc_deleted_at': None,
            'our_store': {
                'name': 'betty',
                'size': 'small'
            },
            'our_citext': 'maGICKal',
            'our_cidr': self.rec_1['our_cidr'],
            'our_inet': self.rec_1['our_inet'],
            'our_mac': self.rec_1['our_mac'],
            'our_alignment_enum': 'bad',
            'our_money': '$56.81'
        }

        self.assertDictEqual(expected_updated_rec, updated_message['data'])
        print("updated record is correct")

        #check state again
        state = menagerie.get_state(conn_id)
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")
        chicken_bookmark = state['bookmarks'][
            'logical_1-public-postgres_logical_replication_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")
        self.assertIsNotNone(
            chicken_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_6 = chicken_bookmark['lsn']
        self.assertTrue(lsn_6 >= lsn_5)

        #table_version does NOT change
        self.assertEqual(
            chicken_bookmark['version'],
            table_version,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )

        #----------------------------------------------------------------------
        # invoke the sync job one last time. should only get the PREVIOUS update
        #----------------------------------------------------------------------
        sync_job_name = runner.run_sync_mode(self, conn_id)
        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        # we will get the previous update record again
        self.assertEqual(record_count_by_stream,
                         {'postgres_logical_replication_test': 1})
        # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives
        update_message = records_by_stream[
            'postgres_logical_replication_test']['messages'][2]
        self.assertEqual(update_message['action'], 'upsert')

        self.assertEqual(
            set(update_message['data'].keys()),
            set(expected_updated_rec.keys()),
            msg="keys for expected_record_1 are wrong: {}".format(
                set(update_message['data'].keys()).symmetric_difference(
                    set(expected_updated_rec.keys()))))

        for k, v in update_message['data'].items():
            self.assertEqual(v,
                             expected_updated_rec[k],
                             msg="{} != {} for key {}".format(
                                 v, expected_updated_rec[k], k))

        #check state again
        state = menagerie.get_state(conn_id)
        chicken_bookmark = state['bookmarks'][
            'logical_1-public-postgres_logical_replication_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")
        self.assertIsNotNone(
            chicken_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_7 = chicken_bookmark['lsn']
        self.assertTrue(lsn_7 >= lsn_6)

        #table_version does NOT change
        self.assertEqual(
            chicken_bookmark['version'],
            table_version,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )
Exemple #9
0
    def setUp(self):
        db_utils.ensure_environment_variables_set()

        db_utils.ensure_db(test_db)

        self.maxDiff = None

        with db_utils.get_test_connection(test_db) as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(""" SELECT EXISTS (SELECT 1
                                                FROM  pg_replication_slots
                                               WHERE  slot_name = 'stitch') """
                            )

                old_slot = cur.fetchone()[0]
                with db_utils.get_test_connection(test_db, True) as conn2:
                    with conn2.cursor() as cur2:
                        if old_slot:
                            cur2.drop_replication_slot("stitch")
                        cur2.create_replication_slot('stitch',
                                                     output_plugin='wal2json')

                old_table = cur.execute(
                    """SELECT EXISTS (
                                          SELECT 1
                                          FROM  information_schema.tables
                                          WHERE  table_schema = %s
                                          AND  table_name =   %s);""",
                    [test_schema_name, test_table_name])
                old_table = cur.fetchone()[0]

                if old_table:
                    cur.execute("DROP TABLE {}".format(
                        canonicalized_table_name(test_schema_name,
                                                 test_table_name, cur)))

                cur = conn.cursor()
                cur.execute(
                    """ SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """
                )
                if cur.fetchone()[0] is None:
                    cur.execute(""" CREATE EXTENSION hstore; """)

                cur.execute(
                    """ CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;"""
                )
                cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """)
                cur.execute(
                    """ CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """
                )

                create_table_sql = """
CREATE TABLE {} (id            SERIAL PRIMARY KEY,
                our_varchar    VARCHAR,
                our_varchar_10 VARCHAR(10),
                our_text       TEXT,
                our_text_2     TEXT,
                our_integer    INTEGER,
                our_smallint   SMALLINT,
                our_bigint     BIGINT,
                our_decimal    NUMERIC(12,2),
                "OUR TS"       TIMESTAMP WITHOUT TIME ZONE,
                "OUR TS TZ"    TIMESTAMP WITH TIME ZONE,
                "OUR TIME"     TIME WITHOUT TIME ZONE,
                "OUR TIME TZ"  TIME WITH TIME ZONE,
                "OUR DATE"     DATE,
                our_double     DOUBLE PRECISION,
                our_real       REAL,
                our_boolean    BOOLEAN,
                our_bit        BIT(1),
                our_json       JSON,
                our_jsonb      JSONB,
                our_uuid       UUID,
                our_store      HSTORE,
                our_citext     CITEXT,
                our_cidr       cidr,
                our_inet       inet,
                our_mac            macaddr,
                our_alignment_enum ALIGNMENT,
                our_money          money)
                """.format(
                    canonicalized_table_name(test_schema_name, test_table_name,
                                             cur))

                cur.execute(create_table_sql)

                #insert fixture data 1
                our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(12, 11, 10)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1998, 3, 4)
                my_uuid = str(uuid.uuid1())

                self.rec_1 = {
                    'our_varchar': "our_varchar",
                    'our_varchar_10': "varchar_10",
                    'our_text': "some text",
                    'our_text_2': "NOT SELECTED",
                    'our_integer': 44100,
                    'our_smallint': 1,
                    'our_bigint': 1000000,
                    'our_decimal': decimal.Decimal('1234567890.01'),
                    quote_ident('OUR TS', cur): our_ts,
                    quote_ident('OUR TS TZ', cur): our_ts_tz,
                    quote_ident('OUR TIME', cur): our_time,
                    quote_ident('OUR TIME TZ', cur): our_time_tz,
                    quote_ident('OUR DATE', cur): our_date,
                    'our_double': 1.1,
                    'our_real': 1.2,
                    'our_boolean': True,
                    'our_bit': '0',
                    'our_json': json.dumps({'secret': 55}),
                    'our_jsonb': json.dumps(['burgers are good']),
                    'our_uuid': my_uuid,
                    'our_store': 'size=>"small",name=>"betty"',
                    'our_citext': 'maGICKal',
                    'our_cidr': '192.168.100.128/25',
                    'our_inet': '192.168.100.128/24',
                    'our_mac': '08:00:2b:01:02:03',
                    'our_alignment_enum': 'bad'
                }

                insert_record(cur, test_table_name, self.rec_1)

                #insert fixture data 2
                our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(10, 9, 8)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1964, 7, 1)
                my_uuid = str(uuid.uuid1())

                self.rec_2 = {
                    'our_varchar': "our_varchar 2",
                    'our_varchar_10': "varchar_10",
                    'our_text': "some text 2",
                    'our_text_2': "NOT SELECTED",
                    'our_integer': 44101,
                    'our_smallint': 2,
                    'our_bigint': 1000001,
                    'our_decimal': decimal.Decimal('9876543210.02'),
                    quote_ident('OUR TS', cur): our_ts,
                    quote_ident('OUR TS TZ', cur): our_ts_tz,
                    quote_ident('OUR TIME', cur): our_time,
                    quote_ident('OUR TIME TZ', cur): our_time_tz,
                    quote_ident('OUR DATE', cur): our_date,
                    'our_double': 1.1,
                    'our_real': 1.2,
                    'our_boolean': True,
                    'our_bit': '1',
                    'our_json': json.dumps({'nymn': 77}),
                    'our_jsonb': json.dumps({'burgers': 'good++'}),
                    'our_uuid': my_uuid,
                    'our_store': 'dances=>"floor",name=>"betty"',
                    'our_citext': 'maGICKal 2',
                    'our_cidr': '192.168.101.128/25',
                    'our_inet': '192.168.101.128/24',
                    'our_mac': '08:00:2b:01:02:04',
                }

                insert_record(cur, test_table_name, self.rec_2)

                #insert fixture data 3
                our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(12, 11, 10)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1998, 3, 4)
                my_uuid = str(uuid.uuid1())

                self.rec_3 = {
                    'our_varchar': "our_varchar 3",
                    'our_varchar_10': "varchar_10",
                    'our_text': "some text",
                    'our_text_2': "NOT SELECTED",
                    'our_integer': 44100,
                    'our_smallint': 1,
                    'our_bigint': 1000000,
                    'our_decimal': decimal.Decimal('1234567890.01'),
                    quote_ident('OUR TS', cur): our_ts,
                    quote_ident('OUR TS TZ', cur): our_ts_tz,
                    quote_ident('OUR TIME', cur): our_time,
                    quote_ident('OUR TIME TZ', cur): our_time_tz,
                    quote_ident('OUR DATE', cur): our_date,
                    'our_double': 1.1,
                    'our_real': 1.2,
                    'our_boolean': True,
                    'our_bit': '0',
                    'our_json': json.dumps({'secret': 55}),
                    'our_jsonb': json.dumps(['burgers are good']),
                    'our_uuid': my_uuid,
                    'our_store': 'size=>"small",name=>"betty"',
                    'our_citext': 'maGICKal',
                    'our_cidr': '192.168.100.128/25',
                    'our_inet': '192.168.100.128/24',
                    'our_mac': '08:00:2b:01:02:03',
                    'our_alignment_enum': 'bad'
                }

                insert_record(cur, test_table_name, self.rec_3)

                #insert fixture data 4
                our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(10, 9, 8)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1964, 7, 1)
                my_uuid = str(uuid.uuid1())

                self.rec_4 = {
                    'our_varchar': "our_varchar 4",
                    'our_varchar_10': "varchar_10",
                    'our_text': "some text 2",
                    'our_text_2': "NOT SELECTED",
                    'our_integer': 44101,
                    'our_smallint': 2,
                    'our_bigint': 1000001,
                    'our_decimal': decimal.Decimal('9876543210.02'),
                    quote_ident('OUR TS', cur): our_ts,
                    quote_ident('OUR TS TZ', cur): our_ts_tz,
                    quote_ident('OUR TIME', cur): our_time,
                    quote_ident('OUR TIME TZ', cur): our_time_tz,
                    quote_ident('OUR DATE', cur): our_date,
                    'our_double': 1.1,
                    'our_real': 1.2,
                    'our_boolean': True,
                    'our_bit': '1',
                    'our_json': json.dumps({'nymn': 77}),
                    'our_jsonb': json.dumps({'burgers': 'good++'}),
                    'our_uuid': my_uuid,
                    'our_store': 'dances=>"floor",name=>"betty"',
                    'our_citext': 'maGICKal 2',
                    'our_cidr': '192.168.101.128/25',
                    'our_inet': '192.168.101.128/24',
                    'our_mac': '08:00:2b:01:02:04',
                }

                insert_record(cur, test_table_name, self.rec_4)
Exemple #10
0
    def test_run(self):
        conn_id = connections.ensure_connection(self)

        # run in check mode
        check_job_name = runner.run_check_mode(self, conn_id)

        # verify check  exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        # verify the tap discovered the right streams
        found_catalogs = [
            fc for fc in menagerie.get_catalogs(conn_id)
            if fc['tap_stream_id'] in self.expected_check_streams()
        ]

        self.assertGreaterEqual(
            len(found_catalogs),
            1,
            msg="unable to locate schemas for connection {}".format(conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))
        diff = self.expected_check_streams().symmetric_difference(
            found_catalog_names)
        self.assertEqual(
            len(diff),
            0,
            msg="discovered schemas do not match: {}".format(diff))

        # verify that persisted streams have the correct properties
        test_catalog = found_catalogs[0]

        self.assertEqual(test_table_name, test_catalog['stream_name'])

        print("discovered streams are correct")

        additional_md = [{
            "breadcrumb": [],
            "metadata": {
                'replication-method': 'LOG_BASED'
            }
        }]
        selected_metadata = connections.select_catalog_and_fields_via_metadata(
            conn_id, test_catalog,
            menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']),
            additional_md)

        # clear state
        menagerie.set_state(conn_id, {})

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(record_count_by_stream, {test_table_name: 0})
        records_by_stream = runner.get_records_from_target_output()

        table_version = records_by_stream[test_table_name]['table_version']

        self.assertEqual(
            records_by_stream[test_table_name]['messages'][0]['action'],
            'activate_version')

        self.assertEqual(
            records_by_stream[test_table_name]['messages'][1]['action'],
            'activate_version')

        # verify state and bookmarks
        state = menagerie.get_state(conn_id)

        bookmark = state['bookmarks'][
            'dev-public-postgres_logical_replication_array_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        self.assertIsNotNone(bookmark['lsn'],
                             msg="expected bookmark for stream to have an lsn")
        lsn_1 = bookmark['lsn']

        self.assertEqual(bookmark['version'],
                         table_version,
                         msg="expected bookmark for stream to match version")

        #----------------------------------------------------------------------
        # invoke the sync job again after adding a record
        #----------------------------------------------------------------------
        print("inserting a record")

        our_ts_tz = None
        our_date = None
        our_uuid = str(uuid.uuid1())
        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor() as cur:
                #insert fixture data 2

                #insert fixture data 1
                our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_date = datetime.date(1998, 3, 4)

                self.rec_1 = {
                    'our_bit_array':
                    '{{0,1,1}}',
                    'our_boolean_array':
                    '{true}',
                    'our_cidr_array':
                    '{{192.168.100.128/25}}',
                    'our_citext_array':
                    '{{maGICKal 2}}',
                    'our_date_array':
                    '{{{}}}'.format(our_date),
                    'our_decimal_array':
                    '{{{}}}'.format(decimal.Decimal('1234567890.01')),
                    'our_double_array':
                    '{{1.232323}}',
                    'our_enum_array':
                    '{{bad}}',
                    'our_float_array':
                    '{{5.23}}',
                    'our_hstore_array':
                    """{{"size=>small","name=>betty"}}""",
                    'our_inet_array':
                    '{{192.168.100.128/24}}',
                    'our_int_array':
                    '{{1,2,3},{4,5,6}}',
                    'our_int8_array':
                    '{16,32,64}',
                    'our_json_array': [psycopg2.extras.Json({'secret': 55})],
                    'our_jsonb_array': [psycopg2.extras.Json({'secret': 69})],
                    'our_mac_array':
                    '{{08:00:2b:01:02:03}}',
                    'our_money_array':
                    '{{$412.1234}}',
                    'our_real_array':
                    '{{76.33}}',
                    'our_smallint_array':
                    '{{10,20,30},{40,50,60}}',
                    'our_string_array':
                    '{{one string, two strings}}',
                    'our_text_array':
                    '{{three string, four}}',
                    'our_time_array':
                    '{{03:04:05}}',
                    'our_ts_tz_array':
                    '{{{}}}'.format(our_ts_tz),
                    'our_uuid_array':
                    '{{{}}}'.format(our_uuid)
                }

                insert_record(cur, test_table_name, self.rec_1)

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        self.assertEqual(record_count_by_stream, {test_table_name: 1})
        records_by_stream = runner.get_records_from_target_output()
        self.assertTrue(len(records_by_stream) > 0)

        for stream, recs in records_by_stream.items():
            # verify the persisted schema was correct
            self.assertEqual(
                recs['schema'],
                expected_schemas[stream],
                msg=
                "Persisted schema did not match expected schema for stream `{}`."
                .format(stream))

        self.assertEqual(1,
                         len(records_by_stream[test_table_name]['messages']))
        actual_record_1 = records_by_stream[test_table_name]['messages'][0][
            'data']

        expected_inserted_record = {
            'id': 1,
            '_sdc_deleted_at': None,
            'our_bit_array': [[False, True, True]],
            'our_boolean_array': [True],
            'our_cidr_array': [['192.168.100.128/25']],
            'our_citext_array': [['maGICKal 2']],
            'our_date_array': ['1998-03-04T00:00:00+00:00'],
            'our_decimal_array': [decimal.Decimal('1234567890.01')],
            'our_double_array': [[decimal.Decimal('1.232323')]],
            'our_enum_array': [['bad']],
            'our_float_array': [[decimal.Decimal('5.23')]],
            'our_hstore_array': [[{
                'size': 'small'
            }, {
                'name': 'betty'
            }]],
            'our_inet_array': [['192.168.100.128/24']],
            'our_int_array': [[1, 2, 3], [4, 5, 6]],
            'our_int8_array': [16, 32, 64],
            'our_json_array': [json.dumps({'secret': 55})],
            'our_jsonb_array': [json.dumps({'secret': 69})],
            'our_mac_array': [['08:00:2b:01:02:03']],
            'our_money_array': [['$412.12']],
            'our_real_array': [[decimal.Decimal('76.33')]],
            'our_smallint_array': [[10, 20, 30], [40, 50, 60]],
            'our_string_array': [['one string', 'two strings']],
            'our_text_array': [['three string', 'four']],
            'our_time_array': [['03:04:05']],
            'our_ts_tz_array': ['1997-02-02T07:02:02.722184+00:00'],
            'our_uuid_array': ['{}'.format(our_uuid)]
        }

        self.assertEqual(set(actual_record_1.keys()),
                         set(expected_inserted_record.keys()),
                         msg="keys for expected_record_1 are wrong: {}".format(
                             set(actual_record_1.keys()).symmetric_difference(
                                 set(expected_inserted_record.keys()))))

        for k, v in actual_record_1.items():
            self.assertEqual(actual_record_1[k],
                             expected_inserted_record[k],
                             msg="{} != {} for key {}".format(
                                 actual_record_1[k],
                                 expected_inserted_record[k], k))

        self.assertEqual(
            records_by_stream[test_table_name]['messages'][0]['action'],
            'upsert')
        print("inserted record is correct")

        state = menagerie.get_state(conn_id)
        chicken_bookmark = state['bookmarks'][
            'dev-public-postgres_logical_replication_array_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        self.assertIsNotNone(
            chicken_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_2 = chicken_bookmark['lsn']

        self.assertTrue(lsn_2 >= lsn_1)

        #table_version does NOT change
        self.assertEqual(
            chicken_bookmark['version'],
            table_version,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )
Exemple #11
0
    def setUp(self):
        db_utils.ensure_db('dev')
        self.maxDiff = None
        creds = {}
        missing_envs = [
            x for x in [
                os.getenv('TAP_POSTGRES_HOST'),
                os.getenv('TAP_POSTGRES_USER'),
                os.getenv('TAP_POSTGRES_PASSWORD'),
                os.getenv('TAP_POSTGRES_PORT'),
                os.getenv('TAP_POSTGRES_DBNAME')
            ] if x == None
        ]
        if len(missing_envs) != 0:
            #pylint: disable=line-too-long
            raise Exception(
                "set TAP_POSTGRES_HOST, TAP_POSTGRES_DBNAME, TAP_POSTGRES_USER, TAP_POSTGRES_PASSWORD, TAP_POSTGRES_PORT"
            )

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(""" SELECT EXISTS (SELECT 1
                                                FROM  pg_replication_slots
                                               WHERE  slot_name = 'stitch') """
                            )

                old_slot = cur.fetchone()[0]
                with db_utils.get_test_connection('dev', True) as conn2:
                    with conn2.cursor() as cur2:
                        if old_slot:
                            cur2.drop_replication_slot("stitch")
                        cur2.create_replication_slot('stitch',
                                                     output_plugin='wal2json')

                old_table = cur.execute(
                    """SELECT EXISTS (
                                          SELECT 1
                                          FROM  information_schema.tables
                                          WHERE  table_schema = %s
                                          AND  table_name =   %s);""",
                    [test_schema_name, test_table_name])
                old_table = cur.fetchone()[0]

                if old_table:
                    cur.execute("DROP TABLE {}".format(
                        canonicalized_table_name(test_schema_name,
                                                 test_table_name, cur)))

                cur = conn.cursor()
                cur.execute(
                    """ SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """
                )
                if cur.fetchone()[0] is None:
                    cur.execute(""" CREATE EXTENSION hstore; """)

                cur.execute(
                    """ CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;"""
                )
                cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """)
                cur.execute(
                    """ CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """
                )

                create_table_sql = """
CREATE TABLE {} (id                      SERIAL PRIMARY KEY,
                our_bit_array            BIT(1)[],
                our_boolean_array        BOOLEAN[],
                our_cidr_array           CIDR[],
                our_citext_array         CITEXT[],
                our_date_array           DATE[],
                our_decimal_array        NUMERIC(12,2)[],
                our_double_array         DOUBLE PRECISION[],
                our_enum_array           ALIGNMENT[],
                our_float_array          FLOAT[],
                our_hstore_array         HSTORE[],
                our_inet_array           INET[],
                our_int_array            INTEGER[][],
                our_int8_array           INT8[],
                our_json_array           JSON[],
                our_jsonb_array          JSONB[],
                our_mac_array            MACADDR[],
                our_money_array          MONEY[],
                our_real_array           REAL[],
                our_smallint_array       SMALLINT[],
                our_string_array         VARCHAR[],
                our_text_array           TEXT[],
                our_time_array           TIME[],
                our_ts_tz_array          TIMESTAMP WITH TIME ZONE[],
                our_uuid_array           UUID[])
                """.format(
                    canonicalized_table_name(test_schema_name, test_table_name,
                                             cur))

                cur.execute(create_table_sql)
Exemple #12
0
 def tearDown(self):
     with db_utils.get_test_connection('dev') as conn:
         conn.autocommit = True
Exemple #13
0
    def test_run(self):
        conn_id = connections.ensure_connection(self)

        # run in check mode
        check_job_name = runner.run_check_mode(self, conn_id)
        # verify check  exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        # verify the tap discovered the right streams
        found_catalogs = [
            fc for fc in menagerie.get_catalogs(conn_id)
            if fc['tap_stream_id'] in self.expected_check_streams()
        ]

        self.assertGreaterEqual(
            len(found_catalogs),
            1,
            msg="unable to locate schemas for connection {}".format(conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))
        diff = self.expected_check_streams().symmetric_difference(
            found_catalog_names)
        self.assertEqual(
            len(diff),
            0,
            msg="discovered schemas do not match: {}".format(diff))

        # verify that persisted streams have the correct properties
        test_catalog = found_catalogs[0]
        self.assertEqual(test_table_name, test_catalog['stream_name'])

        print("discovered streams are correct")

        print(
            'checking discoverd metadata for public-postgres_full_table_test...'
        )
        md = menagerie.get_annotated_schema(
            conn_id, test_catalog['stream_id'])['metadata']

        self.assertEqual(
            {
                ('properties', 'our_varchar'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'character varying'
                },
                ('properties', 'our_boolean'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'boolean'
                },
                ('properties', 'our_real'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'real'
                },
                ('properties', 'our_uuid'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'uuid'
                },
                ('properties', 'our_bit'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'bit'
                },
                ('properties', 'OUR TS TZ'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'timestamp with time zone'
                },
                ('properties', 'our_varchar_10'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'character varying'
                },
                ('properties', 'our_store'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'hstore'
                },
                ('properties', 'OUR TIME'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'time without time zone'
                },
                ('properties', 'our_decimal'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'numeric'
                },
                ('properties', 'OUR TS'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'timestamp without time zone'
                },
                ('properties', 'our_jsonb'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'jsonb'
                },
                ('properties', 'OUR TIME TZ'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'time with time zone'
                },
                ('properties', 'our_text'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'text'
                },
                ('properties', 'OUR DATE'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'date'
                },
                ('properties', 'our_double'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'double precision'
                },
                (): {
                    'is-view': False,
                    'schema-name': 'public',
                    'table-key-properties': ['id'],
                    'database-name': 'dev',
                    'row-count': 0
                },
                ('properties', 'our_bigint'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'bigint'
                },
                ('properties', 'id'): {
                    'inclusion': 'automatic',
                    'selected-by-default': True,
                    'sql-datatype': 'integer'
                },
                ('properties', 'our_json'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'json'
                },
                ('properties', 'our_smallint'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'smallint'
                },
                ('properties', 'our_integer'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'integer'
                },
                ('properties', 'our_cidr'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'cidr'
                },
                ('properties', 'our_citext'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'citext'
                },
                ('properties', 'our_inet'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'inet'
                },
                ('properties', 'our_mac'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'macaddr'
                },
                ('properties', 'our_money'): {
                    'inclusion': 'available',
                    'selected-by-default': True,
                    'sql-datatype': 'money'
                }
            }, metadata.to_map(md))

        additional_md = [{
            "breadcrumb": [],
            "metadata": {
                'replication-method': 'INCREMENTAL',
                'replication-key': 'OUR TS TZ'
            }
        }]
        selected_metadata = connections.select_catalog_and_fields_via_metadata(
            conn_id, test_catalog,
            menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']),
            additional_md)

        # clear state
        menagerie.set_state(conn_id, {})

        # Sync Job 1
        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(record_count_by_stream, {test_table_name: 2})
        records_by_stream = runner.get_records_from_target_output()

        table_version = records_by_stream[test_table_name]['table_version']
        self.assertEqual(3,
                         len(records_by_stream[test_table_name]['messages']))
        self.assertEqual(
            records_by_stream[test_table_name]['messages'][0]['action'],
            'activate_version')
        self.assertEqual(
            records_by_stream[test_table_name]['messages'][1]['action'],
            'upsert')
        self.assertEqual(
            records_by_stream[test_table_name]['messages'][2]['action'],
            'upsert')

        # verifications about individual records
        for table_name, recs in records_by_stream.items():
            # verify the persisted schema was correct
            self.assertEqual(
                recs['schema'],
                expected_schemas[table_name],
                msg=
                "Persisted schema did not match expected schema for table `{}`."
                .format(table_name))

        expected_record_2 = {
            'our_decimal': decimal.Decimal('1234567890.01'),
            'our_text': 'some text',
            'our_bit': False,
            'our_integer': 44100,
            'our_double': decimal.Decimal('1.1'),
            'id': 1,
            'our_json': '{"secret": 55}',
            'our_boolean': True,
            'our_jsonb': self.rec_1['our_jsonb'],
            'our_bigint': 1000000,
            'OUR TS': '1997-02-02T02:02:02.722184+00:00',
            'OUR TS TZ': '1997-02-02T07:02:02.722184+00:00',
            'OUR TIME': '12:11:10',
            'OUR TIME TZ': '12:11:10-04:00',
            'our_store': {
                "name": "betty",
                "size": "small"
            },
            'our_smallint': 1,
            'OUR DATE': '1998-03-04T00:00:00+00:00',
            'our_varchar': 'our_varchar',
            'our_uuid': self.rec_1['our_uuid'],
            'our_real': decimal.Decimal('1.2'),
            'our_varchar_10': 'varchar_10',
            'our_citext': self.rec_1['our_citext'],
            'our_inet': self.rec_1['our_inet'],
            'our_cidr': self.rec_1['our_cidr'],
            'our_mac': self.rec_1['our_mac'],
            'our_money': '$1,445.57'
        }

        expected_record_1 = {
            'our_decimal': decimal.Decimal('9876543210.02'),
            'OUR TIME': '10:09:08',
            'our_text': 'some text 2',
            'our_bit': True,
            'our_integer': 44101,
            'our_double': decimal.Decimal('1.1'),
            'id': 2,
            'our_json': '{"nymn": 77}',
            'our_boolean': True,
            'our_jsonb': '{"burgers": "good++"}',
            'our_bigint': 1000001,
            'OUR TIME TZ': '10:09:08-04:00',
            'our_store': {
                "name": "betty",
                "dances": "floor"
            },
            'OUR TS TZ': '1987-03-03T08:03:03.733184+00:00',
            'our_smallint': 2,
            'OUR DATE': '1964-07-01T00:00:00+00:00',
            'our_varchar': 'our_varchar 2',
            'OUR TS': '1987-03-03T03:03:03.733184+00:00',
            'our_uuid': self.rec_2['our_uuid'],
            'our_real': decimal.Decimal('1.2'),
            'our_varchar_10': 'varchar_10',
            'our_citext': self.rec_2['our_citext'],
            'our_inet': self.rec_2['our_inet'],
            'our_cidr': self.rec_2['our_cidr'],
            'our_mac': self.rec_2['our_mac'],
            'our_money': None
        }

        actual_record_1 = records_by_stream[test_table_name]['messages'][1]
        self.assertEqual(set(actual_record_1['data'].keys()),
                         set(expected_record_1.keys()),
                         msg="keys for expected_record_1 are wrong: {}".format(
                             set(actual_record_1.keys()).symmetric_difference(
                                 set(expected_record_1.keys()))))

        for k, v in actual_record_1['data'].items():
            self.assertEqual(actual_record_1['data'][k],
                             expected_record_1[k],
                             msg="{} != {} for key {}".format(
                                 actual_record_1['data'][k],
                                 expected_record_1[k], k))

        actual_record_2 = records_by_stream[test_table_name]['messages'][2]
        self.assertEqual(set(actual_record_1['data'].keys()),
                         set(expected_record_2.keys()),
                         msg="keys for expected_record_2 are wrong: {}".format(
                             set(actual_record_2.keys()).symmetric_difference(
                                 set(expected_record_2.keys()))))

        for k, v in actual_record_2['data'].items():
            self.assertEqual(actual_record_2['data'][k],
                             expected_record_2[k],
                             msg="{} != {} for key {}".format(
                                 actual_record_2['data'][k],
                                 expected_record_2[k], k))

        print("records are correct")

        # verify state and bookmarks
        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'dev-public-postgres_incremental_replication_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        self.assertIsNone(
            bookmark.get('lsn'),
            msg=
            "expected bookmark for stream ROOT-CHICKEN to have NO lsn because we are using incremental replication"
        )
        self.assertEqual(
            bookmark['version'],
            table_version,
            msg="expected bookmark for stream ROOT-CHICKEN to match version")

        #----------------------------------------------------------------------
        # invoke the sync job AGAIN and get 1 record(the one matching the bookmark)
        #----------------------------------------------------------------------

        #Sync Job 2
        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(record_count_by_stream, {test_table_name: 1})
        records_by_stream = runner.get_records_from_target_output()

        self.assertEqual(2,
                         len(records_by_stream[test_table_name]['messages']))
        self.assertEqual(
            records_by_stream[test_table_name]['messages'][0]['action'],
            'activate_version')
        self.assertEqual(
            records_by_stream[test_table_name]['messages'][1]['action'],
            'upsert')
        #table version did NOT change
        self.assertEqual(records_by_stream[test_table_name]['table_version'],
                         table_version)

        # verifications about individual records
        for stream, recs in records_by_stream.items():
            # verify the persisted schema was correct
            self.assertEqual(
                recs['schema'],
                expected_schemas[stream],
                msg=
                "Persisted schema did not match expected schema for stream `{}`."
                .format(stream))

        actual_record_3 = records_by_stream[test_table_name]['messages'][1]
        self.assertEqual(set(actual_record_3['data'].keys()),
                         set(expected_record_2.keys()),
                         msg="keys for expected_record_1 are wrong: {}".format(
                             set(actual_record_3.keys()).symmetric_difference(
                                 set(expected_record_2.keys()))))

        expected_record_3 = expected_record_2
        for k, v in actual_record_3['data'].items():
            self.assertEqual(actual_record_3['data'][k],
                             expected_record_3[k],
                             msg="{} != {} for key {}".format(
                                 actual_record_3['data'][k],
                                 expected_record_3[k], k))
        print("records are correct")

        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'dev-public-postgres_incremental_replication_test']

        self.assertIsNone(
            bookmark.get('lsn'),
            msg=
            "expected bookmark for stream ROOT-CHICKEN to have NO lsn because we are using incremental replication"
        )
        self.assertEqual(
            bookmark['version'],
            table_version,
            msg="expected bookmark for stream ROOT-CHICKEN to match version")

        self.assertEqual(bookmark['replication_key'], 'OUR TS TZ')
        self.assertEqual(bookmark['replication_key_value'],
                         '1997-02-02T07:02:02.722184+00:00')

        #----------------------------------------------------------------------
        # insert new record with higher replication_key value and invoke the sync job AGAIN and get new record
        #----------------------------------------------------------------------
        our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111)
        nyc_tz = pytz.timezone('America/New_York')
        our_ts_tz = nyc_tz.localize(our_ts)
        our_time = datetime.time(12, 11, 10)
        our_time_tz = our_time.isoformat() + "-04:00"
        our_date = datetime.date(1999, 9, 9)
        my_uuid = str(uuid.uuid1())
        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:

                self.rec_3 = {
                    'our_varchar': "our_varchar 4",
                    'our_varchar_10': "varchar_3",
                    'our_text': "some text 4",
                    'our_integer': 55200,
                    'our_smallint': 1,
                    'our_bigint': 100000,
                    'our_decimal': decimal.Decimal('1234567899.99'),
                    quote_ident('OUR TS', cur): our_ts,
                    quote_ident('OUR TS TZ', cur): our_ts_tz,
                    quote_ident('OUR TIME', cur): our_time,
                    quote_ident('OUR TIME TZ', cur): our_time_tz,
                    quote_ident('OUR DATE', cur): our_date,
                    'our_double': decimal.Decimal('1.1'),
                    'our_real': decimal.Decimal('1.2'),
                    'our_boolean': True,
                    'our_bit': '0',
                    'our_json': json.dumps('some string'),
                    'our_jsonb': json.dumps(['burgers are good']),
                    'our_uuid': my_uuid,
                    'our_store': 'size=>"small",name=>"betty"',
                    'our_citext': 'cyclops 3',
                    'our_cidr': '192.168.101.128/25',
                    'our_inet': '192.168.101.128/24',
                    'our_mac': '08:00:2b:01:02:04',
                    'our_money': None,
                }
                insert_record(cur, test_table_name, self.rec_3)

        #Sync Job 3
        sync_job_name = runner.run_sync_mode(self, conn_id)
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(record_count_by_stream, {test_table_name: 2})
        records_by_stream = runner.get_records_from_target_output()

        self.assertEqual(3,
                         len(records_by_stream[test_table_name]['messages']))
        #table version did NOT change
        self.assertEqual(records_by_stream[test_table_name]['table_version'],
                         table_version)
        self.assertEqual(
            records_by_stream[test_table_name]['messages'][1]['action'],
            'upsert')
        self.assertEqual(
            records_by_stream[test_table_name]['messages'][2]['action'],
            'upsert')

        # verificationsg about individual records
        for table_name, recs in records_by_stream.items():
            # verify the persisted schema was correct
            self.assertEqual(
                recs['schema'],
                expected_schemas[table_name],
                msg=
                "Persisted schema did not match expected schema for table `{}`."
                .format(table_name))

        #check 1st record
        actual_record_4 = records_by_stream[test_table_name]['messages'][1]
        expected_record_4 = expected_record_2
        self.assertEqual(set(actual_record_4['data'].keys()),
                         set(expected_record_1.keys()),
                         msg="keys for expected_record_4 are wrong: {}".format(
                             set(actual_record_4.keys()).symmetric_difference(
                                 set(expected_record_1.keys()))))
        for k, v in actual_record_4['data'].items():
            self.assertEqual(actual_record_4['data'][k],
                             expected_record_4[k],
                             msg="{} != {} for key {}".format(
                                 actual_record_4['data'][k],
                                 expected_record_4[k], k))

        #check 2nd record
        actual_record_5 = records_by_stream[test_table_name]['messages'][2]
        expected_record_5 = {
            'our_decimal': decimal.Decimal('1234567899.99'),
            'our_text': 'some text 4',
            'our_bit': False,
            'our_integer': 55200,
            'our_double': decimal.Decimal('1.1'),
            'id': 3,
            'our_json': self.rec_3['our_json'],
            'our_boolean': True,
            'our_jsonb': self.rec_3['our_jsonb'],
            'our_bigint': 100000,
            'OUR TS': '2111-01-01T12:12:12.222111+00:00',
            'OUR TS TZ': '2111-01-01T17:12:12.222111+00:00',
            'OUR TIME': '12:11:10',
            'OUR TIME TZ': '12:11:10-04:00',
            'our_store': {
                "name": "betty",
                "size": "small"
            },
            'our_smallint': 1,
            'OUR DATE': '1999-09-09T00:00:00+00:00',
            'our_varchar': 'our_varchar 4',
            'our_uuid': self.rec_3['our_uuid'],
            'our_real': decimal.Decimal('1.2'),
            'our_varchar_10': 'varchar_3',
            'our_citext': 'cyclops 3',
            'our_cidr': '192.168.101.128/25',
            'our_inet': '192.168.101.128/24',
            'our_mac': '08:00:2b:01:02:04',
            'our_money': None
        }

        self.assertEqual(set(actual_record_5['data'].keys()),
                         set(expected_record_5.keys()),
                         msg="keys for expected_record_5 are wrong: {}".format(
                             set(actual_record_5.keys()).symmetric_difference(
                                 set(expected_record_5.keys()))))
        for k, v in actual_record_5['data'].items():
            self.assertEqual(actual_record_5['data'][k],
                             expected_record_5[k],
                             msg="{} != {} for key {}".format(
                                 actual_record_5['data'][k],
                                 expected_record_5[k], k))
        print("records are correct")

        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'dev-public-postgres_incremental_replication_test']
        self.assertIsNone(
            bookmark.get('lsn'),
            msg=
            "expected bookmark for stream ROOT-CHICKEN to have NO lsn because we are using incremental replication"
        )
        self.assertEqual(
            bookmark['version'],
            table_version,
            msg="expected bookmark for stream ROOT-CHICKEN to match version")

        self.assertEqual(bookmark['replication_key'], 'OUR TS TZ')
        self.assertEqual(bookmark['replication_key_value'],
                         '2111-01-01T17:12:12.222111+00:00')
Exemple #14
0
    def setUp(self):
        db_utils.ensure_environment_variables_set()

        db_utils.ensure_db(test_db)
        self.maxDiff = None

        with db_utils.get_test_connection(test_db) as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:

                # db_utils.ensure_replication_slot(cur, test_db)

                canonicalized_table_name = db_utils.canonicalized_table_name(
                    cur, test_schema_name, test_table_name)

                create_table_sql = """
CREATE TABLE {} (id                   SERIAL PRIMARY KEY,
                our_varchar           VARCHAR,
                our_varchar_10        VARCHAR(10),
                our_text              TEXT,
                our_text_2            TEXT,
                our_integer           INTEGER,
                our_smallint          SMALLINT,
                our_bigint            BIGINT,
                our_decimal           NUMERIC(12,2),
                "OUR TS"              TIMESTAMP WITHOUT TIME ZONE,
                "OUR TS TZ"           TIMESTAMP WITH TIME ZONE,
                "OUR TIME"            TIME WITHOUT TIME ZONE,
                "OUR TIME TZ"         TIME WITH TIME ZONE,
                "OUR DATE"            DATE,
                our_double            DOUBLE PRECISION,
                our_real              REAL,
                our_boolean           BOOLEAN,
                our_bit               BIT(1),
                our_json              JSON,
                our_jsonb             JSONB,
                our_uuid              UUID,
                our_store             HSTORE,
                our_citext            CITEXT,
                our_cidr              cidr,
                our_inet              inet,
                our_mac               macaddr,
                our_alignment_enum    ALIGNMENT,
                our_money             money,
                invalid_bigserial     BIGSERIAL,
                invalid_bit_varying   BIT VARYING,
                invalid_box           BOX,
                invalid_bytea         BYTEA,
                invalid_circle        CIRCLE,
                invalid_interval      INTERVAL,
                invalid_line          LINE,
                invalid_lseg          LSEG,
                invalid_path          PATH,
                invalid_pg_lsn        PG_LSN,
                invalid_point         POINT,
                invalid_polygon       POLYGON,
                invalid_serial        SERIAL,
                invalid_smallserial   SMALLSERIAL,
                invalid_tsquery       TSQUERY,
                invalid_tsvector      TSVECTOR,
                invalid_txid_snapshot TXID_SNAPSHOT,
                invalid_xml           XML)
                """.format(canonicalized_table_name)

                cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name,
                                                  test_table_name)
                cur.execute(create_table_sql)

                #insert fixture data 1
                our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(12, 11, 10)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1998, 3, 4)
                my_uuid = str(uuid.uuid1())

                self.recs = []
                for _ in range(500):
                    our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184)
                    nyc_tz = pytz.timezone('America/New_York')
                    our_ts_tz = nyc_tz.localize(our_ts)
                    our_time = datetime.time(10, 9, 8)
                    our_time_tz = our_time.isoformat() + "-04:00"
                    our_date = datetime.date(1964, 7, 1)
                    my_uuid = str(uuid.uuid1())

                    record = {
                        'our_varchar': "our_varchar 4",
                        'our_varchar_10': "varchar_10",
                        'our_text': "some text 2",
                        'our_text_2': "NOT SELECTED",
                        'our_integer': 44101,
                        'our_smallint': 2,
                        'our_bigint': 1000001,
                        'our_decimal': decimal.Decimal('9876543210.02'),
                        quote_ident('OUR TS', cur): our_ts,
                        quote_ident('OUR TS TZ', cur): our_ts_tz,
                        quote_ident('OUR TIME', cur): our_time,
                        quote_ident('OUR TIME TZ', cur): our_time_tz,
                        quote_ident('OUR DATE', cur): our_date,
                        'our_double': 1.1,
                        'our_real': 1.2,
                        'our_boolean': True,
                        'our_bit': '1',
                        'our_json': json.dumps({'nymn': 77}),
                        'our_jsonb': json.dumps({'burgers': 'good++'}),
                        'our_uuid': my_uuid,
                        'our_store': 'dances=>"floor",name=>"betty"',
                        'our_citext': 'maGICKal 2',
                        'our_cidr': '192.168.101.128/25',
                        'our_inet': '192.168.101.128/24',
                        'our_mac': '08:00:2b:01:02:04',
                    }

                    db_utils.insert_record(cur, test_table_name, record)
                    self.recs.append(record)

                cur.execute("""ANALYZE {}""".format(canonicalized_table_name))
Exemple #15
0
    def test_run(self):
        conn_id = connections.ensure_connection(self)

        # run in check mode and verify exit codes
        check_job_name = runner.run_check_mode(self, conn_id)
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        # verify basics of discovery are consistent with expectations...

        # verify discovery produced (at least) 1 expected catalog
        found_catalogs = [
            found_catalog for found_catalog in menagerie.get_catalogs(conn_id)
            if found_catalog['tap_stream_id'] in self.expected_check_streams()
        ]
        self.assertGreaterEqual(len(found_catalogs), 1)

        # verify the tap discovered the expected streams
        found_catalog_names = {
            catalog['tap_stream_id']
            for catalog in found_catalogs
        }
        self.assertSetEqual(self.expected_check_streams(), found_catalog_names)

        # verify that persisted streams have the correct properties
        test_catalog = found_catalogs[0]
        self.assertEqual(test_table_name, test_catalog['stream_name'])
        print("discovered streams are correct")

        # perform table selection
        print('selecting {} and all fields within the table'.format(
            test_table_name))
        schema_and_metadata = menagerie.get_annotated_schema(
            conn_id, test_catalog['stream_id'])
        additional_md = [{
            "breadcrumb": [],
            "metadata": {
                'replication-method': 'INCREMENTAL',
                'replication-key': 'OUR TS TZ'
            }
        }]
        _ = connections.select_catalog_and_fields_via_metadata(
            conn_id, test_catalog, schema_and_metadata, additional_md)

        # clear state
        menagerie.set_state(conn_id, {})

        # run sync job 1 and verify exit codes
        sync_job_name = runner.run_sync_mode(self, conn_id)
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # get records
        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(),
            self.expected_primary_keys())
        records_by_stream = runner.get_records_from_target_output()
        table_version = records_by_stream[test_table_name]['table_version']
        messages = records_by_stream[test_table_name]['messages']

        # verify the execpted number of records were replicated
        self.assertEqual(3, record_count_by_stream[test_table_name])

        # verify the message actions match expectations
        self.assertEqual(4, len(messages))
        self.assertEqual('activate_version', messages[0]['action'])
        self.assertEqual('upsert', messages[1]['action'])
        self.assertEqual('upsert', messages[2]['action'])
        self.assertEqual('upsert', messages[3]['action'])

        # verify the persisted schema matches expectations
        self.assertEqual(expected_schemas[test_table_name],
                         records_by_stream[test_table_name]['schema'])

        # verify replicated records match expectations
        self.assertDictEqual(self.expected_records[0], messages[1]['data'])
        self.assertDictEqual(self.expected_records[1], messages[2]['data'])
        self.assertDictEqual(self.expected_records[2], messages[3]['data'])

        # verify records are in ascending order by replication-key value
        expected_replication_key = list(
            self.expected_replication_keys()[test_table_name])[0]
        self.assertLess(messages[1]['data'][expected_replication_key],
                        messages[2]['data'][expected_replication_key])
        self.assertLess(messages[2]['data'][expected_replication_key],
                        messages[3]['data'][expected_replication_key])

        print("records are correct")

        # grab bookmarked state
        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'dev-public-postgres_incremental_replication_test']

        # verify state and bookmarks meet expectations
        self.assertIsNone(state['currently_syncing'])
        self.assertIsNone(bookmark.get('lsn'))
        self.assertEqual(table_version, bookmark['version'])
        self.assertEqual(expected_replication_key, bookmark['replication_key'])
        self.assertEqual(self.expected_records[2][expected_replication_key],
                         bookmark['replication_key_value'])

        #----------------------------------------------------------------------
        # invoke the sync job AGAIN following various manipulations to the data
        #----------------------------------------------------------------------

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:

                # NB | We will perform the following actions prior to the next sync:
                #      [Action (EXPECTED RESULT)]

                #      Insert a record with a lower replication-key value (NOT REPLICATED)
                #      Insert a record with a higher replication-key value (REPLICATED)

                #      Insert a record with a higher replication-key value and...
                #      Delete it (NOT REPLICATED)

                #      Update a record with a higher replication-key value (REPLICATED)
                #      Update a record with a lower replication-key value (NOT REPLICATED)

                # inserting...
                # a record with a replication-key value that is lower than the previous bookmark
                nyc_tz = pytz.timezone('America/New_York')
                our_time_offset = "-04:00"
                our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184)
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(6, 6, 6)
                our_time_tz = our_time.isoformat() + our_time_offset
                our_date = datetime.date(1970, 7, 1)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar 2",
                    'our_varchar_10':
                    "varchar_10",
                    'our_text':
                    "some text 2",
                    'our_integer':
                    44101,
                    'our_smallint':
                    2,
                    'our_bigint':
                    1000001,
                    'our_decimal':
                    decimal.Decimal('9876543210.02'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    '1',
                    'our_json':
                    json.dumps({'nymn': 77}),
                    'our_jsonb':
                    json.dumps({'burgers': 'good++'}),
                    'our_uuid':
                    my_uuid,
                    'our_citext':
                    'cyclops 2',
                    'our_store':
                    'dances=>"floor",name=>"betty"',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    '$0.98789'
                })
                self.expected_records.append({
                    'id':
                    4,
                    'our_varchar':
                    "our_varchar 2",
                    'our_varchar_10':
                    "varchar_10",
                    'our_text':
                    "some text 2",
                    'our_integer':
                    44101,
                    'our_smallint':
                    2,
                    'our_bigint':
                    1000001,
                    'our_decimal':
                    decimal.Decimal('9876543210.02'),
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'OUR TIME':
                    str(our_time),
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'OUR DATE':
                    '1970-07-01T00:00:00+00:00',
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    True,
                    'our_json':
                    '{"nymn": 77}',
                    'our_jsonb':
                    '{"burgers": "good++"}',
                    'our_uuid':
                    self.inserted_records[-1]['our_uuid'],
                    'our_citext':
                    self.inserted_records[-1]['our_citext'],
                    'our_store': {
                        "name": "betty",
                        "dances": "floor"
                    },
                    'our_cidr':
                    self.inserted_records[-1]['our_cidr'],
                    'our_inet':
                    self.inserted_records[-1]['our_inet'],
                    'our_mac':
                    self.inserted_records[-1]['our_mac'],
                    'our_money':
                    '$0.99'
                })
                # a record with a replication-key value that is higher than the previous bookmark
                our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(12, 11, 10)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1999, 9, 9)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar 4",
                    'our_varchar_10':
                    "varchar_3",
                    'our_text':
                    "some text 4",
                    'our_integer':
                    55200,
                    'our_smallint':
                    1,
                    'our_bigint':
                    100000,
                    'our_decimal':
                    decimal.Decimal('1234567899.99'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    '0',
                    'our_json':
                    json.dumps('some string'),
                    'our_jsonb':
                    json.dumps(['burgers are good']),
                    'our_uuid':
                    my_uuid,
                    'our_store':
                    'size=>"small",name=>"betty"',
                    'our_citext':
                    'cyclops 3',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    None,
                })
                self.expected_records.append({
                    'our_decimal':
                    decimal.Decimal('1234567899.99'),
                    'our_text':
                    'some text 4',
                    'our_bit':
                    False,
                    'our_integer':
                    55200,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'id':
                    5,
                    'our_json':
                    self.inserted_records[-1]['our_json'],
                    'our_boolean':
                    True,
                    'our_jsonb':
                    self.inserted_records[-1]['our_jsonb'],
                    'our_bigint':
                    100000,
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'OUR TIME':
                    str(our_time),
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'our_store': {
                        "name": "betty",
                        "size": "small"
                    },
                    'our_smallint':
                    1,
                    'OUR DATE':
                    '1999-09-09T00:00:00+00:00',
                    'our_varchar':
                    'our_varchar 4',
                    'our_uuid':
                    self.inserted_records[-1]['our_uuid'],
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_varchar_10':
                    'varchar_3',
                    'our_citext':
                    'cyclops 3',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    None
                })
                # a record with a replication-key value that is higher than the previous bookmark (to be deleted)
                our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(12, 11, 10)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1999, 9, 9)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar 4",
                    'our_varchar_10':
                    "varchar_3",
                    'our_text':
                    "some text 4",
                    'our_integer':
                    55200,
                    'our_smallint':
                    1,
                    'our_bigint':
                    100000,
                    'our_decimal':
                    decimal.Decimal('1234567899.99'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    '0',
                    'our_json':
                    json.dumps('some string'),
                    'our_jsonb':
                    json.dumps(['burgers are good']),
                    'our_uuid':
                    my_uuid,
                    'our_store':
                    'size=>"small",name=>"betty"',
                    'our_citext':
                    'cyclops 3',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    None,
                })
                self.expected_records.append({
                    'our_decimal':
                    decimal.Decimal('1234567899.99'),
                    'our_text':
                    'some text 4',
                    'our_bit':
                    False,
                    'our_integer':
                    55200,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'id':
                    6,
                    'our_json':
                    self.inserted_records[-1]['our_json'],
                    'our_boolean':
                    True,
                    'our_jsonb':
                    self.inserted_records[-1]['our_jsonb'],
                    'our_bigint':
                    100000,
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'OUR TIME':
                    str(our_time),
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'our_store': {
                        "name": "betty",
                        "size": "small"
                    },
                    'our_smallint':
                    1,
                    'OUR DATE':
                    '1999-09-09T00:00:00+00:00',
                    'our_varchar':
                    'our_varchar 4',
                    'our_uuid':
                    self.inserted_records[-1]['our_uuid'],
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_varchar_10':
                    'varchar_3',
                    'our_citext':
                    'cyclops 3',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    None
                })

                db_utils.insert_record(cur, test_table_name,
                                       self.inserted_records[3])
                db_utils.insert_record(cur, test_table_name,
                                       self.inserted_records[4])
                db_utils.insert_record(cur, test_table_name,
                                       self.inserted_records[5])

                # update a record with a replication-key value that is higher than the previous bookmark
                canon_table_name = db_utils.canonicalized_table_name(
                    cur, test_schema_name, test_table_name)
                record_pk = 1
                our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184)
                our_ts_tz = nyc_tz.localize(our_ts)
                updated_data = {
                    "OUR TS TZ": our_ts_tz,
                    "our_double": decimal.Decimal("6.6"),
                    "our_money": "$0.00"
                }
                self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(
                    our_ts_tz)
                self.expected_records[0]["our_double"] = decimal.Decimal("6.6")
                self.expected_records[0]["our_money"] = "$0.00"
                db_utils.update_record(cur, canon_table_name, record_pk,
                                       updated_data)

                # update a record with a replication-key value that is lower than the previous bookmark
                canon_table_name = db_utils.canonicalized_table_name(
                    cur, test_schema_name, test_table_name)
                record_pk = 2
                our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184)
                our_ts_tz = nyc_tz.localize(our_ts)
                updated_data = {
                    "OUR TS TZ": our_ts_tz,
                    "our_double": decimal.Decimal("6.6"),
                    "our_money": "$0.00"
                }
                self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(
                    our_ts_tz)
                self.expected_records[1]["our_double"] = decimal.Decimal("6.6")
                self.expected_records[1]["our_money"] = "$0.00"
                db_utils.update_record(cur, canon_table_name, record_pk,
                                       updated_data)

                # delete a newly inserted record with a higher replication key than the previous bookmark
                record_pk = 5
                db_utils.delete_record(cur, canon_table_name, record_pk)

        # run sync job 2 and verify exit codes
        sync_job_name = runner.run_sync_mode(self, conn_id)
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # grab records
        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(),
            self.expected_primary_keys())
        records_by_stream = runner.get_records_from_target_output()
        messages = records_by_stream[test_table_name]['messages']

        # verify the expected number of records were synced
        self.assertEqual(3, record_count_by_stream[test_table_name])

        # verify the message actions match expectations
        self.assertEqual('activate_version', messages[0]['action'])
        self.assertEqual('upsert', messages[1]['action'])
        self.assertEqual('upsert', messages[2]['action'])
        self.assertEqual('upsert', messages[3]['action'])

        # verify the persisted schema matches expectations
        self.assertEqual(expected_schemas[test_table_name],
                         records_by_stream[test_table_name]['schema'])

        # verify replicated records meet our expectations...

        # verify the first record was the bookmarked record from the previous sync
        self.assertDictEqual(self.expected_records[2], messages[1]['data'])

        # verify the expected updated record with a higher replication-key value was replicated
        self.assertDictEqual(self.expected_records[0], messages[2]['data'])

        # verify the expected inserted record with a lower replication-key value was NOT replicated
        actual_record_ids = [message['data']['id'] for message in messages[1:]]
        expected_record_id = self.expected_records[3]['id']
        self.assertNotIn(expected_record_id, actual_record_ids)

        # verify the deleted record with a lower replication-key value was NOT replicated
        expected_record_id = self.expected_records[4]['id']
        self.assertNotIn(expected_record_id, actual_record_ids)

        # verify the expected updated record with a lower replication-key value was NOT replicated
        expected_record_id = self.expected_records[1]['id']
        self.assertNotIn(expected_record_id, actual_record_ids)

        # verify the expected inserted record with a higher replication-key value was replicated
        self.assertDictEqual(self.expected_records[5], messages[3]['data'])

        # verify records are in ascending order by replication-key value
        self.assertLess(messages[1]['data'][expected_replication_key],
                        messages[2]['data'][expected_replication_key])
        self.assertLess(messages[2]['data'][expected_replication_key],
                        messages[3]['data'][expected_replication_key])

        print("records are correct")

        # get bookmarked state
        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'dev-public-postgres_incremental_replication_test']

        # verify the bookmarked state matches our expectations
        self.assertIsNone(bookmark.get('lsn'))
        self.assertEqual(bookmark['version'], table_version)
        self.assertEqual(bookmark['replication_key'], expected_replication_key)
        self.assertEqual(bookmark['replication_key_value'],
                         self.expected_records[5][expected_replication_key])

        #---------------------------------------------------------------------
        # run sync AGAIN after deleting a record and get 1 record (prev bookmark)
        #----------------------------------------------------------------------

        # Delete a pre-existing record from the database
        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:

                # delete a record with a lower replication key than the previous sync
                record_pk = 1
                db_utils.delete_record(cur, canon_table_name, record_pk)

        # run sync job 3 and verify exit codes
        sync_job_name = runner.run_sync_mode(self, conn_id)
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # get records
        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(),
            self.expected_primary_keys())
        records_by_stream = runner.get_records_from_target_output()
        messages = records_by_stream[test_table_name]['messages']

        # verify the expected number of records were replicated
        self.assertEqual(1, record_count_by_stream[test_table_name])

        # verify messages match our expectations
        self.assertEqual(2, len(messages))
        self.assertEqual(messages[0]['action'], 'activate_version')
        self.assertEqual(messages[1]['action'], 'upsert')
        self.assertEqual(records_by_stream[test_table_name]['table_version'],
                         table_version)

        # verify replicated records meet our expectations...

        # verify we did not re-replicate the deleted record
        actual_record_ids = [message['data']['id'] for message in messages[1:]]
        expected_record_id = self.expected_records[0]['id']
        self.assertNotIn(expected_record_id, actual_record_ids)

        # verify only the previously bookmarked record was synced
        self.assertDictEqual(self.expected_records[5], messages[1]['data'])

        print("records are correct")

        # get bookmarked state
        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'dev-public-postgres_incremental_replication_test']

        # verify the bookmarked state matches our expectations
        self.assertIsNone(bookmark.get('lsn'))
        self.assertEqual(bookmark['version'], table_version)
        self.assertEqual(bookmark['replication_key'], expected_replication_key)
        self.assertEqual(bookmark['replication_key_value'],
                         self.expected_records[5][expected_replication_key])
Exemple #16
0
    def setUp(self):
        db_utils.ensure_environment_variables_set()

        db_utils.ensure_db("dev")

        self.maxDiff = None

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True

            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                canon_table_name = db_utils.canonicalized_table_name(
                    cur, test_schema_name, test_table_name)

                cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name,
                                                  test_table_name)

                create_table_sql = """
CREATE TABLE {} (id            SERIAL PRIMARY KEY,
                our_varchar    VARCHAR,
                our_varchar_10 VARCHAR(10),
                our_text       TEXT,
                our_integer    INTEGER,
                our_smallint   SMALLINT,
                our_bigint     BIGINT,
                our_decimal    NUMERIC({},{}),
                "OUR TS"       TIMESTAMP WITHOUT TIME ZONE,
                "OUR TS TZ"    TIMESTAMP WITH TIME ZONE,
                "OUR TIME"     TIME WITHOUT TIME ZONE,
                "OUR TIME TZ"  TIME WITH TIME ZONE,
                "OUR DATE"     DATE,
                our_double     DOUBLE PRECISION,
                our_real       REAL,
                our_boolean    BOOLEAN,
                our_bit        BIT(1),
                our_json       JSON,
                our_jsonb      JSONB,
                our_uuid       UUID,
                our_store      HSTORE,
                our_citext     CITEXT,
                our_inet       inet,
                our_cidr       cidr,
                our_mac        macaddr,
                our_alignment_enum ALIGNMENT,
                our_money          money)
                """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE)

                cur.execute(create_table_sql)

                # insert fixture data and track expected records
                self.inserted_records = []
                self.expected_records = []

                # record 1
                our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(12, 11, 10)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1998, 3, 4)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar",
                    'our_varchar_10':
                    "varchar_10",
                    'our_text':
                    "some text",
                    'our_integer':
                    44100,
                    'our_smallint':
                    1,
                    'our_bigint':
                    1000000,
                    'our_decimal':
                    decimal.Decimal('.01'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    1.2,
                    'our_boolean':
                    True,
                    'our_bit':
                    '0',
                    'our_json':
                    json.dumps({'secret': 55}),
                    'our_jsonb':
                    json.dumps({'burgers': 'good'}),
                    'our_uuid':
                    my_uuid,
                    'our_store':
                    'size=>"small",name=>"betty"',
                    'our_citext':
                    'maGICKal 4',
                    'our_cidr':
                    '192.168.100.128/25',
                    'our_inet':
                    '192.168.100.128/24',
                    'our_mac':
                    '08:00:2b:01:02:03',
                    'our_alignment_enum':
                    'good',
                    'our_money':
                    '100.1122',
                })
                self.expected_records.append({
                    'our_decimal':
                    decimal.Decimal('.01'),
                    'our_text':
                    'some text',
                    'our_bit':
                    False,
                    'our_integer':
                    44100,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'id':
                    1,
                    'our_json':
                    '{"secret": 55}',
                    'our_boolean':
                    True,
                    'our_jsonb':
                    '{"burgers": "good"}',
                    'our_bigint':
                    1000000,
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'OUR TIME':
                    str(our_time),
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'our_store': {
                        "name": "betty",
                        "size": "small"
                    },
                    'our_smallint':
                    1,
                    'OUR DATE':
                    '1998-03-04T00:00:00+00:00',
                    'our_varchar':
                    'our_varchar',
                    'our_uuid':
                    self.inserted_records[0]['our_uuid'],
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_varchar_10':
                    'varchar_10',
                    'our_citext':
                    self.inserted_records[0]['our_citext'],
                    'our_inet':
                    self.inserted_records[0]['our_inet'],
                    'our_cidr':
                    self.inserted_records[0]['our_cidr'],
                    'our_mac':
                    self.inserted_records[0]['our_mac'],
                    'our_alignment_enum':
                    self.inserted_records[0]['our_alignment_enum'],
                    'our_money':
                    '$100.11'
                })
                # record 2
                our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(10, 9, 8)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1964, 7, 1)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar 2",
                    'our_varchar_10':
                    "varchar_10",
                    'our_text':
                    "some text 2",
                    'our_integer':
                    44101,
                    'our_smallint':
                    2,
                    'our_bigint':
                    1000001,
                    'our_decimal':
                    decimal.Decimal('.02'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    '1',
                    'our_json':
                    json.dumps(["nymn 77"]),
                    'our_jsonb':
                    json.dumps({'burgers': 'good++'}),
                    'our_uuid':
                    my_uuid,
                    'our_store':
                    'dances=>"floor",name=>"betty"',
                    'our_citext':
                    'maGICKal 2',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    None
                })
                self.expected_records.append({
                    'our_decimal':
                    decimal.Decimal('.02'),
                    'OUR TIME':
                    str(our_time),
                    'our_text':
                    'some text 2',
                    'our_bit':
                    True,
                    'our_integer':
                    44101,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'id':
                    2,
                    'our_json':
                    '["nymn 77"]',
                    'our_boolean':
                    True,
                    'our_jsonb':
                    '{"burgers": "good++"}',
                    'our_bigint':
                    1000001,
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'our_store': {
                        "name": "betty",
                        "dances": "floor"
                    },
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'our_smallint':
                    2,
                    'OUR DATE':
                    '1964-07-01T00:00:00+00:00',
                    'our_varchar':
                    'our_varchar 2',
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'our_uuid':
                    self.inserted_records[1]['our_uuid'],
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_varchar_10':
                    'varchar_10',
                    'our_citext':
                    self.inserted_records[1]['our_citext'],
                    'our_inet':
                    self.inserted_records[1]['our_inet'],
                    'our_cidr':
                    self.inserted_records[1]['our_cidr'],
                    'our_mac':
                    self.inserted_records[1]['our_mac'],
                    'our_alignment_enum':
                    None,
                    'our_money':
                    None
                })
                # record 3
                self.inserted_records.append({
                    'our_decimal':
                    decimal.Decimal('NaN'),
                    'our_double':
                    float('nan'),
                    'our_real':
                    float('-inf')
                })
                self.expected_records.append({
                    'id': 3,
                    # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support
                    # them and now we are at least consistent(ly wrong).
                    'our_decimal': None,
                    'our_double': None,
                    'our_real': None,
                    # any field without a set value will be set to NULL
                    'OUR TIME': None,
                    'our_text': None,
                    'our_bit': None,
                    'our_integer': None,
                    'our_json': None,
                    'our_boolean': None,
                    'our_jsonb': None,
                    'our_bigint': None,
                    'OUR TIME TZ': None,
                    'our_store': None,
                    'OUR TS TZ': None,
                    'our_smallint': None,
                    'OUR DATE': None,
                    'our_varchar': None,
                    'OUR TS': None,
                    'our_uuid': None,
                    'our_varchar_10': None,
                    'our_citext': None,
                    'our_inet': None,
                    'our_cidr': None,
                    'our_mac': None,
                    'our_alignment_enum': None,
                    'our_money': None
                })

                for record in self.inserted_records:
                    db_utils.insert_record(cur, test_table_name, record)
Exemple #17
0
    def test_run(self):
        conn_id = connections.ensure_connection(self)

        # run in check mode
        check_job_name = runner.run_check_mode(self, conn_id)

        # verify check  exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        # verify the tap discovered the right streams
        found_catalogs = [
            fc for fc in menagerie.get_catalogs(conn_id)
            if fc['tap_stream_id'] in self.expected_check_streams()
        ]

        self.assertGreaterEqual(
            len(found_catalogs),
            2,
            msg="unable to locate schemas for connection {}".format(conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))
        diff = self.expected_check_streams().symmetric_difference(
            found_catalog_names)
        self.assertEqual(
            len(diff),
            0,
            msg="discovered schemas do not match: {}".format(diff))

        # verify that persisted streams have the correct properties

        test_catalog_cows = list(
            filter(
                lambda c: c['stream_name'] ==
                'postgres_logical_replication_test_cows', found_catalogs))[0]
        self.assertEqual('postgres_logical_replication_test_cows',
                         test_catalog_cows['stream_name'])

        test_catalog_chickens = list(
            filter(
                lambda c: c['stream_name'
                            ] == 'postgres_logical_replication_test_chickens',
                found_catalogs))[0]
        self.assertEqual('postgres_logical_replication_test_chickens',
                         test_catalog_chickens['stream_name'])
        print("discovered streams are correct")

        additional_md = [{
            "breadcrumb": [],
            "metadata": {
                'replication-method': 'LOG_BASED'
            }
        }]
        connections.select_catalog_and_fields_via_metadata(
            conn_id, test_catalog_cows,
            menagerie.get_annotated_schema(conn_id,
                                           test_catalog_cows['stream_id']),
            additional_md)
        connections.select_catalog_and_fields_via_metadata(
            conn_id, test_catalog_chickens,
            menagerie.get_annotated_schema(conn_id,
                                           test_catalog_chickens['stream_id']),
            additional_md)

        # clear state
        menagerie.set_state(conn_id, {})

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(
            record_count_by_stream, {
                'postgres_logical_replication_test_cows': 1,
                'postgres_logical_replication_test_chickens': 1
            })
        records_by_stream = runner.get_records_from_target_output()

        table_version_cows = records_by_stream[
            'postgres_logical_replication_test_cows']['table_version']
        self.assertEqual(
            records_by_stream['postgres_logical_replication_test_cows']
            ['messages'][0]['action'], 'activate_version')
        self.assertEqual(
            records_by_stream['postgres_logical_replication_test_cows']
            ['messages'][1]['action'], 'upsert')
        self.assertEqual(
            records_by_stream['postgres_logical_replication_test_cows']
            ['messages'][2]['action'], 'activate_version')

        table_version_chickens = records_by_stream[
            'postgres_logical_replication_test_chickens']['table_version']
        self.assertEqual(
            records_by_stream['postgres_logical_replication_test_chickens']
            ['messages'][0]['action'], 'activate_version')
        self.assertEqual(
            records_by_stream['postgres_logical_replication_test_chickens']
            ['messages'][1]['action'], 'upsert')
        self.assertEqual(
            records_by_stream['postgres_logical_replication_test_chickens']
            ['messages'][2]['action'], 'activate_version')

        # verify state and bookmarks
        state = menagerie.get_state(conn_id)
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        bookmark_cows = state['bookmarks'][
            'dev-public-postgres_logical_replication_test_cows']
        self.assertIsNotNone(bookmark_cows['lsn'],
                             msg="expected bookmark for stream to have an lsn")
        lsn_cows_1 = bookmark_cows['lsn']
        self.assertEqual(bookmark_cows['version'],
                         table_version_cows,
                         msg="expected bookmark for stream to match version")

        bookmark_chickens = state['bookmarks'][
            'dev-public-postgres_logical_replication_test_chickens']
        self.assertIsNotNone(bookmark_chickens['lsn'],
                             msg="expected bookmark for stream to have an lsn")
        lsn_chickens_1 = bookmark_chickens['lsn']
        self.assertEqual(bookmark_chickens['version'],
                         table_version_chickens,
                         msg="expected bookmark for stream to match version")

        #----------------------------------------------------------------------
        # invoke the sync job again after adding records
        #----------------------------------------------------------------------
        print("inserting 2 more cows and 2 more chickens")

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor() as cur:
                # insert another cow
                self.cows_rec_2 = {'cow_name': "betty cow", 'cow_age': 21}
                insert_record(cur, test_table_name_cows, self.cows_rec_2)
                # update that cow's expected values
                self.cows_rec_2['id'] = 2
                self.cows_rec_2['_sdc_deleted_at'] = None

                # insert another chicken
                self.chicken_rec_2 = {
                    'chicken_name': "burt chicken",
                    'chicken_age': 14
                }
                insert_record(cur, test_table_name_chickens,
                              self.chicken_rec_2)
                # update that cow's expected values
                self.chicken_rec_2['id'] = 2
                self.chicken_rec_2['_sdc_deleted_at'] = None

                # and repeat...

                self.cows_rec_3 = {'cow_name': "cindy cow", 'cow_age': 10}
                insert_record(cur, test_table_name_cows, self.cows_rec_3)
                self.cows_rec_3['id'] = 3
                self.cows_rec_3['_sdc_deleted_at'] = None

                self.chicken_rec_3 = {
                    'chicken_name': "carl chicken",
                    'chicken_age': 4
                }
                insert_record(cur, test_table_name_chickens,
                              self.chicken_rec_3)
                self.chicken_rec_3['id'] = 3
                self.chicken_rec_3['_sdc_deleted_at'] = None

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        self.assertEqual(
            record_count_by_stream, {
                'postgres_logical_replication_test_cows': 2,
                'postgres_logical_replication_test_chickens': 2
            })
        records_by_stream = runner.get_records_from_target_output()
        chicken_messages = records_by_stream[
            "postgres_logical_replication_test_chickens"]['messages']
        cow_messages = records_by_stream[
            "postgres_logical_replication_test_cows"]['messages']

        self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data'])
        self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data'])
        self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data'])
        self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data'])

        print("inserted record is correct")

        state = menagerie.get_state(conn_id)
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")
        cows_bookmark = state['bookmarks'][
            'dev-public-postgres_logical_replication_test_cows']
        self.assertIsNotNone(
            cows_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_cows_2 = cows_bookmark['lsn']
        self.assertTrue(lsn_cows_2 >= lsn_cows_1)

        chickens_bookmark = state['bookmarks'][
            'dev-public-postgres_logical_replication_test_chickens']
        self.assertIsNotNone(
            chickens_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_chickens_2 = chickens_bookmark['lsn']
        self.assertTrue(lsn_chickens_2 >= lsn_chickens_1)

        #table_version does NOT change
        self.assertEqual(
            chickens_bookmark['version'],
            table_version_chickens,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )

        #table_version does NOT change
        self.assertEqual(
            cows_bookmark['version'],
            table_version_cows,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )
Exemple #18
0
    def setUp(self):
        db_utils.ensure_db('dev')
        db_utils.ensure_db('postgres')
        self.maxDiff = None
        creds = {}
        missing_envs = [
            x for x in [
                os.getenv('TAP_POSTGRES_HOST'),
                os.getenv('TAP_POSTGRES_USER'),
                os.getenv('TAP_POSTGRES_PASSWORD'),
                os.getenv('TAP_POSTGRES_PORT'),
                os.getenv('TAP_POSTGRES_DBNAME')
            ] if x == None
        ]
        if len(missing_envs) != 0:
            #pylint: disable=line-too-long
            raise Exception(
                "set TAP_POSTGRES_HOST, TAP_POSTGRES_DBNAME, TAP_POSTGRES_USER, TAP_POSTGRES_PASSWORD, TAP_POSTGRES_PORT"
            )

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(""" SELECT EXISTS (SELECT 1
                                                FROM  pg_replication_slots
                                               WHERE  slot_name = 'stitch_dev') """
                            )
                old_slot = cur.fetchone()[0]
                with db_utils.get_test_connection('dev', True) as conn2:
                    with conn2.cursor() as cur2:
                        if old_slot:
                            cur2.drop_replication_slot("stitch_dev")
                        cur2.create_replication_slot('stitch_dev',
                                                     output_plugin='wal2json')

                old_table = cur.execute(
                    """SELECT EXISTS (
                                          SELECT 1
                                          FROM  information_schema.tables
                                          WHERE  table_schema = %s
                                          AND  table_name =   %s);""",
                    [test_schema_name, test_table_name_cows])
                old_table = cur.fetchone()[0]

                if old_table:
                    cur.execute("DROP TABLE {}".format(
                        canonicalized_table_name(test_schema_name,
                                                 test_table_name_cows, cur)))

                #create dev_cows
                cur = conn.cursor()
                create_table_sql = """
                CREATE TABLE {} (id            SERIAL PRIMARY KEY,
                                cow_age        integer,
                                cow_name       varchar)
                """.format(
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_cows, cur))
                cur.execute(create_table_sql)

                #insert a cow
                self.cows_rec_1 = {'cow_name': "anne_cow", 'cow_age': 30}
                insert_record(cur, test_table_name_cows, self.cows_rec_1)

        with db_utils.get_test_connection('postgres') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(""" SELECT EXISTS (SELECT 1
                                                FROM  pg_replication_slots
                                               WHERE  slot_name = 'stitch_postgres') """
                            )
                old_slot = cur.fetchone()[0]
                with db_utils.get_test_connection('postgres', True) as conn2:
                    with conn2.cursor() as cur2:
                        if old_slot:
                            cur2.drop_replication_slot("stitch_postgres")
                        cur2.create_replication_slot('stitch_postgres',
                                                     output_plugin='wal2json')

                old_table = cur.execute(
                    """SELECT EXISTS (
                                          SELECT 1
                                          FROM  information_schema.tables
                                          WHERE  table_schema = %s
                                          AND  table_name =   %s);""",
                    [test_schema_name, test_table_name_chickens])
                old_table = cur.fetchone()[0]

                if old_table:
                    cur.execute("DROP TABLE {}".format(
                        canonicalized_table_name(test_schema_name,
                                                 test_table_name_chickens,
                                                 cur)))

                #create postgres_chickens
                create_table_sql = """
                CREATE TABLE {} (id            SERIAL PRIMARY KEY,
                                chicken_age        integer,
                                chicken_name       varchar)
                """.format(
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_chickens, cur))
                cur.execute(create_table_sql)

                #insert a chicken
                self.chickens_rec_1 = {
                    'chicken_name': "alfred_chicken",
                    'chicken_age': 4
                }
                insert_record(cur, test_table_name_chickens,
                              self.chickens_rec_1)
    def setUp(self):
        db_utils.ensure_db("dev")
        self.maxDiff = None
        creds = {}
        missing_envs = [x for x in [os.getenv('TAP_POSTGRES_HOST'),
                                    os.getenv('TAP_POSTGRES_USER'),
                                    os.getenv('TAP_POSTGRES_PASSWORD'),
                                    os.getenv('TAP_POSTGRES_PORT'),
                                    os.getenv('TAP_POSTGRES_DBNAME')] if x == None]
        if len(missing_envs) != 0:
            #pylint: disable=line-too-long
            raise Exception("set TAP_POSTGRES_HOST, TAP_POSTGRES_DBNAME, TAP_POSTGRES_USER, TAP_POSTGRES_PASSWORD, TAP_POSTGRES_PORT")

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:

                old_table = cur.execute("""SELECT EXISTS (
                                          SELECT 1
                                          FROM  information_schema.tables
                                          WHERE  table_schema = %s
                                          AND  table_name =   %s);""",
                                        [test_schema_name, test_table_name])
                old_table = cur.fetchone()[0]
                if old_table:
                    cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur)))

                cur = conn.cursor()
                cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """)
                if cur.fetchone()[0] is None:
                    cur.execute(""" CREATE EXTENSION hstore; """)
                cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""")

                cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """)
                cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """)

                create_table_sql = """
CREATE TABLE {} (id            SERIAL PRIMARY KEY,
                our_varchar    VARCHAR,
                our_varchar_10 VARCHAR(10),
                our_text       TEXT,
                our_integer    INTEGER,
                our_smallint   SMALLINT,
                our_bigint     BIGINT,
                our_decimal    NUMERIC({},{}),
                "OUR TS"       TIMESTAMP WITHOUT TIME ZONE,
                "OUR TS TZ"    TIMESTAMP WITH TIME ZONE,
                "OUR TIME"     TIME WITHOUT TIME ZONE,
                "OUR TIME TZ"  TIME WITH TIME ZONE,
                "OUR DATE"     DATE,
                our_double     DOUBLE PRECISION,
                our_real       REAL,
                our_boolean    BOOLEAN,
                our_bit        BIT(1),
                our_json       JSON,
                our_jsonb      JSONB,
                our_uuid       UUID,
                our_store      HSTORE,
                our_citext     CITEXT,
                our_inet       inet,
                our_cidr       cidr,
                our_mac        macaddr,
                our_alignment_enum ALIGNMENT,
                our_money          money)
                """.format(canonicalized_table_name(test_schema_name, test_table_name, cur), NUMERIC_PRECISION, NUMERIC_SCALE)

                cur.execute(create_table_sql)

                #insert fixture data 1
                our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time  = datetime.time(12,11,10)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1998, 3, 4)
                my_uuid =  str(uuid.uuid1())
                self.rec_1 = {'our_varchar' : "our_varchar",
                              'our_varchar_10' : "varchar_10",
                              'our_text' : "some text",
                              'our_integer' : 44100,
                              'our_smallint' : 1,
                              'our_bigint' : 1000000,
                              'our_decimal' : decimal.Decimal('.01'),
                              quote_ident('OUR TS', cur) : our_ts,
                              quote_ident('OUR TS TZ', cur) :  our_ts_tz,
                              quote_ident('OUR TIME', cur) : our_time,
                              quote_ident('OUR TIME TZ', cur) : our_time_tz,
                              quote_ident('OUR DATE', cur) : our_date,
                              'our_double' : decimal.Decimal('1.1'),
                              'our_real' : 1.2,
                              'our_boolean' : True,
                              'our_bit' : '0',
                              'our_json' : json.dumps({'secret' : 55}),
                              'our_jsonb' : json.dumps({'burgers' : 'good'}),
                              'our_uuid' : my_uuid,
                              'our_store' : 'size=>"small",name=>"betty"',
                              'our_citext': 'maGICKal 4',
                              'our_cidr' : '192.168.100.128/25',
                              'our_inet': '192.168.100.128/24',
                              'our_mac' : '08:00:2b:01:02:03',
                              'our_alignment_enum': 'good',
                              'our_money':    '100.1122',
                }

                insert_record(cur, test_table_name, self.rec_1)


                #insert fixture data 2
                our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time  = datetime.time(10,9,8)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1964, 7, 1)
                my_uuid =  str(uuid.uuid1())

                self.rec_2 = {'our_varchar' : "our_varchar 2",
                              'our_varchar_10' : "varchar_10",
                              'our_text' : "some text 2",
                              'our_integer' : 44101,
                              'our_smallint' : 2,
                              'our_bigint' : 1000001,
                              'our_decimal' : decimal.Decimal('.02'),
                              quote_ident('OUR TS', cur) : our_ts,
                              quote_ident('OUR TS TZ', cur) : our_ts_tz,
                              quote_ident('OUR TIME', cur) : our_time,
                              quote_ident('OUR TIME TZ', cur) : our_time_tz,
                              quote_ident('OUR DATE', cur) : our_date,
                              'our_double' : decimal.Decimal('1.1'),
                              'our_real' : decimal.Decimal('1.2'),
                              'our_boolean' : True,
                              'our_bit' : '1',
                              'our_json' : json.dumps(["nymn 77"]),
                              'our_jsonb' : json.dumps({'burgers' : 'good++'}),
                              'our_uuid' : my_uuid,
                              'our_store' : 'dances=>"floor",name=>"betty"',
                              'our_citext': 'maGICKal 2',
                              'our_cidr' : '192.168.101.128/25',
                              'our_inet': '192.168.101.128/24',
                              'our_mac' : '08:00:2b:01:02:04',
                              'our_money': None
                }

                insert_record(cur, test_table_name, self.rec_2)

                self.rec_3 = {'our_decimal' : decimal.Decimal('NaN'),
                              'our_double' : float('nan'),
                              'our_real' : float('-inf')
                }

                insert_record(cur, test_table_name, self.rec_3)
Exemple #20
0
    def setUp(self):
        db_utils.ensure_environment_variables_set()

        db_utils.ensure_db("dev")

        self.maxDiff = None

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(""" SELECT EXISTS (SELECT 1
                                                FROM  pg_replication_slots
                                               WHERE  slot_name = 'stitch') """
                            )
                old_slot = cur.fetchone()[0]
                with db_utils.get_test_connection('dev', True) as conn2:
                    with conn2.cursor() as cur2:
                        if old_slot:
                            cur2.drop_replication_slot("stitch")
                        cur2.create_replication_slot('stitch',
                                                     output_plugin='wal2json')

                for t in [test_table_name_cows, test_table_name_chickens]:
                    old_table = cur.execute(
                        """SELECT EXISTS (
                                          SELECT 1
                                          FROM  information_schema.tables
                                          WHERE  table_schema = %s
                                          AND  table_name =   %s);""",
                        [test_schema_name, t])
                    old_table = cur.fetchone()[0]

                    if old_table:
                        cur.execute("DROP TABLE {}".format(
                            canonicalized_table_name(test_schema_name, t,
                                                     cur)))

                cur = conn.cursor()
                create_table_sql = """
                CREATE TABLE {} (id            SERIAL PRIMARY KEY,
                                cow_age        integer,
                                cow_name       varchar)
                """.format(
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_cows, cur))
                cur.execute(create_table_sql)

                create_table_sql = """
                CREATE TABLE {} (id            SERIAL PRIMARY KEY,
                                chicken_age        integer,
                                chicken_name       varchar)
                """.format(
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_chickens, cur))
                cur.execute(create_table_sql)

                #insert a cow
                self.cows_rec_1 = {'cow_name': "anne_cow", 'cow_age': 30}
                insert_record(cur, test_table_name_cows, self.cows_rec_1)

                #insert a chicken
                self.chickens_rec_1 = {
                    'chicken_name': "alfred_chicken",
                    'chicken_age': 4
                }
                insert_record(cur, test_table_name_chickens,
                              self.chickens_rec_1)
Exemple #21
0
    def setUp(self):
        db_utils.ensure_db()
        self.maxDiff = None
        creds = {}
        missing_envs = [
            x for x in [
                os.getenv('TAP_POSTGRES_HOST'),
                os.getenv('TAP_POSTGRES_USER'),
                os.getenv('TAP_POSTGRES_PASSWORD'),
                os.getenv('TAP_POSTGRES_DBNAME'),
                os.getenv('TAP_POSTGRES_PORT')
            ] if x == None
        ]
        if len(missing_envs) != 0:
            #pylint: disable=line-too-long
            raise Exception(
                "set TAP_POSTGRES_HOST, TAP_POSTGRES_DBNAME, TAP_POSTGRES_USER, TAP_POSTGRES_PASSWORD, TAP_POSTGRES_PORT"
            )

        with db_utils.get_test_connection() as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                for table in [test_table_name_1, test_table_name_2]:
                    old_table = cur.execute(
                        """SELECT EXISTS (
                                                  SELECT 1
                                                    FROM  information_schema.tables
                                                   WHERE table_schema = %s
                                                     AND  table_name =  %s)""",
                        [test_schema_name, table])
                    old_table = cur.fetchone()[0]
                    if old_table:
                        cur.execute("DROP TABLE {} CASCADE".format(
                            canonicalized_table_name(test_schema_name, table,
                                                     cur)))

                cur.execute("""DROP VIEW IF EXISTS {} """.format(
                    quote_ident(test_view, cur)))
                cur.execute("""CREATE TABLE {}
                                (id SERIAL PRIMARY KEY,
                                 name VARCHAR,
                                 size VARCHAR) """.format(
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_1, cur)))

                cur.execute("""CREATE TABLE {}
                                (fk_id bigint,
                                 age integer) """.format(
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_2, cur)))

                cur.execute("""CREATE VIEW {} AS
                            (SELECT *
                              FROM {}
                              join {}
                                on {}.id = {}.fk_id
                    )""".format(
                    quote_ident(test_view, cur),
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_1, cur),
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_2, cur),
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_1, cur),
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_2, cur)))

                self.rec_1 = {'name': 'fred', 'size': 'big'}
                insert_record(cur, test_table_name_1, self.rec_1)

                cur.execute("SELECT id FROM {}".format(
                    canonicalized_table_name(test_schema_name,
                                             test_table_name_1, cur)))
                fk_id = cur.fetchone()[0]

                self.rec_2 = {'fk_id': fk_id, 'age': 99}
                insert_record(cur, test_table_name_2, self.rec_2)
Exemple #22
0
    def test_run(self):
        conn_id = connections.ensure_connection(self)

        # run in check mode
        check_job_name = runner.run_check_mode(self, conn_id)

        # verify check  exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        # verify discovery produced (at least) 1 expected catalog
        found_catalogs = [
            found_catalog for found_catalog in menagerie.get_catalogs(conn_id)
            if found_catalog['tap_stream_id'] in self.expected_check_streams()
        ]
        self.assertGreaterEqual(len(found_catalogs), 1)

        # verify the tap discovered the expected streams
        found_catalog_names = {
            catalog['tap_stream_id']
            for catalog in found_catalogs
        }
        self.assertSetEqual(self.expected_check_streams(), found_catalog_names)

        # verify that persisted streams have the correct properties
        test_catalog = found_catalogs[0]
        self.assertEqual(test_table_name, test_catalog['stream_name'])
        print("discovered streams are correct")

        # perform table selection
        print('selecting {} and all fields within the table'.format(
            test_table_name))
        schema_and_metadata = menagerie.get_annotated_schema(
            conn_id, test_catalog['stream_id'])
        additional_md = [{
            "breadcrumb": [],
            "metadata": {
                'replication-method': 'FULL_TABLE'
            }
        }]
        _ = connections.select_catalog_and_fields_via_metadata(
            conn_id, test_catalog, schema_and_metadata, additional_md)

        # clear state
        menagerie.set_state(conn_id, {})

        # run sync job 1 and verify exit codes
        sync_job_name = runner.run_sync_mode(self, conn_id)
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # get records
        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        records_by_stream = runner.get_records_from_target_output()
        table_version_1 = records_by_stream[test_table_name]['table_version']
        messages = records_by_stream[test_table_name]['messages']

        # verify the execpted number of records were replicated
        self.assertEqual(3, record_count_by_stream[test_table_name])

        # verify the message actions match expectations
        self.assertEqual(5, len(messages))
        self.assertEqual('activate_version', messages[0]['action'])
        self.assertEqual('upsert', messages[1]['action'])
        self.assertEqual('upsert', messages[2]['action'])
        self.assertEqual('upsert', messages[3]['action'])
        self.assertEqual('activate_version', messages[4]['action'])

        # verify the persisted schema matches expectations
        self.assertEqual(expected_schemas[test_table_name],
                         records_by_stream[test_table_name]['schema'])

        # verify replicated records match expectations
        self.assertDictEqual(self.expected_records[0], messages[1]['data'])
        self.assertDictEqual(self.expected_records[1], messages[2]['data'])
        self.assertDictEqual(self.expected_records[2], messages[3]['data'])

        print("records are correct")

        # grab bookmarked state
        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'dev-public-postgres_full_table_replication_test']

        # verify state and bookmarks meet expectations
        self.assertIsNone(state['currently_syncing'])
        self.assertIsNone(bookmark.get('lsn'))
        self.assertIsNone(bookmark.get('replication_key'))
        self.assertIsNone(bookmark.get('replication_key_value'))
        self.assertEqual(table_version_1, bookmark['version'])

        #----------------------------------------------------------------------
        # invoke the sync job AGAIN and get the same 3 records
        #----------------------------------------------------------------------

        # run sync job 2 and verify exit codes
        sync_job_name = runner.run_sync_mode(self, conn_id)
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # get records
        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        records_by_stream = runner.get_records_from_target_output()
        table_version_2 = records_by_stream[test_table_name]['table_version']
        messages = records_by_stream[test_table_name]['messages']

        # verify the execpted number of records were replicated
        self.assertEqual(3, record_count_by_stream[test_table_name])

        # verify the message actions match expectations
        self.assertEqual(4, len(messages))
        self.assertEqual('upsert', messages[0]['action'])
        self.assertEqual('upsert', messages[1]['action'])
        self.assertEqual('upsert', messages[2]['action'])
        self.assertEqual('activate_version', messages[3]['action'])

        # verify the new table version increased on the second sync
        self.assertGreater(table_version_2, table_version_1)

        # verify the persisted schema still matches expectations
        self.assertEqual(expected_schemas[test_table_name],
                         records_by_stream[test_table_name]['schema'])

        # verify replicated records still match expectations
        self.assertDictEqual(self.expected_records[0], messages[0]['data'])
        self.assertDictEqual(self.expected_records[1], messages[1]['data'])
        self.assertDictEqual(self.expected_records[2], messages[2]['data'])

        # grab bookmarked state
        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'dev-public-postgres_full_table_replication_test']

        # verify state and bookmarks meet expectations
        self.assertIsNone(state['currently_syncing'])
        self.assertIsNone(bookmark.get('lsn'))
        self.assertIsNone(bookmark.get('replication_key'))
        self.assertIsNone(bookmark.get('replication_key_value'))
        self.assertEqual(table_version_2, bookmark['version'])

        #----------------------------------------------------------------------
        # invoke the sync job AGAIN following various manipulations to the data
        #----------------------------------------------------------------------

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:

                # NB | We will perform the following actions prior to the next sync:
                #      [Action (EXPECTED RESULT)]

                #      Insert a record
                #      Insert a record to be updated prior to sync
                #      Insert a record to be deleted prior to sync (NOT REPLICATED)

                #      Update an existing record
                #      Update a newly inserted record

                #      Delete an existing record
                #      Delete  a newly inserted record

                # inserting...
                # a new record
                nyc_tz = pytz.timezone('America/New_York')
                our_time_offset = "-04:00"
                our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184)
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(6, 6, 6)
                our_time_tz = our_time.isoformat() + our_time_offset
                our_date = datetime.date(1970, 7, 1)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar 2",
                    'our_varchar_10':
                    "varchar_10",
                    'our_text':
                    "some text 2",
                    'our_integer':
                    44101,
                    'our_smallint':
                    2,
                    'our_bigint':
                    1000001,
                    'our_decimal':
                    decimal.Decimal('9876543210.02'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    '1',
                    'our_json':
                    json.dumps({'nymn': 77}),
                    'our_jsonb':
                    json.dumps({'burgers': 'good++'}),
                    'our_uuid':
                    my_uuid,
                    'our_citext':
                    'cyclops 2',
                    'our_store':
                    'dances=>"floor",name=>"betty"',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    '$0.98789'
                })
                self.expected_records.append({
                    'id':
                    4,
                    'our_varchar':
                    "our_varchar 2",
                    'our_varchar_10':
                    "varchar_10",
                    'our_text':
                    "some text 2",
                    'our_integer':
                    44101,
                    'our_smallint':
                    2,
                    'our_bigint':
                    1000001,
                    'our_decimal':
                    decimal.Decimal('9876543210.02'),
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'OUR TIME':
                    str(our_time),
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'OUR DATE':
                    '1970-07-01T00:00:00+00:00',
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    True,
                    'our_json':
                    '{"nymn": 77}',
                    'our_jsonb':
                    '{"burgers": "good++"}',
                    'our_uuid':
                    self.inserted_records[-1]['our_uuid'],
                    'our_citext':
                    self.inserted_records[-1]['our_citext'],
                    'our_store': {
                        "name": "betty",
                        "dances": "floor"
                    },
                    'our_cidr':
                    self.inserted_records[-1]['our_cidr'],
                    'our_inet':
                    self.inserted_records[-1]['our_inet'],
                    'our_mac':
                    self.inserted_records[-1]['our_mac'],
                    'our_money':
                    '$0.99',
                    'our_alignment_enum':
                    None,
                })
                # a new record which we will then update prior to sync
                our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(12, 11, 10)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1999, 9, 9)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar 4",
                    'our_varchar_10':
                    "varchar_3",
                    'our_text':
                    "some text 4",
                    'our_integer':
                    55200,
                    'our_smallint':
                    1,
                    'our_bigint':
                    100000,
                    'our_decimal':
                    decimal.Decimal('1234567899.99'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    '0',
                    'our_json':
                    json.dumps('some string'),
                    'our_jsonb':
                    json.dumps(['burgers are good']),
                    'our_uuid':
                    my_uuid,
                    'our_store':
                    'size=>"small",name=>"betty"',
                    'our_citext':
                    'cyclops 3',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    None,
                })
                self.expected_records.append({
                    'our_decimal':
                    decimal.Decimal('1234567899.99'),
                    'our_text':
                    'some text 4',
                    'our_bit':
                    False,
                    'our_integer':
                    55200,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'id':
                    5,
                    'our_json':
                    self.inserted_records[-1]['our_json'],
                    'our_boolean':
                    True,
                    'our_jsonb':
                    self.inserted_records[-1]['our_jsonb'],
                    'our_bigint':
                    100000,
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'OUR TIME':
                    str(our_time),
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'our_store': {
                        "name": "betty",
                        "size": "small"
                    },
                    'our_smallint':
                    1,
                    'OUR DATE':
                    '1999-09-09T00:00:00+00:00',
                    'our_varchar':
                    'our_varchar 4',
                    'our_uuid':
                    self.inserted_records[-1]['our_uuid'],
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_varchar_10':
                    'varchar_3',
                    'our_citext':
                    'cyclops 3',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    None,
                    'our_alignment_enum':
                    None,
                })
                # a new record to be deleted prior to sync
                our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111)
                nyc_tz = pytz.timezone('America/New_York')
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(12, 11, 10)
                our_time_tz = our_time.isoformat() + "-04:00"
                our_date = datetime.date(1999, 9, 9)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar 4",
                    'our_varchar_10':
                    "varchar_3",
                    'our_text':
                    "some text 4",
                    'our_integer':
                    55200,
                    'our_smallint':
                    1,
                    'our_bigint':
                    100000,
                    'our_decimal':
                    decimal.Decimal('1234567899.99'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    '0',
                    'our_json':
                    json.dumps('some string'),
                    'our_jsonb':
                    json.dumps(['burgers are good']),
                    'our_uuid':
                    my_uuid,
                    'our_store':
                    'size=>"small",name=>"betty"',
                    'our_citext':
                    'cyclops 3',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    None,
                })
                self.expected_records.append({
                    'our_decimal':
                    decimal.Decimal('1234567899.99'),
                    'our_text':
                    'some text 4',
                    'our_bit':
                    False,
                    'our_integer':
                    55200,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'id':
                    6,
                    'our_json':
                    self.inserted_records[-1]['our_json'],
                    'our_boolean':
                    True,
                    'our_jsonb':
                    self.inserted_records[-1]['our_jsonb'],
                    'our_bigint':
                    100000,
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'OUR TIME':
                    str(our_time),
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'our_store': {
                        "name": "betty",
                        "size": "small"
                    },
                    'our_smallint':
                    1,
                    'OUR DATE':
                    '1999-09-09T00:00:00+00:00',
                    'our_varchar':
                    'our_varchar 4',
                    'our_uuid':
                    self.inserted_records[-1]['our_uuid'],
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_varchar_10':
                    'varchar_3',
                    'our_citext':
                    'cyclops 3',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                    'our_money':
                    None,
                    'our_alignment_enum':
                    None,
                })

                db_utils.insert_record(cur, test_table_name,
                                       self.inserted_records[3])
                db_utils.insert_record(cur, test_table_name,
                                       self.inserted_records[4])
                db_utils.insert_record(cur, test_table_name,
                                       self.inserted_records[5])

                # updating ...
                # an existing record
                canon_table_name = db_utils.canonicalized_table_name(
                    cur, test_schema_name, test_table_name)
                record_pk = 1
                our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184)
                our_ts_tz = nyc_tz.localize(our_ts)
                updated_data = {
                    "OUR TS TZ": our_ts_tz,
                    "our_double": decimal.Decimal("6.6"),
                    "our_money": "$0.00"
                }
                self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(
                    our_ts_tz)
                self.expected_records[0]["our_double"] = decimal.Decimal("6.6")
                self.expected_records[0]["our_money"] = "$0.00"

                db_utils.update_record(cur, canon_table_name, record_pk,
                                       updated_data)

                # a newly inserted record
                canon_table_name = db_utils.canonicalized_table_name(
                    cur, test_schema_name, test_table_name)
                record_pk = 5
                our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184)
                our_ts_tz = nyc_tz.localize(our_ts)
                updated_data = {
                    "OUR TS TZ": our_ts_tz,
                    "our_double": decimal.Decimal("6.6"),
                    "our_money": "$0.00"
                }
                self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(
                    our_ts_tz)
                self.expected_records[4]["our_double"] = decimal.Decimal("6.6")
                self.expected_records[4]["our_money"] = "$0.00"

                db_utils.update_record(cur, canon_table_name, record_pk,
                                       updated_data)

                # deleting
                # an existing record
                record_pk = 2
                db_utils.delete_record(cur, canon_table_name, record_pk)

                # a newly inserted record
                record_pk = 6
                db_utils.delete_record(cur, canon_table_name, record_pk)

        #----------------------------------------------------------------------
        # invoke the sync job AGAIN after vairous manipulations
        #----------------------------------------------------------------------

        # run sync job 3 and verify exit codes
        sync_job_name = runner.run_sync_mode(self, conn_id)
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # get records
        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        records_by_stream = runner.get_records_from_target_output()
        table_version_3 = records_by_stream[test_table_name]['table_version']
        messages = records_by_stream[test_table_name]['messages']

        # verify the execpted number of records were replicated
        self.assertEqual(4, record_count_by_stream[test_table_name])

        # verify the message actions match expectations
        self.assertEqual(5, len(messages))
        self.assertEqual('upsert', messages[0]['action'])
        self.assertEqual('upsert', messages[1]['action'])
        self.assertEqual('upsert', messages[2]['action'])
        self.assertEqual('upsert', messages[3]['action'])
        self.assertEqual('activate_version', messages[4]['action'])

        # verify the new table version increased on the second sync
        self.assertGreater(table_version_3, table_version_2)

        # verify the persisted schema still matches expectations
        self.assertEqual(expected_schemas[test_table_name],
                         records_by_stream[test_table_name]['schema'])

        # NB | This is a little tough to track mentally so here's a breakdown of
        #      the order of operations by expected records indexes:

        #      Prior to Sync 1
        #        insert 0, 1, 2

        #      Prior to Sync 2
        #        No db changes

        #      Prior to Sync 3
        #        insert 3, 4, 5
        #        update 0, 4
        #        delete 1, 5

        #      Resulting Synced Records: 2, 3, 0, 4

        # verify replicated records still match expectations
        self.assertDictEqual(self.expected_records[2],
                             messages[0]['data'])  # existing insert
        self.assertDictEqual(self.expected_records[3],
                             messages[1]['data'])  # new insert
        self.assertDictEqual(self.expected_records[0],
                             messages[2]['data'])  # existing update
        self.assertDictEqual(self.expected_records[4],
                             messages[3]['data'])  # new insert / update

        # grab bookmarked state
        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'dev-public-postgres_full_table_replication_test']

        # verify state and bookmarks meet expectations
        self.assertIsNone(state['currently_syncing'])
        self.assertIsNone(bookmark.get('lsn'))
        self.assertIsNone(bookmark.get('replication_key'))
        self.assertIsNone(bookmark.get('replication_key_value'))
        self.assertEqual(table_version_3, bookmark['version'])
Exemple #23
0
    def setUp(self):
        db_utils.ensure_environment_variables_set()

        db_utils.ensure_db('dev')

        self.maxDiff = None

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:

                cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name,
                                                  test_table_name)

                create_table_sql = """
CREATE TABLE {} (id            SERIAL PRIMARY KEY,
                our_varchar    VARCHAR,
                our_varchar_10 VARCHAR(10),
                our_text       TEXT,
                our_integer    INTEGER,
                our_smallint   SMALLINT,
                our_bigint     BIGINT,
                our_decimal    NUMERIC(12,2),
                "OUR TS"       TIMESTAMP WITHOUT TIME ZONE,
                "OUR TS TZ"    TIMESTAMP WITH TIME ZONE,
                "OUR TIME"     TIME WITHOUT TIME ZONE,
                "OUR TIME TZ"  TIME WITH TIME ZONE,
                "OUR DATE"     DATE,
                our_double     DOUBLE PRECISION,
                our_real       REAL,
                our_boolean    BOOLEAN,
                our_bit        BIT(1),
                our_json       JSON,
                our_jsonb      JSONB,
                our_uuid       UUID,
                our_store      HSTORE,
                our_citext     CITEXT,
                our_inet       inet,
                our_cidr       cidr,
                our_mac        macaddr,
                our_money      money)
                """.format(
                    db_utils.canonicalized_table_name(cur, test_schema_name,
                                                      test_table_name))

                cur.execute(create_table_sql)

                # insert fixture data and track expected records
                self.inserted_records = []
                self.expected_records = []

                nyc_tz = pytz.timezone('America/New_York')
                our_time_offset = "-04:00"

                # record 1
                our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184)
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(10, 9, 8)
                our_time_tz = our_time.isoformat() + our_time_offset
                our_date = datetime.date(1964, 7, 1)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar 2",
                    'our_varchar_10':
                    "varchar_10",
                    'our_text':
                    "some text 2",
                    'our_integer':
                    44101,
                    'our_smallint':
                    2,
                    'our_bigint':
                    1000001,
                    'our_decimal':
                    decimal.Decimal('9876543210.02'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    '1',
                    'our_json':
                    json.dumps({'nymn': 77}),
                    'our_jsonb':
                    json.dumps({'burgers': 'good++'}),
                    'our_uuid':
                    my_uuid,
                    'our_citext':
                    'cyclops 2',
                    'our_store':
                    'dances=>"floor",name=>"betty"',
                    'our_cidr':
                    '192.168.101.128/25',
                    'our_inet':
                    '192.168.101.128/24',
                    'our_mac':
                    '08:00:2b:01:02:04',
                })
                self.expected_records.append({
                    'our_decimal':
                    decimal.Decimal('9876543210.02'),
                    'OUR TIME':
                    str(our_time),
                    'our_text':
                    'some text 2',
                    'our_bit':
                    True,
                    'our_integer':
                    44101,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'id':
                    1,
                    'our_json':
                    '{"nymn": 77}',
                    'our_boolean':
                    True,
                    'our_jsonb':
                    '{"burgers": "good++"}',
                    'our_bigint':
                    1000001,
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'our_store': {
                        "name": "betty",
                        "dances": "floor"
                    },
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'our_smallint':
                    2,
                    'OUR DATE':
                    '1964-07-01T00:00:00+00:00',
                    'our_varchar':
                    'our_varchar 2',
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'our_uuid':
                    self.inserted_records[0]['our_uuid'],
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_varchar_10':
                    'varchar_10',
                    'our_citext':
                    self.inserted_records[0]['our_citext'],
                    'our_inet':
                    self.inserted_records[0]['our_inet'],
                    'our_cidr':
                    self.inserted_records[0]['our_cidr'],
                    'our_mac':
                    self.inserted_records[0]['our_mac'],
                    'our_money':
                    None
                })
                # record 2
                our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184)
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(12, 11, 10)
                our_time_tz = our_time.isoformat() + our_time_offset
                our_date = datetime.date(1998, 3, 4)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar",
                    'our_varchar_10':
                    "varchar_10",
                    'our_text':
                    "some text",
                    'our_integer':
                    44100,
                    'our_smallint':
                    1,
                    'our_bigint':
                    1000000,
                    'our_decimal':
                    decimal.Decimal('1234567890.01'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    '0',
                    'our_json':
                    json.dumps({'secret': 55}),
                    'our_jsonb':
                    json.dumps(6777777),
                    'our_uuid':
                    my_uuid,
                    'our_store':
                    'size=>"small",name=>"betty"',
                    'our_citext':
                    'cyclops 1',
                    'our_cidr':
                    '192.168.100.128/25',
                    'our_inet':
                    '192.168.100.128/24',
                    'our_mac':
                    '08:00:2b:01:02:03',
                    'our_money':
                    '$1,445.5678'
                })
                self.expected_records.append({
                    'our_decimal':
                    decimal.Decimal('1234567890.01'),
                    'our_text':
                    'some text',
                    'our_bit':
                    False,
                    'our_integer':
                    44100,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'id':
                    2,
                    'our_json':
                    '{"secret": 55}',
                    'our_boolean':
                    True,
                    'our_jsonb':
                    self.inserted_records[1]['our_jsonb'],
                    'our_bigint':
                    1000000,
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'OUR TIME':
                    str(our_time),
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'our_store': {
                        "name": "betty",
                        "size": "small"
                    },
                    'our_smallint':
                    1,
                    'OUR DATE':
                    '1998-03-04T00:00:00+00:00',
                    'our_varchar':
                    'our_varchar',
                    'our_uuid':
                    self.inserted_records[1]['our_uuid'],
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_varchar_10':
                    'varchar_10',
                    'our_citext':
                    self.inserted_records[1]['our_citext'],
                    'our_inet':
                    self.inserted_records[1]['our_inet'],
                    'our_cidr':
                    self.inserted_records[1]['our_cidr'],
                    'our_mac':
                    self.inserted_records[1]['our_mac'],
                    'our_money':
                    '$1,445.57'
                })
                # record 3
                our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184)
                our_ts_tz = nyc_tz.localize(our_ts)
                our_time = datetime.time(12, 11, 10)
                our_time_tz = our_time.isoformat() + our_time_offset
                our_date = datetime.date(1998, 3, 4)
                my_uuid = str(uuid.uuid1())
                self.inserted_records.append({
                    'our_varchar':
                    "our_varchar",
                    'our_varchar_10':
                    "varchar_10",
                    'our_text':
                    "some text",
                    'our_integer':
                    44100,
                    'our_smallint':
                    1,
                    'our_bigint':
                    1000000,
                    'our_decimal':
                    decimal.Decimal('1234567890.01'),
                    quote_ident('OUR TS', cur):
                    our_ts,
                    quote_ident('OUR TS TZ', cur):
                    our_ts_tz,
                    quote_ident('OUR TIME', cur):
                    our_time,
                    quote_ident('OUR TIME TZ', cur):
                    our_time_tz,
                    quote_ident('OUR DATE', cur):
                    our_date,
                    'our_double':
                    '1.1',
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_boolean':
                    True,
                    'our_bit':
                    '0',
                    'our_json':
                    json.dumps({'secret': 55}),
                    'our_jsonb':
                    json.dumps(6777777),
                    'our_uuid':
                    my_uuid,
                    'our_store':
                    'size=>"small",name=>"betty"',
                    'our_citext':
                    'cyclops 1',
                    'our_cidr':
                    '192.168.100.128/25',
                    'our_inet':
                    '192.168.100.128/24',
                    'our_mac':
                    '08:00:2b:01:02:03',
                    'our_money':
                    '$1,445.5678'
                })
                self.expected_records.append({
                    'our_decimal':
                    decimal.Decimal('1234567890.01'),
                    'our_text':
                    'some text',
                    'our_bit':
                    False,
                    'our_integer':
                    44100,
                    'our_double':
                    decimal.Decimal('1.1'),
                    'id':
                    3,
                    'our_json':
                    '{"secret": 55}',
                    'our_boolean':
                    True,
                    'our_jsonb':
                    self.inserted_records[1]['our_jsonb'],
                    'our_bigint':
                    1000000,
                    'OUR TS':
                    self.expected_ts(our_ts),
                    'OUR TS TZ':
                    self.expected_ts_tz(our_ts_tz),
                    'OUR TIME':
                    str(our_time),
                    'OUR TIME TZ':
                    str(our_time_tz),
                    'our_store': {
                        "name": "betty",
                        "size": "small"
                    },
                    'our_smallint':
                    1,
                    'OUR DATE':
                    '1998-03-04T00:00:00+00:00',
                    'our_varchar':
                    'our_varchar',
                    'our_uuid':
                    self.inserted_records[2]['our_uuid'],
                    'our_real':
                    decimal.Decimal('1.2'),
                    'our_varchar_10':
                    'varchar_10',
                    'our_citext':
                    self.inserted_records[2]['our_citext'],
                    'our_inet':
                    self.inserted_records[2]['our_inet'],
                    'our_cidr':
                    self.inserted_records[2]['our_cidr'],
                    'our_mac':
                    self.inserted_records[2]['our_mac'],
                    'our_money':
                    '$1,445.57'
                })

                for rec in self.inserted_records:
                    db_utils.insert_record(cur, test_table_name, rec)