Ejemplo n.º 1
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        catalog = tap_postgres.do_discovery(conn_config)
        chicken_streams = [s for s in catalog.streams if s.tap_stream_id == 'postgres-public-CHICKEN TIMES']
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0].to_dict()

        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])

        self.assertEqual(metadata.to_map(stream_dict.get('metadata')),
                        {() : {'table-key-properties': ['our_decimal'], 'database-name': 'postgres', 'schema-name': 'public', 'is-view': False, 'row-count': 0},
                         ('properties', 'our_decimal')             : {'inclusion': 'automatic', 'sql-datatype' : 'numeric', 'selected-by-default' : True},
                         ('properties', 'our_decimal_38_4')        : {'inclusion': 'available', 'sql-datatype' : 'numeric', 'selected-by-default' : True},
                         ('properties', 'our_decimal_10_2')        : {'inclusion': 'available', 'sql-datatype' : 'numeric', 'selected-by-default' : True}})

        self.assertEqual({'properties': {'our_decimal': {'exclusiveMaximum': True,
                                                         'exclusiveMinimum': True,
                                                         'multipleOf': 10 ** (0 - tap_postgres.MAX_SCALE),
                                                         'maximum': 10 ** (tap_postgres.MAX_PRECISION - tap_postgres.MAX_SCALE),
                                                         'minimum': -10 ** (tap_postgres.MAX_PRECISION - tap_postgres.MAX_SCALE),
                                                         'type': ['number']},
                                         'our_decimal_10_2': {'exclusiveMaximum': True,
                                                              'exclusiveMinimum': True,
                                                              'maximum': 100000000,
                                                              'minimum': -100000000,
                                                              'multipleOf': 0.01,
                                                              'type': ['null', 'number']},
                                         'our_decimal_38_4': {'exclusiveMaximum': True,
                                                              'exclusiveMinimum': True,
                                                              'maximum': 10000000000000000000000000000000000,
                                                              'minimum': -10000000000000000000000000000000000,
                                                              'multipleOf': 0.0001,
                                                              'type': ['null', 'number']}},
                          'type': 'object'},
                         stream_dict.get('schema'))
Ejemplo n.º 2
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        catalog = tap_postgres.do_discovery(conn_config)

        chicken_streams = [s for s in catalog.streams if s.tap_stream_id == 'postgres-public-CHICKEN TIMES']
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0].to_dict()

        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])

        self.assertEqual(metadata.to_map(stream_dict.get('metadata')),
                         {() : {'table-key-properties': ['our_date'], 'database-name': 'postgres', 'schema-name': 'public', 'is-view': False, 'row-count': 0},
                          ('properties', 'our_date')           : {'inclusion': 'automatic', 'sql-datatype' : 'date', 'selected-by-default' : True},
                          ('properties', 'our_ts')             : {'inclusion': 'available', 'sql-datatype' : 'timestamp without time zone', 'selected-by-default' : True},
                          ('properties', 'our_ts_tz')          : {'inclusion': 'available', 'sql-datatype' : 'timestamp with time zone', 'selected-by-default' : True},
                          ('properties', 'our_time')           : {'inclusion': 'available', 'sql-datatype' : 'time without time zone', 'selected-by-default' : True},
                          ('properties', 'our_time_tz')        : {'inclusion': 'available', 'sql-datatype' : 'time with time zone', 'selected-by-default' : True}})

        self.assertEqual({'properties': {'our_date':               {'type': ['string'], 'format' : 'date-time'},
                                         'our_ts':                 {'type': ['null', 'string'], 'format' : 'date-time'},
                                         'our_ts_tz':              {'type': ['null', 'string'], 'format' : 'date-time'},
                                         'our_time':               {'type': ['null', 'string']},
                                         'our_time_tz':            {'type': ['null', 'string']}},
                          'type': 'object'},
                         stream_dict.get('schema'))
Ejemplo n.º 3
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [s for s in streams if s['tap_stream_id'] in ('postgres-public-different_chicken', 'dev-public-different_chicken')]
        self.assertEqual(len(chicken_streams), 2)

        for s in chicken_streams:
            stream_dict = s
            self.assertEqual({'properties': {'our_date':               {'type': ['string'], 'format' : 'date-time'},
                                             'our_ts':                 {'type': ['null', 'string'], 'format' : 'date-time'},
                                             'our_ts_tz':              {'type': ['null', 'string'], 'format' : 'date-time'},
                                             'our_time':               {'type': ['null', 'string']},
                                             'our_time_tz':            {'type': ['null', 'string']}},
                              'type': 'object',
                              'definitions' : tap_postgres.BASE_RECURSIVE_SCHEMAS},
                             stream_dict.get('schema'))
            db_name = metadata.to_map(stream_dict.get('metadata')).get(()).get('database-name')

            self.assertEqual(metadata.to_map(stream_dict.get('metadata')),
                             {() : {'table-key-properties': ['our_date'], 'database-name': db_name, 'schema-name': 'public', 'is-view': False, 'row-count': 0},
                              ('properties', 'our_date')           : {'inclusion': 'automatic', 'sql-datatype' : 'date', 'selected-by-default' : True},
                              ('properties', 'our_ts')             : {'inclusion': 'available', 'sql-datatype' : 'timestamp without time zone', 'selected-by-default' : True},
                              ('properties', 'our_ts_tz')          : {'inclusion': 'available', 'sql-datatype' : 'timestamp with time zone', 'selected-by-default' : True},
                              ('properties', 'our_time')           : {'inclusion': 'available', 'sql-datatype' : 'time without time zone', 'selected-by-default' : True},
                              ('properties', 'our_time_tz')        : {'inclusion': 'available', 'sql-datatype' : 'time with time zone', 'selected-by-default' : True}})
    def test_catalog(self):
        conn_config = get_test_connection_config()
        conn_config['user'] = self.user
        conn_config['password'] = self.password
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [s for s in streams if s['tap_stream_id'] == 'public-CHICKEN TIMES']

        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]

        self.assertEqual(TestStringTableWithPK.table_name, stream_dict.get('table_name'))
        self.assertEqual(TestStringTableWithPK.table_name, stream_dict.get('stream'))

        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])

        self.assertEqual(metadata.to_map(stream_dict.get('metadata')),
                         {(): {'table-key-properties': [],
                               'database-name': 'postgres',
                               'schema-name': 'public',
                               'is-view': False,
                               'row-count': 0},
                          ('properties', 'id'): {'inclusion': 'available',
                                                 'selected-by-default': True,
                                                 'sql-datatype': 'integer'}})
        
        self.assertEqual({'definitions' : BASE_RECURSIVE_SCHEMAS,
                          'type': 'object',
                          'properties': {'id': {'type': ['null', 'integer'],
                                                'minimum': -2147483648,
                                                'maximum': 2147483647}}},
                         stream_dict.get('schema'))
Ejemplo n.º 5
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams
            if s['tap_stream_id'] == 'postgres-public-CHICKEN TIMES'
        ]
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]
        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])

        with get_test_connection() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(
                    """INSERT INTO "CHICKEN TIMES" (our_int_array_pk, our_string_array) VALUES ('{{1,2,3},{4,5,6}}', '{{"a","b","c"}}' )"""
                )
                cur.execute("""SELECT * FROM  "CHICKEN TIMES" """)

                self.assertEqual(
                    metadata.to_map(stream_dict.get('metadata')), {
                        (): {
                            'table-key-properties': ['our_int_array_pk'],
                            'database-name': 'postgres',
                            'schema-name': 'public',
                            'is-view': False,
                            'row-count': 0
                        },
                        ('properties', 'our_int_array_pk'): {
                            'inclusion': 'automatic',
                            'sql-datatype': 'integer[]',
                            'selected-by-default': True
                        },
                        ('properties', 'our_string_array'): {
                            'inclusion': 'available',
                            'sql-datatype': 'character varying[]',
                            'selected-by-default': True
                        }
                    })

                self.assertEqual(
                    {
                        'properties': {
                            'our_int_array_pk': {
                                'type': ['null', 'array'],
                                'items': {
                                    '$ref':
                                    '#/definitions/sdc_recursive_integer_array'
                                }
                            },
                            'our_string_array': {
                                'type': ['null', 'array'],
                                'items': {
                                    '$ref':
                                    '#/definitions/sdc_recursive_string_array'
                                }
                            }
                        },
                        'type': 'object',
                        'definitions': tap_postgres.BASE_RECURSIVE_SCHEMAS
                    }, stream_dict.get('schema'))
Ejemplo n.º 6
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        catalog = tap_postgres.do_discovery(conn_config)
        chicken_streams = [s for s in catalog.streams if s.tap_stream_id == 'postgres-public-CHICKEN TIMES']

        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0].to_dict()

        self.assertEqual(TestStringTableWithPK.table_name, stream_dict.get('table_name'))
        self.assertEqual(TestStringTableWithPK.table_name, stream_dict.get('stream'))


        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])

        self.assertEqual(metadata.to_map(stream_dict.get('metadata')),
                         {() : {'table-key-properties': [], 'database-name': 'postgres', 'schema-name': 'public', 'is-view': False, 'row-count': 0},
                          ('properties', 'id')                     : {'inclusion': 'available', 'sql-datatype' : 'integer', 'selected-by-default' : True},
                          ('properties', 'size integer')         : {'inclusion': 'available', 'sql-datatype' : 'integer', 'selected-by-default' : True},
                          ('properties', 'size smallint')        : {'inclusion': 'available', 'sql-datatype' : 'smallint', 'selected-by-default' : True},
                          ('properties', 'size bigint')          : {'inclusion': 'available', 'sql-datatype' : 'bigint',   'selected-by-default' : True}})

        self.assertEqual({'type': 'object',
                          'properties': {'id': {'type': ['null', 'integer'], 'minimum': -2147483648, 'maximum': 2147483647},
                                         'size smallint': {'type': ['null', 'integer'], 'minimum': -32768, 'maximum': 32767},
                                         'size integer': {'type': ['null', 'integer'], 'minimum': -2147483648, 'maximum': 2147483647},
                                         'size bigint': {'type': ['null', 'integer'], 'minimum': -9223372036854775808, 'maximum': 9223372036854775807}}},
                         stream_dict.get('schema'))
    def test_catalog(self):
        conn_config = get_test_connection_config()
        catalog = tap_postgres.do_discovery(conn_config)
        chicken_streams = [s for s in catalog.streams if s.tap_stream_id == "postgres-public-CHICKEN TIMES"]

        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0].to_dict()
        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])

        self.assertEqual(metadata.to_map(stream_dict.get('metadata')),
                         {():                                   {'is-view': False, 'table-key-properties': [], 'row-count': 0, 'schema-name': 'public', 'database-name': 'postgres'},
                          ('properties', 'bytea_col'):          {'sql-datatype': 'bytea', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'bit_string_col'):     {'sql-datatype': 'bit(5)', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'array_int_col'):      {'sql-datatype': 'integer[]', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'line_col'):           {'sql-datatype': 'line', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'xml_col'):            {'sql-datatype': 'xml', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'enum_col'):           {'sql-datatype': 'mood_enum', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'macaddr_col'):        {'sql-datatype': 'macaddr', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'int_range_col'):      {'sql-datatype': 'int4range', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'circle_col'):         {'sql-datatype': 'circle', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'polygon_col'):        {'sql-datatype': 'polygon', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'box_col'):            {'sql-datatype': 'box', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'lseg_col'):           {'sql-datatype': 'lseg', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'composite_col'):      {'sql-datatype': 'person_composite', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'inet_col'):           {'sql-datatype': 'inet', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'cidr_col'):           {'sql-datatype': 'cidr', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'money_col'):          {'sql-datatype': 'money', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'interval_col'):       {'sql-datatype': 'interval', 'selected-by-default': False, 'inclusion': 'unsupported'},
                          ('properties', 'point_col'):          {'sql-datatype': 'point', 'selected-by-default': False, 'inclusion': 'unsupported'}}
        )
Ejemplo n.º 8
0
    def test_catalog(self):

        conn_config = get_test_connection_config()
        catalog = tap_postgres.do_discovery(conn_config)
        chicken_streams = [s for s in catalog.streams if s.tap_stream_id == "postgres-public-CHICKEN TIMES"]

        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0].to_dict()
        self.assertEqual(TestStringTableWithPK.table_name, stream_dict.get('table_name'))
        self.assertEqual(TestStringTableWithPK.table_name, stream_dict.get('stream'))

        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])


        self.assertEqual(metadata.to_map(stream_dict.get('metadata')),
                         {() : {'table-key-properties': ['id'], 'database-name': 'postgres',
                                'schema-name': 'public', 'is-view': False, 'row-count': 0},
                          ('properties', 'character-varying_name') : {'inclusion': 'available', 'sql-datatype' : 'character varying', 'selected-by-default' : True},
                          ('properties', 'id')                     : {'inclusion': 'automatic', 'sql-datatype' : 'integer', 'selected-by-default' : True},
                          ('properties', 'varchar-name')           : {'inclusion': 'available', 'sql-datatype' : 'character varying', 'selected-by-default' : True},
                          ('properties', 'text-name')              : {'inclusion': 'available', 'sql-datatype' : 'text', 'selected-by-default' : True},
                          ('properties', 'char_name'):               {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'character'}})

        self.assertEqual({'properties': {'id':                      {'type': ['integer'],
                                                                     'maximum':  2147483647,
                                                                     'minimum': -2147483648},
                                         'character-varying_name': {'type': ['null', 'string']},
                                         'varchar-name':           {'type': ['null', 'string'], 'maxLength': 28},
                                         'char_name':              {'type': ['null', 'string'], 'maxLength': 10},
                                         'text-name':              {'type': ['null', 'string']}},
                          'type': 'object'},  stream_dict.get('schema'))
Ejemplo n.º 9
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams
            if s["tap_stream_id"] == "postgres-public-CHICKEN TIMES"
        ]
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]
        stream_dict.get("metadata").sort(key=lambda md: md["breadcrumb"])

        with get_test_connection() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute("""INSERT INTO "CHICKEN TIMES" (our_pk, our_hstore)
                    VALUES ('size=>"small",name=>"betty"', 'size=>"big",name=>"fred"')"""
                            )
                cur.execute("""SELECT * FROM  "CHICKEN TIMES" """)

                self.assertEqual(
                    metadata.to_map(stream_dict.get("metadata")),
                    {
                        (): {
                            "table-key-properties": ["our_pk"],
                            "database-name": "postgres",
                            "schema-name": "public",
                            "is-view": False,
                            "row-count": 0,
                        },
                        ("properties", "our_pk"): {
                            "inclusion": "automatic",
                            "sql-datatype": "hstore",
                            "selected-by-default": True,
                        },
                        ("properties", "our_hstore"): {
                            "inclusion": "available",
                            "sql-datatype": "hstore",
                            "selected-by-default": True,
                        },
                    },
                )

                self.assertEqual(
                    {
                        "properties": {
                            "our_hstore": {
                                "type": ["null", "object"],
                                "properties": {},
                            },
                            "our_pk": {
                                "type": ["object"],
                                "properties": {}
                            },
                        },
                        "type": "object",
                        "definitions": tap_postgres.BASE_RECURSIVE_SCHEMAS,
                    },
                    stream_dict.get("schema"),
                )
Ejemplo n.º 10
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams if s["tap_stream_id"] == "postgres-public-CHICKEN TIMES"
        ]
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]
        stream_dict.get("metadata").sort(key=lambda md: md["breadcrumb"])

        with get_test_connection() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(
                    """INSERT INTO "CHICKEN TIMES" (our_int_array_pk, our_string_array)
                    VALUES ('{{1,2,3},{4,5,6}}', '{{"a","b","c"}}' )"""
                )
                cur.execute("""SELECT * FROM  "CHICKEN TIMES" """)

                self.assertEqual(
                    metadata.to_map(stream_dict.get("metadata")),
                    {
                        (): {
                            "table-key-properties": ["our_int_array_pk"],
                            "database-name": "postgres",
                            "schema-name": "public",
                            "is-view": False,
                            "row-count": 0,
                        },
                        ("properties", "our_int_array_pk"): {
                            "inclusion": "automatic",
                            "sql-datatype": "integer[]",
                            "selected-by-default": True,
                        },
                        ("properties", "our_string_array"): {
                            "inclusion": "available",
                            "sql-datatype": "character varying[]",
                            "selected-by-default": True,
                        },
                    },
                )

                self.assertEqual(
                    {
                        "properties": {
                            "our_int_array_pk": {
                                "type": ["null", "array"],
                                "items": {"$ref": "#/definitions/sdc_recursive_integer_array"},
                            },
                            "our_string_array": {
                                "type": ["null", "array"],
                                "items": {"$ref": "#/definitions/sdc_recursive_string_array"},
                            },
                        },
                        "type": "object",
                        "definitions": tap_postgres.BASE_RECURSIVE_SCHEMAS,
                    },
                    stream_dict.get("schema"),
                )
Ejemplo n.º 11
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams
            if s["tap_stream_id"] == "postgres-public-CHICKEN TIMES"
        ]
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]

        stream_dict.get("metadata").sort(key=lambda md: md["breadcrumb"])
        self.assertEqual(
            metadata.to_map(stream_dict.get("metadata")),
            {
                (): {
                    "table-key-properties": ["our_float"],
                    "database-name": "postgres",
                    "schema-name": "public",
                    "is-view": False,
                    "row-count": 0,
                },
                ("properties", "our_float"): {
                    "inclusion": "automatic",
                    "sql-datatype": "double precision",
                    "selected-by-default": True,
                },
                ("properties", "our_real"): {
                    "inclusion": "available",
                    "sql-datatype": "real",
                    "selected-by-default": True,
                },
                ("properties", "our_double"): {
                    "inclusion": "available",
                    "sql-datatype": "double precision",
                    "selected-by-default": True,
                },
            },
        )

        self.assertEqual(
            {
                "properties": {
                    "our_float": {
                        "type": ["number"]
                    },
                    "our_real": {
                        "type": ["null", "number"]
                    },
                    "our_double": {
                        "type": ["null", "number"]
                    },
                },
                "type": "object",
                "definitions": tap_postgres.BASE_RECURSIVE_SCHEMAS,
            },
            stream_dict.get("schema"),
        )
Ejemplo n.º 12
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams
            if s["tap_stream_id"] == "postgres-public-LIKE CHICKEN TIMES"
        ]
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]
        stream_dict.get("metadata").sort(key=lambda md: md["breadcrumb"])

        self.assertEqual(
            metadata.to_map(stream_dict.get("metadata")),
            {
                (): {
                    "table-key-properties": [],
                    "database-name": "postgres",
                    "schema-name": "public",
                    "is-view": True,
                    "row-count": 0,
                },
                ("properties", "our_int_array_pk"): {
                    "inclusion": "available",
                    "sql-datatype": "integer[]",
                    "selected-by-default": True,
                },
                ("properties", "our_text_array"): {
                    "inclusion": "available",
                    "sql-datatype": "text[]",
                    "selected-by-default": True,
                },
            },
        )
        self.assertEqual(
            {
                "properties": {
                    "our_int_array_pk": {
                        "type": ["null", "array"],
                        "items": {
                            "$ref": "#/definitions/sdc_recursive_integer_array"
                        },
                    },
                    "our_text_array": {
                        "type": ["null", "array"],
                        "items": {
                            "$ref": "#/definitions/sdc_recursive_string_array"
                        },
                    },
                },
                "type": "object",
                "definitions": tap_postgres.BASE_RECURSIVE_SCHEMAS,
            },
            stream_dict.get("schema"),
        )
Ejemplo n.º 13
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams
            if s['tap_stream_id'] == 'postgres-public-CHICKEN TIMES'
        ]
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]
        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])

        with get_test_connection() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(
                    """INSERT INTO "CHICKEN TIMES" (our_pk, our_hstore) VALUES ('size=>"small",name=>"betty"', 'size=>"big",name=>"fred"')"""
                )
                cur.execute("""SELECT * FROM  "CHICKEN TIMES" """)

                self.assertEqual(
                    metadata.to_map(stream_dict.get('metadata')), {
                        (): {
                            'table-key-properties': ['our_pk'],
                            'database-name': 'postgres',
                            'schema-name': 'public',
                            'is-view': False,
                            'row-count': 0
                        },
                        ('properties', 'our_pk'): {
                            'inclusion': 'automatic',
                            'sql-datatype': 'hstore',
                            'selected-by-default': True
                        },
                        ('properties', 'our_hstore'): {
                            'inclusion': 'available',
                            'sql-datatype': 'hstore',
                            'selected-by-default': True
                        }
                    })

                self.assertEqual(
                    {
                        'properties': {
                            'our_hstore': {
                                'type': ['null', 'object'],
                                'properties': {}
                            },
                            'our_pk': {
                                'type': ['object'],
                                'properties': {}
                            }
                        },
                        'type': 'object',
                        'definitions': tap_postgres.BASE_RECURSIVE_SCHEMAS
                    }, stream_dict.get('schema'))
Ejemplo n.º 14
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams
            if s['tap_stream_id'] == 'postgres-public-LIKE CHICKEN TIMES'
        ]
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]
        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])

        with get_test_connection() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                self.assertEqual(
                    metadata.to_map(stream_dict.get('metadata')), {
                        (): {
                            'table-key-properties': [],
                            'database-name': 'postgres',
                            'schema-name': 'public',
                            'is-view': True,
                            'row-count': 0
                        },
                        ('properties', 'our_int_array_pk'): {
                            'inclusion': 'available',
                            'sql-datatype': 'integer[]',
                            'selected-by-default': True
                        },
                        ('properties', 'our_text_array'): {
                            'inclusion': 'available',
                            'sql-datatype': 'text[]',
                            'selected-by-default': True
                        }
                    })
                self.assertEqual(
                    {
                        'properties': {
                            'our_int_array_pk': {
                                'type': ['null', 'array'],
                                'items': {
                                    '$ref':
                                    '#/definitions/sdc_recursive_integer_array'
                                }
                            },
                            'our_text_array': {
                                'type': ['null', 'array'],
                                'items': {
                                    '$ref':
                                    '#/definitions/sdc_recursive_string_array'
                                }
                            }
                        },
                        'type': 'object',
                        'definitions': tap_postgres.BASE_RECURSIVE_SCHEMAS
                    }, stream_dict.get('schema'))
Ejemplo n.º 15
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams
            if s['tap_stream_id'] == 'postgres-public-CHICKEN TIMES'
        ]
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]

        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])
        self.assertEqual(
            metadata.to_map(stream_dict.get('metadata')), {
                (): {
                    'table-key-properties': ['our_float'],
                    'database-name': 'postgres',
                    'schema-name': 'public',
                    'is-view': False,
                    'row-count': 0
                },
                ('properties', 'our_float'): {
                    'inclusion': 'automatic',
                    'sql-datatype': 'double precision',
                    'selected-by-default': True
                },
                ('properties', 'our_real'): {
                    'inclusion': 'available',
                    'sql-datatype': 'real',
                    'selected-by-default': True
                },
                ('properties', 'our_double'): {
                    'inclusion': 'available',
                    'sql-datatype': 'double precision',
                    'selected-by-default': True
                }
            })

        self.assertEqual(
            {
                'properties': {
                    'our_float': {
                        'type': ['number']
                    },
                    'our_real': {
                        'type': ['null', 'number']
                    },
                    'our_double': {
                        'type': ['null', 'number']
                    }
                },
                'type': 'object',
                'definitions': tap_postgres.BASE_RECURSIVE_SCHEMAS
            }, stream_dict.get('schema'))
Ejemplo n.º 16
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        conn_config["user"] = self.user
        conn_config["password"] = self.password
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams
            if s["tap_stream_id"] == "postgres-public-CHICKEN TIMES"
        ]

        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]

        self.assertEqual(TestStringTableWithPK.table_name,
                         stream_dict.get("table_name"))
        self.assertEqual(TestStringTableWithPK.table_name,
                         stream_dict.get("stream"))

        stream_dict.get("metadata").sort(key=lambda md: md["breadcrumb"])

        self.assertEqual(
            metadata.to_map(stream_dict.get("metadata")),
            {
                (): {
                    "table-key-properties": [],
                    "database-name": "postgres",
                    "schema-name": "public",
                    "is-view": False,
                    "row-count": 0,
                },
                ("properties", "id"): {
                    "inclusion": "available",
                    "sql-datatype": "integer",
                    "selected-by-default": True,
                },
            },
        )

        self.assertEqual(
            {
                "definitions": tap_postgres.BASE_RECURSIVE_SCHEMAS,
                "type": "object",
                "properties": {
                    "id": {
                        "type": ["null", "integer"],
                        "minimum": -2147483648,
                        "maximum": 2147483647,
                    }
                },
            },
            stream_dict.get("schema"),
        )
Ejemplo n.º 17
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        catalog = tap_postgres.do_discovery(conn_config)
        chicken_streams = [s for s in catalog.streams if s.tap_stream_id == 'postgres-public-CHICKEN TIMES']
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0].to_dict()
        stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb'])

        with get_test_connection() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute("""INSERT INTO "CHICKEN TIMES" (our_mood_enum_pk, our_mood_enum) VALUES ('sad', 'happy')""")
                cur.execute("""SELECT * FROM  "CHICKEN TIMES" """)

                self.assertEqual(metadata.to_map(stream_dict.get('metadata')),
                                 {() : {'table-key-properties': ['our_mood_enum_pk'], 'database-name': 'postgres', 'schema-name': 'public', 'is-view': False, 'row-count': 0},
                                  ('properties', 'our_mood_enum_pk') : {'inclusion': 'automatic', 'sql-datatype' : 'mood_enum',  'selected-by-default' : True},
                                  ('properties', 'our_mood_enum') : {'inclusion': 'available', 'sql-datatype' : 'mood_enum',  'selected-by-default' : True}})


                self.assertEqual({'properties': {'our_mood_enum':                  {'type': ['null', 'string']},
                                                 'our_mood_enum_pk':               {'type': ['string']}},
                                  'type': 'object'},
                                 stream_dict.get('schema'))
    def test_catalog(self, mock_connect, use_secondary):
        singer.write_message = singer_write_message_no_cow
        pg_common.write_schema_message = singer_write_message_ok

        conn_config = get_test_connection_config(use_secondary=use_secondary)
        streams = tap_postgres.do_discovery(conn_config)

        # Assert that we connected to the correct database
        expected_connection = {
            'application_name':
            unittest.mock.ANY,
            'dbname':
            unittest.mock.ANY,
            'user':
            unittest.mock.ANY,
            'password':
            unittest.mock.ANY,
            'connect_timeout':
            unittest.mock.ANY,
            'host':
            conn_config['secondary_host']
            if use_secondary else conn_config['host'],
            'port':
            conn_config['secondary_port']
            if use_secondary else conn_config['port'],
        }
        mock_connect.assert_called_once_with(**expected_connection)
        mock_connect.reset_mock()

        cow_stream = [s for s in streams if s['table_name'] == 'COW'][0]
        assert cow_stream is not None
        cow_stream = select_all_of_stream(cow_stream)
        cow_stream = set_replication_method_for_stream(cow_stream,
                                                       'FULL_TABLE')

        chicken_stream = [s for s in streams
                          if s['table_name'] == 'CHICKEN'][0]
        assert chicken_stream is not None
        chicken_stream = select_all_of_stream(chicken_stream)
        chicken_stream = set_replication_method_for_stream(
            chicken_stream, 'FULL_TABLE')

        conn = get_test_connection()
        conn.autocommit = True

        with conn.cursor() as cur:
            cow_rec = {'name': 'betty', 'colour': 'blue'}
            insert_record(cur, 'COW', {'name': 'betty', 'colour': 'blue'})

            cow_rec = {'name': 'smelly', 'colour': 'brow'}
            insert_record(cur, 'COW', cow_rec)

            cow_rec = {'name': 'pooper', 'colour': 'green'}
            insert_record(cur, 'COW', cow_rec)

            chicken_rec = {'name': 'fred', 'colour': 'red'}
            insert_record(cur, 'CHICKEN', chicken_rec)

        conn.close()

        state = {}
        blew_up_on_cow = False

        #this will sync the CHICKEN but then blow up on the COW
        try:
            tap_postgres.do_sync(
                get_test_connection_config(use_secondary=use_secondary),
                {'streams': streams}, None, state)
        except Exception as ex:
            # LOGGER.exception(ex)
            blew_up_on_cow = True

        assert blew_up_on_cow
        mock_connect.assert_called_with(**expected_connection)
        mock_connect.reset_mock()

        assert 14 == len(CAUGHT_MESSAGES)

        assert CAUGHT_MESSAGES[0]['type'] == 'SCHEMA'
        assert isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)
        assert CAUGHT_MESSAGES[1].value['bookmarks']['public-CHICKEN'].get(
            'xmin') is None

        assert isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage)
        new_version = CAUGHT_MESSAGES[2].version

        assert isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)
        assert 'public-CHICKEN' == CAUGHT_MESSAGES[3].stream

        assert isinstance(CAUGHT_MESSAGES[4], singer.StateMessage)
        #xmin is set while we are processing the full table replication
        assert CAUGHT_MESSAGES[4].value['bookmarks']['public-CHICKEN'][
            'xmin'] is not None

        assert isinstance(CAUGHT_MESSAGES[5], singer.ActivateVersionMessage)
        assert CAUGHT_MESSAGES[5].version == new_version

        assert isinstance(CAUGHT_MESSAGES[6], singer.StateMessage)
        assert None == singer.get_currently_syncing(CAUGHT_MESSAGES[6].value)
        #xmin is cleared at the end of the full table replication
        assert CAUGHT_MESSAGES[6].value['bookmarks']['public-CHICKEN'][
            'xmin'] is None

        #cow messages
        assert CAUGHT_MESSAGES[7]['type'] == 'SCHEMA'

        assert "public-COW" == CAUGHT_MESSAGES[7]['stream']
        assert isinstance(CAUGHT_MESSAGES[8], singer.StateMessage)
        assert CAUGHT_MESSAGES[8].value['bookmarks']['public-COW'].get(
            'xmin') is None
        assert "public-COW" == CAUGHT_MESSAGES[8].value['currently_syncing']

        assert isinstance(CAUGHT_MESSAGES[9], singer.ActivateVersionMessage)
        cow_version = CAUGHT_MESSAGES[9].version
        assert isinstance(CAUGHT_MESSAGES[10], singer.RecordMessage)

        assert CAUGHT_MESSAGES[10].record['name'] == 'betty'
        assert 'public-COW' == CAUGHT_MESSAGES[10].stream

        assert isinstance(CAUGHT_MESSAGES[11], singer.StateMessage)
        #xmin is set while we are processing the full table replication
        assert CAUGHT_MESSAGES[11].value['bookmarks']['public-COW'][
            'xmin'] is not None

        assert CAUGHT_MESSAGES[12].record['name'] == 'smelly'
        assert 'public-COW' == CAUGHT_MESSAGES[12].stream
        old_state = CAUGHT_MESSAGES[13].value

        #run another do_sync
        singer.write_message = singer_write_message_ok
        CAUGHT_MESSAGES.clear()
        global COW_RECORD_COUNT
        COW_RECORD_COUNT = 0

        tap_postgres.do_sync(
            get_test_connection_config(use_secondary=use_secondary),
            {'streams': streams}, None, old_state)

        mock_connect.assert_called_with(**expected_connection)
        mock_connect.reset_mock()

        assert CAUGHT_MESSAGES[0]['type'] == 'SCHEMA'
        assert isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)

        # because we were interrupted, we do not switch versions
        assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'][
            'version'] == cow_version
        assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'][
            'xmin'] is not None
        assert "public-COW" == singer.get_currently_syncing(
            CAUGHT_MESSAGES[1].value)

        assert isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage)
        assert CAUGHT_MESSAGES[2].record['name'] == 'smelly'
        assert 'public-COW' == CAUGHT_MESSAGES[2].stream

        #after record: activate version, state with no xmin or currently syncing
        assert isinstance(CAUGHT_MESSAGES[3], singer.StateMessage)
        #we still have an xmin for COW because are not yet done with the COW table
        assert CAUGHT_MESSAGES[3].value['bookmarks']['public-COW'][
            'xmin'] is not None
        assert singer.get_currently_syncing(
            CAUGHT_MESSAGES[3].value) == 'public-COW'

        assert isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage)
        assert CAUGHT_MESSAGES[4].record['name'] == 'pooper'
        assert 'public-COW' == CAUGHT_MESSAGES[4].stream

        assert isinstance(CAUGHT_MESSAGES[5], singer.StateMessage)
        assert CAUGHT_MESSAGES[5].value['bookmarks']['public-COW'][
            'xmin'] is not None
        assert singer.get_currently_syncing(
            CAUGHT_MESSAGES[5].value) == 'public-COW'

        #xmin is cleared because we are finished the full table replication
        assert isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage)
        assert CAUGHT_MESSAGES[6].version == cow_version

        assert isinstance(CAUGHT_MESSAGES[7], singer.StateMessage)
        assert singer.get_currently_syncing(CAUGHT_MESSAGES[7].value) is None
        assert CAUGHT_MESSAGES[7].value['bookmarks']['public-CHICKEN'][
            'xmin'] is None
        assert singer.get_currently_syncing(CAUGHT_MESSAGES[7].value) is None
Ejemplo n.º 19
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams
            if s["tap_stream_id"] == "postgres-public-CHICKEN TIMES"
        ]

        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]
        stream_dict.get("metadata").sort(key=lambda md: md["breadcrumb"])

        self.assertEqual(
            metadata.to_map(stream_dict.get("metadata")),
            {
                (): {
                    "is-view": False,
                    "table-key-properties": [],
                    "row-count": 0,
                    "schema-name": "public",
                    "database-name": "postgres",
                },
                ("properties", "bytea_col"): {
                    "sql-datatype": "bytea",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "bit_string_col"): {
                    "sql-datatype": "bit(5)",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "line_col"): {
                    "sql-datatype": "line",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "xml_col"): {
                    "sql-datatype": "xml",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "int_range_col"): {
                    "sql-datatype": "int4range",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "circle_col"): {
                    "sql-datatype": "circle",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "polygon_col"): {
                    "sql-datatype": "polygon",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "box_col"): {
                    "sql-datatype": "box",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "lseg_col"): {
                    "sql-datatype": "lseg",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "composite_col"): {
                    "sql-datatype": "person_composite",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "interval_col"): {
                    "sql-datatype": "interval",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
                ("properties", "point_col"): {
                    "sql-datatype": "point",
                    "selected-by-default": False,
                    "inclusion": "unsupported",
                },
            },
        )
    def test_catalog(self, mock_connect, use_secondary):
        singer.write_message = singer_write_message_no_cow
        pg_common.write_schema_message = singer_write_message_ok

        conn_config = get_test_connection_config(use_secondary=use_secondary)
        streams = tap_postgres.do_discovery(conn_config)

        # Assert that we connected to the correct database
        expected_connection = {
            'application_name':
            unittest.mock.ANY,
            'dbname':
            unittest.mock.ANY,
            'user':
            unittest.mock.ANY,
            'password':
            unittest.mock.ANY,
            'connect_timeout':
            unittest.mock.ANY,
            'host':
            conn_config['secondary_host']
            if use_secondary else conn_config['host'],
            'port':
            conn_config['secondary_port']
            if use_secondary else conn_config['port'],
        }
        mock_connect.assert_called_once_with(**expected_connection)
        mock_connect.reset_mock()

        cow_stream = [s for s in streams if s['table_name'] == 'COW'][0]
        assert cow_stream is not None
        cow_stream = select_all_of_stream(cow_stream)
        cow_stream = set_replication_method_for_stream(cow_stream, 'LOG_BASED')

        conn = get_test_connection()
        conn.autocommit = True

        with conn.cursor() as cur:
            cow_rec = {
                'name': 'betty',
                'colour': 'blue',
                'timestamp_ntz': '2020-09-01 10:40:59',
                'timestamp_tz': '2020-09-01 00:50:59+02'
            }
            insert_record(cur, 'COW', cow_rec)

            cow_rec = {
                'name': 'smelly',
                'colour': 'brow',
                'timestamp_ntz': '2020-09-01 10:40:59 BC',
                'timestamp_tz': '2020-09-01 00:50:59+02 BC'
            }
            insert_record(cur, 'COW', cow_rec)

            cow_rec = {
                'name': 'pooper',
                'colour': 'green',
                'timestamp_ntz': '30000-09-01 10:40:59',
                'timestamp_tz': '10000-09-01 00:50:59+02'
            }
            insert_record(cur, 'COW', cow_rec)

        conn.close()

        blew_up_on_cow = False
        state = {}
        #the initial phase of cows logical replication will be a full table.
        #it will sync the first record and then blow up on the 2nd record
        try:
            tap_postgres.do_sync(
                get_test_connection_config(use_secondary=use_secondary),
                {'streams': streams}, None, state)
        except Exception:
            blew_up_on_cow = True

        assert blew_up_on_cow is True

        mock_connect.assert_called_with(**expected_connection)
        mock_connect.reset_mock()

        assert 7 == len(CAUGHT_MESSAGES)

        assert CAUGHT_MESSAGES[0]['type'] == 'SCHEMA'
        assert isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)
        assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get(
            'xmin') is None
        assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get(
            'lsn') is not None
        end_lsn = CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get(
            'lsn')

        assert isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage)
        new_version = CAUGHT_MESSAGES[2].version

        assert isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)
        assert CAUGHT_MESSAGES[3].record == {
            'colour': 'blue',
            'id': 1,
            'name': 'betty',
            'timestamp_ntz': '2020-09-01T10:40:59+00:00',
            'timestamp_tz': '2020-08-31T22:50:59+00:00'
        }

        assert 'public-COW' == CAUGHT_MESSAGES[3].stream

        assert isinstance(CAUGHT_MESSAGES[4], singer.StateMessage)
        #xmin is set while we are processing the full table replication
        assert CAUGHT_MESSAGES[4].value['bookmarks']['public-COW'][
            'xmin'] is not None
        assert CAUGHT_MESSAGES[4].value['bookmarks']['public-COW'][
            'lsn'] == end_lsn

        assert CAUGHT_MESSAGES[5].record == {
            'colour': 'brow',
            'id': 2,
            'name': 'smelly',
            'timestamp_ntz': '9999-12-31T23:59:59.999000+00:00',
            'timestamp_tz': '9999-12-31T23:59:59.999000+00:00'
        }

        assert 'public-COW' == CAUGHT_MESSAGES[5].stream

        assert isinstance(CAUGHT_MESSAGES[6], singer.StateMessage)
        last_xmin = CAUGHT_MESSAGES[6].value['bookmarks']['public-COW']['xmin']
        old_state = CAUGHT_MESSAGES[6].value

        #run another do_sync, should get the remaining record which effectively finishes the initial full_table
        #replication portion of the logical replication
        singer.write_message = singer_write_message_ok
        global COW_RECORD_COUNT
        COW_RECORD_COUNT = 0
        CAUGHT_MESSAGES.clear()
        tap_postgres.do_sync(
            get_test_connection_config(use_secondary=use_secondary),
            {'streams': streams}, None, old_state)

        mock_connect.assert_called_with(**expected_connection)
        mock_connect.reset_mock()

        assert 8 == len(CAUGHT_MESSAGES)

        assert CAUGHT_MESSAGES[0]['type'] == 'SCHEMA'

        assert isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)
        assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get(
            'xmin') == last_xmin
        assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get(
            'lsn') == end_lsn
        assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get(
            'version') == new_version

        assert isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage)
        assert CAUGHT_MESSAGES[2].record == {
            'colour': 'brow',
            'id': 2,
            'name': 'smelly',
            'timestamp_ntz': '9999-12-31T23:59:59.999000+00:00',
            'timestamp_tz': '9999-12-31T23:59:59.999000+00:00'
        }

        assert 'public-COW' == CAUGHT_MESSAGES[2].stream

        assert isinstance(CAUGHT_MESSAGES[3], singer.StateMessage)
        assert CAUGHT_MESSAGES[3].value['bookmarks']['public-COW'].get(
            'xmin'), last_xmin
        assert CAUGHT_MESSAGES[3].value['bookmarks']['public-COW'].get(
            'lsn') == end_lsn
        assert CAUGHT_MESSAGES[3].value['bookmarks']['public-COW'].get(
            'version') == new_version

        assert isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage)
        assert CAUGHT_MESSAGES[4].record == {
            'colour': 'green',
            'id': 3,
            'name': 'pooper',
            'timestamp_ntz': '9999-12-31T23:59:59.999000+00:00',
            'timestamp_tz': '9999-12-31T23:59:59.999000+00:00'
        }
        assert 'public-COW' == CAUGHT_MESSAGES[4].stream

        assert isinstance(CAUGHT_MESSAGES[5], singer.StateMessage)
        assert CAUGHT_MESSAGES[5].value['bookmarks']['public-COW'].get(
            'xmin') > last_xmin
        assert CAUGHT_MESSAGES[5].value['bookmarks']['public-COW'].get(
            'lsn') == end_lsn
        assert CAUGHT_MESSAGES[5].value['bookmarks']['public-COW'].get(
            'version') == new_version

        assert isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage)
        assert CAUGHT_MESSAGES[6].version == new_version

        assert isinstance(CAUGHT_MESSAGES[7], singer.StateMessage)
        assert CAUGHT_MESSAGES[7].value['bookmarks']['public-COW'].get(
            'xmin') is None
        assert CAUGHT_MESSAGES[7].value['bookmarks']['public-COW'].get(
            'lsn') == end_lsn
        assert CAUGHT_MESSAGES[7].value['bookmarks']['public-COW'].get(
            'version') == new_version
    def test_catalog(self):
        singer.write_message = singer_write_message_no_cow
        pg_common.write_schema_message = singer_write_message_ok

        conn_config = get_test_connection_config()
        conn_config["emit_state_every_n_rows"] = 1
        streams = tap_postgres.do_discovery(conn_config)
        cow_stream = [s for s in streams if s["table_name"] == "COW"][0]
        self.assertIsNotNone(cow_stream)
        cow_stream = select_all_of_stream(cow_stream)
        cow_stream = set_replication_method_for_stream(cow_stream, "LOG_BASED")

        with get_test_connection() as conn:
            conn.autocommit = True
            cur = conn.cursor()

            cow_rec = {"name": "betty", "colour": "blue"}
            insert_record(cur, "COW", cow_rec)

            cow_rec = {"name": "smelly", "colour": "brow"}
            insert_record(cur, "COW", cow_rec)

            cow_rec = {"name": "pooper", "colour": "green"}
            insert_record(cur, "COW", cow_rec)

        state = {}
        # the initial phase of cows logical replication will be a full table.
        # it will sync the first record and then blow up on the 2nd record
        try:
            tap_postgres.do_sync(conn_config, {"streams": streams}, None,
                                 state)
        except Exception:
            blew_up_on_cow = True

        self.assertTrue(blew_up_on_cow)

        self.assertEqual(7, len(CAUGHT_MESSAGES))

        self.assertEqual(CAUGHT_MESSAGES[0]["type"], "SCHEMA")
        self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage))
        self.assertIsNone(CAUGHT_MESSAGES[1].value["bookmarks"]
                          ["postgres-public-COW"].get("xmin"))
        self.assertIsNotNone(CAUGHT_MESSAGES[1].value["bookmarks"]
                             ["postgres-public-COW"].get("lsn"))
        end_lsn = CAUGHT_MESSAGES[1].value["bookmarks"][
            "postgres-public-COW"].get("lsn")

        self.assertTrue(
            isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage))
        new_version = CAUGHT_MESSAGES[2].version

        self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage))
        self.assertEqual(CAUGHT_MESSAGES[3].record, {
            "colour": "blue",
            "id": 1,
            "name": "betty"
        })
        self.assertEqual("COW", CAUGHT_MESSAGES[3].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.StateMessage))
        # xmin is set while we are processing the full table replication
        self.assertIsNotNone(CAUGHT_MESSAGES[4].value["bookmarks"]
                             ["postgres-public-COW"]["xmin"])
        self.assertEqual(
            CAUGHT_MESSAGES[4].value["bookmarks"]["postgres-public-COW"]
            ["lsn"], end_lsn)

        self.assertEqual(CAUGHT_MESSAGES[5].record["name"], "smelly")
        self.assertEqual("COW", CAUGHT_MESSAGES[5].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage))
        last_xmin = CAUGHT_MESSAGES[6].value["bookmarks"][
            "postgres-public-COW"]["xmin"]
        old_state = CAUGHT_MESSAGES[6].value

        # run another do_sync, should get the remaining record which effectively finishes the
        # initial full_table replication portion of the logical replication
        singer.write_message = singer_write_message_ok
        global COW_RECORD_COUNT
        COW_RECORD_COUNT = 0
        CAUGHT_MESSAGES.clear()
        tap_postgres.do_sync(conn_config, {"streams": streams}, None,
                             old_state)

        self.assertEqual(8, len(CAUGHT_MESSAGES))

        self.assertEqual(CAUGHT_MESSAGES[0]["type"], "SCHEMA")

        self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage))
        self.assertEqual(
            CAUGHT_MESSAGES[1].value["bookmarks"]["postgres-public-COW"].get(
                "xmin"), last_xmin)
        self.assertEqual(
            CAUGHT_MESSAGES[1].value["bookmarks"]["postgres-public-COW"].get(
                "lsn"), end_lsn)
        self.assertEqual(
            CAUGHT_MESSAGES[1].value["bookmarks"]["postgres-public-COW"].get(
                "version"), new_version)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage))
        self.assertEqual(CAUGHT_MESSAGES[2].record, {
            "colour": "brow",
            "id": 2,
            "name": "smelly"
        })
        self.assertEqual("COW", CAUGHT_MESSAGES[2].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.StateMessage))
        self.assertTrue(
            CAUGHT_MESSAGES[3].value["bookmarks"]["postgres-public-COW"].get(
                "xmin"), last_xmin)
        self.assertEqual(
            CAUGHT_MESSAGES[3].value["bookmarks"]["postgres-public-COW"].get(
                "lsn"), end_lsn)
        self.assertEqual(
            CAUGHT_MESSAGES[3].value["bookmarks"]["postgres-public-COW"].get(
                "version"), new_version)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage))
        self.assertEqual(CAUGHT_MESSAGES[4].record["name"], "pooper")
        self.assertEqual("COW", CAUGHT_MESSAGES[4].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage))
        self.assertTrue(CAUGHT_MESSAGES[5].value["bookmarks"]
                        ["postgres-public-COW"].get("xmin") > last_xmin)
        self.assertEqual(
            CAUGHT_MESSAGES[5].value["bookmarks"]["postgres-public-COW"].get(
                "lsn"), end_lsn)
        self.assertEqual(
            CAUGHT_MESSAGES[5].value["bookmarks"]["postgres-public-COW"].get(
                "version"), new_version)

        self.assertTrue(
            isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage))
        self.assertEqual(CAUGHT_MESSAGES[6].version, new_version)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[7], singer.StateMessage))
        self.assertIsNone(CAUGHT_MESSAGES[7].value["bookmarks"]
                          ["postgres-public-COW"].get("xmin"))
        self.assertEqual(
            CAUGHT_MESSAGES[7].value["bookmarks"]["postgres-public-COW"].get(
                "lsn"), end_lsn)
        self.assertEqual(
            CAUGHT_MESSAGES[7].value["bookmarks"]["postgres-public-COW"].get(
                "version"), new_version)
Ejemplo n.º 22
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams if s["tap_stream_id"] == "postgres-public-CHICKEN TIMES"
        ]
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]

        stream_dict.get("metadata").sort(key=lambda md: md["breadcrumb"])

        self.assertEqual(
            metadata.to_map(stream_dict.get("metadata")),
            {
                (): {
                    "table-key-properties": ["our_decimal"],
                    "database-name": "postgres",
                    "schema-name": "public",
                    "is-view": False,
                    "row-count": 0,
                },
                ("properties", "our_decimal"): {
                    "inclusion": "automatic",
                    "sql-datatype": "numeric",
                    "selected-by-default": True,
                },
                ("properties", "our_decimal_38_4"): {
                    "inclusion": "available",
                    "sql-datatype": "numeric",
                    "selected-by-default": True,
                },
                ("properties", "our_decimal_10_2"): {
                    "inclusion": "available",
                    "sql-datatype": "numeric",
                    "selected-by-default": True,
                },
            },
        )

        self.assertEqual(
            {
                "properties": {
                    "our_decimal": {
                        "exclusiveMaximum": True,
                        "exclusiveMinimum": True,
                        "multipleOf": 10 ** (0 - post_db.MAX_SCALE),
                        "maximum": 10 ** (post_db.MAX_PRECISION - post_db.MAX_SCALE),
                        "minimum": -(10 ** (post_db.MAX_PRECISION - post_db.MAX_SCALE)),
                        "type": ["number"],
                    },
                    "our_decimal_10_2": {
                        "exclusiveMaximum": True,
                        "exclusiveMinimum": True,
                        "maximum": 100000000,
                        "minimum": -100000000,
                        "multipleOf": 0.01,
                        "type": ["null", "number"],
                    },
                    "our_decimal_38_4": {
                        "exclusiveMaximum": True,
                        "exclusiveMinimum": True,
                        "maximum": 10000000000000000000000000000000000,
                        "minimum": -10000000000000000000000000000000000,
                        "multipleOf": 0.0001,
                        "type": ["null", "number"],
                    },
                },
                "type": "object",
                "definitions": tap_postgres.BASE_RECURSIVE_SCHEMAS,
            },
            stream_dict.get("schema"),
        )
Ejemplo n.º 23
0
    def test_catalog(self):
        singer.write_message = singer_write_message_no_cow
        pg_common.write_schema_message = singer_write_message_ok

        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        cow_stream = [s for s in streams if s['table_name'] == 'COW'][0]
        self.assertIsNotNone(cow_stream)
        cow_stream = select_all_of_stream(cow_stream)
        cow_stream = set_replication_method_for_stream(cow_stream, 'FULL_TABLE')

        chicken_stream = [s for s in streams if s['table_name'] == 'CHICKEN'][0]
        self.assertIsNotNone(chicken_stream)
        chicken_stream = select_all_of_stream(chicken_stream)
        chicken_stream = set_replication_method_for_stream(chicken_stream, 'FULL_TABLE')
        with get_test_connection() as conn:
            conn.autocommit = True
            cur = conn.cursor()

            cow_rec = {'name' : 'betty', 'colour' : 'blue'}
            insert_record(cur, 'COW', cow_rec)
            cow_rec = {'name' : 'smelly', 'colour' : 'brow'}
            insert_record(cur, 'COW', cow_rec)

            cow_rec = {'name' : 'pooper', 'colour' : 'green'}
            insert_record(cur, 'COW', cow_rec)

            chicken_rec = {'name' : 'fred', 'colour' : 'red'}
            insert_record(cur, 'CHICKEN', chicken_rec)

        state = {}
        #this will sync the CHICKEN but then blow up on the COW
        try:
            tap_postgres.do_sync(get_test_connection_config(), {'streams' : streams}, None, state)
        except Exception as ex:
            # LOGGER.exception(ex)
            blew_up_on_cow = True

        self.assertTrue(blew_up_on_cow)


        self.assertEqual(14, len(CAUGHT_MESSAGES))

        self.assertEqual(CAUGHT_MESSAGES[0]['type'], 'SCHEMA')
        self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage))
        self.assertIsNone(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-CHICKEN'].get('xmin'))

        self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage))
        new_version = CAUGHT_MESSAGES[2].version

        self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage))
        self.assertEqual('CHICKEN', CAUGHT_MESSAGES[3].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.StateMessage))
        #xmin is set while we are processing the full table replication
        self.assertIsNotNone(CAUGHT_MESSAGES[4].value['bookmarks']['postgres-public-CHICKEN']['xmin'])

        self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.ActivateVersionMessage))
        self.assertEqual(CAUGHT_MESSAGES[5].version, new_version)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage))
        self.assertEqual(None, singer.get_currently_syncing( CAUGHT_MESSAGES[6].value))
        #xmin is cleared at the end of the full table replication
        self.assertIsNone(CAUGHT_MESSAGES[6].value['bookmarks']['postgres-public-CHICKEN']['xmin'])


        #cow messages
        self.assertEqual(CAUGHT_MESSAGES[7]['type'], 'SCHEMA')

        self.assertEqual("COW", CAUGHT_MESSAGES[7]['stream'])
        self.assertTrue(isinstance(CAUGHT_MESSAGES[8], singer.StateMessage))
        self.assertIsNone(CAUGHT_MESSAGES[8].value['bookmarks']['postgres-public-COW'].get('xmin'))
        self.assertEqual("postgres-public-COW", CAUGHT_MESSAGES[8].value['currently_syncing'])

        self.assertTrue(isinstance(CAUGHT_MESSAGES[9], singer.ActivateVersionMessage))
        cow_version = CAUGHT_MESSAGES[9].version
        self.assertTrue(isinstance(CAUGHT_MESSAGES[10], singer.RecordMessage))

        self.assertEqual(CAUGHT_MESSAGES[10].record['name'], 'betty')
        self.assertEqual('COW', CAUGHT_MESSAGES[10].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[11], singer.StateMessage))
        #xmin is set while we are processing the full table replication
        self.assertIsNotNone(CAUGHT_MESSAGES[11].value['bookmarks']['postgres-public-COW']['xmin'])


        self.assertEqual(CAUGHT_MESSAGES[12].record['name'], 'smelly')
        self.assertEqual('COW', CAUGHT_MESSAGES[12].stream)
        old_state = CAUGHT_MESSAGES[13].value

        #run another do_sync
        singer.write_message = singer_write_message_ok
        CAUGHT_MESSAGES.clear()
        global COW_RECORD_COUNT
        COW_RECORD_COUNT = 0

        tap_postgres.do_sync(get_test_connection_config(), {'streams' : streams}, None, old_state)

        self.assertEqual(CAUGHT_MESSAGES[0]['type'], 'SCHEMA')
        self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage))

        # because we were interrupted, we do not switch versions
        self.assertEqual(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW']['version'], cow_version)
        self.assertIsNotNone(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW']['xmin'])
        self.assertEqual("postgres-public-COW", singer.get_currently_syncing(CAUGHT_MESSAGES[1].value))

        self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage))
        self.assertEqual(CAUGHT_MESSAGES[2].record['name'], 'smelly')
        self.assertEqual('COW', CAUGHT_MESSAGES[2].stream)


        #after record: activate version, state with no xmin or currently syncing
        self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.StateMessage))
        #we still have an xmin for COW because are not yet done with the COW table
        self.assertIsNotNone(CAUGHT_MESSAGES[3].value['bookmarks']['postgres-public-COW']['xmin'])
        self.assertEqual(singer.get_currently_syncing( CAUGHT_MESSAGES[3].value), 'postgres-public-COW')

        self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage))
        self.assertEqual(CAUGHT_MESSAGES[4].record['name'], 'pooper')
        self.assertEqual('COW', CAUGHT_MESSAGES[4].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage))
        self.assertIsNotNone(CAUGHT_MESSAGES[5].value['bookmarks']['postgres-public-COW']['xmin'])
        self.assertEqual(singer.get_currently_syncing( CAUGHT_MESSAGES[5].value), 'postgres-public-COW')


        #xmin is cleared because we are finished the full table replication
        self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage))
        self.assertEqual(CAUGHT_MESSAGES[6].version, cow_version)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[7], singer.StateMessage))
        self.assertIsNone(singer.get_currently_syncing( CAUGHT_MESSAGES[7].value))
        self.assertIsNone(CAUGHT_MESSAGES[7].value['bookmarks']['postgres-public-CHICKEN']['xmin'])
        self.assertIsNone(singer.get_currently_syncing( CAUGHT_MESSAGES[7].value))
Ejemplo n.º 24
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s
            for s in streams
            if s["tap_stream_id"]
            in ("postgres-public-different_chicken", "dev-public-different_chicken")
        ]
        self.assertEqual(len(chicken_streams), 2)

        for s in chicken_streams:
            stream_dict = s
            self.assertEqual(
                {
                    "properties": {
                        "our_date": {"type": ["string"], "format": "date-time"},
                        "our_ts": {"type": ["null", "string"], "format": "date-time"},
                        "our_ts_tz": {"type": ["null", "string"], "format": "date-time"},
                        "our_time": {"type": ["null", "string"]},
                        "our_time_tz": {"type": ["null", "string"]},
                    },
                    "type": "object",
                    "definitions": tap_postgres.BASE_RECURSIVE_SCHEMAS,
                },
                stream_dict.get("schema"),
            )
            db_name = metadata.to_map(stream_dict.get("metadata")).get(()).get("database-name")

            self.assertEqual(
                metadata.to_map(stream_dict.get("metadata")),
                {
                    (): {
                        "table-key-properties": ["our_date"],
                        "database-name": db_name,
                        "schema-name": "public",
                        "is-view": False,
                        "row-count": 0,
                    },
                    ("properties", "our_date"): {
                        "inclusion": "automatic",
                        "sql-datatype": "date",
                        "selected-by-default": True,
                    },
                    ("properties", "our_ts"): {
                        "inclusion": "available",
                        "sql-datatype": "timestamp without time zone",
                        "selected-by-default": True,
                    },
                    ("properties", "our_ts_tz"): {
                        "inclusion": "available",
                        "sql-datatype": "timestamp with time zone",
                        "selected-by-default": True,
                    },
                    ("properties", "our_time"): {
                        "inclusion": "available",
                        "sql-datatype": "time without time zone",
                        "selected-by-default": True,
                    },
                    ("properties", "our_time_tz"): {
                        "inclusion": "available",
                        "sql-datatype": "time with time zone",
                        "selected-by-default": True,
                    },
                },
            )
    def test_catalog(self):
        singer.write_message = singer_write_message_no_cow
        pg_common.write_schema_message = singer_write_message_ok

        conn_config = get_test_connection_config()
        conn_config["emit_state_every_n_rows"] = 1
        streams = tap_postgres.do_discovery(conn_config)
        cow_stream = [s for s in streams if s["table_name"] == "COW"][0]
        self.assertIsNotNone(cow_stream)
        cow_stream = select_all_of_stream(cow_stream)
        cow_stream = set_replication_method_for_stream(cow_stream,
                                                       "FULL_TABLE")

        chicken_stream = [s for s in streams
                          if s["table_name"] == "CHICKEN"][0]
        self.assertIsNotNone(chicken_stream)
        chicken_stream = select_all_of_stream(chicken_stream)
        chicken_stream = set_replication_method_for_stream(
            chicken_stream, "FULL_TABLE")
        with get_test_connection() as conn:
            conn.autocommit = True
            cur = conn.cursor()

            cow_rec = {"name": "betty", "colour": "blue"}
            insert_record(cur, "COW", cow_rec)
            cow_rec = {"name": "smelly", "colour": "brow"}
            insert_record(cur, "COW", cow_rec)

            cow_rec = {"name": "pooper", "colour": "green"}
            insert_record(cur, "COW", cow_rec)

            chicken_rec = {"name": "fred", "colour": "red"}
            insert_record(cur, "CHICKEN", chicken_rec)

        state = {}
        # this will sync the CHICKEN but then blow up on the COW
        try:
            tap_postgres.do_sync(conn_config, {"streams": streams}, None,
                                 state)
        except Exception:
            # LOGGER.exception(ex)
            blew_up_on_cow = True

        self.assertTrue(blew_up_on_cow)

        self.assertEqual(14, len(CAUGHT_MESSAGES))

        self.assertEqual(CAUGHT_MESSAGES[0]["type"], "SCHEMA")
        self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage))
        self.assertIsNone(CAUGHT_MESSAGES[1].value["bookmarks"]
                          ["postgres-public-CHICKEN"].get("xmin"))

        self.assertTrue(
            isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage))
        new_version = CAUGHT_MESSAGES[2].version

        self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage))
        self.assertEqual("CHICKEN", CAUGHT_MESSAGES[3].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.StateMessage))
        # xmin is set while we are processing the full table replication
        self.assertIsNotNone(CAUGHT_MESSAGES[4].value["bookmarks"]
                             ["postgres-public-CHICKEN"]["xmin"])

        self.assertTrue(
            isinstance(CAUGHT_MESSAGES[5], singer.ActivateVersionMessage))
        self.assertEqual(CAUGHT_MESSAGES[5].version, new_version)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage))
        self.assertEqual(
            None, singer.get_currently_syncing(CAUGHT_MESSAGES[6].value))
        # xmin is cleared at the end of the full table replication
        self.assertIsNone(CAUGHT_MESSAGES[6].value["bookmarks"]
                          ["postgres-public-CHICKEN"]["xmin"])

        # cow messages
        self.assertEqual(CAUGHT_MESSAGES[7]["type"], "SCHEMA")

        self.assertEqual("COW", CAUGHT_MESSAGES[7]["stream"])
        self.assertTrue(isinstance(CAUGHT_MESSAGES[8], singer.StateMessage))
        self.assertIsNone(CAUGHT_MESSAGES[8].value["bookmarks"]
                          ["postgres-public-COW"].get("xmin"))
        self.assertEqual("postgres-public-COW",
                         CAUGHT_MESSAGES[8].value["currently_syncing"])

        self.assertTrue(
            isinstance(CAUGHT_MESSAGES[9], singer.ActivateVersionMessage))
        cow_version = CAUGHT_MESSAGES[9].version
        self.assertTrue(isinstance(CAUGHT_MESSAGES[10], singer.RecordMessage))

        self.assertEqual(CAUGHT_MESSAGES[10].record["name"], "betty")
        self.assertEqual("COW", CAUGHT_MESSAGES[10].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[11], singer.StateMessage))
        # xmin is set while we are processing the full table replication
        self.assertIsNotNone(CAUGHT_MESSAGES[11].value["bookmarks"]
                             ["postgres-public-COW"]["xmin"])

        self.assertEqual(CAUGHT_MESSAGES[12].record["name"], "smelly")
        self.assertEqual("COW", CAUGHT_MESSAGES[12].stream)
        old_state = CAUGHT_MESSAGES[13].value

        # run another do_sync
        singer.write_message = singer_write_message_ok
        CAUGHT_MESSAGES.clear()
        global COW_RECORD_COUNT
        COW_RECORD_COUNT = 0

        tap_postgres.do_sync(conn_config, {"streams": streams}, None,
                             old_state)

        self.assertEqual(CAUGHT_MESSAGES[0]["type"], "SCHEMA")
        self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage))

        # because we were interrupted, we do not switch versions
        self.assertEqual(
            CAUGHT_MESSAGES[1].value["bookmarks"]["postgres-public-COW"]
            ["version"], cow_version)
        self.assertIsNotNone(CAUGHT_MESSAGES[1].value["bookmarks"]
                             ["postgres-public-COW"]["xmin"])
        self.assertEqual(
            "postgres-public-COW",
            singer.get_currently_syncing(CAUGHT_MESSAGES[1].value))

        self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage))
        self.assertEqual(CAUGHT_MESSAGES[2].record["name"], "smelly")
        self.assertEqual("COW", CAUGHT_MESSAGES[2].stream)

        # after record: activate version, state with no xmin or currently syncing
        self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.StateMessage))
        # we still have an xmin for COW because are not yet done with the COW table
        self.assertIsNotNone(CAUGHT_MESSAGES[3].value["bookmarks"]
                             ["postgres-public-COW"]["xmin"])
        self.assertEqual(
            singer.get_currently_syncing(CAUGHT_MESSAGES[3].value),
            "postgres-public-COW")

        self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage))
        self.assertEqual(CAUGHT_MESSAGES[4].record["name"], "pooper")
        self.assertEqual("COW", CAUGHT_MESSAGES[4].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage))
        self.assertIsNotNone(CAUGHT_MESSAGES[5].value["bookmarks"]
                             ["postgres-public-COW"]["xmin"])
        self.assertEqual(
            singer.get_currently_syncing(CAUGHT_MESSAGES[5].value),
            "postgres-public-COW")

        # xmin is cleared because we are finished the full table replication
        self.assertTrue(
            isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage))
        self.assertEqual(CAUGHT_MESSAGES[6].version, cow_version)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[7], singer.StateMessage))
        self.assertIsNone(
            singer.get_currently_syncing(CAUGHT_MESSAGES[7].value))
        self.assertIsNone(CAUGHT_MESSAGES[7].value["bookmarks"]
                          ["postgres-public-CHICKEN"]["xmin"])
        self.assertIsNone(
            singer.get_currently_syncing(CAUGHT_MESSAGES[7].value))
Ejemplo n.º 26
0
    def test_catalog(self):
        singer.write_message = singer_write_message_no_cow
        pg_common.write_schema_message = singer_write_message_ok

        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        cow_stream = [s for s in streams if s['table_name'] == 'COW'][0]
        self.assertIsNotNone(cow_stream)
        cow_stream = select_all_of_stream(cow_stream)
        cow_stream = set_replication_method_for_stream(cow_stream, 'LOG_BASED')

        with get_test_connection() as conn:
            conn.autocommit = True
            cur = conn.cursor()

            cow_rec = {'name' : 'betty', 'colour' : 'blue'}
            insert_record(cur, 'COW', cow_rec)

            cow_rec = {'name' : 'smelly', 'colour' : 'brow'}
            insert_record(cur, 'COW', cow_rec)

            cow_rec = {'name' : 'pooper', 'colour' : 'green'}
            insert_record(cur, 'COW', cow_rec)

        state = {}
        #the initial phase of cows logical replication will be a full table.
        #it will sync the first record and then blow up on the 2nd record
        try:

            tap_postgres.do_sync(get_test_connection_config(), {'streams' : streams}, None, state)
        except Exception as ex:
            blew_up_on_cow = True

        self.assertTrue(blew_up_on_cow)

        self.assertEqual(7, len(CAUGHT_MESSAGES))

        self.assertEqual(CAUGHT_MESSAGES[0]['type'], 'SCHEMA')
        self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage))
        self.assertIsNone(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('xmin'))
        self.assertIsNotNone(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('lsn'))
        end_lsn = CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('lsn')

        self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage))
        new_version = CAUGHT_MESSAGES[2].version

        self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage))
        self.assertEqual(CAUGHT_MESSAGES[3].record, {'colour': 'blue', 'id': 1, 'name': 'betty'})
        self.assertEqual('COW', CAUGHT_MESSAGES[3].stream)



        self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.StateMessage))
        #xmin is set while we are processing the full table replication
        self.assertIsNotNone(CAUGHT_MESSAGES[4].value['bookmarks']['postgres-public-COW']['xmin'])
        self.assertEqual(CAUGHT_MESSAGES[4].value['bookmarks']['postgres-public-COW']['lsn'], end_lsn)

        self.assertEqual(CAUGHT_MESSAGES[5].record['name'], 'smelly')
        self.assertEqual('COW', CAUGHT_MESSAGES[5].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage))
        last_xmin = CAUGHT_MESSAGES[6].value['bookmarks']['postgres-public-COW']['xmin']
        old_state = CAUGHT_MESSAGES[6].value


        #run another do_sync, should get the remaining record which effectively finishes the initial full_table
        #replication portion of the logical replication
        singer.write_message = singer_write_message_ok
        global COW_RECORD_COUNT
        COW_RECORD_COUNT = 0
        CAUGHT_MESSAGES.clear()
        tap_postgres.do_sync(get_test_connection_config(), {'streams' : streams}, None, old_state)

        self.assertEqual(8, len(CAUGHT_MESSAGES))

        self.assertEqual(CAUGHT_MESSAGES[0]['type'], 'SCHEMA')

        self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage))
        self.assertEqual(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('xmin'), last_xmin)
        self.assertEqual(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('lsn'), end_lsn)
        self.assertEqual(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('version'), new_version)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage))
        self.assertEqual(CAUGHT_MESSAGES[2].record, {'colour': 'brow', 'id': 2, 'name': 'smelly'})
        self.assertEqual('COW', CAUGHT_MESSAGES[2].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.StateMessage))
        self.assertTrue(CAUGHT_MESSAGES[3].value['bookmarks']['postgres-public-COW'].get('xmin'),last_xmin)
        self.assertEqual(CAUGHT_MESSAGES[3].value['bookmarks']['postgres-public-COW'].get('lsn'), end_lsn)
        self.assertEqual(CAUGHT_MESSAGES[3].value['bookmarks']['postgres-public-COW'].get('version'), new_version)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage))
        self.assertEqual(CAUGHT_MESSAGES[4].record['name'], 'pooper')
        self.assertEqual('COW', CAUGHT_MESSAGES[4].stream)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage))
        self.assertTrue(CAUGHT_MESSAGES[5].value['bookmarks']['postgres-public-COW'].get('xmin') > last_xmin)
        self.assertEqual(CAUGHT_MESSAGES[5].value['bookmarks']['postgres-public-COW'].get('lsn'), end_lsn)
        self.assertEqual(CAUGHT_MESSAGES[5].value['bookmarks']['postgres-public-COW'].get('version'), new_version)


        self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage))
        self.assertEqual(CAUGHT_MESSAGES[6].version, new_version)

        self.assertTrue(isinstance(CAUGHT_MESSAGES[7], singer.StateMessage))
        self.assertIsNone(CAUGHT_MESSAGES[7].value['bookmarks']['postgres-public-COW'].get('xmin'))
        self.assertEqual(CAUGHT_MESSAGES[7].value['bookmarks']['postgres-public-COW'].get('lsn'), end_lsn)
        self.assertEqual(CAUGHT_MESSAGES[7].value['bookmarks']['postgres-public-COW'].get('version'), new_version)
Ejemplo n.º 27
0
    def test_catalog(self):
        conn_config = get_test_connection_config()
        streams = tap_postgres.do_discovery(conn_config)
        chicken_streams = [
            s for s in streams if s["tap_stream_id"] == "public-CHICKEN TIMES"
        ]
        self.assertEqual(len(chicken_streams), 1)
        stream_dict = chicken_streams[0]
        stream_dict.get("metadata").sort(key=lambda md: md["breadcrumb"])

        with get_test_connection() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                cur.execute(
                    """INSERT INTO "CHICKEN TIMES" (our_test_domain_pk, our_test_domain, our_test_integer_domain) VALUES ('sad', 'happy', 3)"""
                )
                cur.execute("""SELECT * FROM  "CHICKEN TIMES" """)

                self.assertEqual(
                    metadata.to_map(stream_dict.get("metadata")),
                    {
                        (): {
                            "table-key-properties": ["our_test_domain_pk"],
                            "database-name": "postgres",
                            "schema-name": "public",
                            "is-view": False,
                            "row-count": 0,
                        },
                        ("properties", "our_test_domain_pk"): {
                            "inclusion": "automatic",
                            "sql-datatype": "text",
                            "selected-by-default": True,
                        },
                        ("properties", "our_test_domain"): {
                            "inclusion": "available",
                            "sql-datatype": "text",
                            "selected-by-default": True,
                        },
                        ("properties", "our_test_integer_domain"): {
                            "inclusion": "available",
                            "sql-datatype": "integer",
                            "selected-by-default": True,
                        },
                        ("properties", "our_test_nested_integer_domain"): {
                            "inclusion": "available",
                            "sql-datatype": "integer",
                            "selected-by-default": True,
                        },
                        ("properties", "our_test_numeric_domain"): {
                            "inclusion": "available",
                            "sql-datatype": "numeric",
                            "selected-by-default": True,
                        },
                        ("properties", "our_test_positive_numeric_domain"): {
                            "inclusion": "available",
                            "sql-datatype": "numeric",
                            "selected-by-default": True,
                        },
                    },
                )

                self.assertEqual(
                    {
                        "properties": {
                            "our_test_domain_pk": {
                                "type": ["string"]
                            },
                            "our_test_domain": {
                                "type": ["null", "string"]
                            },
                            "our_test_integer_domain": {
                                "minimum": -2147483648,
                                "maximum": 2147483647,
                                "type": ["null", "integer"],
                            },
                            "our_test_nested_integer_domain": {
                                "minimum": -2147483648,
                                "maximum": 2147483647,
                                "type": ["null", "integer"],
                            },
                            "our_test_numeric_domain": {
                                "minimum": -100000000000,
                                "maximum": 100000000000,
                                "multipleOf": 0.01,
                                "exclusiveMaximum": True,
                                "exclusiveMinimum": True,
                                "type": ["null", "number"],
                            },
                            "our_test_positive_numeric_domain": {
                                "minimum": -100000000000,
                                "maximum": 100000000000,
                                "multipleOf": 0.01,
                                "exclusiveMaximum": True,
                                "exclusiveMinimum": True,
                                "type": ["null", "number"],
                            },
                        },
                        "type": "object",
                        "definitions": BASE_RECURSIVE_SCHEMAS,
                    },
                    stream_dict.get("schema"),
                )