def test_simplify__allOf__datetime():
    assert json_schema.is_datetime(json_schema.simplify(
        {'allOf': [{'type': 'string'}, {'type': 'string', 'format': 'date-time'}]}
    ))

    assert \
        json_schema.simplify({
            'allOf': [{'type': 'number'}, {'type': 'string', 'format': 'date-time'}]
        }) \
        == {'type': ['string'], 'format': 'date-time'}
def test_simplify__allOf__iterables__merges():
    '''
    NOTE: We assume that the schemas passed into json_schema make sense. ie, there is a possible
    way for data to _actually validate_ against them. ie, something cannot be a scalar and an
    object at the same time, etc.
    '''
    assert \
        json_schema.simplify({
            'allOf': [
                {'type': 'array', 'items': {
                    'type': 'object',
                    'properties': {
                        'a': {'type': 'integer'}
                }}},
                {'type': 'array', 'items': {
                    'type': 'object',
                    'properties': {
                        'c': {'type': 'string'}
                }}},
                {'type': 'array', 'items': {
                    'type': 'object',
                    'properties': {
                        'b': {'type': 'integer'}
                }}}]
        }) \
        == {
            'type': ['array'],
            'items': {
                'type': ['object'],
                'properties': {
                    'a': {'type': ['integer']},
                    'b': {'type': ['integer']},
                    'c': {'type': ['string']}
                }}}

    assert \
        json_schema.simplify({
            'allOf': [
                {'type': 'array', 'items': {
                    'type': 'array',
                    'items': {'type': 'integer'}}},
                {'type': 'array', 'items': {
                    'type': 'array',
                    'items': {'type': ['number', 'null']}}}]
        }) \
        == {
            'type': ['array'],
            'items': {
                'type': ['array'],
                'items': {'type': ['number', 'null']}}}
def test_simplify__types_into_arrays():
    assert \
        json_schema.simplify(
            {'type': 'null'}
        ) \
        == {'type': ['null']}

    assert \
        json_schema.simplify(
            {'type': ['object'],
             'properties': {
                 'a': {'type': 'string'}}}) \
        == {'type': ['object'],
            'properties': {
                'a': {'type': ['string']}}}
    def update_schema(self, schema, key_properties):
        # In order to determine whether a value _is in_ properties _or not_ we need to flatten `$ref`s etc.
        self.schema = json_schema.simplify(schema)
        self.key_properties = deepcopy(key_properties)

        # The validator can handle _many_ more things than our simplified schema, and is, in general handled by third party code
        self.validator = Draft4Validator(schema,
                                         format_checker=FormatChecker())

        properties = self.schema['properties']

        if singer.RECEIVED_AT not in properties:
            properties[singer.RECEIVED_AT] = {
                'type': ['null', 'string'],
                'format': 'date-time'
            }

        if singer.SEQUENCE not in properties:
            properties[singer.SEQUENCE] = {'type': ['null', 'integer']}

        if singer.TABLE_VERSION not in properties:
            properties[singer.TABLE_VERSION] = {'type': ['null', 'integer']}

        if singer.BATCHED_AT not in properties:
            properties[singer.BATCHED_AT] = {
                'type': ['null', 'string'],
                'format': 'date-time'
            }

        if len(self.key_properties) == 0:
            self.use_uuid_pk = True
            self.key_properties = [singer.PK]
            properties[singer.PK] = {'type': ['string']}
        else:
            self.use_uuid_pk = False
Beispiel #5
0
    def update_schema(self, schema, key_properties):
        # In order to determine whether a value _is in_ properties _or not_ we need to flatten `$ref`s etc.
        self.schema = json_schema.simplify(schema)
        self.key_properties = deepcopy(key_properties)
        self.validator = Draft4Validator(self.schema,
                                         format_checker=FormatChecker())

        properties = self.schema['properties']

        if SINGER_RECEIVED_AT not in properties:
            properties[SINGER_RECEIVED_AT] = {
                'type': ['null', 'string'],
                'format': 'date-time'
            }

        if SINGER_SEQUENCE not in properties:
            properties[SINGER_SEQUENCE] = {'type': ['null', 'integer']}

        if SINGER_TABLE_VERSION not in properties:
            properties[SINGER_TABLE_VERSION] = {'type': ['null', 'integer']}

        if SINGER_BATCHED_AT not in properties:
            properties[SINGER_BATCHED_AT] = {
                'type': ['null', 'string'],
                'format': 'date-time'
            }

        if len(self.key_properties) == 0:
            self.use_uuid_pk = True
            self.key_properties = [SINGER_PK]
            properties[SINGER_PK] = {'type': ['string']}
        else:
            self.use_uuid_pk = False
Beispiel #6
0
def _get_streamed_table_schemas(schema, key_properties):
    """
    Given a `schema` and `key_properties` return the denested/flattened TABLE_SCHEMA of
    the root table and each sub table.

    :param schema: SingerStreamSchema
    :param key_properties: [string, ...]
    :return: [TABLE_SCHEMA(denested_streamed_schema_0), ...]
    """
    root_table_schema = json_schema.simplify(schema)

    subtables = {}
    key_prop_schemas = {}
    for key in key_properties:
        key_prop_schemas[key] = schema['properties'][key]
    _denest_schema(tuple(), root_table_schema, key_prop_schemas, subtables)

    ret = [
        _to_table_schema(tuple(), None, key_properties,
                         root_table_schema['properties'])
    ]
    for path, schema in subtables.items():
        ret.append(
            _to_table_schema(path, schema['level'], schema['key_properties'],
                             schema['properties']))

    return ret
def test_simplify__allOf__picks_scalars():
    assert \
        json_schema.simplify({
            'allOf': [
                {},
                {'type': 'integer'},
                {'type': 'array', 'items': {'type': 'number'}}]
        }) \
        == {'type': ['integer']}

    assert \
        json_schema.simplify({
            "allOf": [
                { "type": "string" },
                { "maxLength": 5 }
            ]}) \
        == {'type': ['string']}
def test_simplify__refs__missing():
    with pytest.raises(Exception, match=r'.*not found.*'):
        json_schema.simplify({'properties': {'singleton': {'$ref': '#/foo'}}})

    with pytest.raises(Exception, match=r'.*not found.*'):
        json_schema.simplify({
            'definitions': {
                'foo': {
                    'type': 'null'
                }
            },
            'properties': {
                'singleton': {
                    '$ref': '#/definitions/foo/bar'
                }
            }
        })
Beispiel #9
0
def test_simplify__allOf__nullable():
    assert json_schema.is_nullable(
        json_schema.simplify(
            {'allOf': [{
                'type': ['integer']
            }, {
                'type': ['string', 'null']
            }]}))
Beispiel #10
0
def test_simplify__allOf__iterables():
    assert json_schema.is_iterable(
        json_schema.simplify(
            {'allOf': [{
                'type': 'array',
                'items': {
                    'type': 'integer'
                }
            }]}))
def test_simplify__refs__circular():
    with pytest.raises(Exception, match=r'.*is recursive.*'):
        json_schema.simplify({
            'definitions': {
                'alice': {
                    '$ref': '#/definitions/bob'
                },
                'bob': {
                    '$ref': '#/definitions/alice'
                }
            },
            'properties': {
                'alice': {
                    '$ref': '#/definitions/alice'
                }
            }
        })

    with pytest.raises(Exception, match=r'.*is recursive.*'):
        json_schema.simplify({
            'definitions': {
                'person': {
                    'type': 'object',
                    'properties': {
                        'name': {
                            'type': 'string'
                        },
                        'children': {
                            'type': 'array',
                            'items': {
                                '$ref': '#/definitions/person'
                            },
                            'default': []
                        }
                    }
                }
            },
            'type': 'object',
            'properties': {
                'person': {
                    '$ref': '#/definitions/person'
                }
            }
        })
def test_simplify__allOf__objects__merges():
    assert \
        json_schema.simplify({
            'allOf': [
                {},
                {'properties': {'a': {'type': 'number'}}},
                {'properties': {'c': {'type': 'integer'}}},
                {'properties': {'b': {'type': 'string', 'format': 'date-time'}}}]
        }) \
        == {
            'type': ['object'],
            'properties': {
                'a': {'type': ['number']},
                'b': {'type': ['string'], 'format': 'date-time'},
                'c': {'type': ['integer']}
            }}
def test_simplify__anyOf__datetimes_dont_merge_with_strings():
    assert \
        json_schema.simplify(
            {
                "anyOf": [
                    {
                        "type": "string",
                        "format": "date-time"
                    },
                    {"type": ["string", "null"]}]}) \
        == {
                "anyOf": [
                    {
                        "type": ["string", 'null'],
                        "format": "date-time"
                    },
                    {"type": ["string", "null"]}]}
def test_simplify__anyOf__objects__overlapping_keys():
    assert \
        json_schema.simplify(
            {
                "anyOf": [
                    {
                        "properties": {'a': {'type': 'string'}}
                    },
                    {
                        "properties": {'a': {'type': 'integer'}}
                    },
                    {
                        "properties": {'a': {'type': 'number'}}
                    }]}) \
        == {
            'type': ['object'],
            'properties': {
                'a': {'anyOf': [
                    {'type': ['integer']},
                    {'type': ['number']},
                    {'type': ['string']}]}}}
def test_simplify__anyOf__single_nullable_makes_all_nullable():
    assert \
        json_schema.simplify(
            {'anyOf': [
                {'type': 'string'},
                {'type': 'integer'},
                {'type': ['number', 'null']},
                {
                    'type': 'string',
                    'format': 'date-time'},
                {'type': ['boolean']}
            ]}
        ) \
        == {'anyOf': [
                {
                    'type': ['string', 'null'],
                    'format': 'date-time'
                },
                {'type': ['boolean', 'null']},
                {'type': ['integer', 'null']},
                {'type': ['number', 'null']},
                {'type': ['string', 'null']}]}
def test_simplify__refs__invalid_format():
    with pytest.raises(Exception, match=r'Invalid format.*'):
        json_schema.simplify(
            {
                'properties': {
                    'singleton': {'$ref': ''}}})

    with pytest.raises(Exception, match=r'Invalid format.*'):
        json_schema.simplify(
            {
                'properties': {
                    'singleton': {'$ref': '123BWDSG!@R1513bw4tnb24'}}})

    with pytest.raises(Exception, match=r'Invalid format.*'):
        json_schema.simplify(
            {
                'properties': {
                    'singleton': {'$ref': '#definitions/singleton'}}})
def test_simplify__empty():
    assert json_schema.simplify({}) == {}
Beispiel #18
0
    def write_batch(self, stream_buffer):
        if stream_buffer.count == 0:
            return

        with self.conn.cursor() as cur:
            try:
                cur.execute('BEGIN;')

                processed_records = map(
                    partial(self.process_record_message,
                            stream_buffer.use_uuid_pk,
                            self.get_postgres_datetime()),
                    stream_buffer.peek_buffer())
                versions = set()
                max_version = None
                records_all_versions = []
                for record in processed_records:
                    record_version = record.get(SINGER_TABLE_VERSION)
                    if record_version is not None and \
                       (max_version is None or record_version > max_version):
                        max_version = record_version
                    versions.add(record_version)
                    records_all_versions.append(record)

                current_table_schema = self.get_schema(cur,
                                                       self.postgres_schema,
                                                       stream_buffer.stream)

                current_table_version = None

                if current_table_schema:
                    current_table_version = current_table_schema.get(
                        'version', None)

                    if set(stream_buffer.key_properties) \
                            != set(current_table_schema.get('key_properties')):
                        raise PostgresError(
                            '`key_properties` change detected. Existing values are: {}. Streamed values are: {}'
                            .format(current_table_schema.get('key_properties'),
                                    stream_buffer.key_properties))

                if max_version is not None:
                    target_table_version = max_version
                else:
                    target_table_version = None

                if current_table_version is not None and \
                        min(versions) < current_table_version:
                    self.logger.warning(
                        '{} - Records from an earlier table vesion detected.'.
                        format(stream_buffer.stream))
                if len(versions) > 1:
                    self.logger.warning(
                        '{} - Multiple table versions in stream, only using the latest.'
                        .format(stream_buffer.stream))

                if current_table_version is not None and \
                   target_table_version > current_table_version:
                    root_table_name = stream_buffer.stream + self.SEPARATOR + str(
                        target_table_version)
                else:
                    root_table_name = stream_buffer.stream

                if target_table_version is not None:
                    records = filter(
                        lambda x: x.get(SINGER_TABLE_VERSION) ==
                        target_table_version, records_all_versions)
                else:
                    records = records_all_versions

                root_table_schema = json_schema.simplify(stream_buffer.schema)

                ## Add singer columns to root table
                self.add_singer_columns(root_table_schema,
                                        stream_buffer.key_properties)

                subtables = {}
                key_prop_schemas = {}
                for key in stream_buffer.key_properties:
                    if current_table_schema \
                            and json_schema.get_type(current_table_schema['schema']['properties'][key]) \
                            != json_schema.get_type(root_table_schema['properties'][key]):
                        raise PostgresError(
                            ('`key_properties` type change detected for "{}". '
                             + 'Existing values are: {}. ' +
                             'Streamed values are: {}').format(
                                 key,
                                 json_schema.get_type(
                                     current_table_schema['schema']
                                     ['properties'][key]),
                                 json_schema.get_type(
                                     root_table_schema['properties'][key])))

                    key_prop_schemas[key] = root_table_schema['properties'][
                        key]

                self.denest_schema(root_table_name, root_table_schema,
                                   key_prop_schemas, subtables)

                root_temp_table_name = self.upsert_table_schema(
                    cur, root_table_name, root_table_schema,
                    stream_buffer.key_properties, target_table_version)

                nested_upsert_tables = []
                for table_name, subtable_json_schema in subtables.items():
                    temp_table_name = self.upsert_table_schema(
                        cur, table_name, subtable_json_schema, None, None)
                    nested_upsert_tables.append({
                        'table_name':
                        table_name,
                        'json_schema':
                        subtable_json_schema,
                        'temp_table_name':
                        temp_table_name
                    })

                records_map = {}
                self.denest_records(root_table_name, records, records_map,
                                    stream_buffer.key_properties)
                self.persist_rows(cur, root_table_name, root_temp_table_name,
                                  root_table_schema,
                                  stream_buffer.key_properties,
                                  records_map[root_table_name])
                for nested_upsert_table in nested_upsert_tables:
                    key_properties = []
                    for key in stream_buffer.key_properties:
                        key_properties.append(SINGER_SOURCE_PK_PREFIX + key)
                    self.persist_rows(
                        cur, nested_upsert_table['table_name'],
                        nested_upsert_table['temp_table_name'],
                        nested_upsert_table['json_schema'], key_properties,
                        records_map[nested_upsert_table['table_name']])

                cur.execute('COMMIT;')
            except Exception as ex:
                cur.execute('ROLLBACK;')
                message = 'Exception writing records'
                self.logger.exception(message)
                raise PostgresError(message, ex)

        stream_buffer.flush_buffer()
def test_simplify__allOf__objects():
    assert json_schema.is_object(json_schema.simplify(
        {'allOf': [{'type': ['object']}]}
    ))
def test_simplify__refs():
    assert \
        json_schema.simplify(
            {
                'definitions': {
                    'singleton': {
                        'type': 'string'
                    }},

                'type': 'object',

                'properties': {
                    'singleton': {'$ref': '#/definitions/singleton'}}}) \
        == {'type': ['object'],
            'properties': {
                'singleton': {
                    'type': ['string']}}}

    assert \
        json_schema.simplify(
            {
                'definitions': {
                    'foo': {
                        'type': 'object',
                        'properties': {
                            'bar': {
                                'type': 'object',
                                'properties': {
                                    'baz': {
                                        'type': 'integer'
                                    }
                                }
                            }
                        }
                    }},

                'type': 'object',

                'properties': {
                    'nested': {'$ref': '#/definitions/foo/properties/bar/properties/baz'}}}) \
        == {'type': ['object'],
            'properties': {
                'nested': {
                    'type': ['integer']}}}

    assert \
        json_schema.simplify(
            {
                'definitions': {
                    'address': {
                        'type': 'object',
                        'properties': {
                            'street_address': {'type': 'string'},
                            'city': {'type': 'string'},
                            'state': {'type': 'string'}
                        },
                        'required': ['street_address', 'city', 'state']
                    }
                },

                'type': 'object',

                'properties': {
                    'billing_address': {'$ref': '#/definitions/address'},
                    'shipping_address': {'$ref': '#/definitions/address'}}}) \
        == {'type': ['object'],
            'properties': {
                'billing_address': {
                    'type': ['object'],
                    'properties': {
                        'street_address': {'type': ['string']},
                        'city': {'type': ['string']},
                        'state': {'type': ['string']}
                    }
                },
                'shipping_address': {
                    'type': ['object'],
                    'properties': {
                        'street_address': {'type': ['string']},
                        'city': {'type': ['string']},
                        'state': {'type': ['string']}}}}}
def test_simplify__complex():
    assert \
        json_schema.simplify({
            'properties': {
                'every_type': {
                    'type': ['null', 'integer', 'number', 'boolean', 'string', 'array', 'object'],
                    'items': {'type': 'integer'},
                    'format': 'date-time',
                    'properties': {
                        'i': {'type': 'integer'},
                        'n': {'type': 'number'},
                        'b': {'type': 'boolean'}
                    }
                }
            }
        }) \
        == {
            'type': ['object'],
            'properties': {
                'every_type': {
                    'type': ['null', 'integer', 'number', 'boolean', 'string', 'array', 'object'],
                    'items': {'type': ['integer']},
                    'format': 'date-time',
                    'properties': {
                        'i': {'type': ['integer']},
                        'n': {'type': ['number']},
                        'b': {'type': ['boolean']}
                    }
                }
            }
        }

    assert \
        json_schema.simplify({
            'type': ['null', 'array'],
            'items': {
                'type': 'object',
                'properties': {
                    'type': {
                        'type': ['null', 'string']
                    },
                    'date_administered': {
                        'type': 'string',
                        'format': 'date-time'}}}}) \
        == {
            'type': ['null', 'array'],
            'items': {
                'type': ['object'],
                'properties': {
                    'type': {
                        'type': ['null', 'string']
                    },
                    'date_administered': {
                        'type': ['string'],
                        'format': 'date-time'}}}}

    assert \
        json_schema.simplify(CATS_SCHEMA['schema']) \
        == {
            'type': ['object'],
            'properties': {
                'id': {
                    'type': ['integer']
                },
                'name': {
                    'type': ['string']
                },
                'paw_size': {
                    'type': ['integer'],
                    'default': 314159
                },
                'paw_colour': {
                    'type': ['string'],
                    'default': ''
                },
                'flea_check_complete': {
                    'type': ['boolean'],
                    'default': False
                },
                'pattern': {
                    'type': ['null', 'string']
                },
                'age': {
                    'type': ['null', 'integer']
                },
                'adoption': {
                    'type': ['object', 'null'],
                    'properties': {
                        'adopted_on': {
                            'type': ['null', 'string'],
                            'format': 'date-time'
                        },
                        'was_foster': {
                            'type': ['boolean']
                        },
                        'immunizations': {
                            'type': ['null', 'array'],
                            'items': {
                                'type': ['object'],
                                'properties': {
                                    'type': {
                                        'type': ['null', 'string']
                                    },
                                    'date_administered': {
                                        'type': ['null', 'string'],
                                        'format': 'date-time'}}}}}}}}
def test_simplify__anyOf__duplicates():
    assert \
        json_schema.simplify(
            {'anyOf': [
                {'type': 'string'},
                {'type': 'string'},
                {'type': 'integer'},
                {'type': 'string'},
                {'type': 'string'},
                {'type': 'integer'},
                {'type': ['number']},
                {'type': 'integer'},
                {'type': 'integer'},
            ]}
        ) \
        == {'anyOf': [
                {'type': ['integer']},
                {'type': ['number']},
                {'type': ['string']}
            ]}

    assert \
        json_schema.simplify(
            {'anyOf': [
                {
                    "type": "string",
                    "format": "date-time"
                },
                {
                    "type": "string",
                    "format": "date-time"
                }
            ]}
        ) \
        == {
                "type": ["string"],
                "format": "date-time"
            }

    assert \
        json_schema.simplify(
            {'anyOf': [
                {
                    "properties": {'a': {'type': 'number'}}
                },
                {
                    "properties": {'a': {'type': 'number'}}
                },
                {
                    "properties": {'a': {'type': 'number'}}
                }
            ]}
        ) \
        == {
                'type': ['object'],
                "properties": {'a': {'type': ['number']}}
            }

    assert \
        json_schema.simplify(
            {'anyOf': [
                {
                    'type': 'array',
                    "items": {'type': 'number'}
                },
                {
                    'type': 'array',
                    "items": {'type': 'number'}
                },
                {
                    'type': 'array',
                    "items": {'type': 'number'}
                }
            ]}
        ) \
        == {
                'type': ['array'],
                "items": {'type': ['number']}
            }
def test_simplify__empty_becomes_object():
    assert json_schema.simplify({}) == {'properties': {}, 'type': ['object']}
def test_simplify__anyOf__duplicate_literals__merges_same_types_nullable():
    assert \
        json_schema.simplify(
            {'anyOf': [
                {'type': 'string'},
                {'type': ['null', 'string']}
            ]}
        ) \
        == {'type': ['string', 'null']}

    assert \
        json_schema.simplify(
            {'anyOf': [
                {
                    "type": ["string", 'null'],
                    "format": "date-time"
                },
                {
                    "type": "string",
                    "format": "date-time"
                }
            ]}
        ) \
        == {
                "type": ["string", 'null'],
                "format": "date-time"
            }

    assert \
        json_schema.simplify(
            {'anyOf': [
                {
                    "properties": {'a': {'type': 'number'}}
                },
                {
                    'type': ['object', 'null'],
                    "properties": {'a': {'type': 'number'}}
                }
            ]}) \
        == {
                'type': ['object', 'null'],
                "properties": {'a': {'type': ['number']}}
            }

    assert \
        json_schema.simplify(
            {'anyOf': [
                {
                    'type': ['array', 'null'],
                    "items": {'type': 'number'}
                },
                {
                    'type': 'array',
                    "items": {'type': 'number'}
                }
            ]}
        ) \
        == {
                'type': ['array', 'null'],
                "items": {'type': ['number']}
            }
Beispiel #25
0
    def write_batch(self, stream_buffer):
        if stream_buffer.count == 0:
            return

        with self.conn.cursor() as cur:
            try:
                cur.execute('BEGIN;')

                processed_records = map(
                    partial(self.process_record_message,
                            stream_buffer.use_uuid_pk,
                            self.get_postgres_datetime()),
                    stream_buffer.peek_buffer())
                versions = set()
                max_version = None
                records_all_versions = []
                for record in processed_records:
                    record_version = record.get(SINGER_TABLE_VERSION)
                    if record_version is not None and \
                       (max_version is None or record_version > max_version):
                        max_version = record_version
                    versions.add(record_version)
                    records_all_versions.append(record)

                table_metadata = self.get_table_metadata(
                    cur, self.postgres_schema, stream_buffer.stream)

                ## TODO: check if PK has changed. Fail on PK change? Just update and log on PK change?

                if table_metadata:
                    current_table_version = table_metadata.get('version', None)
                else:
                    current_table_version = None

                if max_version is not None:
                    target_table_version = max_version
                else:
                    target_table_version = None

                if current_table_version is not None and \
                   min(versions) < current_table_version:
                    self.logger.warn(
                        '{} - Records from an earlier table vesion detected.'.
                        format(stream_buffer.stream))
                if len(versions) > 1:
                    self.logger.warn(
                        '{} - Multiple table versions in stream, only using the latest.'
                        .format(stream_buffer.stream))

                if current_table_version is not None and \
                   target_table_version > current_table_version:
                    root_table_name = stream_buffer.stream + self.NESTED_SEPARATOR + str(
                        target_table_version)
                else:
                    root_table_name = stream_buffer.stream

                if target_table_version is not None:
                    records = filter(
                        lambda x: x.get(SINGER_TABLE_VERSION) ==
                        target_table_version, records_all_versions)
                else:
                    records = records_all_versions

                root_table_schema = json_schema.simplify(stream_buffer.schema)

                ## Add singer columns to root table
                self.add_singer_columns(root_table_schema,
                                        stream_buffer.key_properties)

                subtables = {}
                key_prop_schemas = {}
                for key in stream_buffer.key_properties:
                    key_prop_schemas[key] = root_table_schema['properties'][
                        key]
                self.denest_schema(root_table_name, root_table_schema,
                                   key_prop_schemas, subtables)

                root_temp_table_name = self.upsert_table_schema(
                    cur, root_table_name, root_table_schema,
                    stream_buffer.key_properties, target_table_version)

                nested_upsert_tables = []
                for table_name, subtable_json_schema in subtables.items():
                    temp_table_name = self.upsert_table_schema(
                        cur, table_name, subtable_json_schema, None, None)
                    nested_upsert_tables.append({
                        'table_name':
                        table_name,
                        'json_schema':
                        subtable_json_schema,
                        'temp_table_name':
                        temp_table_name
                    })

                records_map = {}
                self.denest_records(root_table_name, records, records_map,
                                    stream_buffer.key_properties)
                self.persist_rows(cur, root_table_name, root_temp_table_name,
                                  root_table_schema,
                                  stream_buffer.key_properties,
                                  records_map[root_table_name])
                for nested_upsert_table in nested_upsert_tables:
                    key_properties = []
                    for key in stream_buffer.key_properties:
                        key_properties.append(SINGER_SOURCE_PK_PREFIX + key)
                    self.persist_rows(
                        cur, nested_upsert_table['table_name'],
                        nested_upsert_table['temp_table_name'],
                        nested_upsert_table['json_schema'], key_properties,
                        records_map[nested_upsert_table['table_name']])

                cur.execute('COMMIT;')
            except:
                cur.execute('ROLLBACK;')
                self.logger.exception('Exception writing records')
                raise

        stream_buffer.flush_buffer()
def test_simplify__complex():
    assert \
        json_schema.simplify({
            'type': ['null', 'array'],
            'items': {
                'type': 'object',
                'properties': {
                    'type': {
                        'type': ['null', 'string']
                    },
                    'date_administered': {
                        'type': 'string',
                        'format': 'date-time'}}}}) \
        == {
            'type': ['null', 'array'],
            'items': {
                'type': ['object'],
                'properties': {
                    'type': {
                        'type': ['null', 'string']
                    },
                    'date_administered': {
                        'type': ['string'],
                        'format': 'date-time'}}}}

    assert \
        json_schema.simplify(CATS_SCHEMA['schema']) \
        == {
            'type': ['object'],
            'properties': {
                'id': {
                    'type': ['integer']
                },
                'name': {
                    'type': ['string']
                },
                'pattern': {
                    'type': ['null', 'string']
                },
                'age': {
                    'type': ['null', 'integer']
                },
                'adoption': {
                    'type': ['object', 'null'],
                    'properties': {
                        'adopted_on': {
                            'type': ['null', 'string'],
                            'format': 'date-time'
                        },
                        'was_foster': {
                            'type': ['boolean']
                        },
                        'immunizations': {
                            'type': ['null', 'array'],
                            'items': {
                                'type': ['object'],
                                'properties': {
                                    'type': {
                                        'type': ['null', 'string']
                                    },
                                    'date_administered': {
                                        'type': ['null', 'string'],
                                        'format': 'date-time'}}}}}}}}
def test_simplify__refs():
    assert \
        json_schema.simplify(
            {
                'definitions': {
                    'singleton': {
                        'type': 'string'
                    }},

                'type': 'object',

                'properties': {
                    'singleton': {'$ref': '#/definitions/singleton'}}}) \
        == {'type': ['object'],
            'properties': {
                'singleton': {
                    'type': ['string']}}}

    assert \
        json_schema.simplify(
            {
                'definitions': {
                    'foo': {
                        'type': 'object',
                        'properties': {
                            'bar': {
                                'type': 'object',
                                'properties': {
                                    'baz': {
                                        'type': 'integer'
                                    }
                                }
                            }
                        }
                    }},

                'type': 'object',

                'properties': {
                    'nested': {'$ref': '#/definitions/foo/properties/bar/properties/baz'}}}) \
        == {'type': ['object'],
            'properties': {
                'nested': {
                    'type': ['integer']}}}

    assert \
        json_schema.simplify(
            {
                'definitions': {
                    'address': {
                        'type': 'object',
                        'properties': {
                            'street_address': {'type': 'string'},
                            'city': {'type': 'string'},
                            'state': {'type': 'string'}
                        },
                        'required': ['street_address', 'city', 'state']
                    }
                },

                'type': 'object',

                'properties': {
                    'billing_address': {'$ref': '#/definitions/address'},
                    'shipping_address': {'$ref': '#/definitions/address'}}}) \
        == {'type': ['object'],
            'properties': {
                'billing_address': {
                    'type': ['object'],
                    'properties': {
                        'street_address': {'type': ['string']},
                        'city': {'type': ['string']},
                        'state': {'type': ['string']}
                    }
                },
                'shipping_address': {
                    'type': ['object'],
                    'properties': {
                        'street_address': {'type': ['string']},
                        'city': {'type': ['string']},
                        'state': {'type': ['string']}}}}}

    assert json_schema.simplify(
        {
            "definitions": {
                "address": {
                    "type": "object",
                    "properties": {
                        "street_address": { "type": "string" },
                        "city": { "type": "string" },
                        "state": { "type": "string" }
                    },
                    "required": ["street_address", "city", "state"]
                }
            },

            "allOf": [
                { "$ref": "#/definitions/address" },
                { "properties": {
                    "state": {'type': ['integer']},
                    "extra": { "type": ["string"] }
                    }
                }
            ]
        }) \
        == {
            'type': ['object'],
            "properties": {
                    "street_address": { "type": ["string"] },
                    "city": { "type": ["string"] },
                    "state": {'type': ['integer']},
                    "extra": { "type": ["string"] }
                }
            }