Exemple #1
0
def _denest_record(table_path, record, records_map, key_properties, pk_fks,
                   level):
    """"""
    """
    {...}
    """
    denested_record = {}
    for prop, value in record.items():
        """
        str : {...} | [...] | None | <literal>
        """

        if isinstance(value, dict):
            """
            {...}
            """
            _denest_subrecord(table_path + (prop, ), (prop, ), denested_record,
                              value, records_map, key_properties, pk_fks,
                              level)

        elif isinstance(value, list):
            """
            [...]
            """
            _denest_records(table_path + (prop, ),
                            value,
                            records_map,
                            key_properties,
                            pk_fks=pk_fks,
                            level=level + 1)

        elif value is None:
            """
            None
            """
            continue

        else:
            """
            <literal>
            """
            denested_record[(prop, )] = (json_schema.python_type(value), value)

    if table_path not in records_map:
        records_map[table_path] = []
    records_map[table_path].append(denested_record)
Exemple #2
0
def _denest_subrecord(table_path, prop_path, parent_record, record,
                      records_map, key_properties, pk_fks, level):
    """"""
    """
    {...}
    """
    for prop, value in record.items():
        """
        str : {...} | [...] | ???None??? | <literal>
        """

        if isinstance(value, dict):
            """
            {...}
            """
            _denest_subrecord(table_path + (prop, ), prop_path + (prop, ),
                              parent_record, value, records_map,
                              key_properties, pk_fks, level)

        elif isinstance(value, list):
            """
            [...]
            """
            _denest_records(table_path + (prop, ),
                            value,
                            records_map,
                            key_properties,
                            pk_fks=pk_fks,
                            level=level + 1)

        elif value is None:
            """
            None
            """
            continue

        else:
            """
            <literal>
            """
            parent_record[prop_path +
                          (prop, )] = (json_schema.python_type(value), value)
Exemple #3
0
    def _serialize_table_records(
            self, remote_schema, streamed_schema, records):
        """
        Parse the given table's `records` in preparation for persistence to the remote target.

        Base implementation returns a list of dictionaries, where _every_ dictionary has the
        same keys as `remote_schema`'s properties.

        :param remote_schema: TABLE_SCHEMA(remote)
        :param streamed_schema: TABLE_SCHEMA(local)
        :param records: [{(path_0, path_1, ...): (_json_schema_string_type, value), ...}, ...]
        :return: [{...}, ...]
        """

        datetime_paths = set()
        default_paths = {}

        for column_path, column_schema in streamed_schema['schema']['properties'].items():
            for sub_schema in column_schema['anyOf']:
                if json_schema.is_datetime(sub_schema):
                    datetime_paths.add(column_path)
                if sub_schema.get('default') is not None:
                    default_paths[column_path] = sub_schema.get('default')

        ## Get the default NULL value so we can assign row values when value is _not_ NULL
        NULL_DEFAULT = self.serialize_table_record_null_value(remote_schema, streamed_schema, None, None)

        serialized_rows = []

        remote_fields = set(remote_schema['schema']['properties'].keys())
        default_row = dict([(field, NULL_DEFAULT) for field in remote_fields])

        paths = streamed_schema['schema']['properties'].keys()

        ## create a partial function with only hashable args so we can use lru_cache on it
        _cached_field_name = partial(self._serialize_table_record_field_name, remote_schema)
        cached_field_name = lru_cache(maxsize=None)(_cached_field_name)

        for record in records:

            ## pickling/unpickling is much faster than deepcopy
            row = pickle.loads(pickle.dumps(default_row))

            for path in paths:
                json_schema_string_type, value = record.get(path, (None, None))

                ## Serialize fields which are not present but have default values set
                if path in default_paths \
                        and value is None:
                    value = default_paths[path]
                    json_schema_string_type = json_schema.python_type(value)

                if not json_schema_string_type:
                    continue

                ## Serialize datetime to compatible format
                if path in datetime_paths \
                        and json_schema_string_type == json_schema.STRING \
                        and value is not None:
                    value = self.serialize_table_record_datetime_value(remote_schema, streamed_schema, path,
                                                                       value)
                    value_json_schema_tuple = (json_schema.STRING, json_schema.DATE_TIME_FORMAT)
                    field_name = cached_field_name(path, value_json_schema_tuple)
                else:
                    field_name = cached_field_name(path, (json_schema_string_type,))

                ## Serialize NULL default value
                value = self.serialize_table_record_null_value(remote_schema, streamed_schema, path, value)

                ## `field_name` is unset
                if row[field_name] == NULL_DEFAULT:
                    row[field_name] = value

            serialized_rows.append(row)

        return serialized_rows
def test_python_type():
    assert json_schema.python_type(None) \
           == json_schema.NULL
    assert json_schema.python_type(True) \
           == json_schema.BOOLEAN
    assert json_schema.python_type(True) \
           == json_schema.BOOLEAN
    assert json_schema.python_type(123) \
    == json_schema.INTEGER
    assert json_schema.python_type(0) \
           == json_schema.INTEGER
    assert json_schema.python_type(-1234567890) \
           == json_schema.INTEGER
    assert json_schema.python_type(3.14159) \
           == json_schema.NUMBER
    assert json_schema.python_type(0.0) \
           == json_schema.NUMBER
    assert json_schema.python_type(-3.14159) \
           == json_schema.NUMBER
    assert json_schema.python_type('') \
           == json_schema.STRING
    assert json_schema.python_type('hello') \
           == json_schema.STRING
    assert json_schema.python_type('world') \
           == json_schema.STRING
    def _serialize_table_records(self, remote_schema, streamed_schema,
                                 records):
        """
        Parse the given table's `records` in preparation for persistence to the remote target.

        Base implementation returns a list of dictionaries, where _every_ dictionary has the
        same keys as `remote_schema`'s properties.

        :param remote_schema: TABLE_SCHEMA(remote)
        :param streamed_schema: TABLE_SCHEMA(local)
        :param records: [{(path_0, path_1, ...): (_json_schema_string_type, value), ...}, ...]
        :return: [{...}, ...]
        """

        datetime_paths = [
            k for k, v in streamed_schema['schema']['properties'].items()
            if json_schema.is_datetime(v)
        ]

        default_paths = {
            k: v.get('default')
            for k, v in streamed_schema['schema']['properties'].items()
            if v.get('default') is not None
        }

        ## Get the default NULL value so we can assign row values when value is _not_ NULL
        NULL_DEFAULT = self.serialize_table_record_null_value(
            remote_schema, streamed_schema, None, None)

        serialized_rows = []

        remote_fields = set(remote_schema['schema']['properties'].keys())
        default_row = dict([(field, NULL_DEFAULT) for field in remote_fields])

        paths = streamed_schema['schema']['properties'].keys()
        for record in records:

            row = deepcopy(default_row)

            for path in paths:
                json_schema_string_type, value = record.get(path, (None, None))

                ## Serialize fields which are not present but have default values set
                if path in default_paths \
                        and value is None:
                    value = default_paths[path]
                    json_schema_string_type = json_schema.python_type(value)

                ## Serialize datetime to compatible format
                if path in datetime_paths \
                        and json_schema_string_type == json_schema.STRING \
                        and value is not None:
                    value = self.serialize_table_record_datetime_value(
                        remote_schema, streamed_schema, path, value)
                    value_json_schema = {
                        'type': json_schema.STRING,
                        'format': json_schema.DATE_TIME_FORMAT
                    }
                elif json_schema_string_type:
                    value_json_schema = {'type': json_schema_string_type}
                else:
                    value_json_schema = json_schema.simple_type(
                        streamed_schema['schema']['properties'][path])

                ## Serialize NULL default value
                value = self.serialize_table_record_null_value(
                    remote_schema, streamed_schema, path, value)

                field_name = self._serialize_table_record_field_name(
                    remote_schema, streamed_schema, path, value_json_schema)

                if field_name in remote_fields \
                        and (not field_name in row
                             or row[field_name] == NULL_DEFAULT):
                    row[field_name] = value

            serialized_rows.append(row)

        return serialized_rows