def _denest_record(table_path, record, records_map, key_properties, pk_fks, level): """""" """ {...} """ denested_record = {} for prop, value in record.items(): """ str : {...} | [...] | None | <literal> """ if isinstance(value, dict): """ {...} """ _denest_subrecord(table_path + (prop, ), (prop, ), denested_record, value, records_map, key_properties, pk_fks, level) elif isinstance(value, list): """ [...] """ _denest_records(table_path + (prop, ), value, records_map, key_properties, pk_fks=pk_fks, level=level + 1) elif value is None: """ None """ continue else: """ <literal> """ denested_record[(prop, )] = (json_schema.python_type(value), value) if table_path not in records_map: records_map[table_path] = [] records_map[table_path].append(denested_record)
def _denest_subrecord(table_path, prop_path, parent_record, record, records_map, key_properties, pk_fks, level): """""" """ {...} """ for prop, value in record.items(): """ str : {...} | [...] | ???None??? | <literal> """ if isinstance(value, dict): """ {...} """ _denest_subrecord(table_path + (prop, ), prop_path + (prop, ), parent_record, value, records_map, key_properties, pk_fks, level) elif isinstance(value, list): """ [...] """ _denest_records(table_path + (prop, ), value, records_map, key_properties, pk_fks=pk_fks, level=level + 1) elif value is None: """ None """ continue else: """ <literal> """ parent_record[prop_path + (prop, )] = (json_schema.python_type(value), value)
def _serialize_table_records( self, remote_schema, streamed_schema, records): """ Parse the given table's `records` in preparation for persistence to the remote target. Base implementation returns a list of dictionaries, where _every_ dictionary has the same keys as `remote_schema`'s properties. :param remote_schema: TABLE_SCHEMA(remote) :param streamed_schema: TABLE_SCHEMA(local) :param records: [{(path_0, path_1, ...): (_json_schema_string_type, value), ...}, ...] :return: [{...}, ...] """ datetime_paths = set() default_paths = {} for column_path, column_schema in streamed_schema['schema']['properties'].items(): for sub_schema in column_schema['anyOf']: if json_schema.is_datetime(sub_schema): datetime_paths.add(column_path) if sub_schema.get('default') is not None: default_paths[column_path] = sub_schema.get('default') ## Get the default NULL value so we can assign row values when value is _not_ NULL NULL_DEFAULT = self.serialize_table_record_null_value(remote_schema, streamed_schema, None, None) serialized_rows = [] remote_fields = set(remote_schema['schema']['properties'].keys()) default_row = dict([(field, NULL_DEFAULT) for field in remote_fields]) paths = streamed_schema['schema']['properties'].keys() ## create a partial function with only hashable args so we can use lru_cache on it _cached_field_name = partial(self._serialize_table_record_field_name, remote_schema) cached_field_name = lru_cache(maxsize=None)(_cached_field_name) for record in records: ## pickling/unpickling is much faster than deepcopy row = pickle.loads(pickle.dumps(default_row)) for path in paths: json_schema_string_type, value = record.get(path, (None, None)) ## Serialize fields which are not present but have default values set if path in default_paths \ and value is None: value = default_paths[path] json_schema_string_type = json_schema.python_type(value) if not json_schema_string_type: continue ## Serialize datetime to compatible format if path in datetime_paths \ and json_schema_string_type == json_schema.STRING \ and value is not None: value = self.serialize_table_record_datetime_value(remote_schema, streamed_schema, path, value) value_json_schema_tuple = (json_schema.STRING, json_schema.DATE_TIME_FORMAT) field_name = cached_field_name(path, value_json_schema_tuple) else: field_name = cached_field_name(path, (json_schema_string_type,)) ## Serialize NULL default value value = self.serialize_table_record_null_value(remote_schema, streamed_schema, path, value) ## `field_name` is unset if row[field_name] == NULL_DEFAULT: row[field_name] = value serialized_rows.append(row) return serialized_rows
def test_python_type(): assert json_schema.python_type(None) \ == json_schema.NULL assert json_schema.python_type(True) \ == json_schema.BOOLEAN assert json_schema.python_type(True) \ == json_schema.BOOLEAN assert json_schema.python_type(123) \ == json_schema.INTEGER assert json_schema.python_type(0) \ == json_schema.INTEGER assert json_schema.python_type(-1234567890) \ == json_schema.INTEGER assert json_schema.python_type(3.14159) \ == json_schema.NUMBER assert json_schema.python_type(0.0) \ == json_schema.NUMBER assert json_schema.python_type(-3.14159) \ == json_schema.NUMBER assert json_schema.python_type('') \ == json_schema.STRING assert json_schema.python_type('hello') \ == json_schema.STRING assert json_schema.python_type('world') \ == json_schema.STRING
def _serialize_table_records(self, remote_schema, streamed_schema, records): """ Parse the given table's `records` in preparation for persistence to the remote target. Base implementation returns a list of dictionaries, where _every_ dictionary has the same keys as `remote_schema`'s properties. :param remote_schema: TABLE_SCHEMA(remote) :param streamed_schema: TABLE_SCHEMA(local) :param records: [{(path_0, path_1, ...): (_json_schema_string_type, value), ...}, ...] :return: [{...}, ...] """ datetime_paths = [ k for k, v in streamed_schema['schema']['properties'].items() if json_schema.is_datetime(v) ] default_paths = { k: v.get('default') for k, v in streamed_schema['schema']['properties'].items() if v.get('default') is not None } ## Get the default NULL value so we can assign row values when value is _not_ NULL NULL_DEFAULT = self.serialize_table_record_null_value( remote_schema, streamed_schema, None, None) serialized_rows = [] remote_fields = set(remote_schema['schema']['properties'].keys()) default_row = dict([(field, NULL_DEFAULT) for field in remote_fields]) paths = streamed_schema['schema']['properties'].keys() for record in records: row = deepcopy(default_row) for path in paths: json_schema_string_type, value = record.get(path, (None, None)) ## Serialize fields which are not present but have default values set if path in default_paths \ and value is None: value = default_paths[path] json_schema_string_type = json_schema.python_type(value) ## Serialize datetime to compatible format if path in datetime_paths \ and json_schema_string_type == json_schema.STRING \ and value is not None: value = self.serialize_table_record_datetime_value( remote_schema, streamed_schema, path, value) value_json_schema = { 'type': json_schema.STRING, 'format': json_schema.DATE_TIME_FORMAT } elif json_schema_string_type: value_json_schema = {'type': json_schema_string_type} else: value_json_schema = json_schema.simple_type( streamed_schema['schema']['properties'][path]) ## Serialize NULL default value value = self.serialize_table_record_null_value( remote_schema, streamed_schema, path, value) field_name = self._serialize_table_record_field_name( remote_schema, streamed_schema, path, value_json_schema) if field_name in remote_fields \ and (not field_name in row or row[field_name] == NULL_DEFAULT): row[field_name] = value serialized_rows.append(row) return serialized_rows