def encode_feature(self, feature, without_pk=False): """ Given a feature, encode it in binary using this schema. If without_pk is True, the resulting bytes don't depend on the feature's pk values. """ raw_dict = self.feature_to_raw_dict(feature) pk_values, non_pk_values = self.legend.raw_dict_to_value_tuples( raw_dict) legend_hash = self.legend.hexhash() data = ([legend_hash, non_pk_values] if without_pk else [legend_hash, pk_values, non_pk_values]) return msg_pack(data)
def encode_pks_to_path(self, pk_values): """ Given some pk values, returns the path the feature should be written to. pk_values should be a list or tuple of pk values. """ packed_pk = msg_pack(pk_values) pk_hash = self._hash(packed_pk) parts = [ pk_hash[i * self.group_length:(i + 1) * self.group_length] for i in range(self.levels) ] parts.append(self._encode_file_name_from_packed_pk(packed_pk)) return "/".join(parts)
def encode_raw_feature_dict(self, raw_feature_dict, legend, relative=False, *, schema=None): """ Given a "raw" feature dict (keyed by column IDs) and a legend, returns the path and the data which *should be written* to write this feature. This is almost the inverse of get_raw_feature_dict, except TableV3 doesn't write the data. """ pk_values, non_pk_values = legend.raw_dict_to_value_tuples( raw_feature_dict) path = self.encode_pks_to_path(pk_values, relative=relative, schema=schema) data = msg_pack([legend.hexhash(), non_pk_values]) return path, data
def dumps(self): """Writes this legend to a bytestring.""" return msg_pack((self.pk_columns, self.non_pk_columns))
def encode_filename(self, pk_values): return self._encode_file_name_from_packed_pk(msg_pack(pk_values))