def testDenseAndSparseFeatures(self): with self.cached_session(): dense_quantiles, sparse_quantiles = quantile_ops.quantiles( [self._dense_float_tensor_0, self._dense_float_tensor_1], [ self._sparse_values_0, self._sparse_values_1, self._sparse_values_2, self._sparse_values_m ], [self._dense_thresholds_0, self._dense_thresholds_1], [ self._sparse_thresholds_0, self._sparse_thresholds_1, self._sparse_thresholds_2, self._sparse_thresholds_m ], [ self._sparse_indices_0, self._sparse_indices_1, self._sparse_indices_2, self._sparse_indices_m ]) # Dense feature 0 self.assertAllEqual([[0, 0], [0, 0], [1, 0], [2, 0]], dense_quantiles[0].eval()) # Dense feature 1 self.assertAllEqual([[1, 0], [0, 0], [2, 0], [2, 0]], dense_quantiles[1].eval()) # Sparse feature 0 self.assertAllEqual([[0, 0], [1, 0], [1, 0], [2, 0]], sparse_quantiles[0].eval()) # Sparse feature 1 self.assertAllEqual([[0, 0], [1, 0], [0, 0]], sparse_quantiles[1].eval()) # Sparse feature 2 self.assertAllEqual([[0, 0], [0, 0]], sparse_quantiles[2].eval()) # Multidimensional feature. self.assertAllEqual([[0, 1], [1, 0], [0, 0], [2, 1], [0, 2]], sparse_quantiles[3].eval())
def testSparseFeaturesOnly(self): with self.cached_session(): _, sparse_quantiles = quantile_ops.quantiles([], [ self._sparse_values_0, self._sparse_values_1, self._sparse_values_2, self._sparse_values_m ], [], [ self._sparse_thresholds_0, self._sparse_thresholds_1, self._sparse_thresholds_2, self._sparse_thresholds_m ], [ self._sparse_indices_0, self._sparse_indices_1, self._sparse_indices_2, self._sparse_indices_m ]) self.assertAllEqual(4, len(sparse_quantiles)) # Sparse feature 0 self.assertAllEqual([[0, 0], [1, 0], [1, 0], [2, 0]], sparse_quantiles[0].eval()) # Sparse feature 1 self.assertAllEqual([[0, 0], [1, 0], [0, 0]], sparse_quantiles[1].eval()) # Sparse feature 2 self.assertAllEqual([[0, 0], [0, 0]], sparse_quantiles[2].eval()) # Multidimensional feature. self.assertAllEqual([[0, 1], [1, 0], [0, 0], [2, 1], [0, 2]], sparse_quantiles[3].eval())
def ready_inputs_fn(): """Branch to execute when quantiles are ready.""" quantized_feature = quantile_ops.quantiles([float_column], [], [quantile_buckets], [], []) quantized_feature = math_ops.cast(quantized_feature[0], dtypes.int64) quantized_feature = array_ops.squeeze(quantized_feature, axis=0) return (example_partition_ids, quantized_feature, gradients, hessians)
def testDenseFeaturesOnly(self): with self.cached_session(): dense_quantiles, _ = quantile_ops.quantiles( [self._dense_float_tensor_0, self._dense_float_tensor_1], [], [self._dense_thresholds_0, self._dense_thresholds_1], [], []) # Dense feature 0 self.assertAllEqual([[0, 0], [0, 0], [1, 0], [2, 0]], dense_quantiles[0].eval()) # Dense feature 1 self.assertAllEqual([[1, 0], [0, 0], [2, 0], [2, 0]], dense_quantiles[1].eval())
def quantiles_ready(): """The subgraph for when the quantiles are ready.""" quantized_feature = quantile_ops.quantiles([], [sparse_column_values], [], [quantile_buckets], [sparse_column_indices]) quantized_feature = math_ops.cast(quantized_feature[1], dtypes.int64) quantized_feature = array_ops.squeeze(quantized_feature, axis=0) example_indices, _ = array_ops.split(sparse_column_indices, num_or_size_splits=2, axis=1) example_indices = array_ops.squeeze(example_indices, [1]) filtered_gradients = array_ops.gather(gradients, example_indices) filtered_hessians = array_ops.gather(hessians, example_indices) filtered_partition_ids = array_ops.gather(example_partition_ids, example_indices) unique_partitions, mapped_partitions = array_ops.unique( example_partition_ids) # Compute aggregate stats for each partition. # Since unsorted_segment_sum can be numerically unstable, use 64bit # operation. gradients64 = math_ops.cast(gradients, dtypes.float64) hessians64 = math_ops.cast(hessians, dtypes.float64) per_partition_gradients = math_ops.unsorted_segment_sum( gradients64, mapped_partitions, array_ops.size(unique_partitions)) per_partition_hessians = math_ops.unsorted_segment_sum( hessians64, mapped_partitions, array_ops.size(unique_partitions)) per_partition_gradients = math_ops.cast(per_partition_gradients, dtypes.float32) per_partition_hessians = math_ops.cast(per_partition_hessians, dtypes.float32) # Prepend a bias feature per partition that accumulates the stats for all # examples in that partition. bias_feature_ids = array_ops.fill(array_ops.shape(unique_partitions), _BIAS_FEATURE_ID) bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64) zeros = array_ops.zeros_like(bias_feature_ids) bias_feature_ids = array_ops.stack([bias_feature_ids, zeros], axis=1) partition_ids = array_ops.concat( [unique_partitions, filtered_partition_ids], 0) filtered_gradients = array_ops.concat( [per_partition_gradients, filtered_gradients], 0) filtered_hessians = array_ops.concat( [per_partition_hessians, filtered_hessians], 0) bucket_ids = array_ops.concat([bias_feature_ids, quantized_feature], 0) return partition_ids, bucket_ids, filtered_gradients, filtered_hessians