def get_buckets(self, stamp_token): """Returns quantile buckets created during previous flush.""" are_buckets_ready, buckets = ( gen_quantile_ops.quantile_accumulator_get_buckets( quantile_accumulator_handles=[self.resource_handle], stamp_token=stamp_token)) return are_buckets_ready[0], buckets[0]
def _make_dense_split(quantile_accumulator_handle, stats_accumulator_handle, stamp_token, next_stamp_token, multiclass_strategy, class_id, feature_column_id, l1_regularization, l2_regularization, tree_complexity_regularization, min_node_weight, is_multi_dimentional, loss_uses_sum_reduction, weak_learner_type): """Function that builds splits for a dense feature column.""" # Get the bucket boundaries are_splits_ready, buckets = ( gen_quantile_ops.quantile_accumulator_get_buckets( quantile_accumulator_handles=[quantile_accumulator_handle], stamp_token=stamp_token)) # quantile_accumulator_get_buckets returns a list of results per handle that # we pass to it. In this case we're getting results just for one resource. are_splits_ready = are_splits_ready[0] buckets = buckets[0] # After we receive the boundaries from previous iteration we can flush # the quantile accumulator. with ops.control_dependencies([buckets]): flush_quantiles = gen_quantile_ops.quantile_accumulator_flush( quantile_accumulator_handle=quantile_accumulator_handle, stamp_token=stamp_token, next_stamp_token=next_stamp_token) if is_multi_dimentional: num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( gen_stats_accumulator_ops.stats_accumulator_tensor_flush( stats_accumulator_handle, stamp_token, next_stamp_token)) else: num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( gen_stats_accumulator_ops.stats_accumulator_scalar_flush( stats_accumulator_handle, stamp_token, next_stamp_token)) # For sum_reduction, we don't need to divide by number of minibatches. num_minibatches = control_flow_ops.cond( loss_uses_sum_reduction, lambda: math_ops.cast(1, dtypes.int64), lambda: num_minibatches) # Put quantile and stats accumulator flushing in the dependency path. with ops.control_dependencies([flush_quantiles, partition_ids]): are_splits_ready = array_ops.identity(are_splits_ready) partition_ids, gains, split_infos = ( split_handler_ops.build_dense_inequality_splits( num_minibatches=num_minibatches, bucket_boundaries=buckets, partition_ids=partition_ids, bucket_ids=bucket_ids, gradients=gradients, hessians=hessians, class_id=class_id, feature_column_group_id=feature_column_id, l1_regularization=l1_regularization, l2_regularization=l2_regularization, tree_complexity_regularization=tree_complexity_regularization, min_node_weight=min_node_weight, multiclass_strategy=multiclass_strategy, weak_learner_type=weak_learner_type)) return are_splits_ready, partition_ids, gains, split_infos
def _make_dense_split(quantile_accumulator_handle, stats_accumulator_handle, stamp_token, next_stamp_token, multiclass_strategy, class_id, feature_column_id, l1_regularization, l2_regularization, tree_complexity_regularization, min_node_weight, is_multi_dimentional, loss_uses_sum_reduction, weak_learner_type): """Function that builds splits for a dense feature column.""" # Get the bucket boundaries are_splits_ready, buckets = ( gen_quantile_ops.quantile_accumulator_get_buckets( quantile_accumulator_handles=[quantile_accumulator_handle], stamp_token=stamp_token)) # quantile_accumulator_get_buckets returns a list of results per handle that # we pass to it. In this case we're getting results just for one resource. are_splits_ready = are_splits_ready[0] buckets = buckets[0] # After we receive the boundaries from previous iteration we can flush # the quantile accumulator. with ops.control_dependencies([buckets]): flush_quantiles = gen_quantile_ops.quantile_accumulator_flush( quantile_accumulator_handle=quantile_accumulator_handle, stamp_token=stamp_token, next_stamp_token=next_stamp_token) if is_multi_dimentional: num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( gen_stats_accumulator_ops.stats_accumulator_tensor_flush( stats_accumulator_handle, stamp_token, next_stamp_token)) else: num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( gen_stats_accumulator_ops.stats_accumulator_scalar_flush( stats_accumulator_handle, stamp_token, next_stamp_token)) # For sum_reduction, we don't need to divide by number of minibatches. num_minibatches = control_flow_ops.cond(loss_uses_sum_reduction, lambda: math_ops.to_int64(1), lambda: num_minibatches) # Put quantile and stats accumulator flushing in the dependency path. with ops.control_dependencies([flush_quantiles, partition_ids]): are_splits_ready = array_ops.identity(are_splits_ready) partition_ids, gains, split_infos = ( split_handler_ops.build_dense_inequality_splits( num_minibatches=num_minibatches, bucket_boundaries=buckets, partition_ids=partition_ids, bucket_ids=bucket_ids, gradients=gradients, hessians=hessians, class_id=class_id, feature_column_group_id=feature_column_id, l1_regularization=l1_regularization, l2_regularization=l2_regularization, tree_complexity_regularization=tree_complexity_regularization, min_node_weight=min_node_weight, multiclass_strategy=multiclass_strategy, weak_learner_type=weak_learner_type)) return are_splits_ready, partition_ids, gains, split_infos