def _maybe_do_batch(self, accumulator: _CombinerStatsGeneratorsCombineFnAcc, force: bool = False) -> None: """Maybe updates accumulator in place. Checks if accumulator has enough examples for a batch, and if so, does the stats computation for the batch and updates accumulator in place. Args: accumulator: Accumulator. Will be updated in place. force: Force computation of stats even if accumulator has less examples than the batch size. """ batch_size = accumulator.curr_batch_size if (force and batch_size > 0) or batch_size >= self._desired_batch_size: self._combine_add_input_batch_size.update(batch_size) if len(accumulator.input_tables) == 1: arrow_table = accumulator.input_tables[0] else: arrow_table = merge.MergeTables(accumulator.input_tables) accumulator.partial_accumulators = self._for_each_generator( lambda gen, gen_acc: gen.add_input(gen_acc, arrow_table), accumulator.partial_accumulators) del accumulator.input_tables[:] accumulator.curr_batch_size = 0
def _process_partition( partition: Tuple[Tuple[types.SliceKey, int], List[pa.Table]], stats_fn: PartitionedStatsFn ) -> Tuple[types.SliceKey, statistics_pb2.DatasetFeatureStatistics]: """Process batches in a single partition.""" (slice_key, _), tables = partition return slice_key, stats_fn.compute(merge.MergeTables(tables))
def _process_partition(partition, stats_fn): """Process batches in a single partition.""" (slice_key, _), tables = partition return slice_key, stats_fn.compute(merge.MergeTables(tables))