コード例 #1
0
ファイル: stats_impl.py プロジェクト: wsuchy/data-validation
    def _maybe_do_batch(self,
                        accumulator: _CombinerStatsGeneratorsCombineFnAcc,
                        force: bool = False) -> None:
        """Maybe updates accumulator in place.

    Checks if accumulator has enough examples for a batch, and if so, does the
    stats computation for the batch and updates accumulator in place.

    Args:
      accumulator: Accumulator. Will be updated in place.
      force: Force computation of stats even if accumulator has less examples
        than the batch size.
    """
        batch_size = accumulator.curr_batch_size
        if (force
                and batch_size > 0) or batch_size >= self._desired_batch_size:
            self._combine_add_input_batch_size.update(batch_size)
            if len(accumulator.input_tables) == 1:
                arrow_table = accumulator.input_tables[0]
            else:
                arrow_table = merge.MergeTables(accumulator.input_tables)
            accumulator.partial_accumulators = self._for_each_generator(
                lambda gen, gen_acc: gen.add_input(gen_acc, arrow_table),
                accumulator.partial_accumulators)
            del accumulator.input_tables[:]
            accumulator.curr_batch_size = 0
コード例 #2
0
def _process_partition(
    partition: Tuple[Tuple[types.SliceKey, int],
                     List[pa.Table]], stats_fn: PartitionedStatsFn
) -> Tuple[types.SliceKey, statistics_pb2.DatasetFeatureStatistics]:
    """Process batches in a single partition."""
    (slice_key, _), tables = partition
    return slice_key, stats_fn.compute(merge.MergeTables(tables))
コード例 #3
0
def _process_partition(partition, stats_fn):
    """Process batches in a single partition."""
    (slice_key, _), tables = partition
    return slice_key, stats_fn.compute(merge.MergeTables(tables))