def ComputePerSliceMetrics( # pylint: disable=invalid-name slice_result: beam.pvalue.PCollection, eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None, compute_with_sampling: Optional[bool] = False, random_seed_for_testing: Optional[int] = None ) -> beam.pvalue.PCollection: """PTransform for computing, aggregating and combining metrics. Args: slice_result: Incoming PCollection consisting of slice key and extracts. eval_shared_model: Shared model parameters for EvalSavedModel. desired_batch_size: Optional batch size for batching in Aggregate. compute_with_sampling: True to compute with sampling. random_seed_for_testing: Seed to use for unit testing. Returns: PCollection of (slice key, dict of metrics). """ # TODO(b/123516222): Remove this workaround per discussions in CL/227944001 slice_result.element_type = beam.typehints.Any return ( slice_result | 'CombinePerSlice' >> beam.CombinePerKey( _AggregateCombineFn(eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, compute_with_sampling=compute_with_sampling, seed_for_testing=random_seed_for_testing)) | 'InterpretOutput' >> beam.ParDo( _ExtractOutputDoFn(eval_shared_model=eval_shared_model)))
def _ComputePerSlice( # pylint: disable=invalid-name sliced_extracts: beam.pvalue.PCollection, computations: List[metric_types.MetricComputation], derived_computations: List[metric_types.DerivedMetricComputation], compute_with_sampling: Optional[bool] = False, random_seed_for_testing: Optional[int] = None ) -> beam.pvalue.PCollection: """PTransform for computing, aggregating and combining metrics and plots. Args: sliced_extracts: Incoming PCollection consisting of slice key and extracts. computations: List of MetricComputations. derived_computations: List of DerivedMetricComputations. compute_with_sampling: True to compute with sampling. random_seed_for_testing: Seed to use for unit testing. Returns: PCollection of (slice key, dict of metrics). """ # TODO(b/123516222): Remove this workaround per discussions in CL/227944001 sliced_extracts.element_type = beam.typehints.Any def convert_and_add_derived_values( sliced_results: Tuple[Text, Tuple[Any, ...]], derived_computations: List[metric_types.DerivedMetricComputation], ) -> Tuple[slicer.SliceKeyType, Dict[metric_types.MetricKey, Any]]: """Converts per slice tuple of dicts into single dict and adds derived.""" result = {} for v in sliced_results[1]: result.update(v) for c in derived_computations: result.update(c.result(result)) # Remove private metrics keys = list(result.keys()) for k in keys: if k.name.startswith('_'): result.pop(k) return (sliced_results[0], result) # A fanout of 8 is used here to reduce stragglers that occur during the # merger of large datasets such as historgram buckets. This has little effect # on the msec profiles, but can impact the wall time and memory usage. If # experiencing significantly extended run times due to stragglers, try bumping # this to a larger number. return (sliced_extracts | 'CombinePerSliceKey' >> beam.CombinePerKey( _ComputationsCombineFn( computations=computations, compute_with_sampling=compute_with_sampling, random_seed_for_testing=random_seed_for_testing)). with_hot_key_fanout(8) | 'ConvertAndAddDerivedValues' >> beam.Map( convert_and_add_derived_values, derived_computations))
def _ComputePerSlice( # pylint: disable=invalid-name sliced_extracts: beam.pvalue.PCollection, computations: List[metric_types.MetricComputation], derived_computations: List[metric_types.DerivedMetricComputation], compute_with_sampling: Optional[bool] = False, random_seed_for_testing: Optional[int] = None ) -> beam.pvalue.PCollection: """PTransform for computing, aggregating and combining metrics and plots. Args: sliced_extracts: Incoming PCollection consisting of slice key and extracts. computations: List of MetricComputations. derived_computations: List of DerivedMetricComputations. compute_with_sampling: True to compute with sampling. random_seed_for_testing: Seed to use for unit testing. Returns: PCollection of (slice key, dict of metrics). """ # TODO(b/123516222): Remove this workaround per discussions in CL/227944001 sliced_extracts.element_type = beam.typehints.Any def convert_and_add_derived_values( sliced_results: Tuple[Text, Tuple[Any, ...]], derived_computations: List[metric_types.DerivedMetricComputation], ) -> Tuple[slicer.SliceKeyType, Dict[metric_types.MetricKey, Any]]: """Converts per slice tuple of dicts into single dict and adds derived.""" result = {} for v in sliced_results[1]: result.update(v) for c in derived_computations: result.update(c.result(result)) # Remove private metrics keys = list(result.keys()) for k in keys: if k.name.startswith('_'): result.pop(k) return (sliced_results[0], result) return (sliced_extracts | 'CombinePerSliceKey' >> beam.CombinePerKey( _ComputationsCombineFn( computations=computations, compute_with_sampling=compute_with_sampling, random_seed_for_testing=random_seed_for_testing)) | 'ConvertAndAddDerivedValues' >> beam.Map( convert_and_add_derived_values, derived_computations))
def ComputePerSliceMetrics( # pylint: disable=invalid-name slice_result: beam.pvalue.PCollection, eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None, compute_with_sampling: Optional[bool] = False, random_seed_for_testing: Optional[int] = None ) -> beam.pvalue.PCollection: """PTransform for computing, aggregating and combining metrics. Args: slice_result: Incoming PCollection consisting of slice key and extracts. eval_shared_model: Shared model parameters for EvalSavedModel. desired_batch_size: Optional batch size for batching in Aggregate. compute_with_sampling: True to compute with sampling. random_seed_for_testing: Seed to use for unit testing. Returns: PCollection of (slice key, dict of metrics). """ # TODO(b/123516222): Remove this workaround per discussions in CL/227944001 slice_result.element_type = beam.typehints.Any return ( slice_result # _ModelLoadingIdentityFn loads the EvalSavedModel into memory # under a shared handle that can be used by subsequent steps. # Combiner lifting and producer-consumer fusion should ensure # that these steps run in the same process and memory space. # TODO(b/69566045): Remove _ModelLoadingIdentityFn and move model # loading to CombineFn.setup after it is available in Beam. | 'LoadModel' >> beam.ParDo( _ModelLoadingIdentityFn(eval_shared_model=eval_shared_model)) | 'CombinePerSlice' >> beam.CombinePerKey( _AggregateCombineFn(eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, compute_with_sampling=compute_with_sampling, seed_for_testing=random_seed_for_testing)) | 'InterpretOutput' >> beam.ParDo( _ExtractOutputDoFn(eval_shared_model=eval_shared_model)))
def ComputePerSliceMetrics( # pylint: disable=invalid-name slice_result: beam.pvalue.PCollection, eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None, num_bootstrap_samples: Optional[int] = 1, random_seed_for_testing: Optional[int] = None, ) -> beam.pvalue.PCollection: """PTransform for computing, aggregating and combining metrics. Args: slice_result: Incoming PCollection consisting of slice key and extracts. eval_shared_model: Shared model parameters for EvalSavedModel. desired_batch_size: Optional batch size for batching in Aggregate. num_bootstrap_samples: Number of replicas to use in calculating uncertainty using bootstrapping. If 1 is provided (default), aggregate metrics will be calculated with no uncertainty. If num_bootstrap_samples is > 0, multiple samples of each slice will be calculated using the Poisson bootstrap method. To calculate standard errors, num_bootstrap_samples should be 20 or more in order to provide useful data. More is better, but you pay a performance cost. random_seed_for_testing: Seed to use for unit testing, because nondeterministic tests stink. Each partition will use this value + i. Returns: DoOutputsTuple. The tuple entries are PCollection of (slice key, metrics) and PCollection of (slice key, plot metrics). """ # TODO(b/123516222): Remove this workaround per discussions in CL/227944001 slice_result.element_type = beam.typehints.Any if not num_bootstrap_samples: num_bootstrap_samples = 1 # TODO(ckuhn): Cap the number of bootstrap samples at 20. if num_bootstrap_samples < 1: raise ValueError('num_bootstrap_samples should be > 0, got %d' % num_bootstrap_samples) output_results = ( slice_result | 'CombinePerSlice' >> beam.CombinePerKey( _AggregateCombineFn(eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, compute_with_sampling=False)) | 'InterpretOutput' >> beam.ParDo( _ExtractOutputDoFn(eval_shared_model=eval_shared_model))) if num_bootstrap_samples > 1: multicombine = [] for i in range(num_bootstrap_samples): multicombine.append( slice_result | 'CombinePerSliceWithSamples%d' % i >> beam.CombinePerKey( _AggregateCombineFn(eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, compute_with_sampling=True, seed_for_testing=None if random_seed_for_testing is None else random_seed_for_testing + i)) | 'InterpretSampledOutput%d' % i >> beam.ParDo( _ExtractOutputDoFn(eval_shared_model=eval_shared_model))) output_results = ( multicombine | 'FlattenBootstrapPartitions' >> beam.Flatten() | 'GroupBySlice' >> beam.GroupByKey() | 'MergeBootstrap' >> beam.ParDo( _MergeBootstrap(), beam.pvalue.AsIter(output_results))) # Separate metrics and plots. return (output_results | 'SeparateMetricsAndPlots' >> beam.ParDo( _SeparateMetricsAndPlotsFn()).with_outputs( _SeparateMetricsAndPlotsFn.OUTPUT_TAG_PLOTS, main=_SeparateMetricsAndPlotsFn.OUTPUT_TAG_METRICS))
def _ComputePerSlice( # pylint: disable=invalid-name sliced_extracts: beam.pvalue.PCollection, computations: List[metric_types.MetricComputation], derived_computations: List[metric_types.DerivedMetricComputation], cross_slice_specs: Optional[Iterable[config.CrossSlicingSpec]] = None, compute_with_sampling: Optional[bool] = False, num_jackknife_samples: int = 0, skip_ci_metric_keys: Set[metric_types.MetricKey] = frozenset(), random_seed_for_testing: Optional[int] = None, baseline_model_name: Optional[Text] = None) -> beam.pvalue.PCollection: """PTransform for computing, aggregating and combining metrics and plots. Args: sliced_extracts: Incoming PCollection consisting of slice key and extracts. computations: List of MetricComputations. derived_computations: List of DerivedMetricComputations. cross_slice_specs: List of CrossSlicingSpec. compute_with_sampling: True to compute with bootstrap sampling. This allows _ComputePerSlice to be used to generate unsampled values from the whole data set, as well as bootstrap resamples, in which each element is treated as if it showed up p ~ poission(1) times. num_jackknife_samples: number of delete-d jackknife estimates to use in computing standard errors on metrics. skip_ci_metric_keys: List of metric keys for which to skip confidence interval computation. random_seed_for_testing: Seed to use for unit testing. baseline_model_name: Name for baseline model. Returns: PCollection of (slice key, dict of metrics). """ # TODO(b/123516222): Remove this workaround per discussions in CL/227944001 sliced_extracts.element_type = beam.typehints.Any def convert_and_add_derived_values( sliced_results: Tuple[slicer.SliceKeyType, Tuple[metric_types.MetricsDict, ...]], derived_computations: List[metric_types.DerivedMetricComputation], ) -> Tuple[slicer.SliceKeyType, metric_types.MetricsDict]: """Converts per slice tuple of dicts into single dict and adds derived.""" result = {} for v in sliced_results[1]: result.update(v) for c in derived_computations: result.update(c.result(result)) # Remove private metrics keys = list(result.keys()) for k in keys: if k.name.startswith('_') and not k.name.startswith('__'): result.pop(k) return sliced_results[0], result def add_diff_metrics( sliced_metrics: Tuple[Union[slicer.SliceKeyType, slicer.CrossSliceKeyType], Dict[metric_types.MetricKey, Any]], baseline_model_name: Optional[Text], ) -> Tuple[slicer.SliceKeyType, Dict[metric_types.MetricKey, Any]]: """Add diff metrics if there is a baseline model.""" result = copy.copy(sliced_metrics[1]) if baseline_model_name: diff_result = {} for k, v in result.items(): if k.model_name != baseline_model_name and k.make_baseline_key( baseline_model_name) in result: # plots will not be diffed. if not isinstance(v, message.Message): diff_result[k.make_diff_key()] = v - result[ k.make_baseline_key(baseline_model_name)] result.update(diff_result) return (sliced_metrics[0], result) combiner = _ComputationsCombineFn( computations=computations, compute_with_sampling=compute_with_sampling, random_seed_for_testing=random_seed_for_testing) if num_jackknife_samples: # We do not use the hotkey fanout hint used by the non-jacknife path because # the random jackknife partitioning naturally mitigates hot keys. sliced_combiner_outputs = ( sliced_extracts | 'JackknifeCombinePerSliceKey' >> jackknife.JackknifeCombinePerKey(combiner, num_jackknife_samples)) else: sliced_combiner_outputs = ( sliced_extracts | 'CombinePerSliceKey' >> beam.CombinePerKey(combiner). with_hot_key_fanout(_COMBINE_PER_SLICE_KEY_HOT_KEY_FANOUT)) sliced_derived_values_and_diffs = ( sliced_combiner_outputs | 'ConvertAndAddDerivedValues' >> beam.Map( convert_and_add_derived_values, derived_computations) | 'AddCrossSliceMetrics' >> _AddCrossSliceMetrics(cross_slice_specs) # pylint: disable=no-value-for-parameter | 'AddDiffMetrics' >> beam.Map(add_diff_metrics, baseline_model_name)) if num_jackknife_samples: return (sliced_derived_values_and_diffs | 'MergeJackknifeSamples' >> jackknife.MergeJackknifeSamples( num_jackknife_samples, skip_ci_metric_keys)) else: return sliced_derived_values_and_diffs