def _build_parameters( self, domain: Domain, variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, recompute_existing_parameter_values: bool = False, ) -> Attributes: """ Builds ParameterContainer object that holds ParameterNode objects with attribute name-value pairs and details. Returns: Attributes object, containing computed parameter values and parameter computation details metadata. """ # Compute "table.columns" metric value for each Batch object. super().build_parameters( domain=domain, variables=variables, parameters=parameters, parameter_computation_impl=super()._build_parameters, recompute_existing_parameter_values= recompute_existing_parameter_values, ) # Retrieve "table.columns" metric values for all Batch objects. parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.raw_fully_qualified_parameter_name, expected_return_type=None, variables=variables, parameters=parameters, ) table_columns_names_multi_batch_value: MetricValues = parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY] one_batch_table_columns_names_value: MetricValue multi_batch_table_columns_names_sets_as_list: List[Set[str]] = [ set(one_batch_table_columns_names_value) for one_batch_table_columns_names_value in table_columns_names_multi_batch_value ] multi_batch_table_columns_names_as_set: Set[str] = set().union( *multi_batch_table_columns_names_sets_as_list) one_batch_table_columns_names_set: Set[str] mean_table_columns_set_match: np.float64 = np.mean( np.asarray([ 1 if one_batch_table_columns_names_set == multi_batch_table_columns_names_as_set else 0 for one_batch_table_columns_names_set in multi_batch_table_columns_names_sets_as_list ])) return Attributes({ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: multi_batch_table_columns_names_as_set, FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: { "success_ratio": mean_table_columns_set_match, }, })
def _build_parameters( self, domain: Domain, variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, recompute_existing_parameter_values: bool = False, ) -> Attributes: """ Builds ParameterContainer object that holds ParameterNode objects with attribute name-value pairs and details. Returns: Attributes object, containing computed parameter values and parameter computation details metadata. """ batch_ids: Optional[List[str]] = self.get_batch_ids( domain=domain, variables=variables, parameters=parameters, ) num_batch_ids: int = len(batch_ids) if num_batch_ids != 1: raise ge_exceptions.ProfilerExecutionError( message=f"""Utilizing a {self.__class__.__name__} requires exactly one Batch of data to be available ({num_batch_ids} Batch identifiers found). """ ) # Compute metric value for one Batch object (expressed as list of Batch objects). super().build_parameters( domain=domain, variables=variables, parameters=parameters, parameter_computation_impl=super()._build_parameters, json_serialize=False, recompute_existing_parameter_values=recompute_existing_parameter_values, ) # Retrieve metric values for one Batch object (expressed as list of Batch objects). parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.fully_qualified_parameter_name, expected_return_type=None, variables=variables, parameters=parameters, ) return Attributes( { FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: None if parameter_node[FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY] is None else parameter_node[FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY][0], FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY ], } )
def _build_parameters( self, domain: Domain, variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, recompute_existing_parameter_values: bool = False, ) -> Attributes: """ Builds ParameterContainer object that holds ParameterNode objects with attribute name-value pairs and details. Returns: Attributes object, containing computed parameter values and parameter computation details metadata. """ # Build the list of unique values for each Batch object. super().build_parameters( domain=domain, variables=variables, parameters=parameters, parameter_computation_impl=super()._build_parameters, recompute_existing_parameter_values=recompute_existing_parameter_values, ) # Retrieve and replace list of unique values for each Batch with set of unique values for all batches in domain. parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.fully_qualified_parameter_name, expected_return_type=None, variables=variables, parameters=parameters, ) metric_values: MetricValues = ( AttributedResolvedMetrics.get_metric_values_from_attributed_metric_values( attributed_metric_values=parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY ] ) ) return Attributes( { FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: _get_unique_values_from_nested_collection_of_sets( collection=metric_values ), FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY ], } )
def _build_parameters( self, domain: Domain, variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, recompute_existing_parameter_values: bool = False, ) -> Attributes: """ Builds ParameterContainer object that holds ParameterNode objects with attribute name-value pairs and details. Returns: Attributes object, containing computed parameter values and parameter computation details metadata. """ # Compute metric value for one Batch object (expressed as list of Batch objects). super().build_parameters( domain=domain, variables=variables, parameters=parameters, parameter_computation_impl=super()._build_parameters, json_serialize=False, recompute_existing_parameter_values= recompute_existing_parameter_values, ) # Retrieve metric values for one Batch object (expressed as list of Batch objects). parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.fully_qualified_parameter_name, expected_return_type=None, variables=variables, parameters=parameters, ) return Attributes({ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: None if parameter_node[FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY] is None else parameter_node[FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY][0], FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: parameter_node[FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY], })
def _build_parameters( self, domain: Domain, variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, recompute_existing_parameter_values: bool = False, ) -> Attributes: """ Builds ParameterContainer object that holds ParameterNode objects with attribute name-value pairs and details. Returns: Attributes object, containing computed parameter values and parameter computation details metadata. """ metric_computation_result: MetricComputationResult = self.get_metrics( metric_name=self.metric_name, metric_domain_kwargs=self.metric_domain_kwargs, metric_value_kwargs=self.metric_value_kwargs, enforce_numeric_metric=self.enforce_numeric_metric, replace_nan_with_zero=self.replace_nan_with_zero, domain=domain, variables=variables, parameters=parameters, ) details: MetricComputationDetails = metric_computation_result.details # Obtain reduce_scalar_metric from "rule state" (i.e., variables and parameters); from instance variable otherwise. reduce_scalar_metric: bool = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.reduce_scalar_metric, expected_return_type=bool, variables=variables, parameters=parameters, ) if len(metric_computation_result.attributed_resolved_metrics) == 1: # As a simplification, apply reduction to scalar in case of one-dimensional metric (for convenience). if (reduce_scalar_metric and isinstance( metric_computation_result.attributed_resolved_metrics[0]. conditioned_metric_values, np.ndarray, ) and metric_computation_result.attributed_resolved_metrics[0]. conditioned_metric_values.ndim > 1 and metric_computation_result.attributed_resolved_metrics[0]. conditioned_metric_values.shape[1] == 1): return Attributes({ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[0]. conditioned_metric_values[:, 0], FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[0]. conditioned_attributed_metric_values, FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: details, }) return Attributes({ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[0]. conditioned_metric_values, FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[0]. conditioned_attributed_metric_values, FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: details, }) return Attributes({ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: metric_computation_result.attributed_resolved_metrics, FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY: metric_computation_result.attributed_resolved_metrics, FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: details, })
def _build_parameters( self, domain: Domain, variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, recompute_existing_parameter_values: bool = False, ) -> Attributes: """ Builds ParameterContainer object that holds ParameterNode objects with attribute name-value pairs and details. Returns: Attributes object, containing computed parameter values and parameter computation details metadata. """ # Obtain total_count_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. total_count_parameter_builder_name: str = ( get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.total_count_parameter_builder_name, expected_return_type=str, variables=variables, parameters=parameters, )) fully_qualified_total_count_parameter_builder_name: str = ( f"{PARAMETER_KEY}{total_count_parameter_builder_name}") # Obtain total_count from "rule state" (i.e., variables and parameters); from instance variable otherwise. total_count_parameter_node: ParameterNode = ( get_parameter_value_and_validate_return_type( domain=domain, parameter_reference= fully_qualified_total_count_parameter_builder_name, expected_return_type=None, variables=variables, parameters=parameters, )) total_count_values: MetricValues = total_count_parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY] # Obtain null_count_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. null_count_parameter_builder_name: Optional[ str] = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.null_count_parameter_builder_name, expected_return_type=None, variables=variables, parameters=parameters, ) batch_ids: Optional[List[str]] = self.get_batch_ids( domain=domain, variables=variables, parameters=parameters, ) num_batch_ids: int = len(batch_ids) null_count_values: MetricValues if null_count_parameter_builder_name is None: null_count_values = np.zeros(shape=(num_batch_ids, )) else: fully_qualified_null_count_parameter_builder_name: str = ( f"{PARAMETER_KEY}{null_count_parameter_builder_name}") # Obtain null_count from "rule state" (i.e., variables and parameters); from instance variable otherwise. null_count_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference= fully_qualified_null_count_parameter_builder_name, expected_return_type=None, variables=variables, parameters=parameters, ) null_count_values = null_count_parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY] nonnull_count_values: np.ndarray = total_count_values - null_count_values # Compute "unexpected_count" corresponding to "map_metric_name" (given as argument to this "ParameterBuilder"). super().build_parameters( domain=domain, variables=variables, parameters=parameters, parameter_computation_impl=super()._build_parameters, json_serialize=None, recompute_existing_parameter_values= recompute_existing_parameter_values, ) # Retrieve "unexpected_count" corresponding to "map_metric_name" (given as argument to this "ParameterBuilder"). parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.fully_qualified_parameter_name, expected_return_type=None, variables=variables, parameters=parameters, ) unexpected_count_values: MetricValues = parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY] unexpected_count_ratio_values: np.ndarray = (unexpected_count_values / nonnull_count_values) mean_unexpected_count_ratio: np.float64 = np.mean( unexpected_count_ratio_values) return Attributes({ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: mean_unexpected_count_ratio, FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: parameter_node[FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY], })
def _build_parameters( self, domain: Domain, variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, recompute_existing_parameter_values: bool = False, ) -> Attributes: """ Builds ParameterContainer object that holds ParameterNode objects with attribute name-value pairs and details. Check the percentage of values matching each string, and return the best fit, or None if no string exceeds the configured threshold. Returns: Attributes object, containing computed parameter values and parameter computation details metadata. """ metric_computation_result: MetricComputationResult metric_computation_result = self.get_metrics( metric_name="column_values.nonnull.count", metric_domain_kwargs=self.metric_domain_kwargs, metric_value_kwargs=self.metric_value_kwargs, domain=domain, variables=variables, parameters=parameters, ) # This should never happen. if len(metric_computation_result.attributed_resolved_metrics) != 1: raise ge_exceptions.ProfilerExecutionError( message=f'Result of metric computations for {self.__class__.__name__} must be a list with exactly 1 element of type "AttributedResolvedMetrics" ({metric_computation_result.attributed_resolved_metrics} found).' ) attributed_resolved_metrics: AttributedResolvedMetrics attributed_resolved_metrics = ( metric_computation_result.attributed_resolved_metrics[0] ) metric_values: MetricValues metric_values = attributed_resolved_metrics.metric_values if metric_values is None: raise ge_exceptions.ProfilerExecutionError( message=f"Result of metric computations for {self.__class__.__name__} is empty." ) # Now obtain 1-dimensional vector of values of computed metric (each element corresponds to a Batch ID). metric_values = metric_values[:, 0] nonnull_count: int = sum(metric_values) # Obtain candidate_strings from "rule state" (i.e., variables and parameters); from instance variable otherwise. candidate_strings: Union[ List[str], Set[str], ] = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.candidate_strings, expected_return_type=None, variables=variables, parameters=parameters, ) # Gather "metric_value_kwargs" for all candidate "strftime_format" strings. format_string: str match_strftime_metric_value_kwargs_list: List[dict] = [] match_strftime_metric_value_kwargs: dict for format_string in candidate_strings: if self.metric_value_kwargs: match_strftime_metric_value_kwargs = { **self.metric_value_kwargs, **{"strftime_format": format_string}, } else: match_strftime_metric_value_kwargs = { "strftime_format": format_string, } match_strftime_metric_value_kwargs_list.append( match_strftime_metric_value_kwargs ) # Obtain resolved metrics and metadata for all metric configurations and available Batch objects simultaneously. metric_computation_result = self.get_metrics( metric_name="column_values.match_strftime_format.unexpected_count", metric_domain_kwargs=self.metric_domain_kwargs, metric_value_kwargs=match_strftime_metric_value_kwargs_list, domain=domain, variables=variables, parameters=parameters, ) format_string_success_ratios: dict = {} for ( attributed_resolved_metrics ) in metric_computation_result.attributed_resolved_metrics: # Now obtain 1-dimensional vector of values of computed metric (each element corresponds to a Batch ID). metric_values = attributed_resolved_metrics.metric_values[:, 0] match_strftime_unexpected_count: int = sum(metric_values) success_ratio: float = ( nonnull_count - match_strftime_unexpected_count ) / nonnull_count format_string_success_ratios[ attributed_resolved_metrics.metric_attributes["strftime_format"] ] = success_ratio # Obtain threshold from "rule state" (i.e., variables and parameters); from instance variable otherwise. threshold: float = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.threshold, expected_return_type=float, variables=variables, parameters=parameters, ) # get best-matching datetime string that matches greater than threshold best_format_string: str best_ratio: float ( best_format_string, best_ratio, ) = ParameterBuilder._get_best_candidate_above_threshold( format_string_success_ratios, threshold ) # dict of sorted datetime and ratios for all evaluated candidates sorted_format_strings_and_ratios: dict = ( ParameterBuilder._get_sorted_candidates_and_ratios( format_string_success_ratios ) ) return Attributes( { FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: best_format_string, FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: { "success_ratio": best_ratio, "candidate_strings": sorted_format_strings_and_ratios, }, } )
def _build_parameters( self, domain: Domain, variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, recompute_existing_parameter_values: bool = False, ) -> Attributes: """ Builds ParameterContainer object that holds ParameterNode objects with attribute name-value pairs and details. Returns: Attributes object, containing computed parameter values and parameter computation details metadata. """ # Obtain bucketize_data directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. bucketize_data = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.bucketize_data, expected_return_type=bool, variables=variables, parameters=parameters, ) is_categorical: bool = not bucketize_data fully_qualified_column_partition_metric_single_batch_parameter_builder_name: str = f"{RAW_PARAMETER_KEY}{self._column_partition_metric_single_batch_parameter_builder_config.name}" # Obtain "column.partition" from "rule state" (i.e., variables and parameters); from instance variable otherwise. column_partition_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=fully_qualified_column_partition_metric_single_batch_parameter_builder_name, expected_return_type=None, variables=variables, parameters=parameters, ) bins: MetricValue = column_partition_parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY ] if bins is None: is_categorical = True else: is_categorical = is_categorical or not np.all(np.diff(bins) > 0.0) fully_qualified_column_values_nonnull_count_metric_parameter_builder_name: str = f"{RAW_PARAMETER_KEY}{self._column_values_nonnull_count_metric_single_batch_parameter_builder_config.name}" # Obtain "column_values.nonnull.count" from "rule state" (i.e., variables and parameters); from instance variable otherwise. column_values_nonnull_count_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=fully_qualified_column_values_nonnull_count_metric_parameter_builder_name, expected_return_type=None, variables=variables, parameters=parameters, ) partition_object: dict details: dict weights: list if is_categorical: fully_qualified_column_value_counts_metric_single_batch_parameter_builder_name: str = f"{RAW_PARAMETER_KEY}{self._column_value_counts_metric_single_batch_parameter_builder_config.name}" # Obtain "column.value_counts" from "rule state" (i.e., variables and parameters); from instance variable otherwise. column_value_counts_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=fully_qualified_column_value_counts_metric_single_batch_parameter_builder_name, expected_return_type=None, variables=variables, parameters=parameters, ) values: list = list( column_value_counts_parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY ].index ) weights = list( np.asarray( column_value_counts_parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY ] ) / column_values_nonnull_count_parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY ] ) partition_object = { "values": values, "weights": weights, } details = column_value_counts_parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY ] else: self.metric_name = "column.histogram" self.metric_value_kwargs = { "bins": tuple(bins), } # Compute metric value for one Batch object. super().build_parameters( domain=domain, variables=variables, parameters=parameters, parameter_computation_impl=super()._build_parameters, recompute_existing_parameter_values=recompute_existing_parameter_values, ) # Retrieve metric values for one Batch object. parameter_node: ParameterNode = ( get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.raw_fully_qualified_parameter_name, expected_return_type=None, variables=variables, parameters=parameters, ) ) # in this case, we have requested a partition, histogram using said partition, and nonnull count bins = list(bins) weights = list( np.asarray(parameter_node[FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY]) / column_values_nonnull_count_parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY ] ) tail_weights: float = (1.0 - sum(weights)) / 2.0 partition_object = { "bins": bins, "weights": weights, "tail_weights": [tail_weights, tail_weights], } details = parameter_node[FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY] return Attributes( { FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: partition_object, FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: details, } )
def _build_parameters( self, domain: Domain, variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, recompute_existing_parameter_values: bool = False, ) -> Attributes: """ Builds ParameterContainer object that holds ParameterNode objects with attribute name-value pairs and details. Returns: Attributes object, containing computed parameter values and parameter computation details metadata. The algorithm operates according to the following steps: 1. Obtain batch IDs of interest using BaseDataContext and BatchRequest (unless passed explicitly as argument). 2. Set up metric_domain_kwargs and metric_value_kwargs (using configuration and/or variables and parameters). 3. Instantiate the Validator object corresponding to BatchRequest (with a temporary expectation_suite_name) in order to have access to all Batch objects, on each of which the specified metric_name will be computed. 4. Perform metric computations and obtain the result in the array-like form (one metric value per each Batch). 5. Using the configured directives and heuristics, determine whether or not the ranges should be clipped. 6. Using the configured directives and heuristics, determine if return values should be rounded to an integer. 7. Convert the multi-dimensional metric computation results to a numpy array (for further computations). 8. Compute [low, high] for the desired metric using the chosen estimator method. 9. Return [low, high] for the desired metric as estimated by the specified sampling method. 10. Set up the arguments and call build_parameter_container() to store the parameter as part of "rule state". """ # Obtain false_positive_rate from "rule state" (i.e., variables and parameters); from instance variable otherwise. false_positive_rate: np.float64 = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.false_positive_rate, expected_return_type=(float, np.float64), variables=variables, parameters=parameters, ) if not (0.0 <= false_positive_rate <= 1.0): raise ge_exceptions.ProfilerExecutionError( f"""false_positive_rate must be a positive decimal number between 0 and 1 inclusive [0, 1], but {false_positive_rate} was provided.""") elif false_positive_rate <= NP_EPSILON: warnings.warn( f"""You have chosen a false_positive_rate of {false_positive_rate}, which is too close to 0. A false_positive_rate of {NP_EPSILON} has been selected instead.""") false_positive_rate = NP_EPSILON elif false_positive_rate >= (1.0 - NP_EPSILON): warnings.warn( f"""You have chosen a false_positive_rate of {false_positive_rate}, which is too close to 1. A false_positive_rate of {1.0-NP_EPSILON} has been selected instead.""") false_positive_rate = np.float64(1.0 - NP_EPSILON) parameter_reference: str if self.metric_multi_batch_parameter_builder_name: # Obtain metric_multi_batch_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. metric_multi_batch_parameter_builder_name: str = ( get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self. metric_multi_batch_parameter_builder_name, expected_return_type=str, variables=variables, parameters=parameters, )) parameter_reference = ( f"{RAW_PARAMETER_KEY}{metric_multi_batch_parameter_builder_name}" ) else: # Compute metric value for each Batch object. super().build_parameters( domain=domain, variables=variables, parameters=parameters, parameter_computation_impl=super()._build_parameters, recompute_existing_parameter_values= recompute_existing_parameter_values, ) parameter_reference = self.raw_fully_qualified_parameter_name # Retrieve metric values for all Batch objects. parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=parameter_reference, expected_return_type=None, variables=variables, parameters=parameters, ) metric_values: MetricValues = parameter_node[ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY] # Obtain estimator directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. estimator: str = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.estimator, expected_return_type=str, variables=variables, parameters=parameters, ) if (estimator not in NumericMetricRangeMultiBatchParameterBuilder. RECOGNIZED_SAMPLING_METHOD_NAMES): raise ge_exceptions.ProfilerExecutionError( message= f"""The directive "estimator" for {self.__class__.__name__} can be only one of {NumericMetricRangeMultiBatchParameterBuilder.RECOGNIZED_SAMPLING_METHOD_NAMES} ("{estimator}" was detected). """) round_decimals: int # Obtain quantile_statistic_interpolation_method directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. quantile_statistic_interpolation_method: str = ( get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self. quantile_statistic_interpolation_method, expected_return_type=str, variables=variables, parameters=parameters, )) if (quantile_statistic_interpolation_method not in NumericMetricRangeMultiBatchParameterBuilder. RECOGNIZED_QUANTILE_STATISTIC_INTERPOLATION_METHODS): raise ge_exceptions.ProfilerExecutionError( message= f"""The directive "quantile_statistic_interpolation_method" for {self.__class__.__name__} can \ be only one of {NumericMetricRangeMultiBatchParameterBuilder.RECOGNIZED_QUANTILE_STATISTIC_INTERPOLATION_METHODS} \ ("{quantile_statistic_interpolation_method}" was detected). """) if integer_semantic_domain_type(domain=domain): round_decimals = 0 else: round_decimals = self._get_round_decimals_using_heuristics( metric_values=metric_values, domain=domain, variables=variables, parameters=parameters, ) if quantile_statistic_interpolation_method == "auto": if round_decimals == 0: quantile_statistic_interpolation_method = "nearest" else: quantile_statistic_interpolation_method = "linear" estimator_func: Callable estimator_kwargs: dict if estimator == "bootstrap": estimator_func = self._get_bootstrap_estimate estimator_kwargs = { "false_positive_rate": false_positive_rate, "quantile_statistic_interpolation_method": quantile_statistic_interpolation_method, "n_resamples": self.n_resamples, "random_seed": self.random_seed, } elif estimator == "kde": estimator_func = self._get_kde_estimate estimator_kwargs = { "false_positive_rate": false_positive_rate, "quantile_statistic_interpolation_method": quantile_statistic_interpolation_method, "n_resamples": self.n_resamples, "bw_method": self.bw_method, "random_seed": self.random_seed, } else: estimator_func = self._get_deterministic_estimate estimator_kwargs = { "false_positive_rate": false_positive_rate, "quantile_statistic_interpolation_method": quantile_statistic_interpolation_method, } numeric_range_estimation_result: NumericRangeEstimationResult = ( self._estimate_metric_value_range( metric_values=metric_values, estimator_func=estimator_func, round_decimals=round_decimals, domain=domain, variables=variables, parameters=parameters, **estimator_kwargs, )) value_range: np.ndarray = numeric_range_estimation_result.value_range details: Dict[str, Any] = copy.deepcopy( parameter_node[FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY]) # Obtain include_estimator_samples_histogram_in_details from "rule state" (i.e., variables and parameters); from instance variable otherwise. include_estimator_samples_histogram_in_details: bool = ( get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self. include_estimator_samples_histogram_in_details, expected_return_type=bool, variables=variables, parameters=parameters, )) if include_estimator_samples_histogram_in_details: details[ "estimation_histogram"] = numeric_range_estimation_result.estimation_histogram return Attributes({ FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: value_range, FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: details, })