def testMatchCIWidth(self): Ys = get_data([self.obsd1, self.obsd2, self.obsd3], ["m2"]) pt = _compute_power_transforms(Ys) pt["m2"].lambdas_.fill(-3.0) bounds = _compute_inverse_bounds(pt)["m2"] # Both will be NaN since we are far outside the bounds new_mean_1, new_var_1 = match_ci_width_truncated( mean=bounds[1] + 2.0, variance=0.1, transform=lambda y: pt["m2"].inverse_transform(np.array(y, ndmin=2)), lower_bound=bounds[0], upper_bound=bounds[1], margin=0.001, clip_mean=False, ) # This will be finite since we clip new_mean_2, new_var_2 = match_ci_width_truncated( mean=bounds[1] + 2.0, variance=0.1, transform=lambda y: pt["m2"].inverse_transform(np.array(y, ndmin=2)), lower_bound=bounds[0], upper_bound=bounds[1], margin=0.001, clip_mean=True, ) self.assertTrue(isnan(new_mean_1) and isnan(new_var_1)) self.assertTrue(isfinite(new_mean_2) and isfinite(new_var_2))
def testGetData(self): for m in ["m1", "m2"]: Ys = get_data([self.obsd1, self.obsd2, self.obsd3], m) self.assertIsInstance(Ys, dict) self.assertEqual([*Ys], [m]) if m == "m1": self.assertEqual(Ys[m], [0.5, 0.1, 0.9]) else: self.assertEqual(Ys[m], [0.9, 0.4, 0.8])
def __init__( self, search_space: SearchSpace, observation_features: List[ObservationFeatures], observation_data: List[ObservationData], modelbridge: Optional["modelbridge_module.base.ModelBridge"] = None, config: Optional[TConfig] = None, ) -> None: if len(observation_data) == 0: raise ValueError( "Winsorize transform requires non-empty observation data.") if config is None: raise ValueError( "Transform config for `Winsorize` transform must be specified and " "non-empty when using winsorization.") all_metric_values = get_data(observation_data=observation_data) # Check for legacy config use_legacy = False old_present = set(OLD_KEYS).intersection(config.keys()) if old_present: warnings.warn( "Winsorization received an out-of-date `transform_config`, containing " f"the following deprecated keys: {old_present}. Please update the " "config according to the docs of " "`ax.modelbridge.transforms.winsorize.Winsorize`.", DeprecationWarning, ) use_legacy = True # Get winsorization and optimization configs winsorization_config = config.get("winsorization_config", {}) opt_config = config.get("optimization_config", {}) if "optimization_config" in config: if not isinstance(opt_config, OptimizationConfig): raise UserInputError( "Expected `optimization_config` of type `OptimizationConfig` but " f"got type `{type(opt_config)}.") opt_config = checked_cast(OptimizationConfig, opt_config) self.cutoffs = {} for metric_name, metric_values in all_metric_values.items(): if use_legacy: self.cutoffs[ metric_name] = _get_cutoffs_from_legacy_transform_config( metric_name=metric_name, metric_values=metric_values, transform_config=config, ) else: self.cutoffs[metric_name] = _get_cutoffs_from_transform_config( metric_name=metric_name, metric_values=metric_values, winsorization_config=winsorization_config, # pyre-ignore[6] optimization_config=opt_config, # pyre-ignore[6] )
def __init__( self, search_space: SearchSpace, observation_features: List[ObservationFeatures], observation_data: List[ObservationData], config: Optional[TConfig] = None, ) -> None: if len(observation_data) == 0: raise ValueError( "Winsorize transform requires non-empty observation data.") # If winsorization limits are missing or either one of them is None, # we can just replace that limit(s) with 0.0, as in those cases the # percentile will just interpret them as 0-th or 100-th percentile, # leaving the data unclipped. lower = 0.0 if config is not None and "winsorization_lower" in config: lower = checked_cast(float, (config.get("winsorization_lower") or 0.0)) upper = 0.0 if config is not None and "winsorization_upper" in config: upper = checked_cast(float, (config.get("winsorization_upper") or 0.0)) metric_values = get_data(observation_data=observation_data) if lower >= 1 - upper: raise ValueError( # pragma: no cover f"Lower bound: {lower} was greater than the inverse of the upper " f"bound: {1 - upper}. Decrease one or both of your " f"winsorization_limits: {(lower, upper)}.") pct_bounds = {} if config is not None and "percentile_bounds" in config: pct_bounds = checked_cast(dict, config.get("percentile_bounds") or {}) self.percentiles = {} for metric_name, vals in metric_values.items(): pct_l = np.percentile(vals, lower * 100, interpolation="lower") pct_u = np.percentile(vals, (1 - upper) * 100, interpolation="higher") if metric_name in pct_bounds: # Update the percentiles if percentile_bounds are specified metric_bnds = pct_bounds.get(metric_name) if len(metric_bnds) != 2: raise ValueError( # pragma: no cover f"Expected percentile_bounds for metric {metric_name} to be " f"of the form (l, u), got {metric_bnds}.") bnd_l, bnd_u = metric_bnds pct_l = min(pct_l, bnd_l if bnd_l is not None else float("inf")) pct_u = max(pct_u, bnd_u if bnd_u is not None else -float("inf")) self.percentiles[metric_name] = (pct_l, pct_u)
def __init__( self, search_space: SearchSpace, observation_features: List[ObservationFeatures], observation_data: List[ObservationData], config: Optional[TConfig] = None, ) -> None: if len(observation_data) == 0: raise ValueError( "StandardizeY transform requires non-empty observation data.") Ys = get_data(observation_data=observation_data) # Compute means and SDs # pyre-fixme[6]: Expected `DefaultDict[Union[str, Tuple[str, Optional[Union[b... self.Ymean, self.Ystd = compute_standardization_parameters(Ys)
def testComputePowerTransform(self): Ys = get_data([self.obsd1, self.obsd2, self.obsd3], ["m2"]) pts = _compute_power_transforms(Ys) self.assertEqual(pts["m2"].method, "yeo-johnson") self.assertIsInstance(pts["m2"].lambdas_, np.ndarray) self.assertEqual(pts["m2"].lambdas_.shape, (1,)) Y_np = np.array(Ys["m2"])[:, None] Y_trans = pts["m2"].transform(Y_np) # Output should be standardized self.assertAlmostEqual(Y_trans.mean(), 0.0) self.assertAlmostEqual(Y_trans.std(), 1.0) # Transform back Y_np2 = pts["m2"].inverse_transform(Y_trans) self.assertAlmostEqual(np.max(np.abs(Y_np - Y_np2)), 0.0)
def __init__( self, search_space: SearchSpace, observation_features: List[ObservationFeatures], observation_data: List[ObservationData], config: Optional[TConfig] = None, ) -> None: if config is None: raise ValueError("PowerTransform requires a config.") # pyre-fixme[6]: Same issue as for LogY metric_names = list(config.get("metrics", [])) if len(metric_names) == 0: raise ValueError("Must specify at least one metric in the config.") self.clip_mean = config.get("clip_mean", True) self.metric_names = metric_names Ys = get_data(observation_data=observation_data, metric_names=metric_names) self.power_transforms = _compute_power_transforms(Ys=Ys) self.inv_bounds = _compute_inverse_bounds(self.power_transforms, tol=1e-10)
def __init__( self, search_space: SearchSpace, observation_features: List[ObservationFeatures], observation_data: List[ObservationData], config: Optional[TConfig] = None, ) -> None: if len(observation_data) == 0: raise ValueError( "Percentile transform requires non-empty observation data.") metric_values = get_data(observation_data=observation_data) self.percentiles = { metric_name: vals for metric_name, vals in metric_values.items() } if config is not None and "winsorize" in config: self.winsorize = checked_cast(bool, (config.get("winsorize") or False)) else: self.winsorize = False
def testComputeInverseBounds(self): Ys = get_data([self.obsd1, self.obsd2, self.obsd3], ["m2"]) pt = _compute_power_transforms(Ys)["m2"] # lambda < 0: im(f) = (-inf, -1/lambda) without standardization pt.lambdas_.fill(-2.5) bounds = _compute_inverse_bounds({"m2": pt})["m2"] self.assertEqual(bounds[0], -np.inf) # Make sure we got the boundary right left = pt.inverse_transform(np.array(bounds[1] - 0.01, ndmin=2)) right = pt.inverse_transform(np.array(bounds[1] + 0.01, ndmin=2)) self.assertTrue(isnan(right) and not isnan(left)) # 0 <= lambda <= 2: im(f) = R pt.lambdas_.fill(1.0) bounds = _compute_inverse_bounds({"m2": pt})["m2"] self.assertTrue(bounds == (-np.inf, np.inf)) # lambda > 2: im(f) = (1 / (2 - lambda), inf) without standardization pt.lambdas_.fill(3.5) bounds = _compute_inverse_bounds({"m2": pt})["m2"] self.assertEqual(bounds[1], np.inf) # Make sure we got the boundary right left = pt.inverse_transform(np.array(bounds[0] - 0.01, ndmin=2)) right = pt.inverse_transform(np.array(bounds[0] + 0.01, ndmin=2)) self.assertTrue(not isnan(right) and isnan(left))