def revert(self, ds): for name, mins, maxs in zip(self.var_names, self.mins, self.maxs): stacked, stack_info = util.to_stacked_array(ds[[name]]) reverted = .5 * (np.tanh(stacked) + 1.) ds_unnormalized = reverted * (maxs - mins) + mins unstacked = util.to_unstacked_dataset(ds_unnormalized.values, stack_info) ds = ds.assign({name: unstacked[name]}) return ds
def test_stacking_roundtrip(): a = np.arange(12).reshape(2, 6) b = np.arange(12, 24).reshape(2, 2, 3) c = np.arange(2) ds = xr.Dataset({ 'a': (('sample', 'foo'), a), 'b': (('sample', 'ff', 'bb'), b), 'c': (('sample'), c) }) arr, info = to_stacked_array(ds) ds_u = to_unstacked_dataset(arr.values, info) assert ds.identical(ds_u)
def revert(self, ds): for name, lmbda, shifting_factor_per_feature, boundary_location in \ zip(self.var_names, self.lmbdas, self.shifting_factors, self.boundary_locations): stacked, stack_info = util.to_stacked_array(ds[[name]]) # This does not guarantee that the generated samples are above zero. reverted = inv_boxcox(stacked, lmbda) - shifting_factor_per_feature unstacked = util.to_unstacked_dataset(reverted.values, stack_info) ds = ds.assign({name: unstacked[name]}) if boundary_location == 'right': ds = ds.assign({name: -ds[name]}) return ds
def apply(self, ds): assert not self.mins, 'This function cannot be called twice.' ds = ds.astype('float64') for name in self.var_names: sample_dim = ds[name].dims[0] stacked, stack_info = util.to_stacked_array(ds[[name]]) mins = stacked.min(sample_dim) - NUMERICAL_OFFSET # feature maxs = stacked.max(sample_dim) + NUMERICAL_OFFSET # feature self.mins.append(mins) self.maxs.append(maxs) ds_normalized = (stacked - mins) / (maxs - mins) transformed = np.arctanh(2 * ds_normalized - 1) unstacked = util.to_unstacked_dataset(transformed.values, stack_info) ds = ds.assign({name: unstacked[name]}) return ds
def apply(self, ds): assert not self.shifting_factors, 'This function cannot be called twice.' ds = ds.astype('float64') for name, lmbda, boundary_location in \ zip(self.var_names, self.lmbdas, self.boundary_locations): if boundary_location == 'right': ds = ds.assign({name: -ds[name]}) sample_dim = ds[name].dims[0] stacked, stack_info = util.to_stacked_array(ds[[name]]) mins = stacked.min(sample_dim) # feature shifting_factor_per_feature = abs( mins) + NUMERICAL_OFFSET # feature shifting_factor_per_feature.load()[mins >= NUMERICAL_OFFSET] = 0. self.shifting_factors.append(shifting_factor_per_feature) transformed = boxcox(stacked + shifting_factor_per_feature, lmbda) unstacked = util.to_unstacked_dataset(transformed.values, stack_info) ds = ds.assign({name: unstacked[name]}) return ds