Exemple #1
0
    def revert(self, ds):
        for name, mins, maxs in zip(self.var_names, self.mins, self.maxs):
            stacked, stack_info = util.to_stacked_array(ds[[name]])

            reverted = .5 * (np.tanh(stacked) + 1.)
            ds_unnormalized = reverted * (maxs - mins) + mins

            unstacked = util.to_unstacked_dataset(ds_unnormalized.values,
                                                  stack_info)
            ds = ds.assign({name: unstacked[name]})
        return ds
Exemple #2
0
def test_stacking_roundtrip():
    a = np.arange(12).reshape(2, 6)
    b = np.arange(12, 24).reshape(2, 2, 3)
    c = np.arange(2)
    ds = xr.Dataset({
        'a': (('sample', 'foo'), a),
        'b': (('sample', 'ff', 'bb'), b),
        'c': (('sample'), c)
    })
    arr, info = to_stacked_array(ds)
    ds_u = to_unstacked_dataset(arr.values, info)
    assert ds.identical(ds_u)
Exemple #3
0
    def revert(self, ds):
        for name, lmbda, shifting_factor_per_feature, boundary_location in \
                zip(self.var_names, self.lmbdas, self.shifting_factors, self.boundary_locations):
            stacked, stack_info = util.to_stacked_array(ds[[name]])

            # This does not guarantee that the generated samples are above zero.
            reverted = inv_boxcox(stacked, lmbda) - shifting_factor_per_feature

            unstacked = util.to_unstacked_dataset(reverted.values, stack_info)
            ds = ds.assign({name: unstacked[name]})
            if boundary_location == 'right':
                ds = ds.assign({name: -ds[name]})
        return ds
Exemple #4
0
    def apply(self, ds):
        assert not self.mins, 'This function cannot be called twice.'

        ds = ds.astype('float64')

        for name in self.var_names:
            sample_dim = ds[name].dims[0]
            stacked, stack_info = util.to_stacked_array(ds[[name]])

            mins = stacked.min(sample_dim) - NUMERICAL_OFFSET  # feature
            maxs = stacked.max(sample_dim) + NUMERICAL_OFFSET  # feature
            self.mins.append(mins)
            self.maxs.append(maxs)
            ds_normalized = (stacked - mins) / (maxs - mins)

            transformed = np.arctanh(2 * ds_normalized - 1)
            unstacked = util.to_unstacked_dataset(transformed.values,
                                                  stack_info)
            ds = ds.assign({name: unstacked[name]})
        return ds
Exemple #5
0
    def apply(self, ds):
        assert not self.shifting_factors, 'This function cannot be called twice.'

        ds = ds.astype('float64')

        for name, lmbda, boundary_location in \
                zip(self.var_names, self.lmbdas, self.boundary_locations):
            if boundary_location == 'right':
                ds = ds.assign({name: -ds[name]})

            sample_dim = ds[name].dims[0]
            stacked, stack_info = util.to_stacked_array(ds[[name]])
            mins = stacked.min(sample_dim)  # feature

            shifting_factor_per_feature = abs(
                mins) + NUMERICAL_OFFSET  # feature
            shifting_factor_per_feature.load()[mins >= NUMERICAL_OFFSET] = 0.
            self.shifting_factors.append(shifting_factor_per_feature)

            transformed = boxcox(stacked + shifting_factor_per_feature, lmbda)
            unstacked = util.to_unstacked_dataset(transformed.values,
                                                  stack_info)
            ds = ds.assign({name: unstacked[name]})
        return ds