class WhiteningNormalizerProcessor(Processor): """Normalizes the observations to have zero mean and standard deviation of one, i.e. it applies whitening to the inputs. This typically helps significantly with learning, especially if different dimensions are on different scales. However, it complicates training in the sense that you will have to store these weights alongside the policy if you intend to load it later. It is the responsibility of the user to do so. """ def __init__(self): self.normalizer = None def process_state_batch(self, batch): if self.normalizer is None: self.normalizer = WhiteningNormalizer(shape=batch.shape[1:], dtype=batch.dtype) self.normalizer.update(batch) return self.normalizer.normalize(batch) def process_action(self, action): upper_action, delta_x_norm, acc_norm = action delta_x = np.clip((delta_x_norm + 1) / 2 * 50 + 10, 10, 60) acc = np.clip(acc_norm * 3, -3, 3) return upper_action, delta_x, acc @staticmethod def process_reward_batch(batch): return batch / 100
def test_whitening_normalizer(): x = np.random.normal(loc=.2, scale=2., size=(1000, 5)) normalizer = WhiteningNormalizer(shape=(5,)) normalizer.update(x[:500]) normalizer.update(x[500:]) assert_allclose(normalizer.mean, np.mean(x, axis=0)) assert_allclose(normalizer.std, np.std(x, axis=0)) x_norm = normalizer.normalize(x) assert_allclose(np.mean(x_norm, axis=0), np.zeros(5, dtype=normalizer.dtype), atol=1e-5) assert_allclose(np.std(x_norm, axis=0), np.ones(5, dtype=normalizer.dtype), atol=1e-5) x_denorm = normalizer.denormalize(x_norm) assert_allclose(x_denorm, x)
class WhiteningNormalizerProcessor(Processor): """Normalizes the observations to have zero mean and standard deviation of one, i.e. it applies whitening to the inputs. This typically helps significantly with learning, especially if different dimensions are on different scales. However, it complicates training in the sense that you will have to store these weights alongside the policy if you intend to load it later. It is the responsibility of the user to do so. """ def __init__(self): self.normalizer = None def process_state_batch(self, batch): if self.normalizer is None: self.normalizer = WhiteningNormalizer(shape=batch.shape[1:], dtype=batch.dtype) self.normalizer.update(batch) return self.normalizer.normalize(batch)
def test_whitening_normalizer(): x = np.random.normal(loc=.2, scale=2., size=(1000, 5)) normalizer = WhiteningNormalizer(shape=(5, )) normalizer.update(x[:500]) normalizer.update(x[500:]) assert_allclose(normalizer.mean, np.mean(x, axis=0)) assert_allclose(normalizer.std, np.std(x, axis=0)) x_norm = normalizer.normalize(x) assert_allclose(np.mean(x_norm, axis=0), np.zeros(5, dtype=normalizer.dtype), atol=1e-5) assert_allclose(np.std(x_norm, axis=0), np.ones(5, dtype=normalizer.dtype), atol=1e-5) x_denorm = normalizer.denormalize(x_norm) assert_allclose(x_denorm, x)