def test_apply_gradients(self): component = DummyWithOptimizer(variable_value=2.0) test = ComponentTest( component=component, input_spaces=dict(input_=FloatBox(add_batch_rank=True))) expected_grad = 0.69314718 expected_outputs = [expected_grad, 2.0] test.test(("calc_grads"), expected_outputs=expected_outputs) # Now apply the grad and check the variable value. expected_loss = np.square(np.log(2.0)) expected_outputs = [None, expected_loss, expected_loss] var_values_before = test.read_variable_values( component.variable_registry) test.test(("step"), expected_outputs=expected_outputs) # Check against variable now. Should change by -learning_rate*grad. var_values_after = test.read_variable_values( component.variable_registry) expected_new_value = var_values_before[ "dummy-with-optimizer/variable"] - (component.learning_rate * expected_grad) recursive_assert_almost_equal( var_values_after["dummy-with-optimizer/variable"], expected_new_value, decimals=5)
def test_multiple_sequences(self): gae = GeneralizedAdvantageEstimation(gae_lambda=self.gae_lambda, discount=self.gamma) test = ComponentTest(component=gae, input_spaces=self.input_spaces) rewards_ = self.rewards.sample(10, fill_value=0.5) baseline_values_ = self.baseline_values.sample(10, fill_value=1.0) terminals_ = [False] * 10 terminals_[5] = True sequence_indices = [False] * 10 sequence_indices[5] = True terminals_ = np.asarray(terminals_) input_ = [baseline_values_, rewards_, terminals_, sequence_indices] advantage_expected = self.gae_helper( baseline=baseline_values_, reward=rewards_, gamma=self.gamma, gae_lambda=self.gae_lambda, terminals=terminals_, sequence_indices=sequence_indices ) print("Advantage expected:", advantage_expected) advantage = test.test(("calc_gae_values", input_)) print("Got advantage = ", advantage) recursive_assert_almost_equal(advantage_expected, advantage, decimals=5) test.terminate()
def test_normal(self): # Create 5 normal distributions (2 parameters (mean and stddev) each). param_space = FloatBox(shape=(10,), add_batch_rank=True) input_spaces = dict( parameters=param_space, deterministic=bool, ) # The Component to test. normal = Normal(switched_off_apis={"log_prob", "kl_divergence"}) test = ComponentTest(component=normal, input_spaces=input_spaces) # Batch of size=2 and deterministic (True). input_ = [input_spaces["parameters"].sample(1), True] expected = input_[0][:, :5] # Sample n times, expect always mean value (deterministic draw). for _ in range(50): test.test(("draw", input_), expected_outputs=expected) test.test(("sample_deterministic", input_[0]), expected_outputs=expected) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = [input_spaces["parameters"].sample(1), False] expected = input_[0][:, :5] outs = [] for _ in range(50): out = test.test(("draw", input_)) outs.append(out) out = test.test(("sample_stochastic", input_[0])) outs.append(out) recursive_assert_almost_equal(np.mean(outs), expected.mean(), decimals=1)
def test_simple_python_preprocessor_stack(self): """ Tests a pure python preprocessor stack. """ space = FloatBox(shape=(2, ), add_batch_rank=True) # python PreprocessorStack multiply = dict(type="multiply", factor=0.5, scope="m") divide = dict(type="divide", divisor=0.5, scope="d") stack = PreprocessorStack(multiply, divide, backend="python") for sub_comp_scope in ["m", "d"]: stack.sub_components[sub_comp_scope].create_variables( input_spaces=dict(inputs=space)) #test = ComponentTest(component=stack, input_spaces=dict(inputs=float)) for _ in range_(3): # Call fake API-method directly (ok for PreprocessorStack). stack.reset() input_ = np.asarray([[1.0], [2.0], [3.0], [4.0]]) expected = input_ #test.test(("preprocess", input_), expected_outputs=expected) out = stack.preprocess(input_) recursive_assert_almost_equal(out, input_) input_ = space.sample() #test.test(("preprocess", input_), expected_outputs=expected) out = stack.preprocess(input_) recursive_assert_almost_equal(out, input_)
def test_bernoulli(self): # Create 5 bernoulli distributions (or a multiple thereof if we use batch-size > 1). param_space = FloatBox(shape=(5,), add_batch_rank=True) # The Component to test. bernoulli = Bernoulli(switched_off_apis={"log_prob", "kl_divergence"}) input_spaces = dict( parameters=param_space, deterministic=bool, ) test = ComponentTest(component=bernoulli, input_spaces=input_spaces) # Batch of size=6 and deterministic (True). input_ = [input_spaces["parameters"].sample(6), True] expected = input_[0] > 0.5 # Sample n times, expect always max value (max likelihood for deterministic draw). for _ in range(10): test.test(("draw", input_), expected_outputs=expected) test.test(("sample_deterministic", input_[0]), expected_outputs=expected) # Batch of size=6 and non-deterministic -> expect roughly the mean. input_ = [input_spaces["parameters"].sample(6), False] outs = [] for _ in range(20): out = test.test(("draw", input_)) outs.append(out) out = test.test(("sample_stochastic", input_[0])) outs.append(out) recursive_assert_almost_equal(np.mean(outs), 0.5, decimals=1)
def test_categorical(self): # Create 5 categorical distributions of 3 categories each. param_space = FloatBox(shape=(5, 3), add_batch_rank=True) # The Component to test. categorical = Categorical(switched_off_apis={"log_prob", "kl_divergence"}) input_spaces = dict( parameters=param_space, deterministic=bool, ) test = ComponentTest(component=categorical, input_spaces=input_spaces) # Batch of size=3 and deterministic (True). input_ = [input_spaces["parameters"].sample(3), True] expected = np.argmax(input_[0], axis=-1) # Sample n times, expect always max value (max likelihood for deterministic draw). for _ in range(10): test.test(("draw", input_), expected_outputs=expected) test.test(("sample_deterministic", input_[0]), expected_outputs=expected) # Batch of size=3 and non-deterministic -> expect roughly the mean. input_ = [input_spaces["parameters"].sample(3), False] outs = [] for _ in range(20): out = test.test(("draw", input_)) outs.append(out) out = test.test(("sample_stochastic", input_[0])) outs.append(out) recursive_assert_almost_equal(np.mean(outs), 1.0, decimals=1)
def test_single_non_terminal_sequence(self): gae = GeneralizedAdvantageEstimation(gae_lambda=self.gae_lambda, discount=self.gamma) test = ComponentTest(component=gae, input_spaces=self.input_spaces) rewards_ = self.rewards.sample(10, fill_value=0.5) baseline_values_ = self.baseline_values.sample(10, fill_value=1.0) terminals_ = self.terminals.sample(size=10, fill_value=False) # Final sequence index must always be true. sequence_indices = [False] * 10 # Assume sequence indices = terminals here. input_ = [baseline_values_, rewards_, terminals_, sequence_indices] advantage_expected = self.gae_helper( baseline=baseline_values_, reward=rewards_, gamma=self.gamma, gae_lambda=self.gae_lambda, terminals=terminals_, sequence_indices=sequence_indices ) advantage = test.test(("calc_gae_values", input_)) recursive_assert_almost_equal(advantage_expected, advantage, decimals=5) print("Expected advantage:", advantage_expected) print("Got advantage:", advantage) test.terminate()
def test_one_hot(self): """ Tests a torch one hot function. """ if get_backend() == "pytorch": # Flat action array. inputs = torch.tensor([0, 1], dtype=torch.int32) one_hot = pytorch_one_hot(inputs, depth=2) expected = torch.tensor([[1., 0.], [0., 1.]]) recursive_assert_almost_equal(one_hot, expected) # Container space. inputs = torch.tensor([[0, 3, 2], [1, 2, 0]], dtype=torch.int32) one_hot = pytorch_one_hot(inputs, depth=4) expected = torch.tensor( [[[1, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0]], [[0, 1, 0, 0], [0, 0, 1, 0], [ 1, 0, 0, 0, ]]], dtype=torch.int32) recursive_assert_almost_equal(one_hot, expected)
def test_gumbel_softmax_distribution(self): # 5-categorical Gumble-Softmax. param_space = Tuple(FloatBox(shape=(5, )), add_batch_rank=True) values_space = FloatBox(shape=(5, ), add_batch_rank=True) input_spaces = dict(parameters=param_space, deterministic=bool, values=values_space) gumble_softmax_distribution = GumbelSoftmax( switched_off_apis={"kl_divergence", "entropy"}, temperature=1.0) test = ComponentTest(component=gumble_softmax_distribution, input_spaces=input_spaces) # Batch of size=2 and deterministic (True). input_ = [param_space.sample(2), True] expected = np.argmax(input_[0], axis=-1) # Sample n times, expect always argmax value (deterministic draw). for _ in range(50): test.test(("draw", input_), expected_outputs=expected, decimals=5) test.test(("sample_deterministic", tuple([input_[0]])), expected_outputs=expected, decimals=5) # TODO: finish this test case, using an actual Gumble-Softmax distribution from the # paper: https://arxiv.org/pdf/1611.01144.pdf. return # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = [param_space.sample(1), False] expected = "???" outs = [] for _ in range(100): out = test.test(("draw", input_)) outs.append(np.argmax(out, axis=-1)) out = test.test(("sample_stochastic", tuple([input_[0]]))) outs.append(np.argmax(out, axis=-1)) recursive_assert_almost_equal(np.mean(outs), expected.mean(), decimals=1) # Test log-likelihood outputs. means = np.array([[0.1, 0.2, 0.3, 0.4, 5.0]]) stds = np.array([[0.8, 0.2, 0.3, 2.0, 4.0]]) # Make sure values are within low and high. values = np.array([[0.9, 0.2, 0.4, -0.1, -1.05]]) # TODO: understand and comment the following formula to get the log-prob. # Unsquash values, then get log-llh from regular gaussian. unsquashed_values = np.arctanh((values - low) / (high - low) * 2.0 - 1.0) log_prob_unsquashed = np.log(norm.pdf(unsquashed_values, means, stds)) log_prob = log_prob_unsquashed - np.sum( np.log(1 - np.tanh(unsquashed_values)**2), axis=-1, keepdims=True) test.test(("log_prob", [tuple([means, stds]), values]), expected_outputs=log_prob, decimals=4)
def test_beta(self): # Create 5 beta distributions (2 parameters (alpha and beta) each). param_space = Tuple( FloatBox(shape=(5, )), # alpha FloatBox(shape=(5, )), # beta add_batch_rank=True) values_space = FloatBox(shape=(5, ), add_batch_rank=True) input_spaces = dict( parameters=param_space, values=values_space, deterministic=bool, ) # The Component to test. low, high = -1.0, 2.0 beta_distribution = Beta(low=low, high=high, switched_off_apis={"kl_divergence"}) test = ComponentTest(component=beta_distribution, input_spaces=input_spaces) # Batch of size=2 and deterministic (True). input_ = [input_spaces["parameters"].sample(2), True] # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] expected = (1.0 / (1.0 + input_[0][1] / input_[0][0])) * (high - low) + low # Sample n times, expect always mean value (deterministic draw). for _ in range(50): test.test(("draw", input_), expected_outputs=expected, decimals=5) test.test(("sample_deterministic", tuple([input_[0]])), expected_outputs=expected, decimals=5) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = [input_spaces["parameters"].sample(1), False] expected = (1.0 / (1.0 + input_[0][1] / input_[0][0])) * (high - low) + low outs = [] for _ in range(50): out = test.test(("draw", input_)) outs.append(out) out = test.test(("sample_stochastic", tuple([input_[0]]))) outs.append(out) recursive_assert_almost_equal(np.mean(outs), expected.mean(), decimals=1) # Test log-likelihood outputs (against scipy). alpha_ = values_space.sample(1) beta_ = values_space.sample(1) values = values_space.sample(1) values_scaled = values * (high - low) + low test.test(("log_prob", [tuple([alpha_, beta_]), values_scaled]), expected_outputs=np.log(beta.pdf(values, alpha_, beta_)), decimals=4)
def test_multivariate_normal(self): # Create batch0=n (batch-rank), batch1=2 (can be used for m mixed Gaussians), num-events=3 (trivariate) # distributions (2 parameters (mean and stddev) each). num_events = 3 # 3=trivariate Gaussian num_mixed_gaussians = 2 # 2x trivariate Gaussians (mixed) param_space = Tuple( FloatBox(shape=(num_mixed_gaussians, num_events)), # mean FloatBox(shape=(num_mixed_gaussians, num_events)), # diag (variance) add_batch_rank=True) values_space = FloatBox(shape=(num_mixed_gaussians, num_events), add_batch_rank=True) input_spaces = dict( parameters=param_space, values=values_space, deterministic=bool, ) # The Component to test. multivariate_normal = MultivariateNormal( switched_off_apis={"kl_divergence"}) test = ComponentTest(component=multivariate_normal, input_spaces=input_spaces) input_ = [input_spaces["parameters"].sample(4), True] expected = input_[0][0] # 0=mean # Sample n times, expect always mean value (deterministic draw). for _ in range(50): test.test(("draw", input_), expected_outputs=expected) test.test(("sample_deterministic", tuple([input_[0]])), expected_outputs=expected) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = [input_spaces["parameters"].sample(1), False] expected = input_[0][0] # 0=mean outs = [] for _ in range(50): out = test.test(("draw", input_)) outs.append(out) out = test.test(("sample_stochastic", tuple([input_[0]]))) outs.append(out) recursive_assert_almost_equal(np.mean(outs), expected.mean(), decimals=1) # Test log-likelihood outputs (against scipy). means = values_space.sample(2) stds = values_space.sample(2) values = values_space.sample(2) test.test( ("log_prob", [tuple([means, stds]), values]), # Sum up the individual log-probs as we have a diag (independent) covariance matrix. expected_outputs=np.sum(np.log(norm.pdf(values, means, stds)), axis=-1), decimals=4)
def test_batched_backend_equivalence(self): return """ Tests if Python and TensorFlow backend return the same output for a standard DQN-style preprocessing stack. """ env_spec = dict( type="openai", gym_env="Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True ) # Test with batching because we assume vector environments to be the normal case going forward. env = SequentialVectorEnv(num_envs=4, env_spec=env_spec, num_background_envs=2) in_space = env.state_space agent_config = config_from_path("configs/ray_apex_for_pong.json") preprocessing_spec = deepcopy(agent_config["preprocessing_spec"]) # Set up python preprocessor. scopes = [preprocessor["scope"] for preprocessor in preprocessing_spec] # Set backend to python. for spec in preprocessing_spec: spec["backend"] = "python" python_processor = PreprocessorStack(*preprocessing_spec, backend="python") for sub_comp_scope in scopes: python_processor.sub_components[sub_comp_scope].create_variables(dict(preprocessing_inputs=in_space)) python_processor.reset() # To have the use case we considered so far, use agent interface for TF backend. agent_config.pop("type") agent = ApexAgent(state_space=env.state_space, action_space=env.action_space, **agent_config) # Generate a few states from random set points. Test if preprocessed states are almost equal states = np.asarray(env.reset_all()) actions, agent_preprocessed_states = agent.get_action( states=states, use_exploration=False, extra_returns="preprocessed_states") print("TensorFlow preprocessed shape: {}".format(np.asarray(agent_preprocessed_states).shape)) python_preprocessed_states = python_processor.preprocess(states) print("Python preprocessed shape: {}".format(np.asarray(python_preprocessed_states).shape)) print("Asserting (almost) equal values:") for tf_state, python_state in zip(agent_preprocessed_states, python_preprocessed_states): flat_tf = np.ndarray.flatten(tf_state) flat_python = np.ndarray.flatten(python_state) for x, y in zip(flat_tf, flat_python): recursive_assert_almost_equal(x, y, decimals=3) states, _, _, _ = env.step(actions) actions, agent_preprocessed_states = agent.get_action( states=states, use_exploration=False, extra_returns="preprocessed_states") print("TensorFlow preprocessed shape: {}".format(np.asarray(agent_preprocessed_states).shape)) python_preprocessed_states = python_processor.preprocess(states) print("Python preprocessed shape: {}".format(np.asarray(python_preprocessed_states).shape)) print("Asserting (almost) equal values:") recursive_assert_almost_equal(agent_preprocessed_states, python_preprocessed_states, decimals=3)
def test_backend_equivalence(self): """ Tests if Python and TensorFlow backend return the same output for a standard DQN-style preprocessing stack. """ in_space = IntBox(256, shape=(210, 160, 3), dtype="uint8", add_batch_rank=True) # Regression test: Incrementally add preprocessors. to_use = [] for i, decimals in zip(range_(len(self.preprocessing_spec)), [0, 0, 2, 2]): to_use.append(i) incremental_spec = [] incremental_scopes = [] for index in to_use: incremental_spec.append(deepcopy(self.preprocessing_spec[index])) incremental_scopes.append(self.preprocessing_spec[index]["scope"]) print("Comparing incremental spec: {}".format(incremental_scopes)) # Set up python preprocessor. # Set backend to python. for spec in incremental_spec: spec["backend"] = "python" python_preprocessor = PreprocessorStack(*incremental_spec, backend="python") for sub_comp_scope in incremental_scopes: python_preprocessor.sub_components[sub_comp_scope].create_variables( input_spaces=dict(preprocessing_inputs=in_space), action_space=None ) python_preprocessor.sub_components[sub_comp_scope].check_input_spaces( input_spaces=dict(preprocessing_inputs=in_space), action_space=None ) #build_space = python_processor.sub_components[sub_comp_scope].get_preprocessed_space(build_space) python_preprocessor.reset() # To compare to tf, use an equivalent tf PreprocessorStack. # Switch back to tf. for spec in incremental_spec: spec["backend"] = "tf" tf_preprocessor = PreprocessorStack(*incremental_spec, backend="tf") test = ComponentTest(component=tf_preprocessor, input_spaces=dict( inputs=in_space )) # Generate a few states from random set points. Test if preprocessed states are almost equal states = in_space.sample(size=self.batch_size) python_preprocessed_states = python_preprocessor.preprocess(states) tf_preprocessed_states = test.test(("preprocess", states), expected_outputs=None) print("Asserting (almost) equal values:") for tf_state, python_state in zip(tf_preprocessed_states, python_preprocessed_states): recursive_assert_almost_equal(tf_state, python_state, decimals=decimals) print("Success comparing: {}".format(incremental_scopes))
def test_bernoulli(self): # Create 5 bernoulli distributions (or a multiple thereof if we use batch-size > 1). param_space = FloatBox(shape=(5, ), add_batch_rank=True) values_space = BoolBox(shape=(5, ), add_batch_rank=True) # The Component to test. bernoulli = Bernoulli(switched_off_apis={"kl_divergence"}) input_spaces = dict( parameters=param_space, values=values_space, deterministic=bool, ) test = ComponentTest(component=bernoulli, input_spaces=input_spaces) # Batch of size=6 and deterministic (True). input_ = [input_spaces["parameters"].sample(6), True] expected = input_[0] > 0.5 # Sample n times, expect always max value (max likelihood for deterministic draw). for _ in range(10): test.test(("draw", input_), expected_outputs=expected) test.test(("sample_deterministic", input_[0]), expected_outputs=expected) # Batch of size=6 and non-deterministic -> expect roughly the mean. input_ = [input_spaces["parameters"].sample(6), False] outs = [] for _ in range(20): out = test.test(("draw", input_)) outs.append(out) out = test.test(("sample_stochastic", input_[0])) outs.append(out) recursive_assert_almost_equal(np.mean(outs), 0.5, decimals=1) # Test log-likelihood outputs. test.test( ( "log_prob", [ np.array([[0.1, 0.2, 0.3, 0.4, 0.5]]), np.array([[True, False, False, True, True]]) # probability that result is the given value ]), expected_outputs=np.log(np.array([[0.1, 0.8, 0.7, 0.4, 0.5]]))) # Test entropy outputs. input_ = np.array([[0.1, 0.2, 0.3, 0.4, 0.5]]) # Binary Entropy with natural log. expected_entropy = -(input_ * np.log(input_)) - ( (1.0 - input_) * np.log(1.0 - input_)) test.test(("entropy", input_), expected_outputs=expected_entropy)
def test_python_sequence_preprocessor(self): seq_len = 3 space = FloatBox(shape=(1,), add_batch_rank=True) sequencer = Sequence(sequence_length=seq_len, batch_size=4, add_rank=True, backend="python") sequencer.create_variables(input_spaces=dict(preprocessing_inputs=space)) #test = ComponentTest(component=sequencer, input_spaces=dict(apply=space)) for _ in range_(3): sequencer._graph_fn_reset() self.assertEqual(sequencer.index, -1) input_ = np.asarray([[1.0], [2.0], [3.0], [4.0]]) out = sequencer._graph_fn_apply(input_) self.assertEqual(sequencer.index, 0) recursive_assert_almost_equal( out, np.asarray([[[1.0, 1.0, 1.0]], [[2.0, 2.0, 2.0]], [[3.0, 3.0, 3.0]], [[4.0, 4.0, 4.0]]]) ) input_ = np.asarray([[1.1], [2.2], [3.3], [4.4]]) out = sequencer._graph_fn_apply(input_) self.assertEqual(sequencer.index, 1) recursive_assert_almost_equal( out, np.asarray([[[1.0, 1.0, 1.1]], [[2.0, 2.0, 2.2]], [[3.0, 3.0, 3.3]], [[4.0, 4.0, 4.4]]]) ) input_ = np.asarray([[1.11], [2.22], [3.33], [4.44]]) out = sequencer._graph_fn_apply(input_) self.assertEqual(sequencer.index, 2) recursive_assert_almost_equal( out, np.asarray([[[1.0, 1.1, 1.11]], [[2.0, 2.2, 2.22]], [[3.0, 3.3, 3.33]], [[4.0, 4.4, 4.44]]]) ) input_ = np.asarray([[10], [20], [30], [40]]) out = sequencer._graph_fn_apply(input_) self.assertEqual(sequencer.index, 0) recursive_assert_almost_equal( out, np.asarray([[[1.1, 1.11, 10]], [[2.2, 2.22, 20]], [[3.3, 3.33, 30]], [[4.4, 4.44, 40]]]) )
def test_time_rank_folding_for_large_dense_nn(self): vector_dim = 256 input_space = FloatBox(shape=(vector_dim, ), add_batch_rank=True, add_time_rank=True) base_config = config_from_path("configs/test_large_dense_nn.json") neural_net_wo_folding = NeuralNetwork.from_spec(base_config) test = ComponentTest(component=neural_net_wo_folding, input_spaces=dict(nn_input=input_space)) # Pull a large batch+time ranked sample. sample_shape = (256, 200) inputs = input_space.sample(sample_shape) start = time.monotonic() runs = 10 for _ in range(runs): print(".", flush=True, end="") test.test(("call", inputs), expected_outputs=None) runtime_wo_folding = time.monotonic() - start print( "\nTesting large dense NN w/o time-rank folding: {}x pass through with {}-data took " "{}s".format(runs, sample_shape, runtime_wo_folding)) neural_net_w_folding = NeuralNetwork.from_spec(base_config) # Folded space. input_space_folded = FloatBox(shape=(vector_dim, ), add_batch_rank=True) inputs = input_space.sample(sample_shape[0] * sample_shape[1]) test = ComponentTest(component=neural_net_w_folding, input_spaces=dict(nn_input=input_space_folded)) start = time.monotonic() for _ in range(runs): print(".", flush=True, end="") test.test(("call", inputs), expected_outputs=None) runtime_w_folding = time.monotonic() - start print( "\nTesting large dense NN w/ time-rank folding: {}x pass through with {}-data took " "{}s".format(runs, sample_shape, runtime_w_folding)) recursive_assert_almost_equal(runtime_w_folding, runtime_wo_folding, decimals=0)
def test_python_image_crop(self): image_crop = ImageCrop(x=7, y=1, width=8, height=12, backend="python") image_crop.create_variables(input_spaces=dict( inputs=FloatBox(shape=(16, 16, 3)), add_batch_rank=False)) input_image = cv2.imread( os.path.join(os.path.dirname(__file__), "images/16x16x3_image.bmp")) expected = cv2.imread( os.path.join(os.path.dirname(__file__), "images/8x12x3_image_cropped.bmp")) assert expected is not None out = image_crop._graph_fn_call(input_image) recursive_assert_almost_equal(out, expected)
def test_grayscale_python_with_uint8_image(self): # last rank is always the color rank (its dim must match len(grayscale-weights)) space = IntBox(256, shape=(1, 1, 3), dtype="uint8", add_batch_rank=True) grayscale = GrayScale(keep_rank=False, backend="python") # Run the test (batch of 2 images). input_ = space.sample(size=2) expected = np.round(np.dot(input_[:, :, :, :3], [0.299, 0.587, 0.114]), 0).astype(dtype=input_.dtype) out = grayscale._graph_fn_apply(input_) recursive_assert_almost_equal(out, expected)
def test_normal(self): # Create 5 normal distributions (2 parameters (mean and stddev) each). param_space = Tuple( FloatBox(shape=(5, )), # mean FloatBox(shape=(5, )), # stddev add_batch_rank=True) values_space = FloatBox(shape=(5, ), add_batch_rank=True) input_spaces = dict( parameters=param_space, values=values_space, deterministic=bool, ) # The Component to test. normal = Normal(switched_off_apis={"kl_divergence"}) test = ComponentTest(component=normal, input_spaces=input_spaces) # Batch of size=2 and deterministic (True). input_ = [param_space.sample(2), True] expected = input_[0][0] # 0 = mean # Sample n times, expect always mean value (deterministic draw). for _ in range(50): test.test(("draw", input_), expected_outputs=expected) test.test(("sample_deterministic", tuple([input_[0]])), expected_outputs=expected) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = [param_space.sample(1), False] expected = input_[0][0] # 0 = mean outs = [] for _ in range(50): out = test.test(("draw", input_)) outs.append(out) out = test.test(("sample_stochastic", tuple([input_[0]]))) outs.append(out) recursive_assert_almost_equal(np.mean(outs), expected.mean(), decimals=1) # Test log-likelihood outputs. means = np.array([[0.1, 0.2, 0.3, 0.4, 100.0]]) stds = np.array([[0.8, 0.2, 0.3, 2.0, 50.0]]) values = np.array([[1.0, 2.0, 0.4, 10.0, 5.4]]) test.test(("log_prob", [tuple([means, stds]), values]), expected_outputs=np.log(norm.pdf(values, means, stds)), decimals=4)
def test_apex_weight_syncing(self): env = RandomEnv(state_space=spaces.IntBox(2), action_space=spaces.IntBox(2), deterministic=True) agent = Agent.from_spec( config_from_path("configs/apex_agent_for_random_env.json"), state_space=env.state_space, action_space=env.action_space) policy_weights = agent.get_policy_weights() print('policy weights: {}'.format(policy_weights)) for variable, weights in policy_weights.items(): weights += 0.01 agent.set_policy_weights(policy_weights) new_weights = agent.get_policy_weights() recursive_assert_almost_equal(policy_weights, new_weights)
def test_apex_weight_syncing(self): agent_config = config_from_path("configs/ray_apex_for_pong.json") agent_config["execution_spec"].pop("ray_spec") environment = OpenAIGymEnv("Pong-v0", frameskip=4) agent = Agent.from_spec( agent_config, state_space=environment.state_space, action_space=environment.action_space ) weights = agent.get_weights()["policy_weights"] print("type weights = ", type(weights)) for variable, value in weights.items(): print("Type value = ", type(value)) value += 0.01 agent.set_weights(weights) new_weights = agent.get_weights()["policy_weights"] recursive_assert_almost_equal(weights, new_weights)
def test_reshape_python_with_time_rank_unfolding(self): # Unfold time rank from batch rank with given time-dimension (2 out of 8 -> batch will be 4 after unfolding). in_space = FloatBox(shape=(4, 4), add_batch_rank=True, add_time_rank=False) in_space_before_folding = FloatBox(shape=(4, 4), add_batch_rank=True, add_time_rank=True) reshape = ReShape(unfold_time_rank=True, backend="python") reshape.create_variables( dict(preprocessing_inputs=in_space, input_before_time_rank_folding=in_space_before_folding)) # seq-len=2, batch-size=4 -> unfold from 8. inputs = in_space.sample(size=8) inputs_before_folding = in_space_before_folding.sample(size=(4, 2)) expected = np.reshape(inputs, newshape=(4, 2, 4, 4)) out = reshape._graph_fn_apply(inputs, inputs_before_folding) recursive_assert_almost_equal(out, expected)
def test_beta(self): # Create 5 beta distributions (2 parameters (alpha and beta) each). param_space = Tuple( FloatBox(shape=(5, )), # alpha FloatBox(shape=(5, )), # beta add_batch_rank=True) input_spaces = dict( parameters=param_space, deterministic=bool, ) # The Component to test. beta_distribution = Beta( switched_off_apis={"log_prob", "kl_divergence"}) test = ComponentTest(component=beta_distribution, input_spaces=input_spaces) # Batch of size=2 and deterministic (True). input_ = [input_spaces["parameters"].sample(2), True] # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] expected = 1.0 / (1.0 + input_[0][1] / input_[0][0]) # Sample n times, expect always mean value (deterministic draw). for _ in range(50): test.test(("draw", input_), expected_outputs=expected) test.test(("sample_deterministic", tuple([input_[0]])), expected_outputs=expected) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = [input_spaces["parameters"].sample(1), False] expected = 1.0 / (1.0 + input_[0][1] / input_[0][0]) outs = [] for _ in range(50): out = test.test(("draw", input_)) outs.append(out) out = test.test(("sample_stochastic", tuple([input_[0]]))) outs.append(out) recursive_assert_almost_equal(np.mean(outs), expected.mean(), decimals=1)
def test_reverse_apply_decays_to_sequence(self): """ Tests reverse decaying a sequence of 1-step TD errors for GAE. """ sequence_helper = SequenceHelper() decay_value = 0.5 test = ComponentTest(component=sequence_helper, input_spaces=self.input_spaces) td_errors = np.asarray([0.1, 0.2, 0.3, 0.4]) indices = np.array([0, 0, 0, 1]) expected_output_sequence_manual = np.asarray([ 0.1 + 0.5 * 0.2 + 0.25 * 0.3 + 0.125 * 0.4, 0.2 + 0.5 * 0.3 + 0.25 * 0.4, 0.3 + 0.5 * 0.4, 0.4 ]) expected_output_sequence_numpy = self.decay_td_sequence( td_errors, decay=decay_value) recursive_assert_almost_equal(expected_output_sequence_manual, expected_output_sequence_numpy) test.test(("reverse_apply_decays_to_sequence", [td_errors, indices, decay_value]), expected_outputs=expected_output_sequence_manual)
def test_with_manual_numbers_and_lambda_0_5(self): lambda_ = 0.5 lg = lambda_ * self.gamma gae = GeneralizedAdvantageEstimation(gae_lambda=lambda_, discount=self.gamma) test = ComponentTest(component=gae, input_spaces=self.input_spaces) # Batch of 2 sequences. rewards_ = np.array([0.1, 0.2, 0.3]) baseline_values_ = np.array([1.0, 2.0, 3.0]) terminals_ = np.array([False, False, False]) # Final sequence index must always be true. sequence_indices = np.array([False, False, True]) input_ = [baseline_values_, rewards_, terminals_, sequence_indices] # Test TD-error outputs. td = np.array([1.08, 1.17, 0.27]) test.test(("calc_td_errors", input_), expected_outputs=td, decimals=5) expected_gaes_manual = np.array([ td[0] + lg * td[1] + lg * lg * td[2], td[1] + lg * td[2], td[2] ]) expected_gaes_helper = self.gae_helper( baseline_values_, rewards_, self.gamma, lambda_, terminals_, sequence_indices ) recursive_assert_almost_equal(expected_gaes_manual, expected_gaes_helper, decimals=5) advantages = test.test(("calc_gae_values", input_), expected_outputs=expected_gaes_manual) print("Rewards:", rewards_) print("Baseline-values:", baseline_values_) print("Terminals:", terminals_) print("Expected advantage:", expected_gaes_manual) print("Got advantage:", advantages) test.terminate()
def test_bootstrapping(self): """ Tests boot-strapping for GAE purposes. """ sequence_helper = SequenceHelper() discount = 0.99 test = ComponentTest(component=sequence_helper, input_spaces=self.input_spaces) # No terminals - just boot-strap with final sequence index. values = np.asarray([1.0, 2.0, 3.0, 4.0]) rewards = np.asarray([0, 0, 0, 0]) sequence_indices = np.asarray([0, 0, 0, 1]) terminals = np.asarray([0, 0, 0, 0]) expected_deltas = self.deltas(values, rewards, discount, terminals, sequence_indices) deltas = test.test(("bootstrap_values", [rewards, values, terminals, sequence_indices])) recursive_assert_almost_equal(expected_deltas, deltas, decimals=5) # Final index is also terminal. values = np.asarray([1.0, 2.0, 3.0, 4.0]) rewards = np.asarray([0, 0, 0, 0]) sequence_indices = np.asarray([0, 0, 0, 1]) terminals = np.asarray([0, 0, 0, 1]) expected_deltas = self.deltas(values, rewards, discount, terminals, sequence_indices) deltas = test.test(("bootstrap_values", [rewards, values, terminals, sequence_indices])) recursive_assert_almost_equal(expected_deltas, deltas, decimals=5) # Mixed: i = 1 is also terminal, i = 3 is only sequence. values = np.asarray([1.0, 2.0, 3.0, 4.0]) rewards = np.asarray([0, 0, 0, 0]) sequence_indices = np.asarray([0, 1, 0, 1]) terminals = np.asarray([0, 1, 0, 0]) expected_deltas = self.deltas(values, rewards, discount, terminals, sequence_indices) deltas = test.test(("bootstrap_values", [rewards, values, terminals, sequence_indices])) recursive_assert_almost_equal(expected_deltas, deltas, decimals=5)
def test_calc_decays(self): """ Tests counting sequence lengths based on terminal configurations. """ sequence_helper = SequenceHelper() decay_value = 0.5 test = ComponentTest(component=sequence_helper, input_spaces=self.input_spaces) input_ = np.asarray([0, 0, 0, 0]) expected_decays = [1.0, 0.5, 0.25, 0.125] lengths, decays = test.test( ("calc_sequence_decays", [input_, decay_value])) # Check lengths and decays. recursive_assert_almost_equal(x=lengths, y=[4]) recursive_assert_almost_equal(x=decays, y=expected_decays) input_ = np.asarray([0, 0, 1, 0]) expected_decays = [1.0, 0.5, 0.25, 1.0] lengths, decays = test.test( ("calc_sequence_decays", [input_, decay_value])) recursive_assert_almost_equal(x=lengths, y=[3, 1]) recursive_assert_almost_equal(x=decays, y=expected_decays) input_ = np.asarray([1, 1, 1, 1]) expected_decays = [1.0, 1.0, 1.0, 1.0] lengths, decays = test.test( ("calc_sequence_decays", [input_, decay_value])) recursive_assert_almost_equal(x=lengths, y=[1, 1, 1, 1]) recursive_assert_almost_equal(x=decays, y=expected_decays)
def test_joint_cumulative_distribution(self): param_space = Dict( { "a": FloatBox(shape=(4, )), # 4-discrete "b": Dict({ "ba": Tuple([ FloatBox(shape=(3, )), FloatBox(0.1, 1.0, shape=(3, )) ]), # 3-variate normal "bb": Tuple([FloatBox(shape=(2, )), FloatBox(shape=(2, ))]), # beta -1 to 1 "bc": Tuple([ FloatBox(shape=(4, )), FloatBox(0.1, 1.0, shape=(4, )) ]), # normal (dim=4) }) }, add_batch_rank=True) values_space = Dict( { "a": IntBox(4), "b": Dict({ "ba": FloatBox(shape=(3, )), "bb": FloatBox(shape=(2, )), "bc": FloatBox(shape=(4, )) }) }, add_batch_rank=True) input_spaces = dict(parameters=param_space, values=values_space, deterministic=bool) low, high = -1.0, 1.0 joined_cumulative_distribution = JointCumulativeDistribution( distribution_specs={ "/a": Categorical(), "/b/ba": MultivariateNormal(), "/b/bb": Beta(low=low, high=high), "/b/bc": Normal() }, switched_off_apis={"kl_divergence"}) test = ComponentTest(component=joined_cumulative_distribution, input_spaces=input_spaces) # Batch of size=2 and deterministic (True). input_ = [param_space.sample(2), True] input_[0]["a"] = softmax(input_[0]["a"]) expected_mean = { "a": np.argmax(input_[0]["a"], axis=-1), "b": { "ba": input_[0]["b"]["ba"][0], # [0]=Mean # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] * range + low "bb": (1.0 / (1.0 + input_[0]["b"]["bb"][1] / input_[0]["b"]["bb"][0])) * (high - low) + low, "bc": input_[0]["b"]["bc"][0], } } # Sample n times, expect always mean value (deterministic draw). for _ in range(50): test.test(("draw", input_), expected_outputs=expected_mean) test.test(("sample_deterministic", tuple([input_[0]])), expected_outputs=expected_mean) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = [param_space.sample(1), False] input_[0]["a"] = softmax(input_[0]["a"]) expected_mean = { "a": np.sum(input_[0]["a"] * np.array([0, 1, 2, 3])), "b": { "ba": input_[0]["b"]["ba"][0], # [0]=Mean # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] * range + low "bb": (1.0 / (1.0 + input_[0]["b"]["bb"][1] / input_[0]["b"]["bb"][0])) * (high - low) + low, "bc": input_[0]["b"]["bc"][0], } } outs = [] for _ in range(100): out = test.test(("draw", input_)) outs.append(out) out = test.test(("sample_stochastic", tuple([input_[0]]))) outs.append(out) recursive_assert_almost_equal(np.mean(np.stack( [o["a"][0] for o in outs], axis=0), axis=0), expected_mean["a"], atol=0.2) recursive_assert_almost_equal(np.mean(np.stack( [o["b"]["ba"][0] for o in outs], axis=0), axis=0), expected_mean["b"]["ba"][0], decimals=1) recursive_assert_almost_equal(np.mean(np.stack( [o["b"]["bb"][0] for o in outs], axis=0), axis=0), expected_mean["b"]["bb"][0], decimals=1) recursive_assert_almost_equal(np.mean(np.stack( [o["b"]["bc"][0] for o in outs], axis=0), axis=0), expected_mean["b"]["bc"][0], decimals=1) # Test log-likelihood outputs. params = param_space.sample(1) params["a"] = softmax(params["a"]) # Make sure beta-values are within 0.0 and 1.0 for the numpy calculation (which doesn't have scaling). values = values_space.sample(1) log_prob_beta = np.log( beta.pdf(values["b"]["bb"], params["b"]["bb"][0], params["b"]["bb"][1])) # Now do the scaling for b/bb (beta values). values["b"]["bb"] = values["b"]["bb"] * (high - low) + low expected_log_llh = np.log(params["a"][0][values["a"][0]]) + \ np.sum(np.log(norm.pdf(values["b"]["ba"][0], params["b"]["ba"][0], params["b"]["ba"][1]))) + \ np.sum(log_prob_beta) + \ np.sum(np.log(norm.pdf(values["b"]["bc"][0], params["b"]["bc"][0], params["b"]["bc"][1]))) test.test(("log_prob", [params, values]), expected_outputs=expected_log_llh, decimals=1)
def test_squashed_normal(self): param_space = Tuple(FloatBox(shape=(5, )), FloatBox(shape=(5, )), add_batch_rank=True) values_space = FloatBox(shape=(5, ), add_batch_rank=True) input_spaces = dict(parameters=param_space, deterministic=bool, values=values_space) low, high = -2.0, 1.0 squashed_distribution = SquashedNormal( switched_off_apis={"kl_divergence"}, low=low, high=high) test = ComponentTest(component=squashed_distribution, input_spaces=input_spaces) # Batch of size=2 and deterministic (True). input_ = [param_space.sample(2), True] expected = ((np.tanh(input_[0][0]) + 1.0) / 2.0) * (high - low) + low # [0] = mean # Sample n times, expect always mean value (deterministic draw). for _ in range(50): test.test(("draw", input_), expected_outputs=expected, decimals=5) test.test(("sample_deterministic", tuple([input_[0]])), expected_outputs=expected, decimals=5) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = [param_space.sample(1), False] expected = ((np.tanh(input_[0][0]) + 1.0) / 2.0) * (high - low) + low # [0] = mean outs = [] for _ in range(500): out = test.test(("draw", input_)) outs.append(out) self.assertTrue(out.max() <= high) self.assertTrue(out.min() >= low) out = test.test(("sample_stochastic", tuple([input_[0]]))) outs.append(out) self.assertTrue(out.max() <= high) self.assertTrue(out.min() >= low) recursive_assert_almost_equal(np.mean(outs), expected.mean(), decimals=1) # Test log-likelihood outputs. means = np.array([[0.1, 0.2, 0.3, 0.4, 5.0]]) stds = np.array([[0.8, 0.2, 0.3, 2.0, 4.0]]) # Make sure values are within low and high. values = np.array([[0.9, 0.2, 0.4, -0.1, -1.05]]) # TODO: understand and comment the following formula to get the log-prob. # Unsquash values, then get log-llh from regular gaussian. unsquashed_values = np.arctanh((values - low) / (high - low) * 2.0 - 1.0) log_prob_unsquashed = np.log(norm.pdf(unsquashed_values, means, stds)) log_prob = log_prob_unsquashed - np.sum( np.log(1 - np.tanh(unsquashed_values)**2), axis=-1, keepdims=True) test.test(("log_prob", [tuple([means, stds]), values]), expected_outputs=log_prob, decimals=4)
def test_mixture(self): # Create a mixture distribution consisting of 3 bivariate normals. num_distributions = 3 num_events_per_multivariate = 2 # 2=bivariate param_space = Dict( { "categorical": FloatBox(shape=(num_distributions, ), low=-1.5, high=2.3), "parameters0": Tuple( FloatBox(shape=(num_events_per_multivariate, )), # mean FloatBox(shape=(num_events_per_multivariate, )), # diag ), "parameters1": Tuple( FloatBox(shape=(num_events_per_multivariate, )), # mean FloatBox(shape=(num_events_per_multivariate, )), # diag ), "parameters2": Tuple( FloatBox(shape=(num_events_per_multivariate, )), # mean FloatBox(shape=(num_events_per_multivariate, )), # diag ), }, add_batch_rank=True) values_space = FloatBox(shape=(num_events_per_multivariate, ), add_batch_rank=True) input_spaces = dict( parameters=param_space, values=values_space, deterministic=bool, ) # The Component to test. mixture = MixtureDistribution( # Try different spec types. MultivariateNormal(), "multi-variate-normal", "multivariate_normal", switched_off_apis={"entropy", "kl_divergence"}) test = ComponentTest(component=mixture, input_spaces=input_spaces) # Batch of size=n and deterministic (True). input_ = [input_spaces["parameters"].sample(1), True] # Make probs for categorical. categorical_probs = softmax(input_[0]["categorical"]) # Note: Usually, the deterministic draw should return the max-likelihood value # Max-likelihood for a 3-Mixed Bivariate: mean-of-argmax(categorical)() # argmax = np.argmax(input_[0]["categorical"], axis=-1) #expected = np.array([input_[0]["parameters{}".format(idx)][0][i] for i, idx in enumerate(argmax)]) # input_[0]["categorical"][:, 1:2] * input_[0]["parameters1"][0] + \ # input_[0]["categorical"][:, 2:3] * input_[0]["parameters2"][0] # The mean value is a 2D vector (bivariate distribution). expected = categorical_probs[:, 0:1] * input_[0]["parameters0"][0] + \ categorical_probs[:, 1:2] * input_[0]["parameters1"][0] + \ categorical_probs[:, 2:3] * input_[0]["parameters2"][0] for _ in range(50): test.test(("draw", input_), expected_outputs=expected) test.test(("sample_deterministic", tuple([input_[0]])), expected_outputs=expected) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = [input_spaces["parameters"].sample(1), False] # Make probs for categorical. categorical_probs = softmax(input_[0]["categorical"]) expected = categorical_probs[:, 0:1] * input_[0]["parameters0"][0] + \ categorical_probs[:, 1:2] * input_[0]["parameters1"][0] + \ categorical_probs[:, 2:3] * input_[0]["parameters2"][0] outs = [] for _ in range(50): out = test.test(("draw", input_)) outs.append(out) out = test.test(("sample_stochastic", tuple([input_[0]]))) outs.append(out) recursive_assert_almost_equal(np.mean(np.array(outs), axis=0), expected, decimals=1) # Test log-likelihood outputs (against scipy). params = param_space.sample(1) # Make sure categorical params are softmaxed. category_probs = softmax(params["categorical"][0]) values = values_space.sample(1) expected = \ category_probs[0] * \ np.sum(np.log(norm.pdf(values[0], params["parameters0"][0][0], params["parameters0"][1][0])), axis=-1) + \ category_probs[1] * \ np.sum(np.log(norm.pdf(values[0], params["parameters1"][0][0], params["parameters1"][1][0])), axis=-1) + \ category_probs[2] * \ np.sum(np.log(norm.pdf(values[0], params["parameters2"][0][0], params["parameters2"][1][0])), axis=-1) test.test(("log_prob", [params, values]), expected_outputs=np.array([expected]), decimals=1)