def test_mixture_adapter(self): input_space = Float(shape=(16,), main_axes="B") output_space = Float(shape=(3,), main_axes="B") adapter = MixtureDistributionAdapter( "normal-distribution-adapter", "beta-distribution-adapter", output_space=output_space, activation="relu" # Don't do this in real life! This is just to test. ) batch_size = 2 inputs = input_space.sample(batch_size) out = adapter(inputs) weights = adapter.get_weights() params0 = np.split(dense(inputs, weights[2], weights[3]), 2, axis=-1) params0[1] = np.exp(np.clip(params0[1], MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT)) params1 = dense(inputs, weights[4], weights[5]) params1 = np.clip(params1, np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)) params1 = np.log(np.exp(params1) + 1.0) + 1.0 params1 = np.split(params1, 2, axis=-1) expected = { "categorical": relu(dense(inputs, weights[0], weights[1])), "parameters0": params0, "parameters1": params1 } check(out, expected, decimals=5)
def test_subclassing_network_with_primitive_int_output_space(self): input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B") output_space = Int(3, main_axes="B") # Using keras subclassing. network = self.MyModel() nn = Network(network=network, output_space=output_space) # Simple function call -> Expect output for all int-inputs. input_ = input_space.sample(6) result = nn(input_) weights = nn.get_weights() expected = dense(np.concatenate( [dense(input_, weights[0], weights[1]), dense(input_, weights[2], weights[3])], axis=-1 ), weights[4], weights[5]) check(result, expected) # Function call with value -> Expect output for only that int-value input_ = input_space.sample(6) values = output_space.sample(6) result = nn(input_, values) weights = nn.get_weights() expected = dense(np.concatenate( [dense(input_, weights[0], weights[1]), dense(input_, weights[2], weights[3])], axis=-1 ), weights[4], weights[5]) expected = np.sum(expected * one_hot(values, depth=output_space.num_categories), axis=-1) check(result, expected)
def test_func_api_network_with_automatically_handling_container_input_space(self): # Simple vectors plus image as inputs (see e.g. SAC). input_space = Dict(A=Float(-1.0, 1.0, shape=(2,)), B=Int(5), C=Float(-1.0, 1.0, shape=(2, 2, 3)), main_axes="B") output_space = Float(shape=(3,), main_axes="B") # simple output # Only define a base-core network and let the automation handle the complex input structure via # `pre-concat` nets. core_nn = tf.keras.models.Sequential() core_nn.add(tf.keras.layers.Dense(3, activation="relu")) core_nn.add(tf.keras.layers.Dense(3)) # Use no distributions. nn = Network( network=core_nn, input_space=input_space, pre_concat_networks=dict( # leave "A" out -> "A" input will go unaltered into concat step. B=lambda i: tf.one_hot(i, depth=input_space["B"].num_categories, axis=-1), C=tf.keras.layers.Flatten() ), output_space=output_space, distributions=False ) # Simple function call. input_ = input_space.sample(6) result = nn(input_) weights = nn.get_weights() expected = dense(dense(relu(dense(np.concatenate([ input_["A"], one_hot(input_["B"], depth=input_space["B"].num_categories), np.reshape(input_["C"], newshape=(6, -1)) ], axis=-1), weights[0], weights[1])), weights[2], weights[3]), weights[4], weights[5]) check(result, expected)
def test_normal(self): # Create 5 normal distributions (2 parameters (mean and stddev) each). param_space = Tuple( Float(shape=(5, )), # mean Float(0.5, 1.0, shape=(5, )), # stddev main_axes="B") values_space = Float(shape=(5, ), main_axes="B") # The Component to test. normal = Normal() # Batch of size=2 and deterministic (True). input_ = param_space.sample(2) expected = input_[0] # 0 = mean # Sample n times, expect always mean value (deterministic draw). for _ in range(50): out = normal.sample(input_, deterministic=True) check(out, expected) normal.sample_deterministic(input_) check(out, expected) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = param_space.sample(1) expected = input_[0][0] # 0 = mean outs = [] for _ in range(100): out = normal.sample(input_, deterministic=False) outs.append(out) out = normal.sample_stochastic(input_) outs.append(out) check(np.mean(outs), expected.mean(), decimals=1) means = np.array([[0.1, 0.2, 0.3, 0.4, 50.0]]) log_stds = np.array([[0.8, -0.2, 0.3, -1.0, 10.0]]) # The normal-adapter does this following line with the NN output (interpreted as log(stddev)): # Doesn't really matter here in this test case, though. stds = np.exp( np.clip(log_stds, a_min=MIN_LOG_NN_OUTPUT, a_max=MAX_LOG_NN_OUTPUT)) values = np.array([[1.0, 2.0, 0.4, 10.0, 5.4]]) # Test log-likelihood outputs. out = normal.log_prob((means, stds), values) expected_outputs = np.log(norm.pdf(values, means, stds)) check(out, expected_outputs) # Test entropy outputs. out = normal.entropy((means, stds)) # See: https://en.wikipedia.org/wiki/Normal_distribution#Maximum_entropy expected_entropy = 0.5 * (1 + np.log(2 * np.square(stds) * np.pi)) check(out, expected_entropy)
def test_plain_output_adapter(self): input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B") output_space = Float(shape=(3,), main_axes="B") adapter = PlainOutputAdapter(output_space) # Simple function call -> Expect output for all int-inputs. input_ = input_space.sample(6) result = adapter(input_) weights = adapter.get_weights() expected = dense(input_, weights[0], weights[1]) check(result, expected)
def test_bernoulli_adapter(self): input_space = Float(shape=(16,), main_axes="B") output_space = Bool(shape=(2,), main_axes="B") adapter = BernoulliDistributionAdapter(output_space=output_space, activation="relu") batch_size = 32 inputs = input_space.sample(batch_size) out = adapter(inputs) weights = adapter.get_weights() # Parameters are the plain logits (no sigmoid). expected = relu(dense(inputs, weights[0], weights[1])) check(out, expected, decimals=5)
def test_categorical_adapter(self): input_space = Float(shape=(16,), main_axes="B") output_space = Int(2, shape=(3, 2), main_axes="B") adapter = CategoricalDistributionAdapter( output_space=output_space, kernel_initializer="ones", activation="relu" ) batch_size = 2 inputs = input_space.sample(batch_size) out = adapter(inputs) weights = adapter.get_weights() expected = np.reshape(relu(dense(inputs, weights[0], weights[1])), newshape=(batch_size, 3, 2, 2)) check(out, expected, decimals=5)
def test_normal_adapter(self): input_space = Float(shape=(8,), main_axes="B") output_space = Float(shape=(3, 2), main_axes="B") adapter = NormalDistributionAdapter(output_space=output_space, activation="linear") batch_size = 3 inputs = input_space.sample(batch_size) out = adapter(inputs) weights = adapter.get_weights() expected = np.split(np.reshape(dense(inputs, weights[0], weights[1]), newshape=(batch_size, 3, 4)), 2, axis=-1) expected[1] = np.clip(expected[1], MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT) expected[1] = np.exp(expected[1]) check(out, expected, decimals=5)
def test_beta_adapter(self): input_space = Float(shape=(8,), main_axes="B") output_space = Float(shape=(3, 2), main_axes="B") adapter = BetaDistributionAdapter(output_space=output_space) batch_size = 5 inputs = input_space.sample(batch_size) out = adapter(inputs) weights = adapter.get_weights() expected = np.reshape(dense(inputs, weights[0], weights[1]), newshape=(batch_size, 3, 4)) expected = np.clip(expected, np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)) expected = np.log(np.exp(expected) + 1.0) + 1.0 expected = np.split(expected, 2, axis=-1) check(out, expected, decimals=5)
def test_plain_output_adapter_with_pre_network(self): input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B") output_space = Float(shape=(3,), main_axes="B") adapter = PlainOutputAdapter(output_space, pre_network=tf.keras.models.Sequential( tf.keras.layers.Dense(units=10, activation="relu") )) # Simple function call -> Expect output for all int-inputs. input_ = input_space.sample(6) result = adapter(input_) weights = adapter.get_weights() expected = dense(relu(dense(input_, weights[0], weights[1])), weights[2], weights[3]) check(result, expected)
def test_categorical(self): # Create 5 categorical distributions of 3 categories each. param_space = Float(shape=(5, 3), low=-1.0, high=2.0, main_axes="B") values_space = Int(3, shape=(5, ), main_axes="B") # The Component to test. categorical = Categorical() # Batch of size=3 and deterministic (True). input_ = param_space.sample(3) expected = np.argmax(input_, axis=-1) # Sample n times, expect always max value (max likelihood for deterministic draw). for _ in range(10): out = categorical.sample(input_, deterministic=True) check(out, expected) out = categorical.sample_deterministic(input_) check(out, expected) # Batch of size=3 and non-deterministic -> expect roughly the mean. input_ = param_space.sample(3) outs = [] for _ in range(100): out = categorical.sample(input_, deterministic=False) outs.append(out) out = categorical.sample_stochastic(input_) outs.append(out) check(np.mean(outs), 1.0, decimals=0) input_ = param_space.sample(1) probs = softmax(input_) values = values_space.sample(1) # Test log-likelihood outputs. out = categorical.log_prob(input_, values) check(out, np.log( np.array([[ probs[0][0][values[0][0]], probs[0][1][values[0][1]], probs[0][2][values[0][2]], probs[0][3][values[0][3]], probs[0][4][values[0][4]] ]])), decimals=4) # Test entropy outputs. out = categorical.entropy(input_) expected_entropy = -np.sum(probs * np.log(probs), axis=-1) check(out, expected_entropy)
def test_copying_a_network(self): # Using keras layer as network spec. layer = tf.keras.layers.Dense(4) nn = Network(network=layer, output_space=Dict({"a": Float(shape=(2,)), "b": Int(2)})) # Simple call -> Should return dict with "a"->float(2,) and "b"->float(2,) input_ = Float(-1.0, 1.0, shape=(5,), main_axes="B").sample(5) _ = nn(input_) weights = nn.get_weights() expected_a = dense(dense(input_, weights[0], weights[1]), weights[2], weights[3]) expected_b = dense(dense(input_, weights[0], weights[1]), weights[4], weights[5]) # Do the copy. nn_copy = nn.copy() result = nn_copy(input_) check(result, dict(a=expected_a, b=expected_b))
def test_layer_network_with_container_output_space(self): # Using keras layer as network spec. layer = tf.keras.layers.Dense(10) nn = Network( network=layer, output_space=Dict({"a": Float(shape=(2, 3)), "b": Int(3)}) ) # Simple call -> Should return dict with "a"->float(2,3) and "b"->float(3,) input_ = Float(-1.0, 1.0, shape=(5,), main_axes="B").sample(5) result = nn(input_) weights = nn.get_weights() expected_a = np.reshape(dense(dense(input_, weights[0], weights[1]), weights[2], weights[3]), newshape=(-1, 2, 3)) expected_b = dense(dense(input_, weights[0], weights[1]), weights[4], weights[5]) check(result, dict(a=expected_a, b=expected_b))
def test_bernoulli(self): # Create 5 bernoulli distributions (or a multiple thereof if we use batch-size > 1). param_space = Float(-1.0, 1.0, shape=(5, ), main_axes="B") # The Component to test. bernoulli = Bernoulli() # Batch of size=6 and deterministic (True). input_ = param_space.sample(6) expected = sigmoid(input_) > 0.5 # Sample n times, expect always max value (max likelihood for deterministic draw). for _ in range(10): out = bernoulli.sample(input_, deterministic=True) check(out, expected) out = bernoulli.sample_deterministic(input_) check(out, expected) # Batch of size=6 and non-deterministic -> expect roughly the mean. input_ = param_space.sample(6) outs = [] for _ in range(100): out = bernoulli.sample(input_, deterministic=False) outs.append(out) out = bernoulli.sample_stochastic(input_) outs.append(out) check(np.mean(outs), 0.5, decimals=1) logits = np.array([[0.1, -0.2, 0.3, -4.4, 2.0]]) probs = sigmoid(logits) # Test log-likelihood outputs. values = np.array([[True, False, False, True, True]]) out = bernoulli.log_prob(logits, values=values) expected_log_probs = np.log(np.where(values, probs, 1.0 - probs)) check(out, expected_log_probs) # Test entropy outputs. # Binary Entropy with natural log. expected_entropy = -(probs * np.log(probs)) - ( (1.0 - probs) * np.log(1.0 - probs)) out = bernoulli.entropy(logits) check(out, expected_entropy)
def test_layer_network_with_container_output_space_and_one_distribution(self): input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B") output_space = Dict({"a": Float(shape=(2, 3)), "b": Int(3)}, main_axes="B") # Using keras layer as network spec. layer = tf.keras.layers.Dense(10) nn = Network( network=layer, output_space=output_space, # Only one output component is a distribution, the other not (Int). distributions=dict(a=True) ) # Simple call -> Should return sample dict with "a"->float(2,3) and "b"->int(3,). input_ = input_space.sample(1000) result = nn(input_) check(np.mean(result["a"]), 0.0, decimals=0) check(np.mean(np.sum(softmax(result["b"]), axis=-1)), 1.0, decimals=5) # Call with value -> Should return likelihood of "a"-value and output for "b"-value. input_ = input_space.sample(3) value = output_space.sample(3) result, likelihood = nn(input_, value) self.assertTrue(result["a"] is None) # a is None b/c value was already given for likelihood calculation self.assertTrue(result["b"].shape == (3,)) # b is the (batched) output values for the given int-numbers self.assertTrue(result["b"].dtype == np.float32) self.assertTrue(likelihood.shape == (3,)) # (total) likelihood is some float self.assertTrue(likelihood.dtype == np.float32) # Extract only the "b" value-output (one output for each int category). # Also: No likelihood output b/c "a" was invalidated. del value["a"] value["b"] = None result = nn(input_, value) self.assertTrue(result["a"] is None) self.assertTrue(result["b"].shape == (3, 3)) self.assertTrue(result["b"].dtype == np.float32) value = output_space.sample(3) value["a"] = None del value["b"] result = nn(input_, value) self.assertTrue(result is None)
def test_gumbel_softmax_distribution(self): # 5-categorical Gumble-Softmax. param_space = Float(shape=(5, ), main_axes="B") values_space = Float(shape=(5, ), main_axes="B") gumble_softmax_distribution = GumbelSoftmax(temperature=1.0) # Batch of size=2 and deterministic (True). input_ = param_space.sample(2) expected = softmax(input_) # Sample n times, expect always argmax value (deterministic draw). for _ in range(50): out = gumble_softmax_distribution.sample(input_, deterministic=True) check(out, expected) out = gumble_softmax_distribution.sample_deterministic(input_) check(out, expected) # Batch of size=1 and non-deterministic -> expect roughly the vector of probs. input_ = param_space.sample(1) expected = softmax(input_) outs = [] for _ in range(100): out = gumble_softmax_distribution.sample(input_) outs.append(out) out = gumble_softmax_distribution.sample_stochastic(input_) outs.append(out) check(np.mean(outs, axis=0), expected, decimals=1) return # TODO: Figure out Gumbel Softmax log-prob calculation (our current implementation does not correspond with paper's formula). def gumbel_log_density(y, probs, num_categories, temperature=1.0): # https://arxiv.org/pdf/1611.01144.pdf. density = np.math.factorial(num_categories - 1) * np.math.pow(temperature, num_categories - 1) * \ (np.sum(probs / np.power(y, temperature), axis=-1) ** -num_categories) * \ np.prod(probs / np.power(y, temperature + 1.0), axis=-1) return np.log(density) # Test log-likelihood outputs. input_ = param_space.sample(3) values = values_space.sample(3) expected = gumbel_log_density(values, softmax(input_), num_categories=param_space.shape[0]) out = gumble_softmax_distribution.log_prob(input_, values) check(out, expected)
def test_func_api_network_with_manually_handling_container_input_space(self): # Simple vector plus image as inputs (see e.g. SAC). input_space = Dict(A=Float(-1.0, 1.0, shape=(2,)), B=Float(-1.0, 1.0, shape=(2, 2, 3)), main_axes="B") output_space = Float(shape=(3,), main_axes="B") # simple output # Using keras functional API to create network. keras_input = input_space.create_keras_input() # Simply flatten an concat everything, then output. o = tf.keras.layers.Flatten()(keras_input["B"]) o = tf.concat([keras_input["A"], o], axis=-1) network = tf.keras.Model(inputs=keras_input, outputs=o) # Use no distributions. nn = Network( network=network, output_space=output_space, distributions=False ) # Simple function call. input_ = input_space.sample(6) result = nn(input_) weights = nn.get_weights() expected = dense(np.concatenate([input_["A"], np.reshape(input_["B"], newshape=(6, -1))], axis=-1), weights[0], weights[1]) check(result, expected) # Function call with value -> Expect error as we only have float outputs (w/o distributions). input_ = input_space.sample(6) values = output_space.sample(6) error = True try: nn(input_, values) error = False except SurrealError: pass self.assertTrue(error)
def test_multivariate_normal(self): # Create batch0=n (batch-rank), batch1=2 (can be used for m mixed Gaussians), num-events=3 (trivariate) # distributions (2 parameters (mean and stddev) each). num_events = 3 # 3=trivariate Gaussian num_mixed_gaussians = 2 # 2x trivariate Gaussians (mixed) param_space = Tuple( Float(shape=(num_mixed_gaussians, num_events)), # mean Float(0.5, 1.0, shape=(num_mixed_gaussians, num_events)), # diag (variance) main_axes="B") values_space = Float(shape=(num_mixed_gaussians, num_events), main_axes="B") # The Component to test. distribution = MultivariateNormal() input_ = param_space.sample(4) expected = input_[0] # 0=mean # Sample n times, expect always mean value (deterministic draw). for _ in range(50): out = distribution.sample(input_, deterministic=True) check(out, expected) out = distribution.sample_deterministic(input_) check(out, expected) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = param_space.sample(1) expected = input_[0] # 0=mean outs = [] for _ in range(100): out = distribution.sample(input_, deterministic=False) outs.append(out) out = distribution.sample_stochastic(input_) outs.append(out) check(np.mean(outs), expected.mean(), decimals=1) means = values_space.sample(2) stds = values_space.sample(2) values = values_space.sample(2) # Test log-likelihood outputs (against scipy). out = distribution.log_prob((means, stds), values) # Sum up the individual log-probs as we have a diag (independent) covariance matrix. check(out, np.sum(np.log(norm.pdf(values, means, stds)), axis=-1), decimals=4)
def test_dueling_network(self): input_space = Float(-1.0, 1.0, shape=(2,), main_axes="B") output_space = Dict({"A": Float(shape=(4,)), "V": Float()}, main_axes="B") # V=single node # Using keras layer as main network spec. layer = tf.keras.layers.Dense(5) nn = Network( network=layer, output_space=output_space, # Only two output components are distributions (a and b), the others not (c=Float, d=Int). adapters=dict(A=dict(pre_network=tf.keras.layers.Dense(2)), V=dict(pre_network=tf.keras.layers.Dense(3))) ) # Simple call -> Should return sample dict. input_ = input_space.sample(10) result = nn(input_) weights = nn.get_weights() expected_a = dense(dense(dense(input_, weights[0], weights[1]), weights[2], weights[3]), weights[4], weights[5]) expected_v = np.reshape( dense(dense(dense(input_, weights[0], weights[1]), weights[6], weights[7]), weights[8], weights[9]), newshape=(10,) ) check(result["A"], expected_a, decimals=5) check(result["V"], expected_v, decimals=5)
def test_beta(self): # Create 5 beta distributions (2 parameters (alpha and beta) each). param_space = Tuple( Float(shape=(5, )), # alpha Float(shape=(5, )), # beta main_axes="B") values_space = Float(shape=(5, ), main_axes="B") # The Component to test. low, high = -1.0, 2.0 beta_distribution = Beta(low=low, high=high) # Batch of size=2 and deterministic (True). input_ = param_space.sample(2) # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] expected = (1.0 / (1.0 + input_[1] / input_[0])) * (high - low) + low # Sample n times, expect always mean value (deterministic draw). for _ in range(100): out = beta_distribution.sample(input_, deterministic=True) check(out, expected) out = beta_distribution.sample_deterministic(input_) check(out, expected) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = param_space.sample(1) expected = (1.0 / (1.0 + input_[1] / input_[0])) * (high - low) + low outs = [] for _ in range(100): out = beta_distribution.sample(input_, deterministic=False) outs.append(out) out = beta_distribution.sample_stochastic(input_) outs.append(out) check(np.mean(outs), expected.mean(), decimals=1) alpha_ = values_space.sample(1) beta_ = values_space.sample(1) values = values_space.sample(1) values_scaled = values * (high - low) + low # Test log-likelihood outputs (against scipy). out = beta_distribution.log_prob((alpha_, beta_), values_scaled) check(out, np.log(beta.pdf(values, alpha_, beta_)), decimals=4) # TODO: Test entropy outputs (against scipy). out = beta_distribution.entropy((alpha_, beta_))
def test_layer_network_with_container_output_space_and_distributions(self): input_space = Float(-1.0, 1.0, shape=(10,), main_axes="B") output_space = Dict({"a": Float(shape=(2, 3)), "b": Int(3)}, main_axes="B") # Using keras layer as network spec. layer = tf.keras.layers.Dense(10) nn = Network( network=layer, output_space=output_space, distributions=True ) # Simple call -> Should return sample dict with "a"->float(2,3) and "b"->int(3). input_ = input_space.sample(1000) result = nn(input_) check(np.mean(result["a"]), 0.0, decimals=0) check(np.mean(result["b"]), 1, decimals=0) # Call with value -> Should return likelihood of value. input_ = input_space.sample(3) value = output_space.sample(3) likelihood = nn(input_, value) self.assertTrue(likelihood.shape == (3,)) self.assertTrue(likelihood.dtype == np.float32)
def test_func_api_network_with_primitive_int_output_space_and_distribution(self): input_space = Float(-1.0, 1.0, shape=(3,), main_axes="B") output_space = Int(5, main_axes="B") # Using keras functional API to create network. i = tf.keras.layers.Input(shape=(3,)) d = tf.keras.layers.Dense(10)(i) e = tf.keras.layers.Dense(5)(i) o = tf.concat([d, e], axis=-1) network = tf.keras.Model(inputs=i, outputs=o) # Use default distributions (i.e. categorical for Int). nn = Network( network=network, output_space=output_space, distributions="default" ) input_ = input_space.sample(1000) result = nn(input_) # Check the sample for a proper mean value. check(np.mean(result), 2, decimals=0) # Function call with value -> Expect probabilities for given int-values. input_ = input_space.sample(6) values = output_space.sample(6) result = nn(input_, values) weights = nn.get_weights() expected = dense(np.concatenate( [dense(input_, weights[0], weights[1]), dense(input_, weights[2], weights[3])], axis=-1 ), weights[4], weights[5]) expected = softmax(expected) expected = np.sum(expected * one_hot(values, depth=output_space.num_categories), axis=-1) check(result, expected) # Function call with "likelihood" option set -> Expect sample plus probabilities for sampled int-values. input_ = input_space.sample(1000) sample, probs = nn(input_, likelihood=True) check(np.mean(sample), 2, decimals=0) check(np.mean(probs), 1.0 / output_space.num_categories, decimals=1)
def test_copying_an_adapter(self): input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B") output_space = Float(shape=(3,), main_axes="B") adapter = PlainOutputAdapter(output_space, pre_network=None) # Simple function call -> Expect output for all int-inputs. input_ = input_space.sample(3) result = adapter(input_) weights = adapter.get_weights() expected = dense(input_, weights[0], weights[1]) check(result, expected) new_adapter = adapter.copy() new_weights = new_adapter.get_weights() # Check all weights. check(weights, new_weights) # Do a pass and double-check. result = new_adapter(input_) check(result, expected)
def test_mixture(self): # Create a mixture distribution consisting of 3 bivariate normals weighted by an internal # categorical distribution. num_distributions = 3 num_events_per_multivariate = 2 # 2=bivariate param_space = Dict( { "categorical": Float(shape=(num_distributions, ), low=-1.5, high=2.3), "parameters0": Tuple( Float(shape=(num_events_per_multivariate, )), # mean Float(shape=(num_events_per_multivariate, ), low=0.5, high=1.0), # diag ), "parameters1": Tuple( Float(shape=(num_events_per_multivariate, )), # mean Float(shape=(num_events_per_multivariate, ), low=0.5, high=1.0), # diag ), "parameters2": Tuple( Float(shape=(num_events_per_multivariate, )), # mean Float(shape=(num_events_per_multivariate, ), low=0.5, high=1.0), # diag ), }, main_axes="B") values_space = Float(shape=(num_events_per_multivariate, ), main_axes="B") # The Component to test. mixture = MixtureDistribution( # Try different spec types. MultivariateNormal(), "multi-variate-normal", "multivariate_normal") # Batch of size=n and deterministic (True). input_ = param_space.sample(1) # Make probs for categorical. categorical_probs = softmax(input_["categorical"]) # Note: Usually, the deterministic draw should return the max-likelihood value # Max-likelihood for a 3-Mixed Bivariate: mean-of-argmax(categorical)() # argmax = np.argmax(input_[0]["categorical"], axis=-1) #expected = np.array([input_[0]["parameters{}".format(idx)][0][i] for i, idx in enumerate(argmax)]) # input_[0]["categorical"][:, 1:2] * input_[0]["parameters1"][0] + \ # input_[0]["categorical"][:, 2:3] * input_[0]["parameters2"][0] # The mean value is a 2D vector (bivariate distribution). expected = categorical_probs[:, 0:1] * input_["parameters0"][0] + \ categorical_probs[:, 1:2] * input_["parameters1"][0] + \ categorical_probs[:, 2:3] * input_["parameters2"][0] for _ in range(20): out = mixture.sample(input_, deterministic=True) check(out, expected) out = mixture.sample_deterministic(input_) check(out, expected) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = param_space.sample(1) # Make probs for categorical. categorical_probs = softmax(input_["categorical"]) expected = categorical_probs[:, 0:1] * input_["parameters0"][0] + \ categorical_probs[:, 1:2] * input_["parameters1"][0] + \ categorical_probs[:, 2:3] * input_["parameters2"][0] outs = [] for _ in range(500): out = mixture.sample(input_, deterministic=False) outs.append(out) out = mixture.sample_stochastic(input_) outs.append(out) check(np.mean(np.array(outs), axis=0), expected, decimals=1) return # TODO: prob/log-prob tests for Mixture. # Test log-likelihood outputs (against scipy). for i in range(20): params = param_space.sample(1) # Make sure categorical params are softmaxed. category_probs = softmax(params["categorical"][0]) values = values_space.sample(1) expected = 0.0 v = [] for j in range(3): v.append( multivariate_normal.pdf( values[0], mean=params["parameters{}".format(j)][0][0], cov=params["parameters{}".format(j)][1][0])) expected += category_probs[j] * v[-1] out = mixture.prob(params, values) check(out[0], expected, atol=0.1) expected = np.zeros(shape=(3, )) for j in range(3): expected[j] = np.log(category_probs[j]) + np.log( multivariate_normal.pdf( values[0], mean=params["parameters{}".format(j)][0][0], cov=params["parameters{}".format(j)][1][0])) expected = np.log(np.sum(np.exp(expected))) out = mixture.log_prob(params, values) print("{}: out={} expected={}".format(i, out, expected)) check(out, np.array([expected]), atol=0.25)
def test_layer_network_with_container_output_space_and_mix_of_distributions_and_no_distributions(self): input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B") output_space = Dict({ "a": Float(shape=(2, 3)), "b": Int(3), "c": Float(-0.1, 1.0, shape=(2,)), "d": Int(3, shape=(2,)) }, main_axes="B") # Using keras layer as network spec. layer = tf.keras.layers.Dense(10) nn = Network( network=layer, output_space=output_space, # Only two output components are distributions (a and b), the others not (c=Float, d=Int). distributions=dict(a="default", b=True) ) # Simple call -> Should return sample dict. input_ = input_space.sample(10000) result = nn(input_) check(np.mean(result["a"]), 0.0, decimals=0) check(np.mean(result["b"]), 1.0, decimals=0) check(np.mean(result["d"]), 0.0, decimals=0) self.assertTrue(result["d"].shape == (10000, 2, 3)) self.assertTrue(result["d"].dtype == np.float32) # Change limits of input a little to get more chances of reaching extreme float outputs (for "c"). input_space = Float(-10.0, 10.0, shape=(5,), main_axes="B") input_ = input_space.sample(10000) result = nn(input_) self.assertFalse(np.any(result["c"].numpy() > 1.0)) self.assertTrue(np.any(result["c"].numpy() > 0.9)) self.assertFalse(np.any(result["c"].numpy() < -0.1)) self.assertTrue(np.any(result["c"].numpy() < 0.0)) # Call with (complete) value -> Should return likelihood of "a"+"b"-values and outputs for "c"/"d"-values. input_ = input_space.sample(100) value = output_space.sample(100) # Delete float value ("c") from value otherwise this would create an error as we can't get a likelihood value # for a non-distribution float output. del value["c"] result, likelihood = nn(input_, value) # a is None b/c value was already given for likelihood calculation self.assertTrue(result["a"] is None) # b is None b/c value was already given for likelihood calculation self.assertTrue(result["b"] is None) # c is None b/c value was not given for "c" as it would result in ERROR (float component w/o distribution). self.assertTrue(result["c"] is None) # d are the (batched) output values for the given int-numbers self.assertTrue(result["d"].shape == (100, 2)) self.assertTrue(result["d"].dtype == np.float32) self.assertTrue(likelihood.shape == (100,)) # (total) likelihood is some float self.assertTrue(likelihood.dtype == np.float32) # Calculate likelihood only for "a" component. del value["b"] # Extract only the "c" sample-output. value["c"] = None # We don't want outputs for "d". del value["d"] result, likelihood = nn(input_, value) # a and b are None as we are using these for likelihood calculations only. self.assertTrue(result["a"] is None) self.assertTrue(result["b"] is None) # c is a float sample. self.assertTrue(result["c"].shape == (100, 2)) self.assertTrue(result["c"].dtype == np.float32) self.assertFalse(np.any(result["c"].numpy() > 1.0)) self.assertFalse(np.any(result["c"].numpy() < -0.1)) # d is None (no output desired). self.assertTrue(result["d"] is None) value = output_space.sample(100) # Calculate likelihood only for "b" component. value["a"] = None # Do nothing for "c". # Leave "d" and expect output-values for each given int. del value["c"] result, likelihood = nn(input_, value) self.assertTrue(result["a"] is None) self.assertTrue(result["b"] is None) self.assertTrue(result["c"] is None) # d are the (batched) output values for the given int-numbers self.assertTrue(result["d"].shape == (100, 2)) self.assertTrue(result["d"].dtype == np.float32) self.assertTrue(likelihood.shape == (100,)) # (total) likelihood is some float self.assertTrue(likelihood.dtype == np.float32) # Add "c" again, but as None (will not cause ERROR then and return the output). value["c"] = None result, likelihood = nn(input_, value) self.assertTrue(result["c"].shape == (100, 2)) self.assertTrue(result["c"].dtype == np.float32) self.assertFalse(np.any(result["c"].numpy() > 1.0)) self.assertFalse(np.any(result["c"].numpy() < -0.1))
def test_joint_cumulative_distribution(self): param_space = Dict( { "a": Float(shape=(4, )), # 4-discrete "b": Dict({ "ba": Tuple([Float(shape=(3, )), Float(0.1, 1.0, shape=(3, ))]), # 3-variate normal "bb": Tuple([Float(shape=(2, )), Float(shape=(2, ))]), # beta -1 to 1 "bc": Tuple([Float(shape=(4, )), Float(0.1, 1.0, shape=(4, ))]), # normal (dim=4) }) }, main_axes="B") values_space = Dict( { "a": Int(4), "b": Dict({ "ba": Float(shape=(3, )), "bb": Float(shape=(2, )), "bc": Float(shape=(4, )) }) }, main_axes="B") low, high = -1.0, 1.0 cumulative_distribution = JointCumulativeDistribution( distributions={ "a": Categorical(), "b": { "ba": MultivariateNormal(), "bb": Beta(low=low, high=high), "bc": Normal() } }) # Batch of size=2 and deterministic (True). input_ = param_space.sample(2) input_["a"] = softmax(input_["a"]) expected_mean = { "a": np.argmax(input_["a"], axis=-1), "b": { "ba": input_["b"]["ba"][0], # [0]=Mean # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] * range + low "bb": (1.0 / (1.0 + input_["b"]["bb"][1] / input_["b"]["bb"][0])) * (high - low) + low, "bc": input_["b"]["bc"][0], } } # Sample n times, expect always mean value (deterministic draw). for _ in range(20): out = cumulative_distribution.sample(input_, deterministic=True) check(out, expected_mean) out = cumulative_distribution.sample_deterministic(input_) check(out, expected_mean) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = param_space.sample(1) input_["a"] = softmax(input_["a"]) expected_mean = { "a": np.sum(input_["a"] * np.array([0, 1, 2, 3])), "b": { "ba": input_["b"]["ba"][0], # [0]=Mean # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] * range + low "bb": (1.0 / (1.0 + input_["b"]["bb"][1] / input_["b"]["bb"][0])) * (high - low) + low, "bc": input_["b"]["bc"][0], } } outs = [] for _ in range(500): out = cumulative_distribution.sample(input_) outs.append(out) out = cumulative_distribution.sample_stochastic(input_) outs.append(out) check(np.mean(np.stack([o["a"][0] for o in outs], axis=0), axis=0), expected_mean["a"], atol=0.3) check(np.mean(np.stack([o["b"]["ba"][0] for o in outs], axis=0), axis=0), expected_mean["b"]["ba"][0], decimals=1) check(np.mean(np.stack([o["b"]["bb"][0] for o in outs], axis=0), axis=0), expected_mean["b"]["bb"][0], decimals=1) check(np.mean(np.stack([o["b"]["bc"][0] for o in outs], axis=0), axis=0), expected_mean["b"]["bc"][0], decimals=1) # Test log-likelihood outputs. params = param_space.sample(1) params["a"] = softmax(params["a"]) # Make sure beta-values are within 0.0 and 1.0 for the numpy calculation (which doesn't have scaling). values = values_space.sample(1) log_prob_beta = np.log( beta.pdf(values["b"]["bb"], params["b"]["bb"][0], params["b"]["bb"][1])) # Now do the scaling for b/bb (beta values). values["b"]["bb"] = values["b"]["bb"] * (high - low) + low expected_log_llh = np.log(params["a"][0][values["a"][0]]) + \ np.sum(np.log(norm.pdf(values["b"]["ba"][0], params["b"]["ba"][0], params["b"]["ba"][1]))) + \ np.sum(log_prob_beta) + \ np.sum(np.log(norm.pdf(values["b"]["bc"][0], params["b"]["bc"][0], params["b"]["bc"][1]))) out = cumulative_distribution.log_prob(params, values) check(out, expected_log_llh, decimals=0)
def test_squashed_normal(self): param_space = Tuple(Float(-1.0, 1.0, shape=(5, )), Float(0.5, 1.0, shape=(5, )), main_axes="B") low, high = -2.0, 1.0 squashed_distribution = SquashedNormal(low=low, high=high) # Batch of size=2 and deterministic (True). input_ = param_space.sample(2) expected = ((np.tanh(input_[0]) + 1.0) / 2.0) * (high - low) + low # [0] = mean # Sample n times, expect always mean value (deterministic draw). for _ in range(50): out = squashed_distribution.sample(input_, deterministic=True) check(out, expected) out = squashed_distribution.sample_deterministic(input_) check(out, expected) # Batch of size=1 and non-deterministic -> expect roughly the mean. input_ = param_space.sample(1) expected = ((np.tanh(input_[0]) + 1.0) / 2.0) * (high - low) + low # [0] = mean outs = [] for _ in range(500): out = squashed_distribution.sample(input_, deterministic=False) outs.append(out) self.assertTrue(np.max(out) <= high) self.assertTrue(np.min(out) >= low) out = squashed_distribution.sample_stochastic(input_) outs.append(out) self.assertTrue(np.max(out) <= high) self.assertTrue(np.min(out) >= low) check(np.mean(outs), expected.mean(), decimals=1) means = np.array([[0.1, 0.2, 0.3, 0.4, 50.0], [-0.1, -0.2, -0.3, -0.4, -1.0]]) log_stds = np.array([[0.8, -0.2, 0.3, -1.0, 10.0], [0.7, -0.3, 0.4, -0.9, 8.0]]) # The normal-adapter does this following line with the NN output (interpreted as log(stddev)): # Doesn't really matter here in this test case, though. stds = np.exp( np.clip(log_stds, a_min=MIN_LOG_NN_OUTPUT, a_max=MAX_LOG_NN_OUTPUT)) # Make sure values are within low and high. values = np.array([[0.9, 0.2, 0.4, -0.1, -1.05], [-0.9, -0.2, 0.4, -0.1, -1.05]]) # Test log-likelihood outputs. # TODO: understand and comment the following formula to get the log-prob. # Unsquash values, then get log-llh from regular gaussian. unsquashed_values = np.arctanh((values - low) / (high - low) * 2.0 - 1.0) log_prob_unsquashed = np.log(norm.pdf(unsquashed_values, means, stds)) log_prob = log_prob_unsquashed - np.sum( np.log(1 - np.tanh(unsquashed_values)**2), axis=-1, keepdims=True) out = squashed_distribution.log_prob((means, stds), values) check(out, log_prob) # Test entropy outputs. # TODO return