def test_empirical_ensemble_multi_out_dict(self): inp = tf.keras.Input(shape=(10,)) x = tf.keras.layers.Dense(10, activation="relu")(inp) out1 = tf.keras.layers.Dense(10, name="a")(x) out2 = tf.keras.layers.Dense(10, name="b")(x) out3 = tf.keras.layers.Dense(10, name="nolabel")(x) # Use the tf.keras functional api with named outputs. model = tf.keras.models.Model(inp, [out1, out2, out3]) orig_weights = model.get_weights() # Keras does not provide a good reinit function, just draw random weights: weights1 = [np.random.random(w.shape) for w in orig_weights] weights2 = [np.random.random(w.shape) for w in orig_weights] input_shape = (None, 10) ens = ensemble.EmpiricalEnsemble(model, input_shape, [weights1, weights2]) self.assertLen(ens, 2, msg="Empirical ensemble len wrong.") y_true = np.random.choice(10, 20) x = np.random.normal(0, 1, (20, 10)) dataset = tf.data.Dataset.from_tensor_slices((x, { "a": y_true, "b": y_true })).batch(4) stat_results = ens.evaluate_ensemble(dataset, ({ "a": [stats.Accuracy()], "b": [stats.ClassificationLogProb(), stats.Accuracy()], "nolabel": [stats.ClassificationLogProb()], })) self.assertLen( stat_results, 3, msg="Number of returned statistic_list should be 2") self.assertLen( stat_results["a"], 1, msg="Number of returned statistics should be 1") self.assertEqual( stat_results["b"][0].shape, (len(x), 10), "Statistic result should have valid shape." ) self.assertEqual( stat_results["nolabel"][0].shape, (len(x), 10), "Statistic result should have valid shape." ) outputs = ens.predict_ensemble(dataset) self.assertLen(outputs, 3) for output in outputs: self.assertEqual( output.shape, (len(ens), len(x), 10), "Predicted output should have valid shape." )
def test_empirical_ensemble(self): model = tf.keras.models.Sequential([ tf.keras.layers.Dense(10, activation="relu"), tf.keras.layers.Dense(10) ]) input_shape = (None, 10) model.build(input_shape=input_shape) orig_weights = model.get_weights() # Keras does not provide a good reinit function, just draw random weights: weights1 = [np.random.random(w.shape) for w in orig_weights] weights2 = [np.random.random(w.shape) for w in orig_weights] ens = ensemble.EmpiricalEnsemble(model, input_shape, [weights1, weights2]) self.assertLen(ens, 2, msg="Empirical ensemble len wrong.") y_true = np.random.choice(10, 20) x = np.random.normal(0, 1, (20, 10)) dataset = tf.data.Dataset.from_tensor_slices((x, y_true)).batch(4) stat_results = ens.evaluate_ensemble(dataset, [stats.ClassificationLogProb()]) self.assertLen( stat_results, 1, msg="Number of evaluation outputs differ from statistics count.") self.assertEqual( stat_results[0].shape, (len(x), 10), "Statistic result should have valid shape." ) output = ens.predict_ensemble(dataset) self.assertEqual( output.shape, (len(ens), len(x), 10), "Output should have valid shape." )
def test_classification_prob(self): cprob = stats.ClassificationLogProb() logits1 = tf.math.log([[0.3, 0.7], [0.6, 0.4]]) logits2 = tf.math.log([[0.2, 0.8], [0.5, 0.5]]) logits3 = tf.math.log([[0.4, 0.6], [0.4, 0.6]]) cprob.reset() cprob.update(logits1) cprob.update(logits2) cprob.update(logits3) log_prob = cprob.result() self.assertAlmostEqual(math.log(0.3), float(log_prob[0, 0]), delta=TOL) self.assertAlmostEqual(math.log(0.7), float(log_prob[0, 1]), delta=TOL) self.assertAlmostEqual(math.log(0.5), float(log_prob[1, 0]), delta=TOL) self.assertAlmostEqual(math.log(0.5), float(log_prob[1, 1]), delta=TOL)
def test_fresh_reservoir_ensemble(self): model = tf.keras.models.Sequential([ tf.keras.layers.Dense(10, activation="relu"), tf.keras.layers.Dense(10) ]) input_shape = (None, 10) model.build(input_shape=input_shape) orig_weights = model.get_weights() # Keras does not provide a good reinit function, just draw random weights: weights1 = [np.random.random(w.shape) for w in orig_weights] weights2 = [np.random.random(w.shape) for w in orig_weights] ens = ensemble.EmpiricalEnsemble(model, input_shape, [weights1, weights2]) self.assertLen(ens, 2, msg="Empirical ensemble len wrong.") y_true = np.random.choice(10, 20) x = np.random.normal(0, 1, (20, 10)) ens = ensemble.FreshReservoirEnsemble(model, input_shape, capacity=2, freshness=50) ens.append(weights1) ens.append(weights2) self.assertLen(ens, 1, msg="Fresh reservoir ensemble len wrong.") statistics = [stats.ClassificationLogProb()] ens_pred = ens.evaluate_ensemble(x, statistics) self.assertLen( statistics, len(ens_pred), msg="Number of prediction outputs differ from statistics count.") self.assertLen( x, int(ens_pred[0].shape[0]), msg="Ensemble prediction statistics output has wrong shape.") statistics = [stats.Accuracy(), stats.ClassificationCrossEntropy()] ens_eval = ens.evaluate_ensemble((x, y_true), statistics) self.assertLen( statistics, len(ens_eval), msg="Number of evaluation outputs differ from statistics count.")