def test_configure_callbacks(): model = training.EnsembleModel(model_builder) optimizer = tf.keras.optimizers.SGD() loss = 'sparse_categorical_crossentropy' n_replicas = 6 model.compile(optimizer, loss, n_replicas) hparams_dict = { 'learning_rate': np.linspace(0.001, 0.01, n_replicas), 'dropout_rate': np.linspace(0., 0.6, n_replicas) } kwargs = { 'do_validation': True, 'batch_size': 2, 'epochs': 2, 'steps_per_epoch': None, 'samples': None, 'verbose': 1, } callbacklist = cbks.configure_callbacks([], model, **kwargs) kwargs.update({ 'metrics': model.metrics_names + ['val_' + m for m in model.metrics_names] }) kwargs['steps'] = (kwargs['steps_per_epoch'], kwargs.pop('steps_per_epoch'))[0] # test that params are stored as intended assert kwargs == callbacklist.params
def test_base_hp_exchange_callback(): tf.compat.v1.keras.backend.clear_session() em = training.EnsembleModel(model_builder) optimizer = tf.keras.optimizers.SGD() loss = 'binary_crossentropy' n_replicas = 6 em.compile(optimizer, loss, n_replicas) hparams_dict = { 'learning_rate': np.linspace(0.001, 0.01, n_replicas), 'dropout_rate': np.linspace(0., 0.6, n_replicas) } hpss = training_utils.HyperParamSpace(em, hparams_dict) x = np.random.normal(0, 2, (18, 2)) y = np.random.randint(0, 2, (18, 1)) clb = cbks.BaseExchangeCallback((x, y), swap_step=10, burn_in=1) clb.model = em # test get_ordered_losses() and _metrics_sorting_key() input_ = ['loss_' + str(i) for i in range(n_replicas)] input_ = input_ + ['loss_1_' + str(i) for i in range(n_replicas)] logs = {l: np.random.uniform() for l in input_} expected = copy.deepcopy(input_) random.shuffle(input_) actual = clb.get_ordered_losses(logs) actual = [x[0] for x in actual] assert actual == expected # test should_exchange property em.global_step = 10 assert clb.should_exchange() em.global_step = 9 assert not clb.should_exchange()
def test_pt_ensemble(): ensemble = training.EnsembleModel(model_builder) # all args are `None` optimizers = [None] losses = [None] hyper_params = [None] errors = [ValueError] zipped = zip(optimizers, losses, hyper_params, errors) for optimizer, loss, hp, error in zipped: with pytest.raises(error): ensemble.compile(optimizer, loss, n_replicas=2) optimizer = tf.keras.optimizers.SGD() loss = 'categorical_crossentropy' metrics = ['accuracy', tf.keras.metrics.Precision()] ensemble.compile(optimizer, loss, 2, metrics=metrics) x = np.random.normal(0, 1, (10, 2)) y = np.random.randint(0, 2, (10, 1)) hp = {'learning_rate': [0.0, 0.03], 'dropout_rate': [0.0, 0.1]} return ensemble.fit(x, y, hyper_params=hp, epochs=3, batch_size=2)
def test_metropolis_callback(): tf.compat.v1.keras.backend.clear_session() em = training.EnsembleModel(model_builder) optimizer = tf.keras.optimizers.SGD() loss = 'binary_crossentropy' n_replicas = 10 em.compile(optimizer, loss, n_replicas) em.global_step = 0 hparams_dict = { 'learning_rate': np.linspace(0.001, 0.01, n_replicas), 'dropout_rate': np.linspace(0.05, 0.6, n_replicas) } hpspace = training_utils.ScheduledHyperParamSpace(em, hparams_dict) x = np.random.normal(0, 0.2, (18, 2)) y = np.random.randint(0, 2, (18, 1)) clb = cbks.MetropolisExchangeCallback((x, y), swap_step=10) clb.model = em em._hp_state_space = hpspace losses = list(([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.8])) hpname = 'dropout_rate' # expected state of hyperparams after calling `exchage()` function expected = copy.deepcopy(hpspace.get_current_hyperparams_space().hpspace) t = expected[8]['dropout_rate'] expected[8]['dropout_rate'] = expected[9]['dropout_rate'] expected[9]['dropout_rate'] = t # this pair must exchange with probability of one because # (beta_i - beta_j) < 0 and losses[i] - losses[j] = 0.8 - 0.9 < 0 # and exp((beta_i - beta_j) * (losses[i] - losses[j])) > 1 exchange_pair = 9 clb.exchange(hpname=hpname, exchange_pair=exchange_pair) assert hpspace.get_current_hyperparams_space().hpspace == expected
def test_metrics_and_losses(): """Tests metrics and losses for `model.fit()` and `model.evaluate()`.""" # Explanation of how do I test metrics and losses: # 1. I generate two replicas and train them within the my model_iteration. # 2. I train two same models initialized with exact same weights but using # Keras' API. # 3. I compare final weights and history values of metrics and losses. # test a number of times. for _ in range(1): tf.compat.v1.keras.backend.clear_session() x_data, y_data = make_blobs(n_samples=32, centers=[[1, 1], [-1, -1]]) batch_size = 8 epochs = 5 verbose = 1 init1 = np.random.normal(0, 0.5, (2, 2)).astype('float32') init2 = np.random.normal(0, 0.2, (2, 1)).astype('float32') def init_fn1(*args, **kwargs): return init1 def init_fn2(*args, **kwargs): return init2 def model_builder2(*args): inputs = tf.keras.layers.Input((2, )) res = tf.keras.layers.Dense(2, activation=tf.nn.relu, kernel_initializer=init_fn1)(inputs) res = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, kernel_initializer=init_fn2)(res) model = tf.keras.models.Model(inputs, res) return model metrics = [ 'accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC(curve='PR'), tf.keras.metrics.AUC(curve='ROC'), ] model = model_builder2() model.compile(optimizer=tf.keras.optimizers.SGD(0.003), loss='binary_crossentropy', metrics=metrics) hist = model.fit(x_data, y_data, batch_size=batch_size, epochs=epochs, shuffle=False, verbose=verbose) expected_loss = hist.history['loss'] expected_acc = hist.history['acc'] expected_precision = hist.history['precision'] expected_recall = hist.history['recall'] expected_auc = hist.history['auc'] expected_auc_1 = hist.history['auc_1'] expected_evaluated = model.evaluate(x_data, y_data, verbose=0) expected_predicted = model.predict(x_data) # ensemble model tf.compat.v1.keras.backend.clear_session() metrics = [ 'accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC(curve='PR'), tf.keras.metrics.AUC(curve='ROC'), ] em = training.EnsembleModel(model_builder2) em.compile(optimizer=tf.keras.optimizers.SGD(0.0), loss='binary_crossentropy', n_replicas=2, metrics=metrics) hp = { 'learning_rate': [0.003, 0.], } hist2 = em.fit(x_data, y_data, hyper_params=hp, epochs=epochs, batch_size=batch_size, shuffle=False, verbose=verbose) actual_evaluated = em.evaluate(x_data, y_data, verbose=0) actual_predicted = em.predict(x_data) # compare evaluation metrics size = len(expected_evaluated) np.testing.assert_almost_equal(expected_evaluated, actual_evaluated[::2][:size]) # compare predicted outputs np.testing.assert_almost_equal(expected_predicted, actual_predicted[::2][0]) # compare training history loss_0 = hist2.history['loss_0'] loss_1 = hist2.history['loss_1'] acc_0 = hist2.history['acc_0'] acc_1 = hist2.history['acc_1'] precision_0 = hist2.history['precision_0'] precision_1 = hist2.history['precision_1'] recall_0 = hist2.history['recall_0'] recall_1 = hist2.history['recall_1'] auc_0 = hist2.history['auc_0'] auc_1 = hist2.history['auc_1'] auc_1_0 = hist2.history['auc_1_0'] auc_1_1 = hist2.history['auc_1_1'] np.testing.assert_almost_equal(loss_0, expected_loss) np.testing.assert_almost_equal(acc_0, expected_acc) np.testing.assert_almost_equal(precision_0, expected_precision) np.testing.assert_almost_equal(recall_0, expected_recall) np.testing.assert_almost_equal(auc_0, expected_auc) np.testing.assert_almost_equal(auc_1_0, expected_auc_1) # learning rate is 0 - no change is expected assert len(set(loss_1)) == 1 assert len(set(acc_1)) == 1 assert len(set(precision_1)) == 1 assert len(set(recall_1)) == 1 assert len(set(auc_1)) == 1 assert len(set(auc_1_1)) == 1 # test that the extraction of replica (keras model) that corresponds to the # minimal loss is correct optimal_model = em.optimal_model() sess = tf.compat.v1.keras.backend.get_session() graph = sess.graph optimal_model.compile(optimizer=tf.keras.optimizers.SGD(), loss='binary_crossentropy') optimal_loss = optimal_model.evaluate(x_data, y_data) min_loss = optimal_loss evaluated_losses = em.evaluate(x_data, y_data)[:em.n_replicas] np.testing.assert_almost_equal(min_loss, min(evaluated_losses)) tf.compat.v1.keras.backend.clear_session()
def test_hp_space_state(): tf.compat.v1.keras.backend.clear_session() em = training.EnsembleModel(model_builder) optimizer = tf.keras.optimizers.SGD() loss = 'sparse_categorical_crossentropy' n_replicas = 6 em.compile(optimizer, loss, n_replicas) hparams_dict = { 'learning_rate': np.linspace(0.001, 0.01, n_replicas), 'dropout_rate': np.linspace(0., 0.6, n_replicas) } hpss = training_utils.HyperParamSpace(em, hparams_dict) # test that initial hyper-parameter values are correct initial_values = { 0: { 'learning_rate': 0.001, 'dropout_rate': 0.0 }, 1: { 'learning_rate': 0.0028000000000000004, 'dropout_rate': 0.12 }, 2: { 'learning_rate': 0.0046, 'dropout_rate': 0.24 }, 3: { 'learning_rate': 0.0064, 'dropout_rate': 0.36 }, 4: { 'learning_rate': 0.0082, 'dropout_rate': 0.48 }, 5: { 'learning_rate': 0.01, 'dropout_rate': 0.6 } } assert initial_values == hpss.hpspace # swap replica learning rate, replicas 0, 1 replica_i = 0 replica_j = 1 hpss.swap_between(replica_i, replica_j, 'learning_rate') expected_values = { 0: { 'learning_rate': 0.0028000000000000004, 'dropout_rate': 0.0 }, 1: { 'learning_rate': 0.001, 'dropout_rate': 0.12 }, 2: { 'learning_rate': 0.0046, 'dropout_rate': 0.24 }, 3: { 'learning_rate': 0.0064, 'dropout_rate': 0.36 }, 4: { 'learning_rate': 0.0082, 'dropout_rate': 0.48 }, 5: { 'learning_rate': 0.01, 'dropout_rate': 0.6 } } assert hpss.hpspace == expected_values # test the the ordered values are represent adjacent temperatures. expected_values = [(1, 0.001), (0, 0.0028000000000000004), (2, 0.0046), (3, 0.0064), (4, 0.0082), (5, 0.01)] assert hpss.get_ordered_hparams('learning_rate') == expected_values # test that placeholders are correctly fed feed_dict = hpss.prepare_feed_tensors_and_values() lr_feed_dict = { k: v for k, v in feed_dict.items() if 'learning_rate' in k.name } lr_items = list(lr_feed_dict.items()) lr_items.sort(key=lambda x: x[0].name) actual = [v[1] for v in lr_items] expected_values.sort(key=lambda x: x[0]) expected = [v[1] for v in expected_values] np.testing.assert_almost_equal(actual, expected)
def test_model_iteration_without_exchanges(): # test that history stores accurate losses tf.compat.v1.keras.backend.clear_session() model = training.EnsembleModel(model_builder) n_replicas = 3 optimizer = tf.keras.optimizers.SGD() loss = 'sparse_categorical_crossentropy' model.compile(optimizer, loss, n_replicas) x = np.random.normal(0, 1, (6, 2)) y_train = np.arange(6).astype('float') y_test = np.arange(6, 12).astype('float') hp = {'learning_rate': [0.01, 0.02, 0.3], 'dropout_rate': [0.0, 0.1, 0.3]} batch_size = 3 epochs = 5 validation_data = (x, y_test) def train_on_batch(x, y): return [y[0], y[1], y[2]] def test_on_batch(x, y): return [y[0], y[1], y[2]] model.train_on_batch = train_on_batch model.test_on_batch = test_on_batch history = model.fit(x, y_train, hyper_params=hp, epochs=epochs, batch_size=batch_size, validation_data=validation_data, shuffle=False, verbose=0) expected_hist = { 'loss_0': [1.5] * epochs, 'loss_1': [2.5] * epochs, 'loss_2': [3.5] * epochs, 'val_loss_0': [7.5] * epochs, 'val_loss_1': [8.5] * epochs, 'val_loss_2': [9.5] * epochs } assert expected_hist == history.history # test the case when the last batch size is smaller than others model = training.EnsembleModel(model_builder) n_replicas = 3 optimizer = tf.keras.optimizers.SGD() loss = 'sparse_categorical_crossentropy' model.compile(optimizer, loss, n_replicas) x = np.random.normal(0, 1, (5, 2)) y_train = np.arange(5).astype('float') y_test = np.arange(5, 10).astype('float') hp = { 'learning_rate': [0.01, 0.02, 0.3], 'dropout_rate': [0.0001, 0.1, 0.3] } batch_size = 3 epochs = 5 validation_data = (x, y_test) def train_on_batch(x, y): if y.shape[0] < 3: res = [y[0], y[1], (y[0] + y[1]) / 2] else: res = [y[0], y[1], y[2]] return res def test_on_batch(x, y): if y.shape[0] < 3: res = [y[0], y[1], (y[0] + y[1]) / 2] else: res = [y[0], y[1], y[2]] return res model.train_on_batch = train_on_batch model.test_on_batch = test_on_batch history = model.fit(x, y_train, hyper_params=hp, epochs=epochs, batch_size=batch_size, validation_data=validation_data, shuffle=False) expected_hist = { 'loss_0': [0 * (3 / 5) + 3 * (2 / 5)] * epochs, 'loss_1': [1 * (3 / 5) + 4 * (2 / 5)] * epochs, 'loss_2': [2 * (3 / 5) + 3.5 * (2 / 5)] * epochs, 'val_loss_0': [5 * (3 / 5) + 8 * (2 / 5)] * epochs, 'val_loss_1': [6 * (3 / 5) + 9 * (2 / 5)] * epochs, 'val_loss_2': [7 * (3 / 5) + 8.5 * (2 / 5)] * epochs } for k in expected_hist: np.testing.assert_almost_equal(np.squeeze(history.history[k]), expected_hist[k])
def test_all_exchange_callback(): # Add testing here when there are multiple exchange callbacks # setting up tf.compat.v1.keras.backend.clear_session() model = training.EnsembleModel(model_builder) n_replicas = 4 optimizer = tf.keras.optimizers.SGD() loss = 'sparse_categorical_crossentropy' model.compile(optimizer, loss, n_replicas) x = np.random.normal(0, 1, (6, 2)) y_train = np.arange(6).astype('float') y_test = np.arange(6, 12).astype('float') hp = { 'learning_rate': [0.01, 0.02, 0.03, 0.04], 'dropout_rate': [0.1, 0.2, 0.3, 0.4] } batch_size = 3 epochs = 5 do_validation = False validation_data = (x, y_test) callbacks = [] samples = 6 exchange_data = validation_data swap_step = 2 hpss = training_utils.ScheduledHyperParamSpace(model, hp) model._hp_state_space = hpss # values of losses that train_on_batch/test_on_batch # will return def train_on_batch(x, y): train_losses = [0.16, 0.15, 0.14, 0.13] return train_losses def test_on_batch(x, y): test_losses = [0.25, 0.24, 0.23, 0.22] return test_losses model.train_on_batch = train_on_batch model.test_on_batch = test_on_batch # test that we've added correctly the ExchangeCallback callbacks_list = cbks.configure_callbacks( callbacks, model, do_validation=do_validation, batch_size=batch_size, epochs=epochs, exchange_data=exchange_data, ) # callbacks_list has instance of `BaseExchangeCallback` assert any( isinstance(c, cbks.BaseExchangeCallback) for c in callbacks_list.callbacks) def get_first_exchange_callback(): for cbk in callbacks_list.callbacks: if isinstance(cbk, cbks.BaseExchangeCallback): return cbk prev_hpspace = copy.deepcopy(model.hpspace.hpspace) get_first_exchange_callback()._safe_exchange(hpname='dropout_rate', exchange_pair=3) # test that exchange happened assert model.hpspace.hpspace[3]['dropout_rate'] == prev_hpspace[2][ 'dropout_rate'] get_first_exchange_callback()._safe_exchange(hpname='learning_rate', exchange_pair=3) # test that exchange happened assert model.hpspace.hpspace[3] == prev_hpspace[2] assert get_first_exchange_callback().exchange_logs['swaped'] == [1, 1] assert get_first_exchange_callback().exchange_logs['hpname'] == [ 'dropout_rate', 'learning_rate' ]
def test_pbt_callback(): tf.compat.v1.keras.backend.clear_session() em = training.EnsembleModel(model_builder) optimizer = tf.keras.optimizers.SGD() loss = 'binary_crossentropy' n_replicas = 4 em.compile(optimizer, loss, n_replicas) hparams_dict = { 'learning_rate': np.linspace(0.001, 0.01, n_replicas), 'dropout_rate': np.linspace(0., 0.6, n_replicas) } hpss = training_utils.ScheduledHyperParamSpace(em, hparams_dict) em._hp_state_space = hpss x = np.random.normal(0, 2, (18, 2)) y = np.random.randint(0, 2, (18, 1)) # define a dict that maps hyperparameter to the distribution # by which its value will be perturbed (in this test the # constant values are added) hparams_dist_dict = { 'learning_rate': lambda *x: 0, 'dropout_rate': lambda *x: 0 } # define the distribution by which the weights of each # replica will be perturbed weight_dist = lambda shape: np.zeros(shape) swap_step = 10 explore_weights = True explore_hyperparams = True burn_in = 1 clb = cbks.PBTExchangeCallback((x, y), swap_step=swap_step, explore_weights=explore_weights, explore_hyperparams=explore_hyperparams, burn_in=burn_in, weight_dist_fn=weight_dist, hyperparams_dist=hparams_dist_dict) clb.model = em em.global_step = 0 # test that the values were instantiated correctly assert clb.swap_step == swap_step assert clb.burn_in == burn_in assert explore_weights == clb.should_explore_weights assert explore_hyperparams == clb.should_explore_hyperparams assert len(hparams_dist_dict) == len(clb.hyperparams_dist) # to test the logit we define a test losses # and test whether replicas have copied weights and hyperparams correctly test_losses = [0.1 * x for x in range(1, n_replicas + 1)] # every replica should copy from replica 0 sess = tf.keras.backend.get_session() rid0_weights = sess.run(em.models[0].trainable_variables) rid0_hyperparams = hpss.get_current_hyperparams_space().hpspace[0] # copy weights and hyperparams from replica 0 to all other clb.exploit_and_explore(test_losses=test_losses) for i, m in enumerate(em.models[1:]): weights = sess.run(m.trainable_variables) for w1, w2 in zip(rid0_weights, weights): np.testing.assert_almost_equal(w1, w2) hparams = hpss.get_current_hyperparams_space().hpspace[i + 1] for hname in rid0_hyperparams: np.testing.assert_almost_equal(rid0_hyperparams[hname], hparams[hname]) def test_copy_weights(): """Test that the values are copied as intended"""