def test_misc(self): # tests of non-main features to improve coverage for optimizer_name in ['AdamW', 'NadamW', 'SGDW']: cprint("<< TESTING {} OPTIMIZER >>".format(optimizer_name), 'blue') reset_seeds() optimizer_kw = { 'total_iterations': 0, 'decay': 1e-3, 'amsgrad': optimizer_name == 'AdamW', 'nesterov': optimizer_name == 'SGDW' } num_batches = 4 batch_size, timesteps = 16, 8 batch_shape = (batch_size, timesteps) embed_input_dim = 5 total_iterations = 0 self.model = self._make_model(batch_shape, total_iterations, embed_input_dim=embed_input_dim, dense_constraint=1, l2_reg=1e-4, bidirectional=False, sparse=True) optimizer = self._make_optimizer(optimizer_name, self.model, **optimizer_kw) self.model.compile(optimizer, loss='sparse_categorical_crossentropy') X, Y = self._make_data(num_batches, *batch_shape, embed_input_dim=embed_input_dim, sparse=True) for batch_num in range(num_batches): self.model.train_on_batch(X[batch_num], Y[batch_num]) self._test_save_load(self.model, X, optimizer_name, optimizer) # util test dc = {'lstm': 0, 'dense': 0} fill_dict_in_order(dc, [1e-4, 2e-4]) AdamW(model=self.model, zero_penalties=True) AdamW(model=self.model, weight_decays={'a': 0}) # cleanup del self.model, optimizer reset_seeds(reset_graph_with_backend=K) cprint("\n<< {} MISC TEST PASSED >>\n".format(optimizer_name), 'green') cprint("\n<< ALL MISC TESTS PASSED >>\n", 'green')
def test_main(): # Save/Load, Warm Restarts (w/ cosine annealing) for optimizer_name in ['AdamW', 'NadamW', 'SGDW']: cprint("<< TESTING {} OPTIMIZER >>".format(optimizer_name), 'blue') reset_seeds() num_batches, num_epochs = 25, 4 batch_size, timesteps, num_channels = 16, 8, 4 batch_shape = (batch_size, timesteps, num_channels) total_iterations = num_batches # due to warm restarts model = _make_model(batch_shape, l1_reg=1e-4, l2_reg=1e-4) optimizer = _make_optimizer(optimizer_name, model, total_iterations) model.compile(optimizer, loss='binary_crossentropy') assert _valid_weight_decays(model) if hasattr(model, '_make_train_function'): # graph-mode model._make_train_function() # else K.eval before train may fail X, Y = _make_data(num_batches, *batch_shape) eta_history = [] # for stop-introspection t_cur_history = [] # for stop-introspection # // Explanation for "manual option" when autorestart=False # eta_t is first applied as-is, and only updated AFTER iteration; # setting t_cur does not immediately change eta_t. # Thus, t_cur must be reset 1 iteration BEFORE epoch ends # (t, e) = (t_cur_history[-1], eta_history[-1]) # (t, e) = (24, 0) -> RESET -> (-1, 0...) [on epoch end] # (t, e) = (23, 0...) -> RESET -> (-1, 0) -> (0, 1) [before epoch end] for epoch in range(num_epochs): for batch_num in range(num_batches): t_cur_history += [K_eval(model.optimizer.t_cur, K)] eta_history += [K_eval(model.optimizer.eta_t, K)] model.train_on_batch(X[batch_num], Y[batch_num]) # if batch_num == (num_batches - 2): Manual Option # K.set_value(model.optimizer.t_cur, -1) assert _valid_cosine_annealing(eta_history, total_iterations, num_epochs) assert model.optimizer.get_config( ) # ensure value evaluation won't error _test_save_load(model, X, optimizer_name, optimizer) # cleanup del model, optimizer reset_seeds(reset_graph_with_backend=K) cprint("\n<< {} MAIN TEST PASSED >>\n".format(optimizer_name), 'green') cprint("\n<< ALL MAIN TESTS PASSED >>\n", 'green')
def test_main(self): # Save/Load, Warm Restarts (w/ cosine annealing) for optimizer_name in ['AdamW', 'NadamW', 'SGDW']: cprint("<< TESTING {} OPTIMIZER >>".format(optimizer_name), 'blue') reset_seeds() num_batches, num_epochs = 25, 4 batch_size, timesteps, num_channels = 16, 8, 4 batch_shape = (batch_size, timesteps, num_channels) total_iterations = num_batches # due to warm restarts self.model = self._make_model(batch_shape, total_iterations, l1_reg=1e-4, l2_reg=1e-4) optimizer = self._make_optimizer(optimizer_name, self.model, total_iterations) self.model.compile(optimizer, loss='binary_crossentropy') self.assertTrue(self._valid_weight_decays(self.model)) self.model._make_train_function( ) # else K.eval before train may fail X, Y = self._make_data(num_batches, *batch_shape) self.eta_history = [] # for stop-introspection self.t_cur_history = [] # for stop-introspection for epoch in range(num_epochs): for batch_num in range(num_batches): self.t_cur_history += [ K_eval(self.model.optimizer.t_cur, K) ] self.eta_history += [K_eval(self.model.optimizer.eta_t, K)] self.model.train_on_batch(X[batch_num], Y[batch_num]) self.eta_history += [K_eval(self.model.optimizer.eta_t, K)] self.eta_history.pop(-(1 + int(tf_eager))) K.set_value(self.model.optimizer.t_cur, 0) self.assertTrue( self._valid_cosine_annealing(self.eta_history, total_iterations, num_epochs)) self._test_save_load(self.model, X, optimizer_name, optimizer) # cleanup del self.model, optimizer reset_seeds(reset_graph_with_backend=K) cprint("\n<< {} MAIN TEST PASSED >>\n".format(optimizer_name), 'green') cprint("\n<< ALL MAIN TESTS PASSED >>\n", 'green')
def test_misc(): # tests of non-main features to improve coverage for optimizer_name in ['AdamW', 'NadamW', 'SGDW']: cprint("<< TESTING {} OPTIMIZER >>".format(optimizer_name), 'blue') reset_seeds() optimizer_kw = { 'total_iterations': 0, 'decay': 1e-3, 'amsgrad': optimizer_name == 'AdamW', 'nesterov': optimizer_name == 'SGDW' } num_batches = 4 batch_size, timesteps = 16, 8 batch_shape = (batch_size, timesteps) embed_input_dim = 5 # arbitrarily select SGDW for coverage testing l1_reg = 1e-4 if optimizer_name == 'SGDW' else 0 l2_reg = 1e-4 if optimizer_name != 'SGDW' else 0 if optimizer_name == 'SGDW': optimizer_kw.update( dict(zero_penalties=False, weight_decays={}, total_iterations=2, momentum=0)) model = _make_model(batch_shape, embed_input_dim=embed_input_dim, dense_constraint=1, l1_reg=l1_reg, l2_reg=l2_reg, bidirectional=False, sparse=True) optimizer = _make_optimizer(optimizer_name, model, **optimizer_kw) model.compile(optimizer, loss='sparse_categorical_crossentropy') X, Y = _make_data(num_batches, *batch_shape, embed_input_dim=embed_input_dim, sparse=True) for batch_num in range(num_batches): model.train_on_batch(X[batch_num], Y[batch_num]) _test_save_load(model, X, optimizer_name, optimizer) # util test dc = {'lstm': 0, 'dense': 0} fill_dict_in_order(dc, [1e-4, 2e-4]) AdamW(model=model, zero_penalties=False, total_iterations=2) AdamW(model=model, weight_decays={'a': 0}) opt = AdamW(weight_decays={model.layers[1].weights[0].name: (0, 0)}, total_iterations=2) model.compile(opt, loss='sparse_categorical_crossentropy') model.train_on_batch(X[0], Y[0]) # cleanup del model, optimizer reset_seeds(reset_graph_with_backend=K) try: K_eval('x', K) # for coverage except: pass cprint("\n<< {} MISC TEST PASSED >>\n".format(optimizer_name), 'green') cprint("\n<< ALL MISC TESTS PASSED >>\n", 'green')
def _test_control(optimizer_name, amsgrad=False, nesterov=False, momentum=.9): optimizer_kw = dict(total_iterations=0, decay=1e-3, amsgrad=amsgrad, nesterov=nesterov, momentum=momentum, control_mode=True) num_batches = 100 batch_size, timesteps = 16, 32 batch_shape = (batch_size, timesteps) embed_input_dim = 5 model_kw = dict(batch_shape=batch_shape, dense_constraint=1, embed_input_dim=embed_input_dim, l1_reg=0, l2_reg=0, bidirectional=False, sparse=True) loss_name = 'sparse_categorical_crossentropy' reset_seeds(verbose=0) X, Y = _make_data(num_batches, *batch_shape, embed_input_dim=embed_input_dim, sparse=True) reset_seeds(reset_graph_with_backend=K, verbose=0) model_custom = _make_model(**model_kw) optimizer_custom = _make_optimizer(optimizer_name, model_custom, **optimizer_kw) model_custom.compile(optimizer_custom, loss=loss_name) loss_custom = [] # for introspection t0 = time() for batch_num in range(num_batches): loss_custom += [ model_custom.train_on_batch(X[batch_num], Y[batch_num]) ] print("\nmodel_custom -- %s batches -- time: %.2f sec" % (num_batches, time() - t0)) reset_seeds(reset_graph_with_backend=K, verbose=0) model_control = _make_model(**model_kw) optimizer_control = _make_optimizer(optimizer_name[:-1], model_control, **optimizer_kw) model_control.compile(optimizer_control, loss=loss_name) loss_control = [] # for introspection t0 = time() for batch_num in range(num_batches): loss_control += [ model_control.train_on_batch(X[batch_num], Y[batch_num]) ] print("model_control -- %s batches -- time: %.2f sec" % (num_batches, time() - t0)) loss_diff = np.abs(np.array(loss_custom) - np.array(loss_control)) print("%s max loss diff: %e" % (optimizer_name, np.max(loss_diff))) assert np.allclose(loss_custom, loss_control, rtol=0, atol=1e-3) # cleanup del model_custom, model_control del optimizer_custom, optimizer_control reset_seeds(reset_graph_with_backend=K, verbose=0)