def test_Updater_CustomUpdate(): with make_scope() as session: from returnn.tf.network import TFNetwork, ExternData from returnn.config import Config from returnn.tf.util.basic import CustomUpdate config = Config() network = TFNetwork(extern_data=ExternData(), train_flag=True) layer = network.add_layer(name="output", layer_class=DummyLayer, initial_value=4.0) assert isinstance(layer, DummyLayer) network.initialize_params(session=session) class CustomUpdateAdd13(CustomUpdate): def update_var(self, var): return tf_compat.v1.assign_add(var, 13.0) CustomUpdateAdd13().set_on_var(layer.x) updater = Updater(config=config, network=network) updater.set_learning_rate(1000.0, session=session) # should be ignored updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) session.run(updater.get_optim_op()) # Should have applied CustomUpdateAdd13. assert_almost_equal( session.run(network.get_default_output_layer().output.placeholder), 17.0)
def test_Updater_add_check_numerics_ops(): class _Layer(DummyLayer): def _get_loss_value(self): return tf_compat.v1.log(self.x) from returnn.tf.network import TFNetwork, ExternData from returnn.config import Config with make_scope() as session: config = Config() config.set("debug_add_check_numerics_ops", True) network = TFNetwork(extern_data=ExternData(), train_flag=True) network.add_layer(name="output", layer_class=_Layer, initial_value=1.0) network.initialize_params(session=session) updater = Updater(config=config, network=network) updater.set_learning_rate(1.0, session=session) updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) # Should succeed. session.run(updater.get_optim_op()) # One gradient descent step from ln(x), x = 1.0: gradient is 1.0 / x, thus x - 1.0 = 0.0. assert_almost_equal( session.run(network.get_default_output_layer().output.placeholder), 0.0) try: # Now, should fail. session.run(updater.get_optim_op()) except tf.errors.InvalidArgumentError as exc: print("Expected exception: %r" % exc) else: assert False, "should have raised an exception"
def test_Updater_simple_batch(): with make_scope() as session: from returnn.tf.network import TFNetwork, ExternData from returnn.config import Config from returnn.datasets.generating import Task12AXDataset dataset = Task12AXDataset() dataset.init_seq_order(epoch=1) extern_data = ExternData() extern_data.init_from_dataset(dataset) config = Config() network = TFNetwork(extern_data=extern_data, train_flag=True) network.construct_from_dict({ "layer1": { "class": "linear", "activation": "tanh", "n_out": 13, "from": "data:data" }, "layer2": { "class": "linear", "activation": "tanh", "n_out": 13, "from": ["layer1"] }, "output": { "class": "softmax", "loss": "ce", "target": "classes", "from": ["layer2"] } }) network.initialize_params(session=session) updater = Updater(config=config, network=network) updater.set_learning_rate(1.0, session=session) updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) from returnn.tf.data_pipeline import FeedDictDataProvider batches = dataset.generate_batches( recurrent_net=network.recurrent, batch_size=100, max_seqs=10, max_seq_length=sys.maxsize, used_data_keys=network.used_data_keys) data_provider = FeedDictDataProvider(tf_session=session, extern_data=extern_data, data_keys=network.used_data_keys, dataset=dataset, batches=batches) feed_dict, _ = data_provider.get_feed_dict(single_threaded=True) session.run(updater.get_optim_op(), feed_dict=feed_dict)
def test_Updater_GradientDescent(): with make_scope() as session: from returnn.tf.network import TFNetwork, ExternData from returnn.config import Config config = Config() network = TFNetwork(extern_data=ExternData(), train_flag=True) network.add_layer(name="output", layer_class=DummyLayer, initial_value=5.0, loss_value_factor=3.0) network.initialize_params(session=session) updater = Updater(config=config, network=network) updater.set_learning_rate(1.0, session=session) updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) session.run(updater.get_optim_op()) # One gradient descent step from 3.0 * x: gradient is 3, thus 5 - 3 = 2. assert_almost_equal(session.run(network.get_default_output_layer().output.placeholder), 2.0)