예제 #1
0
def test_Updater_CustomUpdate():
    with make_scope() as session:
        from returnn.tf.network import TFNetwork, ExternData
        from returnn.config import Config
        from returnn.tf.util.basic import CustomUpdate

        config = Config()
        network = TFNetwork(extern_data=ExternData(), train_flag=True)
        layer = network.add_layer(name="output",
                                  layer_class=DummyLayer,
                                  initial_value=4.0)
        assert isinstance(layer, DummyLayer)
        network.initialize_params(session=session)

        class CustomUpdateAdd13(CustomUpdate):
            def update_var(self, var):
                return tf_compat.v1.assign_add(var, 13.0)

        CustomUpdateAdd13().set_on_var(layer.x)

        updater = Updater(config=config, network=network)
        updater.set_learning_rate(1000.0, session=session)  # should be ignored
        updater.set_trainable_vars(network.get_trainable_params())
        updater.init_optimizer_vars(session=session)
        session.run(updater.get_optim_op())
        # Should have applied CustomUpdateAdd13.
        assert_almost_equal(
            session.run(network.get_default_output_layer().output.placeholder),
            17.0)
예제 #2
0
def test_Updater_add_check_numerics_ops():
    class _Layer(DummyLayer):
        def _get_loss_value(self):
            return tf_compat.v1.log(self.x)

    from returnn.tf.network import TFNetwork, ExternData
    from returnn.config import Config

    with make_scope() as session:
        config = Config()
        config.set("debug_add_check_numerics_ops", True)
        network = TFNetwork(extern_data=ExternData(), train_flag=True)
        network.add_layer(name="output", layer_class=_Layer, initial_value=1.0)
        network.initialize_params(session=session)

        updater = Updater(config=config, network=network)
        updater.set_learning_rate(1.0, session=session)
        updater.set_trainable_vars(network.get_trainable_params())
        updater.init_optimizer_vars(session=session)
        # Should succeed.
        session.run(updater.get_optim_op())
        # One gradient descent step from ln(x), x = 1.0: gradient is 1.0 / x, thus x - 1.0 = 0.0.
        assert_almost_equal(
            session.run(network.get_default_output_layer().output.placeholder),
            0.0)

        try:
            # Now, should fail.
            session.run(updater.get_optim_op())
        except tf.errors.InvalidArgumentError as exc:
            print("Expected exception: %r" % exc)
        else:
            assert False, "should have raised an exception"
예제 #3
0
def test_Updater_simple_batch():
    with make_scope() as session:
        from returnn.tf.network import TFNetwork, ExternData
        from returnn.config import Config
        from returnn.datasets.generating import Task12AXDataset
        dataset = Task12AXDataset()
        dataset.init_seq_order(epoch=1)
        extern_data = ExternData()
        extern_data.init_from_dataset(dataset)

        config = Config()
        network = TFNetwork(extern_data=extern_data, train_flag=True)
        network.construct_from_dict({
            "layer1": {
                "class": "linear",
                "activation": "tanh",
                "n_out": 13,
                "from": "data:data"
            },
            "layer2": {
                "class": "linear",
                "activation": "tanh",
                "n_out": 13,
                "from": ["layer1"]
            },
            "output": {
                "class": "softmax",
                "loss": "ce",
                "target": "classes",
                "from": ["layer2"]
            }
        })
        network.initialize_params(session=session)

        updater = Updater(config=config, network=network)
        updater.set_learning_rate(1.0, session=session)
        updater.set_trainable_vars(network.get_trainable_params())
        updater.init_optimizer_vars(session=session)

        from returnn.tf.data_pipeline import FeedDictDataProvider
        batches = dataset.generate_batches(
            recurrent_net=network.recurrent,
            batch_size=100,
            max_seqs=10,
            max_seq_length=sys.maxsize,
            used_data_keys=network.used_data_keys)
        data_provider = FeedDictDataProvider(tf_session=session,
                                             extern_data=extern_data,
                                             data_keys=network.used_data_keys,
                                             dataset=dataset,
                                             batches=batches)
        feed_dict, _ = data_provider.get_feed_dict(single_threaded=True)
        session.run(updater.get_optim_op(), feed_dict=feed_dict)
예제 #4
0
def test_Updater_GradientDescent():
  with make_scope() as session:
    from returnn.tf.network import TFNetwork, ExternData
    from returnn.config import Config

    config = Config()
    network = TFNetwork(extern_data=ExternData(), train_flag=True)
    network.add_layer(name="output", layer_class=DummyLayer, initial_value=5.0, loss_value_factor=3.0)
    network.initialize_params(session=session)

    updater = Updater(config=config, network=network)
    updater.set_learning_rate(1.0, session=session)
    updater.set_trainable_vars(network.get_trainable_params())
    updater.init_optimizer_vars(session=session)
    session.run(updater.get_optim_op())
    # One gradient descent step from 3.0 * x: gradient is 3, thus 5 - 3 = 2.
    assert_almost_equal(session.run(network.get_default_output_layer().output.placeholder), 2.0)