Beispiel #1
0
def test_tensor_learning_rate():
    for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 1.0, 0.0, 1.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)
        opt = yogi.Yogi(tf.constant(0.01), initial_accumulator_value=1.0)

        # Fetch params to validate initial values.
        np.testing.assert_allclose(np.asanyarray([1.0, 2.0]), var0.numpy())
        np.testing.assert_allclose(np.asanyarray([3.0, 4.0]), var1.numpy())

        # Run 3 steps of Yogi.
        for t in range(1, 4):
            beta1_power, beta2_power = get_beta_accumulators(opt, dtype)
            test_utils.assert_allclose_according_to_type(0.9 ** t, beta1_power)
            test_utils.assert_allclose_according_to_type(0.999 ** t, beta2_power)

            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

            var0_np, m0, v0 = yogi_update_numpy(var0_np, grads0_np, t, m0, v0)
            var1_np, m1, v1 = yogi_update_numpy(var1_np, grads1_np, t, m1, v1)

            # Validate updated params.
            test_utils.assert_allclose_according_to_type(var0_np, var0.numpy())
            test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
Beispiel #2
0
def test_sharing():
    for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)
        opt = lamb.LAMB()

        # Fetch params to validate initial values
        np.testing.assert_allclose(np.asanyarray([1.0, 2.0]), var0.numpy())
        np.testing.assert_allclose(np.asanyarray([3.0, 4.0]), var1.numpy())

        # Run 3 steps of intertwined LAMB1 and LAMB2.
        for t in range(3):
            beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
            test_utils.assert_allclose_according_to_type(
                0.9**(t + 1), beta_1_power)
            test_utils.assert_allclose_according_to_type(
                0.999**(t + 1), beta_2_power)

            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

            var0_np, m0, v0 = lamb_update_numpy(var0_np, grads0_np, t, m0, v0)
            var1_np, m1, v1 = lamb_update_numpy(var1_np, grads1_np, t, m1, v1)

            # Validate updated params
            test_utils.assert_allclose_according_to_type(var0_np, var0.numpy())
            test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
Beispiel #3
0
def test_fit_simple_linear_model_mixed_precision():
    if test_utils.is_gpu_available() and LooseVersion(
            tf.__version__) <= "2.2.0":
        pytest.xfail(
            "See https://github.com/tensorflow/tensorflow/issues/39775")
    np.random.seed(0x2019)
    tf.random.set_seed(0x2019)

    x = np.random.standard_normal((10000, 3))
    w = np.random.standard_normal((3, 1))
    y = np.dot(x, w) + np.random.standard_normal((10000, 1)) * 1e-4

    try:
        tf.keras.mixed_precision.experimental.set_policy("mixed_float16")
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Dense(input_shape=(3, ), units=1))
        model.compile(Lookahead("sgd"), loss="mse")
    finally:
        tf.keras.mixed_precision.experimental.set_policy("float32")
    model.fit(x, y, epochs=3)

    x = np.random.standard_normal((100, 3))
    y = np.dot(x, w)
    predicted = model.predict(x)

    max_abs_diff = np.max(np.abs(predicted - y))
    assert max_abs_diff < 2.3e-3
    assert max_abs_diff >= 1e-3
Beispiel #4
0
def test_sparse_repeated_indices():
    for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()):
        repeated_index_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype)
        aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype)
        grad_repeated_index = tf.IndexedSlices(
            tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
            tf.constant([1, 1]),
            tf.constant([2, 1]),
        )
        grad_aggregated = tf.IndexedSlices(
            tf.constant([0.2], shape=[1, 1], dtype=dtype),
            tf.constant([1]),
            tf.constant([2, 1]),
        )
        opt1 = yogi.Yogi()
        opt2 = yogi.Yogi()

        np.testing.assert_allclose(
            aggregated_update_var.numpy(), repeated_index_update_var.numpy(),
        )

        for _ in range(3):
            opt1.apply_gradients([(grad_repeated_index, repeated_index_update_var)])
            opt2.apply_gradients([(grad_aggregated, aggregated_update_var)])

        np.testing.assert_allclose(
            aggregated_update_var.numpy(), repeated_index_update_var.numpy(),
        )
Beispiel #5
0
def do_test_sparse(beta1=0.0, l1reg=0.0, l2reg=0.0):
    for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 1.0, 0.0, 1.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0_np_indices = np.array([0, 1], dtype=np.int32)
        grads0 = tf.IndexedSlices(tf.constant(grads0_np),
                                  tf.constant(grads0_np_indices),
                                  tf.constant([2]))
        grads1_np_indices = np.array([0, 1], dtype=np.int32)
        grads1 = tf.IndexedSlices(tf.constant(grads1_np),
                                  tf.constant(grads1_np_indices),
                                  tf.constant([2]))
        opt = yogi.Yogi(
            beta1=beta1,
            l1_regularization_strength=l1reg,
            l2_regularization_strength=l2reg,
            initial_accumulator_value=1.0,
        )

        # Fetch params to validate initial values.
        np.testing.assert_allclose(np.asanyarray([1.0, 2.0]), var0.numpy())
        np.testing.assert_allclose(np.asanyarray([3.0, 4.0]), var1.numpy())

        # Run 3 steps of Yogi.
        for t in range(1, 4):
            beta1_power, beta2_power = get_beta_accumulators(opt, dtype)
            test_utils.assert_allclose_according_to_type(beta1**t, beta1_power)
            test_utils.assert_allclose_according_to_type(0.999**t, beta2_power)
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

            var0_np, m0, v0 = yogi_update_numpy(var0_np,
                                                grads0_np,
                                                t,
                                                m0,
                                                v0,
                                                beta1=beta1,
                                                l1reg=l1reg,
                                                l2reg=l2reg)
            var1_np, m1, v1 = yogi_update_numpy(var1_np,
                                                grads1_np,
                                                t,
                                                m1,
                                                v1,
                                                beta1=beta1,
                                                l1reg=l1reg,
                                                l2reg=l2reg)

            # Validate updated params.
            test_utils.assert_allclose_according_to_type(var0_np, var0.numpy())
            test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
Beispiel #6
0
def test_basic_with_learning_rate_decay():
    for i, dtype in enumerate(
            _dtypes_to_test(use_gpu=test_utils.is_gpu_available())):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = 0.001
        beta_1 = 0.9
        beta_2 = 0.999
        epsilon = 1e-7
        decay = 0.5
        lamb_wd = 0.01

        opt = lamb.LAMB(
            learning_rate=learning_rate,
            beta_1=beta_1,
            beta_2=beta_2,
            epsilon=epsilon,
            weight_decay=lamb_wd,
            decay=decay,
        )

        # Run 3 steps of LAMB
        for t in range(3):
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

            lr_np = learning_rate / (1 + decay * t)

            var0_np, m0, v0 = lamb_update_numpy(var0_np,
                                                grads0_np,
                                                t,
                                                m0,
                                                v0,
                                                lr=lr_np,
                                                lamb_wd=lamb_wd)
            var1_np, m1, v1 = lamb_update_numpy(var1_np,
                                                grads1_np,
                                                t,
                                                m1,
                                                v1,
                                                lr=lr_np,
                                                lamb_wd=lamb_wd)

            # Validate updated params
            test_utils.assert_allclose_according_to_type(var0_np, var0.numpy())
            test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
Beispiel #7
0
def test_dynamic_decode_tflite_conversion():
    if test_utils.is_gpu_available():
        pytest.skip("cpu-only test")
    units = 10
    vocab_size = 20
    cell = tf.keras.layers.LSTMCell(units)
    sampler = sampler_py.GreedyEmbeddingSampler()
    embeddings = tf.random.uniform([vocab_size, units])
    my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)

    @tf.function
    def _decode(start_tokens, end_token):
        batch_size = tf.size(start_tokens)
        initial_state = cell.get_initial_state(batch_size=batch_size, dtype=tf.float32)
        return decoder.dynamic_decode(
            my_decoder,
            maximum_iterations=5,
            enable_tflite_convertible=True,
            decoder_init_input=embeddings,
            decoder_init_kwargs=dict(
                initial_state=initial_state,
                start_tokens=start_tokens,
                end_token=end_token,
            ),
        )

    concrete_function = _decode.get_concrete_function(
        tf.TensorSpec([1], dtype=tf.int32), tf.TensorSpec([], dtype=tf.int32)
    )
    if tf.__version__[:3] >= "2.7":
        converter = tf.lite.TFLiteConverter.from_concrete_functions(
            [concrete_function], _decode
        )
    else:
        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_function])
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS,
        tf.lite.OpsSet.SELECT_TF_OPS,
    ]
    _ = converter.convert()

    with pytest.raises(tf.errors.InvalidArgumentError, match="batch size"):
        # Batch size > 1 should throw an error.
        _decode.get_concrete_function(
            tf.TensorSpec([2], dtype=tf.int32), tf.TensorSpec([], dtype=tf.int32)
        )
def do_test_sparse_repeated_indices(dtype, optimizer, **optimizer_kwargs):
    """Test for repeated indices in sparse updates.

    This test verifies that an update with repeated indices is the same as
    an update with two times the gradient.

    Args:
        optimizer: The tensorflow optimizer class to be tested.
        **optimizer_kwargs: The parameters to pass to the construcor of the
            optimizer. Either a constant or a callable. This also passed to
            the optimizer_params in the update_fn.
    """
    # TODO: Fix #347 issue
    if test_utils.is_gpu_available():
        pytest.skip("Wait #347 to be fixed")

    repeated_index_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype)
    aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype)
    grad_repeated_index = tf.IndexedSlices(
        tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
        tf.constant([1, 1]),
        tf.constant([2, 1]),
    )
    grad_aggregated = tf.IndexedSlices(
        tf.constant([0.2], shape=[1, 1], dtype=dtype),
        tf.constant([1]),
        tf.constant([2, 1]),
    )
    opt_repeated = optimizer(**optimizer_kwargs)
    _ = opt_repeated.apply_gradients([(grad_repeated_index,
                                       repeated_index_update_var)])
    opt_aggregated = optimizer(**optimizer_kwargs)
    _ = opt_aggregated.apply_gradients([(grad_aggregated,
                                         aggregated_update_var)])
    np.testing.assert_allclose(aggregated_update_var.numpy(),
                               repeated_index_update_var.numpy())
    for _ in range(3):
        opt_repeated.apply_gradients([(grad_repeated_index,
                                       repeated_index_update_var)])
        opt_aggregated.apply_gradients([(grad_aggregated,
                                         aggregated_update_var)])
        np.testing.assert_allclose(aggregated_update_var.numpy(),
                                   repeated_index_update_var.numpy())
def test_minimize_sparse_resource_variable_nuclear():
    # TODO:
    #       to address issue #347 and #36764.
    for dtype in _dtypes_with_checking_system(
        use_gpu=test_utils.is_gpu_available(), system=platform.system()
    ):
        var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)

        def loss():
            x = tf.constant([[4.0], [5.0]], dtype=dtype)
            pred = tf.matmul(tf.nn.embedding_lookup([var0], [0]), x)
            return pred * pred

        # the gradient based on the current loss function
        grads0_0 = 32 * 1.0 + 40 * 2.0
        grads0_1 = 40 * 1.0 + 50 * 2.0
        grads0 = tf.constant([[grads0_0, grads0_1]], dtype=dtype)
        top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector(grads0)

        learning_rate = 0.1
        lambda_ = 0.1
        ord = "nuclear"
        opt = cg_lib.ConditionalGradient(
            learning_rate=learning_rate, lambda_=lambda_, ord=ord
        )
        _ = opt.minimize(loss, var_list=[var0])

        # Validate updated params
        test_utils.assert_allclose_according_to_type(
            [
                [
                    1.0 * learning_rate
                    - (1 - learning_rate) * lambda_ * top_singular_vector0[0][0],
                    2.0 * learning_rate
                    - (1 - learning_rate) * lambda_ * top_singular_vector0[0][1],
                ]
            ],
            var0.numpy(),
        )
Beispiel #10
0
def test_resource():
    for i, dtype in enumerate(
            _dtypes_to_test(use_gpu=test_utils.is_gpu_available())):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        def learning_rate():
            return 0.001

        opt = lamb.LAMB(learning_rate=learning_rate)

        # Run 3 steps of LAMB
        for t in range(3):
            beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
            test_utils.assert_allclose_according_to_type(
                0.9**(t + 1), beta_1_power)
            test_utils.assert_allclose_according_to_type(
                0.999**(t + 1), beta_2_power)

            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

            var0_np, m0, v0 = lamb_update_numpy(var0_np, grads0_np, t, m0, v0)
            var1_np, m1, v1 = lamb_update_numpy(var1_np, grads1_np, t, m1, v1)

            # Validate updated params
            test_utils.assert_allclose_according_to_type(var0_np, var0.numpy())
            test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
def do_test(dtype,
            optimizer,
            update_fn,
            do_sparse=False,
            do_decay_var_list=False,
            **optimizer_kwargs):
    """The major test function.

    Args:
        optimizer: The tensorflow optimizer class to be tested.
        update_fn: The numpy update function of the optimizer, the function
            signature must be
            update_fn(var: np.array,
                        grad_t: np.array,
                        slot_vars: dict,
                        **kwargs) -> (updated_var, updated_slot_vars)
            Note that slot_vars will be initialized to an empty dictionary
            for each variable, initial values should be handled in the
            update_fn.
        do_sparse: If True, test sparse update. Defaults to False, i.e.,
            dense update.
        do_decay_var_list: If True, test by passing a list of vars to ensure hashing is handled correctly
        **optimizer_kwargs:The parameters to pass to the construcor of the
            optimizer. Either a constant or a callable. This also passed to
            the optimizer_params in the update_fn.
    """
    # TODO: Fix #347 issue
    if do_sparse and test_utils.is_gpu_available():
        pytest.skip("Wait #347 to be fixed")

    # Initialize variables for numpy implementation.
    np_slot_vars0, np_slot_vars1 = {}, {}
    var0_np = np.array([1.0, 2.0], dtype=dtype[0].as_numpy_dtype)
    grads0_np = np.array([0.1, 0.1], dtype=dtype[0].as_numpy_dtype)
    var1_np = np.array([3.0, 4.0], dtype=dtype[0].as_numpy_dtype)
    grads1_np = np.array([0.01, 0.01], dtype=dtype[0].as_numpy_dtype)
    # Create Tensorflow variables.
    var0 = tf.Variable(var0_np, name="var0_%d" % dtype[1])
    var1 = tf.Variable(var1_np, name="var1_%d" % dtype[1])
    if do_sparse:
        grads0_np_indices = np.array([0, 1], dtype=np.int32)
        grads0 = tf.IndexedSlices(
            tf.constant(grads0_np),
            tf.constant(grads0_np_indices),
            tf.constant([2]),
        )
        grads1_np_indices = np.array([0, 1], dtype=np.int32)
        grads1 = tf.IndexedSlices(
            tf.constant(grads1_np),
            tf.constant(grads1_np_indices),
            tf.constant([2]),
        )
    else:
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)
    opt = optimizer(**optimizer_kwargs)
    # Create the update op.
    # Run 3 steps of the optimizer
    for _ in range(3):
        if do_decay_var_list:
            opt.apply_gradients(
                zip([grads0, grads1], [var0, var1]),
                decay_var_list=[var0, var1],
            )
        else:
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        var0_np, np_slot_vars0 = update_fn(var0_np, grads0_np, np_slot_vars0,
                                           **optimizer_kwargs)
        var1_np, np_slot_vars1 = update_fn(var1_np, grads1_np, np_slot_vars1,
                                           **optimizer_kwargs)
        # Validate updated params
        test_utils.assert_allclose_according_to_type(var0_np, var0.numpy())
        test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
Beispiel #12
0
def test_sharing_nuclear():
    # TODO:
    #       To address the issue #36764.
    for dtype in _dtypes_with_checking_system(
            use_gpu=test_utils.is_gpu_available(), system=platform.system()):
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        grads0 = tf.constant([0.1, 0.1], dtype=dtype)
        grads1 = tf.constant([0.01, 0.01], dtype=dtype)
        top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector(
            grads0)
        top_singular_vector1 = cg_lib.ConditionalGradient._top_singular_vector(
            grads1)
        learning_rate = 0.1
        lambda_ = 0.1
        ord = "nuclear"
        cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate,
                                            lambda_=lambda_,
                                            ord=ord)
        _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        # Check we have slots
        assert ["conditional_gradient"] == cg_opt.get_slot_names()
        slot0 = cg_opt.get_slot(var0, "conditional_gradient")
        assert slot0.get_shape() == var0.get_shape()
        slot1 = cg_opt.get_slot(var1, "conditional_gradient")
        assert slot1.get_shape() == var1.get_shape()

        # Because in the eager mode, as we declare two cg_update
        # variables, it already altomatically finish executing them.
        # Thus, we cannot test the param value at this time for
        # eager mode. We can only test the final value of param
        # after the second execution.

        # Step 2: the second conditional_gradient contain
        # the previous update.
        # Check that the parameters have been updated.
        cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        test_utils.assert_allclose_according_to_type(
            np.array([
                (1.0 * learning_rate -
                 (1 - learning_rate) * lambda_ * top_singular_vector0[0]) *
                learning_rate -
                (1 - learning_rate) * lambda_ * top_singular_vector0[0],
                (2.0 * learning_rate -
                 (1 - learning_rate) * lambda_ * top_singular_vector0[1]) *
                learning_rate -
                (1 - learning_rate) * lambda_ * top_singular_vector0[1],
            ]),
            var0.numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                (3.0 * learning_rate -
                 (1 - learning_rate) * lambda_ * top_singular_vector1[0]) *
                learning_rate -
                (1 - learning_rate) * lambda_ * top_singular_vector1[0],
                (4.0 * learning_rate -
                 (1 - learning_rate) * lambda_ * top_singular_vector1[1]) *
                learning_rate -
                (1 - learning_rate) * lambda_ * top_singular_vector1[1],
            ]),
            var1.numpy(),
        )
Beispiel #13
0
def test_sparse_frobenius():
    # TODO:
    #       To address the issue #347.
    for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()):
        var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype))
        var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2]))
        grads0 = tf.IndexedSlices(
            tf.constant([[0.1, 0.1]], dtype=dtype),
            tf.constant([1]),
            tf.constant([4, 2]),
        )
        grads1 = tf.IndexedSlices(
            tf.constant([[0.01, 0.01], [0.01, 0.01]], dtype=dtype),
            tf.constant([2, 3]),
            tf.constant([4, 2]),
        )
        norm0 = tf.math.reduce_sum(tf.math.multiply(grads0, grads0))**0.5
        norm1 = tf.math.reduce_sum(tf.math.multiply(grads1, grads1))**0.5
        learning_rate = 0.1
        lambda_ = 0.1
        ord = "fro"
        cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate,
                                            lambda_=lambda_,
                                            ord=ord)
        _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        # Check we have slots
        assert ["conditional_gradient"] == cg_opt.get_slot_names()
        slot0 = cg_opt.get_slot(var0, "conditional_gradient")
        assert slot0.get_shape() == var0.get_shape()
        slot1 = cg_opt.get_slot(var1, "conditional_gradient")
        assert slot1.get_shape() == var1.get_shape()

        # Check that the parameters have been updated.
        test_utils.assert_allclose_according_to_type(
            np.array([
                0 - (1 - learning_rate) * lambda_ * 0 / norm0,
                0 - (1 - learning_rate) * lambda_ * 0 / norm0,
            ]),
            var0[0].numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                0 - (1 - learning_rate) * lambda_ * 0.1 / norm0,
                0 - (1 - learning_rate) * lambda_ * 0.1 / norm0,
            ]),
            var0[1].numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                1.0 * learning_rate -
                (1 - learning_rate) * lambda_ * 0.01 / norm1,
                1.0 * learning_rate -
                (1 - learning_rate) * lambda_ * 0.01 / norm1,
            ]),
            var1[2].numpy(),
        )
        # Step 2: the conditional_gradient contain the
        # previous update.
        cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        # Check that the parameters have been updated.
        np.testing.assert_allclose(np.array([0, 0]), var0[0].numpy())
        test_utils.assert_allclose_according_to_type(
            np.array([
                (0 -
                 (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate -
                (1 - learning_rate) * lambda_ * 0.1 / norm0,
                (0 -
                 (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate -
                (1 - learning_rate) * lambda_ * 0.1 / norm0,
            ]),
            var0[1].numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                (1.0 * learning_rate -
                 (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate
                - (1 - learning_rate) * lambda_ * 0.01 / norm1,
                (1.0 * learning_rate -
                 (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate
                - (1 - learning_rate) * lambda_ * 0.01 / norm1,
            ]),
            var1[2].numpy(),
        )
Beispiel #14
0
def test_tensor_learning_rate_and_conditional_gradient_nuclear():
    for dtype in _dtypes_with_checking_system(
            use_gpu=test_utils.is_gpu_available(), system=platform.system()):
        # TODO:
        # Based on issue #36764 in the following link,
        #        "https://github.com/tensorflow/tensorflow/issues/36764"
        # tf.half is not registered for tf.linalg.svd function on Windows
        # CPU version.
        # So we have to remove tf.half when testing with Windows CPU version.
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        grads0 = tf.constant([0.1, 0.1], dtype=dtype)
        grads1 = tf.constant([0.01, 0.01], dtype=dtype)
        top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector(
            grads0)
        top_singular_vector1 = cg_lib.ConditionalGradient._top_singular_vector(
            grads1)
        ord = "nuclear"
        cg_opt = cg_lib.ConditionalGradient(learning_rate=tf.constant(0.5),
                                            lambda_=tf.constant(0.01),
                                            ord=ord)
        _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        # Check we have slots
        assert ["conditional_gradient"] == cg_opt.get_slot_names()
        slot0 = cg_opt.get_slot(var0, "conditional_gradient")
        assert slot0.get_shape() == var0.get_shape()
        slot1 = cg_opt.get_slot(var1, "conditional_gradient")
        assert slot1.get_shape() == var1.get_shape()

        # Check that the parameters have been updated.
        test_utils.assert_allclose_according_to_type(
            np.array([
                1.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0],
                2.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1],
            ]),
            var0.numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                3.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0],
                4.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1],
            ]),
            var1.numpy(),
        )
        # Step 2: the conditional_gradient contain the
        # previous update.
        cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        # Check that the parameters have been updated.
        test_utils.assert_allclose_according_to_type(
            np.array([
                (1.0 * 0.5 -
                 (1 - 0.5) * 0.01 * top_singular_vector0[0]) * 0.5 -
                (1 - 0.5) * 0.01 * top_singular_vector0[0],
                (2.0 * 0.5 -
                 (1 - 0.5) * 0.01 * top_singular_vector0[1]) * 0.5 -
                (1 - 0.5) * 0.01 * top_singular_vector0[1],
            ]),
            var0.numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                (3.0 * 0.5 -
                 (1 - 0.5) * 0.01 * top_singular_vector1[0]) * 0.5 -
                (1 - 0.5) * 0.01 * top_singular_vector1[0],
                (4.0 * 0.5 -
                 (1 - 0.5) * 0.01 * top_singular_vector1[1]) * 0.5 -
                (1 - 0.5) * 0.01 * top_singular_vector1[1],
            ]),
            var1.numpy(),
        )
Beispiel #15
0
def test_basic_nuclear(use_resource):
    # TODO:
    #       to address issue #36764
    for i, dtype in enumerate(
            _dtypes_with_checking_system(use_gpu=test_utils.is_gpu_available(),
                                         system=platform.system())):

        if use_resource:
            var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0_%d" % i)
            var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1_%d" % i)
        else:
            var0 = tf.Variable([1.0, 2.0], dtype=dtype)
            var1 = tf.Variable([3.0, 4.0], dtype=dtype)

        grads0 = tf.constant([0.1, 0.1], dtype=dtype)
        grads1 = tf.constant([0.01, 0.01], dtype=dtype)
        top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector(
            grads0)
        top_singular_vector1 = cg_lib.ConditionalGradient._top_singular_vector(
            grads1)

        def learning_rate():
            return 0.5

        def lambda_():
            return 0.01

        ord = "nuclear"

        cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate,
                                            lambda_=lambda_,
                                            ord=ord)
        _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        # Check we have slots
        assert ["conditional_gradient"] == cg_opt.get_slot_names()
        slot0 = cg_opt.get_slot(var0, "conditional_gradient")
        assert slot0.get_shape() == var0.get_shape()
        slot1 = cg_opt.get_slot(var1, "conditional_gradient")
        assert slot1.get_shape() == var1.get_shape()

        test_utils.assert_allclose_according_to_type(
            np.array([
                1.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0],
                2.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1],
            ]),
            var0.numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                3.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0],
                4.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1],
            ]),
            var1.numpy(),
        )

        # Step 2: the conditional_gradient contain the previous update.
        cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        test_utils.assert_allclose_according_to_type(
            np.array([
                (1.0 * 0.5 -
                 (1 - 0.5) * 0.01 * top_singular_vector0[0]) * 0.5 -
                (1 - 0.5) * 0.01 * top_singular_vector0[0],
                (2.0 * 0.5 -
                 (1 - 0.5) * 0.01 * top_singular_vector0[1]) * 0.5 -
                (1 - 0.5) * 0.01 * top_singular_vector0[1],
            ]),
            var0.numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                (3.0 * 0.5 -
                 (1 - 0.5) * 0.01 * top_singular_vector1[0]) * 0.5 -
                (1 - 0.5) * 0.01 * top_singular_vector1[1],
                (4.0 * 0.5 -
                 (1 - 0.5) * 0.01 * top_singular_vector1[0]) * 0.5 -
                (1 - 0.5) * 0.01 * top_singular_vector1[1],
            ]),
            var1.numpy(),
        )
Beispiel #16
0
def test_sparse_nuclear():
    # TODO:
    #       To address the issue #347 and issue #36764.
    for dtype in _dtypes_with_checking_system(
            use_gpu=test_utils.is_gpu_available(), system=platform.system()):
        var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype))
        var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2]))
        grads0 = tf.IndexedSlices(
            tf.constant([[0.1, 0.1]], dtype=dtype),
            tf.constant([1]),
            tf.constant([4, 2]),
        )
        grads1 = tf.IndexedSlices(
            tf.constant([[0.01, 0.01], [0.01, 0.01]], dtype=dtype),
            tf.constant([2, 3]),
            tf.constant([4, 2]),
        )
        top_singular_vector0 = tf.constant(
            [[0.0, 0.0], [0.7071067, 0.7071067], [0.0, 0.0], [0.0, 0.0]],
            dtype=dtype)
        top_singular_vector1 = tf.constant(
            [
                [-4.2146844e-08, -4.2146844e-08],
                [0.0000000e00, 0.0000000e00],
                [4.9999994e-01, 4.9999994e-01],
                [4.9999994e-01, 4.9999994e-01],
            ],
            dtype=dtype,
        )
        learning_rate = 0.1
        lambda_ = 0.1
        ord = "nuclear"
        cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate,
                                            lambda_=lambda_,
                                            ord=ord)
        _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        # Check we have slots
        assert ["conditional_gradient"] == cg_opt.get_slot_names()
        slot0 = cg_opt.get_slot(var0, "conditional_gradient")
        assert slot0.get_shape() == var0.get_shape()
        slot1 = cg_opt.get_slot(var1, "conditional_gradient")
        assert slot1.get_shape() == var1.get_shape()

        # Check that the parameters have been updated.
        test_utils.assert_allclose_according_to_type(
            np.array([
                0 - (1 - learning_rate) * lambda_ * top_singular_vector0[0][0],
                0 - (1 - learning_rate) * lambda_ * top_singular_vector0[0][1],
            ]),
            var0[0].numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                0 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][0],
                0 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][1],
            ]),
            var0[1].numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                1.0 * learning_rate -
                (1 - learning_rate) * lambda_ * top_singular_vector1[2][0],
                1.0 * learning_rate -
                (1 - learning_rate) * lambda_ * top_singular_vector1[2][1],
            ]),
            var1[2].numpy(),
        )
        # Step 2: the conditional_gradient contain the
        # previous update.
        cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        # Check that the parameters have been updated.
        np.testing.assert_allclose(np.array([0, 0]), var0[0].numpy())
        test_utils.assert_allclose_according_to_type(
            np.array([
                (0 -
                 (1 - learning_rate) * lambda_ * top_singular_vector0[1][0]) *
                learning_rate -
                (1 - learning_rate) * lambda_ * top_singular_vector0[1][0],
                (0 -
                 (1 - learning_rate) * lambda_ * top_singular_vector0[1][1]) *
                learning_rate -
                (1 - learning_rate) * lambda_ * top_singular_vector0[1][1],
            ]),
            var0[1].numpy(),
        )
        test_utils.assert_allclose_according_to_type(
            np.array([
                (1.0 * learning_rate -
                 (1 - learning_rate) * lambda_ * top_singular_vector1[2][0]) *
                learning_rate -
                (1 - learning_rate) * lambda_ * top_singular_vector1[2][0],
                (1.0 * learning_rate -
                 (1 - learning_rate) * lambda_ * top_singular_vector1[2][1]) *
                learning_rate -
                (1 - learning_rate) * lambda_ * top_singular_vector1[2][1],
            ]),
            var1[2].numpy(),
        )