Beispiel #1
0
        def f(g):
            a = T.variable([], initializer=123.)
            b = T.variable([], initializer=456.)
            c = T.variable([], initializer=789.)
            T.random.seed(1234)
            optimizer = optimizer_factory([a, b, c], lr)

            with optimizer.capture_grad():
                optimizer.add_loss((a + b)**2)
            g(optimizer)
            optimizer.step()

            return [T.to_numpy(t) for t in (a, b, c, (a + b)**2)]
Beispiel #2
0
    def test_xavier_initializer(self):
        for dtype, initializer, mode in product(
                float_dtypes,
            (tk.init.xavier_normal, tk.init.xavier_uniform),
            (None, 'fan_in', 'fan_out'),
        ):
            weight = T.variable([n_samples // 50, 50],
                                dtype=dtype,
                                initializer=0.)
            assert_equal(weight, T.full_like(weight, 0.))
            mode_arg = {'mode': mode} if mode is not None else {}

            # xavier
            fan_in, fan_out = tk.init.calculate_fan_in_and_fan_out(weight)
            xavier_std = np.sqrt(2.0 / float(fan_in + fan_out))
            tk.init.apply_initializer(weight, initializer, **mode_arg)
            self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))),
                                 5.0 / xavier_std / np.sqrt(n_samples))

            # xavier with custom gain and fan_in/fan_out
            fan_in, fan_out = 23, 17
            init_gain = 1.5
            xavier_std = init_gain * np.sqrt(2.0 / float(fan_in + fan_out))
            tk.init.apply_initializer(weight,
                                      initializer,
                                      fan_in_and_fan_out=(fan_in, fan_out),
                                      gain=init_gain,
                                      **mode_arg)
            self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))),
                                 5.0 / xavier_std / np.sqrt(n_samples))
Beispiel #3
0
 def test_fill(self):
     for dtype in float_dtypes:
         weight = T.variable([2, 3, 4], dtype=dtype, initializer=0.)
         assert_equal(weight, T.full_like(weight, 0.))
         tk.init.apply_initializer(weight,
                                   partial(tk.init.fill, fill_value=123.))
         assert_equal(weight, T.full_like(weight, 123.))
Beispiel #4
0
    def test_kaming_initializer(self):
        for dtype, initializer, mode in product(
                float_dtypes,
            (tk.init.kaming_normal, tk.init.kaming_uniform),
            (None, 'fan_in', 'fan_out'),
        ):
            weight = T.variable([n_samples // 50, 50],
                                dtype=dtype,
                                initializer=0.)
            assert_equal(weight, T.full_like(weight, 0.))
            mode_arg = {'mode': mode} if mode is not None else {}

            # kaming
            fan_in, fan_out = tk.init.calculate_fan_in_and_fan_out(weight)
            if mode == 'fan_out':
                kaming_std = np.sqrt(1.0 / np.sqrt(fan_out))
            else:
                kaming_std = np.sqrt(1.0 / np.sqrt(fan_in))
            tk.init.apply_initializer(weight, initializer, **mode_arg)
            self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))),
                                 5.0 / kaming_std / np.sqrt(n_samples))

            # kaming with custom gain and fan_in/fan_out
            fan_in, fan_out = 23, 17
            init_gain = 1.5
            if mode == 'fan_out':
                kaming_std = init_gain * np.sqrt(1.0 / np.sqrt(fan_out))
            else:
                kaming_std = init_gain * np.sqrt(1.0 / np.sqrt(fan_in))
            tk.init.apply_initializer(weight,
                                      initializer,
                                      fan_in_and_fan_out=(fan_in, fan_out),
                                      gain=init_gain,
                                      **mode_arg)
            self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))),
                                 5.0 / kaming_std / np.sqrt(n_samples))

            # test error
            with pytest.raises(
                    ValueError,
                    match='`mode` must be either "fan_in" or "fan_out"'):
                weight = T.variable([n_samples // 50, 50],
                                    dtype=dtype,
                                    initializer=0.)
                tk.init.apply_initializer(weight, initializer, mode='invalid')
Beispiel #5
0
def full_scan_average_check(ctx, factory, input_x, expected):
    weight = T.variable(T.shape(input_x)[1:], initializer=tk.init.zeros,
                        requires_grad=False)
    avg = factory([weight])
    for x in input_x:
        T.assign(weight, x)
        avg.update()
    avg.commit()
    assert_allclose(weight, expected, atol=1e-4, rtol=1e-6)
Beispiel #6
0
 def test_random_init(self):
     for dtype in float_dtypes:
         t = T.variable([n_samples, 2, 3], dtype=dtype)
         for fn, mean, std in [
             (partial(T.random.normal_init, mean=1., std=2.), 1., 2.),
             (partial(T.random.uniform_init, low=0.,
                      high=1.), 0.5, 1. / math.sqrt(12)),
         ]:
             fn(t)
             t_mean = np.mean(T.to_numpy(t))
             self.assertLess(abs(t_mean - mean),
                             3. * std / math.sqrt(n_samples * 2 * 3))
Beispiel #7
0
    def test_normal(self):
        for dtype in float_dtypes:
            weight = T.variable([n_samples // 50, 50],
                                dtype=dtype,
                                initializer=0.)
            assert_equal(weight, T.full_like(weight, 0.))

            # uniform with default args
            tk.init.apply_initializer(weight, tk.init.normal)
            self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))),
                                 5.0 / np.sqrt(n_samples))

            # uniform with customized args
            tk.init.apply_initializer(weight,
                                      partial(tk.init.normal, mean=1., std=3.))
            self.assertLessEqual(
                np.abs(T.to_numpy(T.reduce_mean(weight)) - 1.),
                5.0 * 3. / np.sqrt(n_samples))
Beispiel #8
0
    def test_uniform(self):
        for dtype in float_dtypes:
            weight = T.variable([n_samples // 50, 50],
                                dtype=dtype,
                                initializer=0.)
            assert_equal(weight, T.full_like(weight, 0.))

            # uniform with default args
            tk.init.apply_initializer(weight, tk.init.uniform)
            self.assertLessEqual(
                np.abs(T.to_numpy(T.reduce_mean(weight)) - 0.5),
                5.0 / np.sqrt(12.) / np.sqrt(n_samples))

            # uniform with customized args
            tk.init.apply_initializer(
                weight, partial(tk.init.uniform, low=-4., high=3.))
            self.assertLessEqual(
                np.abs(T.to_numpy(T.reduce_mean(weight)) - (-0.5)),
                5.0 * 7.0 / np.sqrt(12.) / np.sqrt(n_samples))
Beispiel #9
0
    def test_apply_initializer(self):
        for dtype in float_dtypes:
            weight = T.variable([5, 3], dtype=dtype)
            fan_in_and_fan_out = tk.init.calculate_fan_in_and_fan_out(weight)
            initializer = Mock()

            # test by value
            tk.init.apply_initializer(weight, 123)
            assert_equal(weight, T.full_like(weight, 123))
            tk.init.apply_initializer(weight, 124.)
            assert_equal(weight, T.full_like(weight, 124.))
            tk.init.apply_initializer(weight, np.array(125.))
            assert_equal(weight, T.full_like(weight, 125.))

            value = np.random.randn(*T.shape(weight)).astype(dtype)
            tk.init.apply_initializer(weight, value)
            assert_equal(weight, value)

            # test by initializer
            initializer.reset_mock()
            tk.init.apply_initializer(weight, initializer)
            self.assertEqual(initializer.call_args,
                             ((weight, ), {
                                 'gain': 1.0,
                                 'mode': 'fan_in',
                                 'fan_in_and_fan_out': fan_in_and_fan_out,
                             }))

            # test fan_in_and_fan_out
            initializer.reset_mock()
            tk.init.apply_initializer(weight,
                                      initializer,
                                      fan_in_and_fan_out=(2, 3))
            self.assertEqual(initializer.call_args,
                             ((weight, ), {
                                 'gain': 1.0,
                                 'mode': 'fan_in',
                                 'fan_in_and_fan_out': (2, 3),
                             }))

            initializer.reset_mock()
            tk.init.apply_initializer(weight, initializer, mode='fan_out')
            self.assertEqual(initializer.call_args,
                             ((weight, ), {
                                 'gain': 1.0,
                                 'mode': 'fan_out',
                                 'fan_in_and_fan_out': fan_in_and_fan_out,
                             }))

            # test gain
            initializer.reset_mock()
            tk.init.apply_initializer(weight, initializer, gain=1.5)
            self.assertEqual(initializer.call_args,
                             ((weight, ), {
                                 'gain': 1.5,
                                 'mode': 'fan_in',
                                 'fan_in_and_fan_out': fan_in_and_fan_out,
                             }))

            for activation in ['LeakyReLU', tk.layers.ReLU, tk.layers.Tanh()]:
                initializer.reset_mock()
                init_gain = tk.init.get_activation_gain(activation)
                tk.init.apply_initializer(weight,
                                          initializer,
                                          activation=activation)
                self.assertEqual(initializer.call_args,
                                 ((weight, ), {
                                     'gain': init_gain,
                                     'mode': 'fan_in',
                                     'fan_in_and_fan_out': fan_in_and_fan_out,
                                 }))

            # unsupported initializer
            with pytest.raises(TypeError, match='Unsupported initializer'):
                tk.init.apply_initializer(weight, object())
Beispiel #10
0
 def test_ones(self):
     for dtype in float_dtypes:
         weight = T.variable([2, 3, 4], dtype=dtype, initializer=0.)
         assert_equal(weight, T.full_like(weight, 0.))
         tk.init.apply_initializer(weight, tk.init.ones)
         assert_equal(weight, T.full_like(weight, 1.))
Beispiel #11
0
def optimizer_standard_check(ctx, optimizer_factory, lr):
    a = T.variable([], initializer=123.)
    b = T.variable([], initializer=456.)

    def calculate_loss(a, b):
        return (a + b)**2

    optimizer = optimizer_factory(iter([a]), lr)
    ctx.assertEqual(optimizer.lr, lr)
    ctx.assertEqual(list(optimizer.iter_params()), [a])

    with pytest.raises(ValueError, match='Duplicated parameter'):
        optimizer.add_params([a])

    with pytest.raises(ValueError, match='Duplicated parameter'):
        _ = optimizer_factory([a, a], lr)

    # test optimize a
    optimizer.clear_grad()
    with optimizer.capture_grad():
        loss = calculate_loss(a, b)
        optimizer.add_loss(loss)
    optimizer.step()
    ctx.assertLessEqual(calculate_loss(a, b), loss)
    assert_not_equal(a, 123.)
    assert_equal(b, 456.)

    # test optimize a and b,
    # and also using 'set_param_grad' to optimize a0 and b0
    T.random.seed(1234)
    optimizer = optimizer_factory(iter([a]), lr)
    optimizer.add_params(iter([b]))
    ctx.assertEqual(list(optimizer.iter_params()), [a, b])

    T.random.seed(1234)
    a0 = T.variable([], initializer=a)
    b0 = T.variable([], initializer=b)
    optimizer0 = optimizer_factory([a0], lr)
    optimizer0.add_params([b0])

    with optimizer.capture_grad():
        loss = calculate_loss(a, b)
        optimizer.add_loss(loss)

    # copy grads to optimizer0
    params_and_grads = list(optimizer.iter_params_and_grads())
    ctx.assertEqual(len(params_and_grads), 2)
    ctx.assertIs(params_and_grads[0][0], a)
    ctx.assertIs(params_and_grads[1][0], b)
    optimizer0.set_param_grad(
        a0, T.as_tensor(params_and_grads[0][1], force_copy=True))
    optimizer0.set_param_grad(
        b0, T.as_tensor(params_and_grads[1][1], force_copy=True))

    optimizer.step()
    ctx.assertLessEqual(calculate_loss(a, b), loss)
    assert_not_equal(a, 123.)
    assert_not_equal(b, 456.)

    optimizer0.step()
    assert_allclose(calculate_loss(a0, b0), calculate_loss(a, b))
    assert_allclose(a0, a)
    assert_allclose(b0, b)

    # save checkpoint
    with TemporaryDirectory() as temp_dir:
        ckpt_path = os.path.join(temp_dir, 'ckpt')
        checkpoint = tk.train.Checkpoint(optimizer=optimizer)
        checkpoint.save(ckpt_path)

        # test backup and restore the status
        a2 = T.variable([], initializer=a)
        b2 = T.variable([], initializer=b)
        optimizer2 = optimizer_factory([a2], lr)
        optimizer2.add_params([b2])
        checkpoint2 = tk.train.Checkpoint(optimizer=optimizer2)
        checkpoint2.restore(ckpt_path)

        with optimizer2.capture_grad():
            loss = calculate_loss(a2, b2)
            optimizer2.add_loss(loss)
        optimizer2.step()
        ctx.assertLessEqual(calculate_loss(a2, b2), loss)
        assert_not_equal(a2, a)
        assert_not_equal(b2, b)

        # test backup and restore the status, and use maximize instead of minimize
        a3 = T.variable([], initializer=a)
        b3 = T.variable([], initializer=b)
        optimizer3 = optimizer_factory([a3], lr)
        optimizer3.add_params([b3])
        checkpoint3 = tk.train.Checkpoint(optimizer=optimizer3)
        checkpoint3.restore(ckpt_path)

        with optimizer3.capture_grad():
            loss = calculate_loss(a3, b3)
            optimizer3.add_loss(-loss, maximize=True)
        optimizer3.step()
        ctx.assertLessEqual(calculate_loss(a3, b3), loss)
        assert_allclose(a3, a2)
        assert_allclose(b3, b2)
        assert_allclose(calculate_loss(a3, b3), calculate_loss(a2, b2))

        # backup and restore the status, change the learning rate and get
        # the third output, and compare to the result with optimizer2
        a4 = T.variable([], initializer=a)
        b4 = T.variable([], initializer=b)
        optimizer4 = optimizer_factory([a4], lr)
        optimizer4.add_params([b4])
        checkpoint4 = tk.train.Checkpoint(optimizer=optimizer4)
        checkpoint4.restore(ckpt_path)

        optimizer4.set_lr(lr * 0.5)
        ctx.assertEqual(optimizer4.lr, lr * 0.5)
        with optimizer4.capture_grad():
            loss = calculate_loss(a4, b4)
            optimizer4.add_loss(loss)
        optimizer4.step()
        assert_not_allclose(a4, a2)
        assert_not_allclose(b4, b2)
        assert_not_allclose(calculate_loss(a4, b4), calculate_loss(a2, b2))

    # now proceed the optimization from the first optimizer, and compare
    # the result with optimizer2
    optimizer.clear_grad()
    with optimizer.capture_grad():
        loss = calculate_loss(a, b)
        optimizer.add_loss(loss)
    optimizer.step()
    ctx.assertLessEqual(calculate_loss(a, b), loss)
    assert_allclose(a, a2)
    assert_allclose(b, b2)
    assert_allclose(calculate_loss(a, b), calculate_loss(a2, b2))

    # test context
    optimizer.clear_grad()
    with pytest.raises(RuntimeError,
                       match=r'`add_loss\(\)` must be called inside the '
                       r'`capture_grad\(\)` context'):
        optimizer.add_loss(calculate_loss(a, b))

    optimizer.clear_grad()
    with optimizer.capture_grad():
        optimizer.add_loss(calculate_loss(a, b))
        with pytest.raises(RuntimeError,
                           match=r'`step\(\)` must be called outside the '
                           r'`capture_grad\(\)` context'):
            optimizer.step()

    # test clip grads
    def check_clip_grad(optimizer_fn, naive_fn):
        def f(g):
            a = T.variable([], initializer=123.)
            b = T.variable([], initializer=456.)
            c = T.variable([], initializer=789.)
            T.random.seed(1234)
            optimizer = optimizer_factory([a, b, c], lr)

            with optimizer.capture_grad():
                optimizer.add_loss((a + b)**2)
            g(optimizer)
            optimizer.step()

            return [T.to_numpy(t) for t in (a, b, c, (a + b)**2)]

        def h(optimizer):
            params = []
            grads = []
            for param, grad in optimizer.iter_params_and_grads():
                if grad is not None:
                    params.append(param)
                    grads.append(grad)
            grads = naive_fn(grads)
            for param, grad in zip(params, grads):
                optimizer.set_param_grad(param, grad)

        a, b, c, loss = f(lambda optimizer: optimizer_fn(optimizer))
        a0, b0, c0, loss0 = f(h)

        for t, t0 in zip((a, b, c, loss), (a0, b0, c0, loss0)):
            assert_allclose(t, t0, rtol=1e-4, atol=1e-6)

    def naive_clip_by_value(grads, clip_min, clip_max):
        return [T.clip(g, clip_min, clip_max) for g in grads]

    def naive_clip_by_norm(grads, clip_norm):
        return [T.clip_by_norm(g, clip_norm) for g in grads]

    def naive_clip_by_global_norm(grads, clip_norm):
        return T.clip_by_global_norm(grads, clip_norm)

    for v in [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 100.0]:
        check_clip_grad(
            lambda optimizer: optimizer.clip_grad_by_value(-v, v),
            lambda grads: naive_clip_by_value(grads, -v, v),
        )
        check_clip_grad(
            lambda optimizer: optimizer.clip_grad_by_norm(v),
            lambda grads: naive_clip_by_norm(grads, v),
        )
        check_clip_grad(
            lambda optimizer: optimizer.clip_grad_by_global_norm(v),
            lambda grads: naive_clip_by_global_norm(grads, v),
        )
Beispiel #12
0
def stepwise_average_check(ctx, factory, update_fn, get_fn):
    def clone_state(val):
        if isinstance(val, dict):
            return {k: clone_state(v) for k, v in val.items()}
        elif isinstance(val, list):
            return [clone_state(v) for v in val]
        elif isinstance(val, (T.Tensor, T.Variable)):
            return T.copy(val)
        elif isinstance(val, np.ndarray):
            return np.copy(val)
        else:
            return copy.copy(val)

    T.random.seed(1234)
    weights = [
        T.variable(shape=[4], initializer=tk.init.zeros, requires_grad=False),
        T.variable(shape=[3], initializer=tk.init.zeros, requires_grad=False),
    ]
    answers = [clone_state(w) for w in weights]
    inputs_1 = T.random.randn([7, 4])
    inputs_2 = T.random.randn([7, 3])

    # do a scan
    avg = factory(weights)
    the_states = []
    the_outputs = []
    num_updates = 0

    for batch_vals in zip(inputs_1, inputs_2):
        for weight, val in zip(weights, batch_vals):
            T.assign(weight, val)

        the_states.append(clone_state(avg.get_state_dict()))
        avg.update()

        with avg.temporarily_commit():
            the_outputs.extend(clone_state(w) for w in weights)
            for i, val in enumerate(batch_vals):
                answers[i] = update_fn(answers[i], val, num_updates)
            num_updates += 1
            for weight, ans in zip(weights, answers):
                assert_allclose(weight, get_fn(ans, num_updates), rtol=1e-4, atol=1e-6)

        for weight, val in zip(weights, batch_vals):
            assert_allclose(weight, val, rtol=1e-4, atol=1e-6)

    # test enabled = False
    avg = factory(weights, enabled=False)
    for x1, x2, state, output in zip(inputs_1, inputs_2, the_states, the_outputs):
        batch_vals = [x1, x2]
        for weight, val in zip(weights, batch_vals):
            T.assign(weight, val)
        avg.update()

    avg.commit()  # should still affect weights even if enabled is False
    for avg_val in avg.get_state_dict()['averages']:
        assert_allclose(avg_val, T.zeros_like(avg_val), rtol=1e-4, atol=1e-6)
    for weight in weights:
        assert_allclose(weight, T.zeros_like(weight), rtol=1e-4, atol=1e-6)

    # do another scan using backup states
    avg = factory(weights, enabled=False)
    avg.set_enabled(True)
    for x1, x2, state, output in zip(inputs_1, inputs_2, the_states, the_outputs):
        batch_vals = [x1, x2]
        for weight, val in zip(weights, batch_vals):
            T.assign(weight, val)

        avg.set_state_dict(state)
        avg.update()

        with avg.temporarily_commit():
            the_outputs.extend(clone_state(w) for w in weights)
        for weight, val in zip(weights, batch_vals):
            assert_allclose(weight, val, rtol=1e-4, atol=1e-6)

    # try set bad state
    avg = factory(weights)
    state = dict(avg.get_state_dict())
    state['averages'] = []
    with pytest.raises(ValueError, match='Bad state'):
        avg.set_state_dict(state)