Ejemplo n.º 1
0
 def _create_variables(self, dev_str):
     """
     Create internal variables for the LSTM layer
     """
     # ToDo: support other initialization mechanisms, via class constructor options
     # ToDo: tidy the construction of these variables, with helper functions
     wlim = (6 / (self._output_channels + self._input_channels))**0.5
     input_weights = dict(
         zip(['layer_' + str(i) for i in range(self._num_layers)], [{
             'w':
             ivy.variable(
                 ivy.random_uniform(
                     -wlim,
                     wlim,
                     (self._input_channels if i == 0 else
                      self._output_channels, 4 * self._output_channels),
                     dev_str=dev_str))
         } for i in range(self._num_layers)]))
     wlim = (6 / (self._output_channels + self._output_channels))**0.5
     recurrent_weights = dict(
         zip(['layer_' + str(i) for i in range(self._num_layers)], [{
             'w':
             ivy.variable(
                 ivy.random_uniform(
                     -wlim,
                     wlim,
                     (self._output_channels, 4 * self._output_channels),
                     dev_str=dev_str))
         } for i in range(self._num_layers)]))
     return {'input': input_weights, 'recurrent': recurrent_weights}
Ejemplo n.º 2
0
 def test_gradcheck(self, dev_str, dtype_str, call):
     if call is not helpers.torch_call:
         # ivy gradcheck method not yet implemented
         pytest.skip()
     input_ = ivy.variable(ivy.cast(ivy.random_uniform(shape=(2, 3, 4, 4), dev_str=dev_str), 'float64'))
     kernel = ivy.variable(ivy.cast(ivy.random_uniform(shape=(3, 3), dev_str=dev_str), 'float64'))
     assert gradcheck(top_hat, (input_, kernel), raise_exception=True)
Ejemplo n.º 3
0
def test_linear_layer(bs_ic_oc_target, with_v, dtype_str, tensor_fn, dev_str,
                      call):
    # smoke test
    batch_shape, input_channels, output_channels, target = bs_ic_oc_target
    x = ivy.cast(
        ivy.linspace(ivy.zeros(batch_shape), ivy.ones(batch_shape),
                     input_channels), 'float32')
    if with_v:
        np.random.seed(0)
        wlim = (6 / (output_channels + input_channels))**0.5
        w = ivy.variable(
            ivy.array(
                np.random.uniform(-wlim, wlim,
                                  (output_channels, input_channels)),
                'float32'))
        b = ivy.variable(ivy.zeros([output_channels]))
        v = Container({'w': w, 'b': b})
    else:
        v = None
    linear_layer = ivy.Linear(input_channels, output_channels, v=v)
    ret = linear_layer(x)
    # type test
    assert ivy.is_array(ret)
    # cardinality test
    assert ret.shape == tuple(batch_shape + [output_channels])
    # value test
    if not with_v:
        return
    assert np.allclose(call(linear_layer, x), np.array(target))
    # compilation test
    if call is helpers.torch_call:
        # pytest scripting does not **kwargs
        return
    helpers.assert_compilable(linear_layer)
Ejemplo n.º 4
0
def test_lstm(b_t_ic_hc_otf_sctv, dtype_str, tensor_fn, dev_str, call):
    # smoke test
    b, t, input_channels, hidden_channels, output_true_flat, state_c_true_val = b_t_ic_hc_otf_sctv
    x = ivy.cast(ivy.linspace(ivy.zeros([b, t]), ivy.ones([b, t]), input_channels), 'float32')
    init_h = ivy.ones([b, hidden_channels])
    init_c = ivy.ones([b, hidden_channels])
    kernel = ivy.variable(ivy.ones([input_channels, 4*hidden_channels]))*0.5
    recurrent_kernel = ivy.variable(ivy.ones([hidden_channels, 4*hidden_channels]))*0.5
    output, state_c = ivy.lstm_update(x, init_h, init_c, kernel, recurrent_kernel)
    # type test
    assert ivy.is_array(output)
    assert ivy.is_array(state_c)
    # cardinality test
    assert output.shape == (b, t, hidden_channels)
    assert state_c.shape == (b, hidden_channels)
    # value test
    output_true = np.tile(np.asarray(output_true_flat).reshape((b, t, 1)), (1, 1, hidden_channels))
    state_c_true = np.ones([b, hidden_channels]) * state_c_true_val
    output, state_c = call(ivy.lstm_update, x, init_h, init_c, kernel, recurrent_kernel)
    assert np.allclose(output, output_true, atol=1e-6)
    assert np.allclose(state_c, state_c_true, atol=1e-6)
    # compilation test
    if call in [helpers.torch_call]:
        # this is not a backend implemented function
        pytest.skip()
    helpers.assert_compilable(ivy.lstm_update)
Ejemplo n.º 5
0
def test_adam_update(ws_n_grads_n_lr_n_wsnew, dtype_str, tensor_fn, dev_str,
                     call):
    # smoke test
    ws_raw, dcdws_raw, lr, ws_raw_new = ws_n_grads_n_lr_n_wsnew
    ws = ws_raw.map(lambda x, _: ivy.variable(ivy.array(x)))
    dcdws = dcdws_raw.map(lambda x, _: ivy.array(x))
    ws_true_new = ws_raw_new.map(lambda x, _: ivy.variable(ivy.array(x)))
    mw = dcdws
    vw = dcdws.map(lambda x, _: x**2)
    ws_new, mw_new, vw_new = ivy.adam_update(ws, dcdws, lr, mw, vw,
                                             ivy.array(1))
    # type test
    assert isinstance(ws_new, dict)
    assert isinstance(mw_new, dict)
    assert isinstance(vw_new, dict)
    # cardinality test
    for (w_new, w_true_new) in zip(ws_new.values(), ws_true_new.values()):
        assert w_new.shape == w_true_new.shape
    for (m_new, m_orig) in zip(mw_new.values(), mw.values()):
        assert m_new.shape == m_orig.shape
    for (v_new, v_orig) in zip(vw_new.values(), vw.values()):
        assert v_new.shape == v_orig.shape
    # value test
    for (w_new, w_true_new) in zip(ws_new.values(), ws_true_new.values()):
        assert np.allclose(ivy.to_numpy(w_new), ivy.to_numpy(w_true_new))
    # compilation test
    if call in [helpers.torch_call]:
        # pytorch scripting does not support internal function definitions
        return
    helpers.assert_compilable(ivy.adam_update)
Ejemplo n.º 6
0
def test_lstm_layer(b_t_ic_hc_otf_sctv, with_v, with_initial_state, dtype_str,
                    tensor_fn, dev_str, call):
    # smoke test
    b, t, input_channels, hidden_channels, output_true_flat, state_c_true_val = b_t_ic_hc_otf_sctv
    x = ivy.cast(
        ivy.linspace(ivy.zeros([b, t]), ivy.ones([b, t]), input_channels),
        'float32')
    if with_initial_state:
        init_h = ivy.ones([b, hidden_channels])
        init_c = ivy.ones([b, hidden_channels])
        initial_state = ([init_h], [init_c])
    else:
        initial_state = None
    if with_v:
        kernel = ivy.variable(
            ivy.ones([input_channels, 4 * hidden_channels]) * 0.5)
        recurrent_kernel = ivy.variable(
            ivy.ones([hidden_channels, 4 * hidden_channels]) * 0.5)
        v = Container({
            'input': {
                'layer_0': {
                    'w': kernel
                }
            },
            'recurrent': {
                'layer_0': {
                    'w': recurrent_kernel
                }
            }
        })
    else:
        v = None
    lstm_layer = ivy.LSTM(input_channels, hidden_channels, v=v)
    output, (state_h, state_c) = lstm_layer(x, initial_state=initial_state)
    # type test
    assert ivy.is_array(output)
    assert ivy.is_array(state_h[0])
    assert ivy.is_array(state_c[0])
    # cardinality test
    assert output.shape == (b, t, hidden_channels)
    assert state_h[0].shape == (b, hidden_channels)
    assert state_c[0].shape == (b, hidden_channels)
    # value test
    if not with_v or not with_initial_state:
        return
    output_true = np.tile(
        np.asarray(output_true_flat).reshape((b, t, 1)),
        (1, 1, hidden_channels))
    state_c_true = np.ones([b, hidden_channels]) * state_c_true_val
    output, (state_h, state_c) = call(lstm_layer,
                                      x,
                                      initial_state=initial_state)
    assert np.allclose(output, output_true, atol=1e-6)
    assert np.allclose(state_c, state_c_true, atol=1e-6)
    # compilation test
    if call in [helpers.torch_call]:
        # this is not a backend implemented function
        pytest.skip()
    helpers.assert_compilable(ivy.lstm_update)
Ejemplo n.º 7
0
def test_sgd_optimizer(bs_ic_oc_target, with_v, dtype_str, tensor_fn, dev_str,
                       call):
    # smoke test
    if call is helpers.np_call:
        # NumPy does not support gradients
        pytest.skip()
    batch_shape, input_channels, output_channels, target = bs_ic_oc_target
    x = ivy.cast(
        ivy.linspace(ivy.zeros(batch_shape), ivy.ones(batch_shape),
                     input_channels), 'float32')
    if with_v:
        np.random.seed(0)
        wlim = (6 / (output_channels + input_channels))**0.5
        w = ivy.variable(
            ivy.array(
                np.random.uniform(-wlim, wlim,
                                  (output_channels, input_channels)),
                'float32'))
        b = ivy.variable(ivy.zeros([output_channels]))
        v = Container({'w': w, 'b': b})
    else:
        v = None
    linear_layer = ivy.Linear(input_channels, output_channels, v=v)

    def loss_fn(v_):
        out = linear_layer(x, v=v_)
        return ivy.reduce_mean(out)[0]

    # optimizer
    optimizer = ivy.SGD()

    # train
    loss_tm1 = 1e12
    loss = None
    grads = None
    for i in range(10):
        loss, grads = ivy.execute_with_gradients(loss_fn, linear_layer.v)
        linear_layer.v = optimizer.step(linear_layer.v, grads)
        assert loss < loss_tm1
        loss_tm1 = loss

    # type test
    assert ivy.is_array(loss)
    assert isinstance(grads, ivy.Container)
    # cardinality test
    if call is helpers.mx_call:
        # mxnet slicing cannot reduce dimension to zero
        assert loss.shape == (1, )
    else:
        assert loss.shape == ()
    # value test
    assert ivy.reduce_max(ivy.abs(grads.b)) > 0
    assert ivy.reduce_max(ivy.abs(grads.w)) > 0
    # compilation test
    if call is helpers.torch_call:
        # pytest scripting does not **kwargs
        return
    helpers.assert_compilable(loss_fn)
Ejemplo n.º 8
0
 def _create_variables(self, dev_str):
     """
     Create internal variables for the Linear layer
     """
     # ToDo: support other initialization mechanisms, via class constructor options
     # ToDo: tidy the construction of these variables, with helper functions
     wlim = (6 / (self._output_channels + self._input_channels))**0.5
     w = ivy.variable(
         ivy.random_uniform(-wlim,
                            wlim,
                            (self._output_channels, self._input_channels),
                            dev_str=dev_str))
     b = ivy.variable(ivy.zeros([self._output_channels], dev_str=dev_str))
     return {'w': w, 'b': b}
Ejemplo n.º 9
0
 def _create_variables(self, dev_str):
     vars_dict = dict()
     wlim = (6 / (2 * self._memory_vector_dim)) ** 0.5
     vars_dict['read_weights'] =\
         dict(zip(['w_' + str(i) for i in range(self._read_head_num)],
                  [ivy.variable(ivy.random_uniform(-wlim, wlim, [self._memory_vector_dim, ], dev_str=dev_str))
                   for _ in range(self._read_head_num)]))
     wlim = (6 / (2 * self._memory_size)) ** 0.5
     vars_dict['write_weights'] =\
         dict(zip(['w_' + str(i) for i in range(self._read_head_num + self._write_head_num)],
                  [ivy.variable(ivy.random_uniform(-wlim, wlim, [self._memory_size, ], dev_str=dev_str))
                   for _ in range(self._read_head_num + self._write_head_num)]))
     vars_dict['memory'] = ivy.variable(
         ivy.ones([self._memory_size, self._memory_vector_dim], dev_str=dev_str) * self._init_value)
     return vars_dict
Ejemplo n.º 10
0
def test_is_variable(object_in, dtype_str, dev_str, call):
    if call is helpers.tf_graph_call:
        # cannot create variables as part of compiled tf graph
        pytest.skip()
    if call in [helpers.mx_call] and dtype_str == 'int16':
        # mxnet does not support int16
        pytest.skip()
    if len(object_in) == 0 and call is helpers.mx_call:
        # mxnet does not support 0-dimensional variables
        pytest.skip()
    # smoke test
    non_var = ivy.array(object_in, dtype_str, dev_str)
    var = ivy.variable(ivy.array(object_in, dtype_str, dev_str))
    non_var_res = ivy.is_variable(non_var)
    var_res = ivy.is_variable(var)
    # type test
    assert ivy.is_array(non_var)
    if call is not helpers.np_call:
        assert ivy.is_variable(var)
    if call in [helpers.np_call, helpers.jnp_call]:
        # numpy and jax do not support flagging variables
        pytest.skip()
    # value test
    assert non_var_res is False
    assert var_res is True
    # compilation test
    helpers.assert_compilable(ivy.is_variable)
Ejemplo n.º 11
0
def test_variable(object_in, dtype_str, dev_str, call):
    if call is helpers.tf_graph_call:
        # cannot create variables as part of compiled tf graph
        pytest.skip()
    if call in [helpers.mx_call] and dtype_str == 'int16':
        # mxnet does not support int16
        pytest.skip()
    if len(object_in) == 0 and call is helpers.mx_call:
        # mxnet does not support 0-dimensional variables
        pytest.skip()
    # smoke test
    ret = ivy.variable(ivy.array(object_in, dtype_str, dev_str))
    # type test
    if call is not helpers.np_call:
        assert ivy.is_variable(ret)
    # cardinality test
    assert ret.shape == np.array(object_in).shape
    # value test
    assert np.allclose(
        call(ivy.variable, ivy.array(object_in, dtype_str, dev_str)),
        np.array(object_in).astype(dtype_str))
    # compilation test
    if call in [helpers.torch_call]:
        # pytorch scripting does not support string devices
        return
    helpers.assert_compilable(ivy.variable)
Ejemplo n.º 12
0
def test_gradient_descent_update(ws_n_grads_n_lr_n_wsnew, dtype_str, tensor_fn,
                                 dev_str, call):
    # smoke test
    ws_raw, dcdws_raw, lr, ws_raw_new = ws_n_grads_n_lr_n_wsnew
    ws = ws_raw.map(lambda x, _: ivy.variable(ivy.array(x)))
    dcdws = dcdws_raw.map(lambda x, _: ivy.array(x))
    ws_true_new = ws_raw_new.map(lambda x, _: ivy.variable(ivy.array(x)))
    ws_new = ivy.gradient_descent_update(ws, dcdws, lr)
    # type test
    assert isinstance(ws_new, dict)
    # cardinality test
    for (w_new, w_true_new) in zip(ws_new.values(), ws_true_new.values()):
        assert w_new.shape == w_true_new.shape
    # value test
    for (w_new, w_true_new) in zip(ws_new.values(), ws_true_new.values()):
        assert np.allclose(ivy.to_numpy(w_new), ivy.to_numpy(w_true_new))
    # compilation test
    if call in [helpers.torch_call]:
        # pytorch scripting does not support internal function definitions
        return
    helpers.assert_compilable(ivy.gradient_descent_update)
Ejemplo n.º 13
0
 def restore(self, checkpoint_path):
     checkpoint = ivy.Container.from_disk_as_hdf5(checkpoint_path)
     loaded_v = checkpoint.network.map(
         lambda x, kc: ivy.variable(ivy.to_dev(x, self._net._dev_str)))
     if ivy.exists(self._net.v):
         # if build_mode is 'on_call', the network variables will not have been built yet
         assert (self._net.v.shapes == loaded_v.shapes).all_true(
             assert_is_bool=True)
     self._net.v = loaded_v
     self._optimizer.set_state(
         checkpoint.optimizer.map(
             lambda x, kc: ivy.to_dev(x, self._net.spec.dev_strs[0])))
Ejemplo n.º 14
0
def main(interactive=True, try_use_sim=True, f=None):

    # config
    this_dir = os.path.dirname(os.path.realpath(__file__))
    f = choose_random_framework(excluded=['numpy']) if f is None else f
    set_framework(f)
    sim = Simulator(interactive, try_use_sim)
    lr = 0.5
    num_anchors = 3
    num_sample_points = 100

    # spline start
    anchor_points = ivy.cast(
        ivy.expand_dims(ivy.linspace(0, 1, 2 + num_anchors), -1), 'float32')
    query_points = ivy.cast(
        ivy.expand_dims(ivy.linspace(0, 1, num_sample_points), -1), 'float32')

    # learnable parameters
    robot_start_config = ivy.array(ivy.cast(sim.robot_start_config, 'float32'))
    robot_target_config = ivy.array(
        ivy.cast(sim.robot_target_config, 'float32'))
    learnable_anchor_vals = ivy.variable(
        ivy.cast(
            ivy.transpose(
                ivy.linspace(robot_start_config, robot_target_config,
                             2 + num_anchors)[..., 1:-1], (1, 0)), 'float32'))

    # optimizer
    optimizer = ivy.SGD(lr=lr)

    # optimize
    it = 0
    colliding = True
    clearance = 0
    joint_query_vals = None
    while colliding:
        total_cost, grads, joint_query_vals, link_positions, sdf_vals = ivy.execute_with_gradients(
            lambda xs: compute_cost_and_sdfs(xs[
                'w'], anchor_points, robot_start_config, robot_target_config,
                                             query_points, sim),
            Container({'w': learnable_anchor_vals}))
        colliding = ivy.reduce_min(sdf_vals[2:]) < clearance
        sim.update_path_visualization(
            link_positions, sdf_vals,
            os.path.join(this_dir, 'msp_no_sim', 'path_{}.png'.format(it)))
        learnable_anchor_vals = optimizer.step(
            Container({'w': learnable_anchor_vals}), grads)['w']
        it += 1
    sim.execute_motion(joint_query_vals)
    sim.close()
    unset_framework()
Ejemplo n.º 15
0
def test_execute_with_gradients(func_n_xs_n_ty_n_te_n_tg, dtype_str, tensor_fn,
                                dev_str, call):
    # smoke test
    func, xs_raw, true_y, true_extra, true_dydxs = func_n_xs_n_ty_n_te_n_tg
    xs = xs_raw.map(lambda x, _: ivy.variable(ivy.array(x)))
    if true_extra is None:
        y, dydxs = ivy.execute_with_gradients(func, xs)
        extra_out = None
    else:
        y, dydxs, extra_out = ivy.execute_with_gradients(func, xs)
    # type test
    assert ivy.is_array(y) or isinstance(y, Number)
    if call is not helpers.np_call:
        assert isinstance(dydxs, dict)
    # cardinality test
    if call is not helpers.mx_call:
        # mxnet cannot slice array down to shape (), it remains fixed at size (1,)
        assert y.shape == true_y.shape
    if call is not helpers.np_call:
        for (g, g_true) in zip(dydxs.values(), true_dydxs.values()):
            assert g.shape == g_true.shape
    # value test
    xs = xs_raw.map(lambda x, _: ivy.variable(ivy.array(x)))
    if true_extra is None:
        y, dydxs = call(ivy.execute_with_gradients, func, xs)
    else:
        y, dydxs, extra_out = call(ivy.execute_with_gradients, func, xs)
    assert np.allclose(y, true_y)
    if true_extra:
        assert np.allclose(extra_out, true_extra)
    if call is helpers.np_call:
        # numpy doesn't support autodiff
        assert dydxs is None
    else:
        for (g, g_true) in zip(dydxs.values(), true_dydxs.values()):
            assert np.allclose(ivy.to_numpy(g), g_true)
Ejemplo n.º 16
0
def var_fn(a, b=None, c=None):
    return ivy.variable(ivy.array(a, b, c))
Ejemplo n.º 17
0
def test_ntm(addressing_mode, batch_shape, dev_str, call):

    # ntm config
    input_dim = 256
    output_dim = 8
    ctrl_output_size = 256
    ctrl_layers = 2
    memory_size = 5
    timesteps = 5
    memory_vector_dim = 2
    read_head_num = 3
    write_head_num = 1
    shift_range = 0
    clip_value = 20
    init_value = 1e-6
    ctrl_input_size = read_head_num * memory_vector_dim + input_dim
    num_heads = read_head_num + write_head_num
    num_parameters_per_head = memory_vector_dim + 1 + 1 + (shift_range * 2 +
                                                           1) + 1
    total_parameter_num = num_parameters_per_head * num_heads + memory_vector_dim * 2 * write_head_num
    usage = ivy.zeros([
        memory_size,
    ])

    # memory object wo vars
    ntm = ivy_mem.NTM(input_dim,
                      output_dim,
                      ctrl_output_size,
                      ctrl_layers,
                      memory_size,
                      memory_vector_dim,
                      read_head_num,
                      write_head_num,
                      addressing_mode=addressing_mode,
                      shift_range=shift_range,
                      clip_value=clip_value,
                      sequential_writing=True,
                      retroactive_updates=False,
                      with_erase=False)

    # test
    x = ivy.ones(batch_shape + [timesteps, input_dim])
    assert call(ntm, x).shape == tuple(batch_shape + [timesteps, output_dim])

    # variables
    variables = dict()
    variables['ntm_cell'] = dict()
    np.random.seed(0)

    # lstm
    in_wlim = (6 / (ctrl_input_size + 4 * ctrl_output_size))**0.5
    rec_wlim = (6 / (ctrl_output_size + 4 * ctrl_output_size))**0.5
    variables['ntm_cell']['controller'] = \
        {'input': {'layer1': {'w': ivy.array(np.random.uniform(
            -in_wlim, in_wlim, size=[ctrl_input_size, 4 * ctrl_output_size]).astype(np.float32))},
                   'layer2': {'w': ivy.array(np.random.uniform(
                       -in_wlim, in_wlim, size=[ctrl_output_size, 4 * ctrl_output_size]).astype(np.float32))}},
         'recurrent': {'layer1': {'w': ivy.array(np.random.uniform(
             -rec_wlim, rec_wlim, size=[ctrl_output_size, 4 * ctrl_output_size]).astype(np.float32))},
                       'layer2': {'w': ivy.array(np.random.uniform(
                           -rec_wlim, rec_wlim, size=[ctrl_output_size, 4 * ctrl_output_size]).astype(
                           np.float32))}}}

    # fully connected
    proj_wlim = (6 / (total_parameter_num + ctrl_output_size))**0.5
    variables['ntm_cell']['controller_proj'] = {
        'w':
        ivy.array(
            np.random.uniform(-proj_wlim,
                              proj_wlim,
                              size=[total_parameter_num,
                                    ctrl_output_size]).astype(np.float32)),
        'b':
        ivy.zeros([total_parameter_num])
    }

    out_wlim = (6 / (total_parameter_num + ctrl_input_size))**0.5
    variables['ntm_cell']['output_proj'] = {
        'w':
        ivy.array(
            np.random.uniform(-out_wlim,
                              out_wlim,
                              size=[
                                  output_dim, ctrl_output_size +
                                  read_head_num * memory_vector_dim
                              ]).astype(np.float32)),
        'b':
        ivy.zeros([output_dim])
    }

    # memory
    wlim = (6 / (2 * memory_vector_dim))**0.5
    variables['ntm_cell']['read_weights'] = dict(
        zip(['w_' + str(i) for i in range(read_head_num)], [
            ivy.variable(
                ivy.array(
                    np.random.uniform(-wlim, wlim, [
                        memory_vector_dim,
                    ]), 'float32')) for _ in range(read_head_num)
        ]))

    wlim = (6 / (2 * memory_size))**0.5
    variables['ntm_cell']['write_weights'] = dict(
        zip(['w_' + str(i) for i in range(read_head_num + write_head_num)], [
            ivy.variable(
                ivy.array(np.random.uniform(-wlim, wlim, [
                    memory_size,
                ]), 'float32')) for _ in range(read_head_num + write_head_num)
        ]))

    variables['ntm_cell']['memory'] = ivy.variable(
        ivy.ones([memory_size, memory_vector_dim]) * init_value)

    # memory object w vars
    ntm = ivy_mem.NTM(input_dim,
                      output_dim,
                      ctrl_output_size,
                      ctrl_layers,
                      memory_size,
                      memory_vector_dim,
                      read_head_num,
                      write_head_num,
                      Container(variables),
                      usage,
                      addressing_mode=addressing_mode,
                      shift_range=shift_range,
                      clip_value=clip_value,
                      init_value=init_value,
                      sequential_writing=True,
                      retroactive_updates=False,
                      with_erase=False)

    # test
    assert np.allclose(call(ntm, x), td.ntm_return[addressing_mode], atol=1e-6)

    # compilation test
    if call is helpers.torch_call:
        # pytest scripting does not support try-catch statements
        return
    helpers.assert_compilable(ntm)
Ejemplo n.º 18
0
def test_lstm_layer_training(b_t_ic_hc_otf_sctv, with_v, dtype_str, tensor_fn,
                             dev_str, call):
    # smoke test
    if call is helpers.np_call:
        # NumPy does not support gradients
        pytest.skip()
    # smoke test
    b, t, input_channels, hidden_channels, output_true_flat, state_c_true_val = b_t_ic_hc_otf_sctv
    x = ivy.cast(
        ivy.linspace(ivy.zeros([b, t]), ivy.ones([b, t]), input_channels),
        'float32')
    if with_v:
        kernel = ivy.variable(
            ivy.ones([input_channels, 4 * hidden_channels]) * 0.5)
        recurrent_kernel = ivy.variable(
            ivy.ones([hidden_channels, 4 * hidden_channels]) * 0.5)
        v = Container({
            'input': {
                'layer_0': {
                    'w': kernel
                }
            },
            'recurrent': {
                'layer_0': {
                    'w': recurrent_kernel
                }
            }
        })
    else:
        v = None
    lstm_layer = ivy.LSTM(input_channels, hidden_channels, v=v)

    def loss_fn(v_):
        out, (state_h, state_c) = lstm_layer(x, v=v_)
        return ivy.reduce_mean(out)[0]

    # train
    loss_tm1 = 1e12
    loss = None
    grads = None
    for i in range(10):
        loss, grads = ivy.execute_with_gradients(loss_fn, lstm_layer.v)
        lstm_layer.v = ivy.gradient_descent_update(lstm_layer.v, grads, 1e-3)
        assert loss < loss_tm1
        loss_tm1 = loss

    # type test
    assert ivy.is_array(loss)
    assert isinstance(grads, ivy.Container)
    # cardinality test
    if call is helpers.mx_call:
        # mxnet slicing cannot reduce dimension to zero
        assert loss.shape == (1, )
    else:
        assert loss.shape == ()
    # value test
    for key, val in grads.to_iterator():
        assert ivy.reduce_max(ivy.abs(val)) > 0
    # compilation test
    if call is helpers.torch_call:
        # pytest scripting does not **kwargs
        return
    helpers.assert_compilable(loss_fn)