def _create_variables(self, dev_str): """ Create internal variables for the LSTM layer """ # ToDo: support other initialization mechanisms, via class constructor options # ToDo: tidy the construction of these variables, with helper functions wlim = (6 / (self._output_channels + self._input_channels))**0.5 input_weights = dict( zip(['layer_' + str(i) for i in range(self._num_layers)], [{ 'w': ivy.variable( ivy.random_uniform( -wlim, wlim, (self._input_channels if i == 0 else self._output_channels, 4 * self._output_channels), dev_str=dev_str)) } for i in range(self._num_layers)])) wlim = (6 / (self._output_channels + self._output_channels))**0.5 recurrent_weights = dict( zip(['layer_' + str(i) for i in range(self._num_layers)], [{ 'w': ivy.variable( ivy.random_uniform( -wlim, wlim, (self._output_channels, 4 * self._output_channels), dev_str=dev_str)) } for i in range(self._num_layers)])) return {'input': input_weights, 'recurrent': recurrent_weights}
def test_gradcheck(self, dev_str, dtype_str, call): if call is not helpers.torch_call: # ivy gradcheck method not yet implemented pytest.skip() input_ = ivy.variable(ivy.cast(ivy.random_uniform(shape=(2, 3, 4, 4), dev_str=dev_str), 'float64')) kernel = ivy.variable(ivy.cast(ivy.random_uniform(shape=(3, 3), dev_str=dev_str), 'float64')) assert gradcheck(top_hat, (input_, kernel), raise_exception=True)
def test_linear_layer(bs_ic_oc_target, with_v, dtype_str, tensor_fn, dev_str, call): # smoke test batch_shape, input_channels, output_channels, target = bs_ic_oc_target x = ivy.cast( ivy.linspace(ivy.zeros(batch_shape), ivy.ones(batch_shape), input_channels), 'float32') if with_v: np.random.seed(0) wlim = (6 / (output_channels + input_channels))**0.5 w = ivy.variable( ivy.array( np.random.uniform(-wlim, wlim, (output_channels, input_channels)), 'float32')) b = ivy.variable(ivy.zeros([output_channels])) v = Container({'w': w, 'b': b}) else: v = None linear_layer = ivy.Linear(input_channels, output_channels, v=v) ret = linear_layer(x) # type test assert ivy.is_array(ret) # cardinality test assert ret.shape == tuple(batch_shape + [output_channels]) # value test if not with_v: return assert np.allclose(call(linear_layer, x), np.array(target)) # compilation test if call is helpers.torch_call: # pytest scripting does not **kwargs return helpers.assert_compilable(linear_layer)
def test_lstm(b_t_ic_hc_otf_sctv, dtype_str, tensor_fn, dev_str, call): # smoke test b, t, input_channels, hidden_channels, output_true_flat, state_c_true_val = b_t_ic_hc_otf_sctv x = ivy.cast(ivy.linspace(ivy.zeros([b, t]), ivy.ones([b, t]), input_channels), 'float32') init_h = ivy.ones([b, hidden_channels]) init_c = ivy.ones([b, hidden_channels]) kernel = ivy.variable(ivy.ones([input_channels, 4*hidden_channels]))*0.5 recurrent_kernel = ivy.variable(ivy.ones([hidden_channels, 4*hidden_channels]))*0.5 output, state_c = ivy.lstm_update(x, init_h, init_c, kernel, recurrent_kernel) # type test assert ivy.is_array(output) assert ivy.is_array(state_c) # cardinality test assert output.shape == (b, t, hidden_channels) assert state_c.shape == (b, hidden_channels) # value test output_true = np.tile(np.asarray(output_true_flat).reshape((b, t, 1)), (1, 1, hidden_channels)) state_c_true = np.ones([b, hidden_channels]) * state_c_true_val output, state_c = call(ivy.lstm_update, x, init_h, init_c, kernel, recurrent_kernel) assert np.allclose(output, output_true, atol=1e-6) assert np.allclose(state_c, state_c_true, atol=1e-6) # compilation test if call in [helpers.torch_call]: # this is not a backend implemented function pytest.skip() helpers.assert_compilable(ivy.lstm_update)
def test_adam_update(ws_n_grads_n_lr_n_wsnew, dtype_str, tensor_fn, dev_str, call): # smoke test ws_raw, dcdws_raw, lr, ws_raw_new = ws_n_grads_n_lr_n_wsnew ws = ws_raw.map(lambda x, _: ivy.variable(ivy.array(x))) dcdws = dcdws_raw.map(lambda x, _: ivy.array(x)) ws_true_new = ws_raw_new.map(lambda x, _: ivy.variable(ivy.array(x))) mw = dcdws vw = dcdws.map(lambda x, _: x**2) ws_new, mw_new, vw_new = ivy.adam_update(ws, dcdws, lr, mw, vw, ivy.array(1)) # type test assert isinstance(ws_new, dict) assert isinstance(mw_new, dict) assert isinstance(vw_new, dict) # cardinality test for (w_new, w_true_new) in zip(ws_new.values(), ws_true_new.values()): assert w_new.shape == w_true_new.shape for (m_new, m_orig) in zip(mw_new.values(), mw.values()): assert m_new.shape == m_orig.shape for (v_new, v_orig) in zip(vw_new.values(), vw.values()): assert v_new.shape == v_orig.shape # value test for (w_new, w_true_new) in zip(ws_new.values(), ws_true_new.values()): assert np.allclose(ivy.to_numpy(w_new), ivy.to_numpy(w_true_new)) # compilation test if call in [helpers.torch_call]: # pytorch scripting does not support internal function definitions return helpers.assert_compilable(ivy.adam_update)
def test_lstm_layer(b_t_ic_hc_otf_sctv, with_v, with_initial_state, dtype_str, tensor_fn, dev_str, call): # smoke test b, t, input_channels, hidden_channels, output_true_flat, state_c_true_val = b_t_ic_hc_otf_sctv x = ivy.cast( ivy.linspace(ivy.zeros([b, t]), ivy.ones([b, t]), input_channels), 'float32') if with_initial_state: init_h = ivy.ones([b, hidden_channels]) init_c = ivy.ones([b, hidden_channels]) initial_state = ([init_h], [init_c]) else: initial_state = None if with_v: kernel = ivy.variable( ivy.ones([input_channels, 4 * hidden_channels]) * 0.5) recurrent_kernel = ivy.variable( ivy.ones([hidden_channels, 4 * hidden_channels]) * 0.5) v = Container({ 'input': { 'layer_0': { 'w': kernel } }, 'recurrent': { 'layer_0': { 'w': recurrent_kernel } } }) else: v = None lstm_layer = ivy.LSTM(input_channels, hidden_channels, v=v) output, (state_h, state_c) = lstm_layer(x, initial_state=initial_state) # type test assert ivy.is_array(output) assert ivy.is_array(state_h[0]) assert ivy.is_array(state_c[0]) # cardinality test assert output.shape == (b, t, hidden_channels) assert state_h[0].shape == (b, hidden_channels) assert state_c[0].shape == (b, hidden_channels) # value test if not with_v or not with_initial_state: return output_true = np.tile( np.asarray(output_true_flat).reshape((b, t, 1)), (1, 1, hidden_channels)) state_c_true = np.ones([b, hidden_channels]) * state_c_true_val output, (state_h, state_c) = call(lstm_layer, x, initial_state=initial_state) assert np.allclose(output, output_true, atol=1e-6) assert np.allclose(state_c, state_c_true, atol=1e-6) # compilation test if call in [helpers.torch_call]: # this is not a backend implemented function pytest.skip() helpers.assert_compilable(ivy.lstm_update)
def test_sgd_optimizer(bs_ic_oc_target, with_v, dtype_str, tensor_fn, dev_str, call): # smoke test if call is helpers.np_call: # NumPy does not support gradients pytest.skip() batch_shape, input_channels, output_channels, target = bs_ic_oc_target x = ivy.cast( ivy.linspace(ivy.zeros(batch_shape), ivy.ones(batch_shape), input_channels), 'float32') if with_v: np.random.seed(0) wlim = (6 / (output_channels + input_channels))**0.5 w = ivy.variable( ivy.array( np.random.uniform(-wlim, wlim, (output_channels, input_channels)), 'float32')) b = ivy.variable(ivy.zeros([output_channels])) v = Container({'w': w, 'b': b}) else: v = None linear_layer = ivy.Linear(input_channels, output_channels, v=v) def loss_fn(v_): out = linear_layer(x, v=v_) return ivy.reduce_mean(out)[0] # optimizer optimizer = ivy.SGD() # train loss_tm1 = 1e12 loss = None grads = None for i in range(10): loss, grads = ivy.execute_with_gradients(loss_fn, linear_layer.v) linear_layer.v = optimizer.step(linear_layer.v, grads) assert loss < loss_tm1 loss_tm1 = loss # type test assert ivy.is_array(loss) assert isinstance(grads, ivy.Container) # cardinality test if call is helpers.mx_call: # mxnet slicing cannot reduce dimension to zero assert loss.shape == (1, ) else: assert loss.shape == () # value test assert ivy.reduce_max(ivy.abs(grads.b)) > 0 assert ivy.reduce_max(ivy.abs(grads.w)) > 0 # compilation test if call is helpers.torch_call: # pytest scripting does not **kwargs return helpers.assert_compilable(loss_fn)
def _create_variables(self, dev_str): """ Create internal variables for the Linear layer """ # ToDo: support other initialization mechanisms, via class constructor options # ToDo: tidy the construction of these variables, with helper functions wlim = (6 / (self._output_channels + self._input_channels))**0.5 w = ivy.variable( ivy.random_uniform(-wlim, wlim, (self._output_channels, self._input_channels), dev_str=dev_str)) b = ivy.variable(ivy.zeros([self._output_channels], dev_str=dev_str)) return {'w': w, 'b': b}
def _create_variables(self, dev_str): vars_dict = dict() wlim = (6 / (2 * self._memory_vector_dim)) ** 0.5 vars_dict['read_weights'] =\ dict(zip(['w_' + str(i) for i in range(self._read_head_num)], [ivy.variable(ivy.random_uniform(-wlim, wlim, [self._memory_vector_dim, ], dev_str=dev_str)) for _ in range(self._read_head_num)])) wlim = (6 / (2 * self._memory_size)) ** 0.5 vars_dict['write_weights'] =\ dict(zip(['w_' + str(i) for i in range(self._read_head_num + self._write_head_num)], [ivy.variable(ivy.random_uniform(-wlim, wlim, [self._memory_size, ], dev_str=dev_str)) for _ in range(self._read_head_num + self._write_head_num)])) vars_dict['memory'] = ivy.variable( ivy.ones([self._memory_size, self._memory_vector_dim], dev_str=dev_str) * self._init_value) return vars_dict
def test_is_variable(object_in, dtype_str, dev_str, call): if call is helpers.tf_graph_call: # cannot create variables as part of compiled tf graph pytest.skip() if call in [helpers.mx_call] and dtype_str == 'int16': # mxnet does not support int16 pytest.skip() if len(object_in) == 0 and call is helpers.mx_call: # mxnet does not support 0-dimensional variables pytest.skip() # smoke test non_var = ivy.array(object_in, dtype_str, dev_str) var = ivy.variable(ivy.array(object_in, dtype_str, dev_str)) non_var_res = ivy.is_variable(non_var) var_res = ivy.is_variable(var) # type test assert ivy.is_array(non_var) if call is not helpers.np_call: assert ivy.is_variable(var) if call in [helpers.np_call, helpers.jnp_call]: # numpy and jax do not support flagging variables pytest.skip() # value test assert non_var_res is False assert var_res is True # compilation test helpers.assert_compilable(ivy.is_variable)
def test_variable(object_in, dtype_str, dev_str, call): if call is helpers.tf_graph_call: # cannot create variables as part of compiled tf graph pytest.skip() if call in [helpers.mx_call] and dtype_str == 'int16': # mxnet does not support int16 pytest.skip() if len(object_in) == 0 and call is helpers.mx_call: # mxnet does not support 0-dimensional variables pytest.skip() # smoke test ret = ivy.variable(ivy.array(object_in, dtype_str, dev_str)) # type test if call is not helpers.np_call: assert ivy.is_variable(ret) # cardinality test assert ret.shape == np.array(object_in).shape # value test assert np.allclose( call(ivy.variable, ivy.array(object_in, dtype_str, dev_str)), np.array(object_in).astype(dtype_str)) # compilation test if call in [helpers.torch_call]: # pytorch scripting does not support string devices return helpers.assert_compilable(ivy.variable)
def test_gradient_descent_update(ws_n_grads_n_lr_n_wsnew, dtype_str, tensor_fn, dev_str, call): # smoke test ws_raw, dcdws_raw, lr, ws_raw_new = ws_n_grads_n_lr_n_wsnew ws = ws_raw.map(lambda x, _: ivy.variable(ivy.array(x))) dcdws = dcdws_raw.map(lambda x, _: ivy.array(x)) ws_true_new = ws_raw_new.map(lambda x, _: ivy.variable(ivy.array(x))) ws_new = ivy.gradient_descent_update(ws, dcdws, lr) # type test assert isinstance(ws_new, dict) # cardinality test for (w_new, w_true_new) in zip(ws_new.values(), ws_true_new.values()): assert w_new.shape == w_true_new.shape # value test for (w_new, w_true_new) in zip(ws_new.values(), ws_true_new.values()): assert np.allclose(ivy.to_numpy(w_new), ivy.to_numpy(w_true_new)) # compilation test if call in [helpers.torch_call]: # pytorch scripting does not support internal function definitions return helpers.assert_compilable(ivy.gradient_descent_update)
def restore(self, checkpoint_path): checkpoint = ivy.Container.from_disk_as_hdf5(checkpoint_path) loaded_v = checkpoint.network.map( lambda x, kc: ivy.variable(ivy.to_dev(x, self._net._dev_str))) if ivy.exists(self._net.v): # if build_mode is 'on_call', the network variables will not have been built yet assert (self._net.v.shapes == loaded_v.shapes).all_true( assert_is_bool=True) self._net.v = loaded_v self._optimizer.set_state( checkpoint.optimizer.map( lambda x, kc: ivy.to_dev(x, self._net.spec.dev_strs[0])))
def main(interactive=True, try_use_sim=True, f=None): # config this_dir = os.path.dirname(os.path.realpath(__file__)) f = choose_random_framework(excluded=['numpy']) if f is None else f set_framework(f) sim = Simulator(interactive, try_use_sim) lr = 0.5 num_anchors = 3 num_sample_points = 100 # spline start anchor_points = ivy.cast( ivy.expand_dims(ivy.linspace(0, 1, 2 + num_anchors), -1), 'float32') query_points = ivy.cast( ivy.expand_dims(ivy.linspace(0, 1, num_sample_points), -1), 'float32') # learnable parameters robot_start_config = ivy.array(ivy.cast(sim.robot_start_config, 'float32')) robot_target_config = ivy.array( ivy.cast(sim.robot_target_config, 'float32')) learnable_anchor_vals = ivy.variable( ivy.cast( ivy.transpose( ivy.linspace(robot_start_config, robot_target_config, 2 + num_anchors)[..., 1:-1], (1, 0)), 'float32')) # optimizer optimizer = ivy.SGD(lr=lr) # optimize it = 0 colliding = True clearance = 0 joint_query_vals = None while colliding: total_cost, grads, joint_query_vals, link_positions, sdf_vals = ivy.execute_with_gradients( lambda xs: compute_cost_and_sdfs(xs[ 'w'], anchor_points, robot_start_config, robot_target_config, query_points, sim), Container({'w': learnable_anchor_vals})) colliding = ivy.reduce_min(sdf_vals[2:]) < clearance sim.update_path_visualization( link_positions, sdf_vals, os.path.join(this_dir, 'msp_no_sim', 'path_{}.png'.format(it))) learnable_anchor_vals = optimizer.step( Container({'w': learnable_anchor_vals}), grads)['w'] it += 1 sim.execute_motion(joint_query_vals) sim.close() unset_framework()
def test_execute_with_gradients(func_n_xs_n_ty_n_te_n_tg, dtype_str, tensor_fn, dev_str, call): # smoke test func, xs_raw, true_y, true_extra, true_dydxs = func_n_xs_n_ty_n_te_n_tg xs = xs_raw.map(lambda x, _: ivy.variable(ivy.array(x))) if true_extra is None: y, dydxs = ivy.execute_with_gradients(func, xs) extra_out = None else: y, dydxs, extra_out = ivy.execute_with_gradients(func, xs) # type test assert ivy.is_array(y) or isinstance(y, Number) if call is not helpers.np_call: assert isinstance(dydxs, dict) # cardinality test if call is not helpers.mx_call: # mxnet cannot slice array down to shape (), it remains fixed at size (1,) assert y.shape == true_y.shape if call is not helpers.np_call: for (g, g_true) in zip(dydxs.values(), true_dydxs.values()): assert g.shape == g_true.shape # value test xs = xs_raw.map(lambda x, _: ivy.variable(ivy.array(x))) if true_extra is None: y, dydxs = call(ivy.execute_with_gradients, func, xs) else: y, dydxs, extra_out = call(ivy.execute_with_gradients, func, xs) assert np.allclose(y, true_y) if true_extra: assert np.allclose(extra_out, true_extra) if call is helpers.np_call: # numpy doesn't support autodiff assert dydxs is None else: for (g, g_true) in zip(dydxs.values(), true_dydxs.values()): assert np.allclose(ivy.to_numpy(g), g_true)
def var_fn(a, b=None, c=None): return ivy.variable(ivy.array(a, b, c))
def test_ntm(addressing_mode, batch_shape, dev_str, call): # ntm config input_dim = 256 output_dim = 8 ctrl_output_size = 256 ctrl_layers = 2 memory_size = 5 timesteps = 5 memory_vector_dim = 2 read_head_num = 3 write_head_num = 1 shift_range = 0 clip_value = 20 init_value = 1e-6 ctrl_input_size = read_head_num * memory_vector_dim + input_dim num_heads = read_head_num + write_head_num num_parameters_per_head = memory_vector_dim + 1 + 1 + (shift_range * 2 + 1) + 1 total_parameter_num = num_parameters_per_head * num_heads + memory_vector_dim * 2 * write_head_num usage = ivy.zeros([ memory_size, ]) # memory object wo vars ntm = ivy_mem.NTM(input_dim, output_dim, ctrl_output_size, ctrl_layers, memory_size, memory_vector_dim, read_head_num, write_head_num, addressing_mode=addressing_mode, shift_range=shift_range, clip_value=clip_value, sequential_writing=True, retroactive_updates=False, with_erase=False) # test x = ivy.ones(batch_shape + [timesteps, input_dim]) assert call(ntm, x).shape == tuple(batch_shape + [timesteps, output_dim]) # variables variables = dict() variables['ntm_cell'] = dict() np.random.seed(0) # lstm in_wlim = (6 / (ctrl_input_size + 4 * ctrl_output_size))**0.5 rec_wlim = (6 / (ctrl_output_size + 4 * ctrl_output_size))**0.5 variables['ntm_cell']['controller'] = \ {'input': {'layer1': {'w': ivy.array(np.random.uniform( -in_wlim, in_wlim, size=[ctrl_input_size, 4 * ctrl_output_size]).astype(np.float32))}, 'layer2': {'w': ivy.array(np.random.uniform( -in_wlim, in_wlim, size=[ctrl_output_size, 4 * ctrl_output_size]).astype(np.float32))}}, 'recurrent': {'layer1': {'w': ivy.array(np.random.uniform( -rec_wlim, rec_wlim, size=[ctrl_output_size, 4 * ctrl_output_size]).astype(np.float32))}, 'layer2': {'w': ivy.array(np.random.uniform( -rec_wlim, rec_wlim, size=[ctrl_output_size, 4 * ctrl_output_size]).astype( np.float32))}}} # fully connected proj_wlim = (6 / (total_parameter_num + ctrl_output_size))**0.5 variables['ntm_cell']['controller_proj'] = { 'w': ivy.array( np.random.uniform(-proj_wlim, proj_wlim, size=[total_parameter_num, ctrl_output_size]).astype(np.float32)), 'b': ivy.zeros([total_parameter_num]) } out_wlim = (6 / (total_parameter_num + ctrl_input_size))**0.5 variables['ntm_cell']['output_proj'] = { 'w': ivy.array( np.random.uniform(-out_wlim, out_wlim, size=[ output_dim, ctrl_output_size + read_head_num * memory_vector_dim ]).astype(np.float32)), 'b': ivy.zeros([output_dim]) } # memory wlim = (6 / (2 * memory_vector_dim))**0.5 variables['ntm_cell']['read_weights'] = dict( zip(['w_' + str(i) for i in range(read_head_num)], [ ivy.variable( ivy.array( np.random.uniform(-wlim, wlim, [ memory_vector_dim, ]), 'float32')) for _ in range(read_head_num) ])) wlim = (6 / (2 * memory_size))**0.5 variables['ntm_cell']['write_weights'] = dict( zip(['w_' + str(i) for i in range(read_head_num + write_head_num)], [ ivy.variable( ivy.array(np.random.uniform(-wlim, wlim, [ memory_size, ]), 'float32')) for _ in range(read_head_num + write_head_num) ])) variables['ntm_cell']['memory'] = ivy.variable( ivy.ones([memory_size, memory_vector_dim]) * init_value) # memory object w vars ntm = ivy_mem.NTM(input_dim, output_dim, ctrl_output_size, ctrl_layers, memory_size, memory_vector_dim, read_head_num, write_head_num, Container(variables), usage, addressing_mode=addressing_mode, shift_range=shift_range, clip_value=clip_value, init_value=init_value, sequential_writing=True, retroactive_updates=False, with_erase=False) # test assert np.allclose(call(ntm, x), td.ntm_return[addressing_mode], atol=1e-6) # compilation test if call is helpers.torch_call: # pytest scripting does not support try-catch statements return helpers.assert_compilable(ntm)
def test_lstm_layer_training(b_t_ic_hc_otf_sctv, with_v, dtype_str, tensor_fn, dev_str, call): # smoke test if call is helpers.np_call: # NumPy does not support gradients pytest.skip() # smoke test b, t, input_channels, hidden_channels, output_true_flat, state_c_true_val = b_t_ic_hc_otf_sctv x = ivy.cast( ivy.linspace(ivy.zeros([b, t]), ivy.ones([b, t]), input_channels), 'float32') if with_v: kernel = ivy.variable( ivy.ones([input_channels, 4 * hidden_channels]) * 0.5) recurrent_kernel = ivy.variable( ivy.ones([hidden_channels, 4 * hidden_channels]) * 0.5) v = Container({ 'input': { 'layer_0': { 'w': kernel } }, 'recurrent': { 'layer_0': { 'w': recurrent_kernel } } }) else: v = None lstm_layer = ivy.LSTM(input_channels, hidden_channels, v=v) def loss_fn(v_): out, (state_h, state_c) = lstm_layer(x, v=v_) return ivy.reduce_mean(out)[0] # train loss_tm1 = 1e12 loss = None grads = None for i in range(10): loss, grads = ivy.execute_with_gradients(loss_fn, lstm_layer.v) lstm_layer.v = ivy.gradient_descent_update(lstm_layer.v, grads, 1e-3) assert loss < loss_tm1 loss_tm1 = loss # type test assert ivy.is_array(loss) assert isinstance(grads, ivy.Container) # cardinality test if call is helpers.mx_call: # mxnet slicing cannot reduce dimension to zero assert loss.shape == (1, ) else: assert loss.shape == () # value test for key, val in grads.to_iterator(): assert ivy.reduce_max(ivy.abs(val)) > 0 # compilation test if call is helpers.torch_call: # pytest scripting does not **kwargs return helpers.assert_compilable(loss_fn)