def train(self): optimizer = ivy.Adam(self._lr, dev_str=self._dev_str) for i in range(self._num_iters + 1): img_i = np.random.randint(self._images.shape[0]) target = self._images[img_i] cam_geom = self._cam_geoms.slice(img_i) rays_o, rays_d = self._get_rays(cam_geom) loss, grads = ivy.execute_with_gradients( lambda v: self._loss_fn(self._model, rays_o, rays_d, target, v=v), self._model.v) self._model.v = optimizer.step(self._model.v, grads) if i % self._log_freq == 0 and self._log_freq != -1: print('step {}, loss {}'.format(i, ivy.to_numpy(loss).item())) if i % self._vis_freq == 0 and self._vis_freq != -1: # Render the holdout view for logging rays_o, rays_d = self._get_rays(self._test_cam_geom) rgb, depth = ivy_vision.render_implicit_features_and_depth( self._model, rays_o, rays_d, near=ivy.ones(self._img_dims, dev_str=self._dev_str) * 2, far=ivy.ones(self._img_dims, dev_str=self._dev_str)*6, samples_per_ray=self._num_samples) plt.imsave(os.path.join(self._vis_log_dir, 'img_{}.png'.format(str(i).zfill(3))), ivy.to_numpy(rgb)) print('Completed Training')
def _raw_execute_with_grads(self, network, dev_str, batch, network_v): cost, gradients = ivy.execute_with_gradients( lambda v: self._compute_cost(network, batch, dev_str, v=network_v.set_at_key_chains(v)), network_v.at_key_chains(self._net_spec.v_keychains, ignore_none=True) if self._net_spec.keep_v_keychains else network_v.prune_key_chains(self._net_spec.v_keychains, ignore_none=True)) return cost, gradients
def test_sgd_optimizer(bs_ic_oc_target, with_v, dtype_str, tensor_fn, dev_str, call): # smoke test if call is helpers.np_call: # NumPy does not support gradients pytest.skip() batch_shape, input_channels, output_channels, target = bs_ic_oc_target x = ivy.cast( ivy.linspace(ivy.zeros(batch_shape), ivy.ones(batch_shape), input_channels), 'float32') if with_v: np.random.seed(0) wlim = (6 / (output_channels + input_channels))**0.5 w = ivy.variable( ivy.array( np.random.uniform(-wlim, wlim, (output_channels, input_channels)), 'float32')) b = ivy.variable(ivy.zeros([output_channels])) v = Container({'w': w, 'b': b}) else: v = None linear_layer = ivy.Linear(input_channels, output_channels, v=v) def loss_fn(v_): out = linear_layer(x, v=v_) return ivy.reduce_mean(out)[0] # optimizer optimizer = ivy.SGD() # train loss_tm1 = 1e12 loss = None grads = None for i in range(10): loss, grads = ivy.execute_with_gradients(loss_fn, linear_layer.v) linear_layer.v = optimizer.step(linear_layer.v, grads) assert loss < loss_tm1 loss_tm1 = loss # type test assert ivy.is_array(loss) assert isinstance(grads, ivy.Container) # cardinality test if call is helpers.mx_call: # mxnet slicing cannot reduce dimension to zero assert loss.shape == (1, ) else: assert loss.shape == () # value test assert ivy.reduce_max(ivy.abs(grads.b)) > 0 assert ivy.reduce_max(ivy.abs(grads.w)) > 0 # compilation test if call is helpers.torch_call: # pytest scripting does not **kwargs return helpers.assert_compilable(loss_fn)
def main(interactive=True, try_use_sim=True, f=None): # config this_dir = os.path.dirname(os.path.realpath(__file__)) f = choose_random_framework(excluded=['numpy']) if f is None else f set_framework(f) sim = Simulator(interactive, try_use_sim) lr = 0.5 num_anchors = 3 num_sample_points = 100 # spline start anchor_points = ivy.cast( ivy.expand_dims(ivy.linspace(0, 1, 2 + num_anchors), -1), 'float32') query_points = ivy.cast( ivy.expand_dims(ivy.linspace(0, 1, num_sample_points), -1), 'float32') # learnable parameters robot_start_config = ivy.array(ivy.cast(sim.robot_start_config, 'float32')) robot_target_config = ivy.array( ivy.cast(sim.robot_target_config, 'float32')) learnable_anchor_vals = ivy.variable( ivy.cast( ivy.transpose( ivy.linspace(robot_start_config, robot_target_config, 2 + num_anchors)[..., 1:-1], (1, 0)), 'float32')) # optimizer optimizer = ivy.SGD(lr=lr) # optimize it = 0 colliding = True clearance = 0 joint_query_vals = None while colliding: total_cost, grads, joint_query_vals, link_positions, sdf_vals = ivy.execute_with_gradients( lambda xs: compute_cost_and_sdfs(xs[ 'w'], anchor_points, robot_start_config, robot_target_config, query_points, sim), Container({'w': learnable_anchor_vals})) colliding = ivy.reduce_min(sdf_vals[2:]) < clearance sim.update_path_visualization( link_positions, sdf_vals, os.path.join(this_dir, 'msp_no_sim', 'path_{}.png'.format(it))) learnable_anchor_vals = optimizer.step( Container({'w': learnable_anchor_vals}), grads)['w'] it += 1 sim.execute_motion(joint_query_vals) sim.close() unset_framework()
def test_execute_with_gradients(func_n_xs_n_ty_n_te_n_tg, dtype_str, tensor_fn, dev_str, call): # smoke test func, xs_raw, true_y, true_extra, true_dydxs = func_n_xs_n_ty_n_te_n_tg xs = xs_raw.map(lambda x, _: ivy.variable(ivy.array(x))) if true_extra is None: y, dydxs = ivy.execute_with_gradients(func, xs) extra_out = None else: y, dydxs, extra_out = ivy.execute_with_gradients(func, xs) # type test assert ivy.is_array(y) or isinstance(y, Number) if call is not helpers.np_call: assert isinstance(dydxs, dict) # cardinality test if call is not helpers.mx_call: # mxnet cannot slice array down to shape (), it remains fixed at size (1,) assert y.shape == true_y.shape if call is not helpers.np_call: for (g, g_true) in zip(dydxs.values(), true_dydxs.values()): assert g.shape == g_true.shape # value test xs = xs_raw.map(lambda x, _: ivy.variable(ivy.array(x))) if true_extra is None: y, dydxs = call(ivy.execute_with_gradients, func, xs) else: y, dydxs, extra_out = call(ivy.execute_with_gradients, func, xs) assert np.allclose(y, true_y) if true_extra: assert np.allclose(extra_out, true_extra) if call is helpers.np_call: # numpy doesn't support autodiff assert dydxs is None else: for (g, g_true) in zip(dydxs.values(), true_dydxs.values()): assert np.allclose(ivy.to_numpy(g), g_true)
def train_step(loss_fn_in, optimizer, ntm, total_seq, target_seq, seq_len, mw, vw, step, max_grad_norm): # compute loss loss, dldv, pred_vals = ivy.execute_with_gradients( lambda v_: loss_fn_in(v_, total_seq, target_seq, seq_len), ntm.v) global_norm = ivy.reduce_sum( ivy.stack([ivy.reduce_sum(grad**2) for grad in dldv.to_flat_list()], 0))**0.5 dldv = dldv.map(lambda x, _: x * max_grad_norm / ivy.maximum( global_norm, max_grad_norm)) # update variables ntm.v = optimizer.step(ntm.v, dldv) return loss, pred_vals
def test_module_training(bs_ic_oc, dev_str, call): # smoke test if call is helpers.np_call: # NumPy does not support gradients pytest.skip() batch_shape, input_channels, output_channels = bs_ic_oc x = ivy.cast( ivy.linspace(ivy.zeros(batch_shape), ivy.ones(batch_shape), input_channels), 'float32') module = TrainableModule(input_channels, output_channels) def loss_fn(v_): out = module(x, v=v_) return ivy.reduce_mean(out)[0] # train loss_tm1 = 1e12 loss = None grads = None for i in range(10): loss, grads = ivy.execute_with_gradients(loss_fn, module.v) module.v = ivy.gradient_descent_update(module.v, grads, 1e-3) assert loss < loss_tm1 loss_tm1 = loss # type test assert ivy.is_array(loss) assert isinstance(grads, ivy.Container) # cardinality test if call is helpers.mx_call: # mxnet slicing cannot reduce dimension to zero assert loss.shape == (1, ) else: assert loss.shape == () # value test assert ivy.reduce_max(ivy.abs(grads.linear0.b)) > 0 assert ivy.reduce_max(ivy.abs(grads.linear0.w)) > 0 assert ivy.reduce_max(ivy.abs(grads.linear1.b)) > 0 assert ivy.reduce_max(ivy.abs(grads.linear1.w)) > 0 assert ivy.reduce_max(ivy.abs(grads.linear2.b)) > 0 assert ivy.reduce_max(ivy.abs(grads.linear2.w)) > 0 # compilation test if call is helpers.torch_call: # pytest scripting does not support **kwargs return helpers.assert_compilable(loss_fn)
def test_lstm_layer_training(b_t_ic_hc_otf_sctv, with_v, dtype_str, tensor_fn, dev_str, call): # smoke test if call is helpers.np_call: # NumPy does not support gradients pytest.skip() # smoke test b, t, input_channels, hidden_channels, output_true_flat, state_c_true_val = b_t_ic_hc_otf_sctv x = ivy.cast( ivy.linspace(ivy.zeros([b, t]), ivy.ones([b, t]), input_channels), 'float32') if with_v: kernel = ivy.variable( ivy.ones([input_channels, 4 * hidden_channels]) * 0.5) recurrent_kernel = ivy.variable( ivy.ones([hidden_channels, 4 * hidden_channels]) * 0.5) v = Container({ 'input': { 'layer_0': { 'w': kernel } }, 'recurrent': { 'layer_0': { 'w': recurrent_kernel } } }) else: v = None lstm_layer = ivy.LSTM(input_channels, hidden_channels, v=v) def loss_fn(v_): out, (state_h, state_c) = lstm_layer(x, v=v_) return ivy.reduce_mean(out)[0] # train loss_tm1 = 1e12 loss = None grads = None for i in range(10): loss, grads = ivy.execute_with_gradients(loss_fn, lstm_layer.v) lstm_layer.v = ivy.gradient_descent_update(lstm_layer.v, grads, 1e-3) assert loss < loss_tm1 loss_tm1 = loss # type test assert ivy.is_array(loss) assert isinstance(grads, ivy.Container) # cardinality test if call is helpers.mx_call: # mxnet slicing cannot reduce dimension to zero assert loss.shape == (1, ) else: assert loss.shape == () # value test for key, val in grads.to_iterator(): assert ivy.reduce_max(ivy.abs(val)) > 0 # compilation test if call is helpers.torch_call: # pytest scripting does not **kwargs return helpers.assert_compilable(loss_fn)
def train_step(compiled_loss_fn, optimizer, initial_state, policy, f): loss, grads = ivy.execute_with_gradients(lambda pol_vs: compiled_loss_fn(initial_state, pol_vs), policy.v) policy.v = optimizer.step(policy.v, grads) return -f.reshape(loss, (1,))
def main(): # LSTM # # -----# # using the Ivy LSTM memory module, dual stacked, in a PyTorch model class TorchModelWithLSTM(torch.nn.Module): def __init__(self, channels_in, channels_out): torch.nn.Module.__init__(self) self._linear = torch.nn.Linear(channels_in, 64) self._lstm = ivy_mem.LSTM(64, channels_out, 2, return_state=False) self._assign_variables() def _assign_variables(self): self._lstm.v.map(lambda x, kc: self.register_parameter( name=kc, param=torch.nn.Parameter(x))) self._lstm.v = self._lstm.v.map(lambda x, kc: self._parameters[kc]) def forward(self, x): x = self._linear(x) return self._lstm(x) # create model in_channels = 32 out_channels = 8 ivy.set_framework('torch') model = TorchModelWithLSTM(in_channels, out_channels) # define inputs batch_shape = [1, 2] timesteps = 3 input_shape = batch_shape + [timesteps, in_channels] input_seq = torch.rand(batch_shape + [timesteps, in_channels]) # call model and test output output_seq = model(input_seq) assert input_seq.shape[:-1] == output_seq.shape[:-1] assert input_seq.shape[-1] == in_channels assert output_seq.shape[-1] == out_channels # define loss function target = torch.zeros_like(output_seq) def loss_fn(): pred = model(input_seq) return torch.sum((pred - target)**2) # define optimizer optimizer = torch.optim.SGD(model.parameters(), lr=1e-2) # train model print('\ntraining dummy PyTorch LSTM model...\n') for i in range(10): loss = loss_fn() loss.backward() optimizer.step() print('step {}, loss = {}'.format(i, loss)) print('\ndummy PyTorch LSTM model trained!\n') ivy.unset_framework() # NTM # # ----# # using the Ivy NTM memory module in a TensorFlow model class TfModelWithNTM(tf.keras.Model): def __init__(self, channels_in, channels_out): tf.keras.Model.__init__(self) self._linear = tf.keras.layers.Dense(64) memory_size = 4 memory_vector_dim = 1 self._ntm = ivy_mem.NTM(input_dim=64, output_dim=channels_out, ctrl_output_size=channels_out, ctrl_layers=1, memory_size=memory_size, memory_vector_dim=memory_vector_dim, read_head_num=1, write_head_num=1) self._assign_variables() def _assign_variables(self): self._ntm.v.map( lambda x, kc: self.add_weight(name=kc, shape=x.shape)) self.set_weights( [ivy.to_numpy(v) for k, v in self._ntm.v.to_iterator()]) self.trainable_weights_dict = dict() for weight in self.trainable_weights: self.trainable_weights_dict[weight.name] = weight self._ntm.v = self._ntm.v.map( lambda x, kc: self.trainable_weights_dict[kc + ':0']) def call(self, x, **kwargs): x = self._linear(x) return self._ntm(x) # create model in_channels = 32 out_channels = 8 ivy.set_framework('tensorflow') model = TfModelWithNTM(in_channels, out_channels) # define inputs batch_shape = [1, 2] timesteps = 3 input_shape = batch_shape + [timesteps, in_channels] input_seq = tf.random.uniform(batch_shape + [timesteps, in_channels]) # call model and test output output_seq = model(input_seq) assert input_seq.shape[:-1] == output_seq.shape[:-1] assert input_seq.shape[-1] == in_channels assert output_seq.shape[-1] == out_channels # define loss function target = tf.zeros_like(output_seq) def loss_fn(): pred = model(input_seq) return tf.reduce_sum((pred - target)**2) # define optimizer optimizer = tf.keras.optimizers.Adam(1e-2) # train model print('\ntraining dummy TensorFlow NTM model...\n') for i in range(10): with tf.GradientTape() as tape: loss = loss_fn() grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) print('step {}, loss = {}'.format(i, loss)) print('\ndummy TensorFlow NTM model trained!\n') ivy.unset_framework() # ESM # # ----# # using the Ivy ESM memory module in a pure-Ivy model, with a JAX backend # ToDo: add pre-ESM conv layers to this demo class IvyModelWithESM(ivy.Module): def __init__(self, channels_in, channels_out): self._channels_in = channels_in self._esm = ivy_mem.ESM(omni_image_dims=(16, 32)) self._linear = ivy_mem.Linear(channels_in, channels_out) ivy.Module.__init__(self, 'cpu') def _forward(self, obs): mem = self._esm(obs) x = ivy.reshape(mem.mean, (-1, self._channels_in)) return self._linear(x) # create model in_channels = 32 out_channels = 8 ivy.set_framework('torch') model = IvyModelWithESM(in_channels, out_channels) # input config batch_size = 1 image_dims = [5, 5] num_timesteps = 2 num_feature_channels = 3 # create image of pixel co-ordinates uniform_pixel_coords =\ ivy_vision.create_uniform_pixel_coords_image(image_dims, [batch_size, num_timesteps]) # define camera measurement depths = ivy.random_uniform(shape=[batch_size, num_timesteps] + image_dims + [1]) ds_pixel_coords = ivy_vision.depth_to_ds_pixel_coords(depths) inv_calib_mats = ivy.random_uniform( shape=[batch_size, num_timesteps, 3, 3]) cam_coords = ivy_vision.ds_pixel_to_cam_coords(ds_pixel_coords, inv_calib_mats)[..., 0:3] features = ivy.random_uniform(shape=[batch_size, num_timesteps] + image_dims + [num_feature_channels]) img_mean = ivy.concatenate((cam_coords, features), -1) cam_rel_mat = ivy.identity(4, batch_shape=[batch_size, num_timesteps])[..., 0:3, :] # place these into an ESM camera measurement container esm_cam_meas = ESMCamMeasurement(img_mean=img_mean, cam_rel_mat=cam_rel_mat) # define agent pose transformation agent_rel_mat = ivy.identity(4, batch_shape=[batch_size, num_timesteps])[..., 0:3, :] # collect together into an ESM observation container esm_obs = ESMObservation(img_meas={'camera_0': esm_cam_meas}, agent_rel_mat=agent_rel_mat) # call model and test output output = model(esm_obs) assert output.shape[-1] == out_channels # define loss function target = ivy.zeros_like(output) def loss_fn(v): pred = model(esm_obs, v=v) return ivy.reduce_mean((pred - target)**2) # optimizer optimizer = ivy.SGD(lr=1e-4) # train model print('\ntraining dummy Ivy ESM model...\n') for i in range(10): loss, grads = ivy.execute_with_gradients(loss_fn, model.v) model.v = optimizer.step(model.v, grads) print('step {}, loss = {}'.format(i, ivy.to_numpy(loss).item())) print('\ndummy Ivy ESM model trained!\n') ivy.unset_framework() # message print('End of Run Through Demo!')