def rollout(model, trajectories, max_timesteps=300): # To make things easier, we're going to cut all our trajectories to the # same length :) timesteps = np.min([len(s) for s, _, _ in trajectories] + [max_timesteps]) predicted_states = [[states[0]] for states, _, _ in trajectories] actual_states = [states[:timesteps] for states, _, _ in trajectories] for t in range(1, timesteps): s = [] o = {} c = [] for i, traj in enumerate(trajectories): states, observations, controls = traj s.append(predicted_states[i][t - 1]) o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) device = next(model.parameters()).device pred = model(*utils.to_torch([s, o, c], device=device)) pred = utils.to_numpy(pred) assert pred.shape == (len(trajectories), 2) for i in range(len(trajectories)): predicted_states[i].append(pred[i]) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) return predicted_states, actual_states
def __init__(self, *paths, stddev=None, samples_per_pair=20, **kwargs): """ Args: *paths: paths to dataset hdf5 files """ trajectories = load_trajectories(*paths, **kwargs) if stddev is None: stddev = self.default_stddev self.stddev = np.array(stddev) self.samples_per_pair = samples_per_pair self.dataset = [] for i, trajectory in enumerate(tqdm(trajectories)): assert len(trajectory) == 3 states, observations, controls = trajectory timesteps = len(states) assert type(observations) == dict assert len(controls) == timesteps for t in range(0, timesteps): # Pull out data & labels state = states[t] observation = utils.DictIterator(observations)[t] self.dataset.append((state, observation)) print("Loaded {} points".format(len(self.dataset)))
def rollout_lstm(model, trajectories, max_timesteps=300): timesteps = np.min([len(s) for s, _, _ in trajectories] + [max_timesteps]) trajectory_count = len(trajectories) state_dim = trajectories[0][0].shape[-1] actual_states = np.zeros((trajectory_count, timesteps, state_dim)) batched_observations = {} batched_controls = [] # Trajectories is a list of (states, observations, controls) for i, (states, observations, controls) in enumerate(trajectories): observations = utils.DictIterator(observations)[1:timesteps] utils.DictIterator(batched_observations).append(observations) batched_controls.append(controls[1:timesteps]) assert states.shape == (timesteps, state_dim) actual_states[i] = states[:timesteps] # * 0 + 0.1 utils.DictIterator(batched_observations).convert_to_numpy() batched_controls = np.array(batched_controls) # Propagate through model # model.reset_hidden_states(utils.to_torch(actual_states[:, 0, :])) device = next(model.parameters()).device predicted_states = np.concatenate([ actual_states[:, 0:1, :], utils.to_numpy( model( utils.to_torch(batched_observations, device), utils.to_torch(batched_controls, device), )), ], axis=1) # Indexing: batch, sequence length, state return predicted_states, actual_states
def split(x): if type(x) == np.ndarray: new_length = (len(x) // subsequence_length) * \ subsequence_length x = x[:new_length] return np.split(x[:new_length], sections) elif type(x) == dict: output = {} for key, value in x.items(): output[key] = split(value) return utils.DictIterator(output) else: assert False
def __init__(self, *paths, **kwargs): """ Input: *paths: paths to dataset hdf5 files """ trajectories = load_trajectories(*paths, **kwargs) active_dataset = [] inactive_dataset = [] for trajectory in trajectories: assert len(trajectory) == 3 states, observations, controls = trajectory timesteps = len(states) assert type(observations) == dict assert len(controls) == timesteps for t in range(1, timesteps): # Pull out data & labels prev_state = states[t - 1] observation = utils.DictIterator(observations)[t] control = controls[t] new_state = states[t] # Construct sample, bring to torch, & add to dataset sample = (prev_state, observation, control, new_state) sample = tuple(utils.to_torch(x) for x in sample) if np.linalg.norm(new_state - prev_state) > 1e-5: active_dataset.append(sample) else: inactive_dataset.append(sample) print("Parsed data: {} active, {} inactive".format( len(active_dataset), len(inactive_dataset))) keep_count = min(len(active_dataset) // 2, len(inactive_dataset)) print("Keeping:", keep_count) np.random.shuffle(inactive_dataset) self.dataset = active_dataset + inactive_dataset[:keep_count]
def _print_normalization(trajectories): """ Helper for producing code to normalize inputs """ states = [] observations = {} controls = [] for t in trajectories: states.extend(t[0]) utils.DictIterator(observations).extend(t[1]) controls.extend(t[2]) def print_ranges(**kwargs): for k, v in kwargs.items(): mean = repr(np.mean(v, axis=0, keepdims=True)) stddev = repr(np.std(v, axis=0, keepdims=True)) print(f"{k} -= np.{mean}") print(f"{k} /= np.{stddev}") print_ranges( gripper_pos=observations['gripper_pos'], gripper_sensors=observations['gripper_sensors'], states=states, controls=controls, )
def rollout_and_eval(pf_model, trajectories, start_time=0, max_timesteps=300, particle_count=100, noisy_dynamics=True, true_initial=False): # To make things easier, we're going to cut all our trajectories to the # same length :) end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps]) actual_states = [ states[start_time:end_time] for states, _, _ in trajectories ] state_dim = len(actual_states[0][0]) N = len(trajectories) M = particle_count device = next(pf_model.parameters()).device particles = np.zeros((N, M, state_dim)) if true_initial: for i in range(N): particles[i, :] = trajectories[i][0][0] particles += np.random.normal(0, 0.2, size=[N, 1, state_dim]) particles += np.random.normal(0, 0.2, size=particles.shape) else: # Distribute initial particles randomly particles += np.random.normal(0, 1.0, size=particles.shape) # Populate the initial state estimate as just the estimate of our particles # This is a little hacky # (N, t, state_dim) predicted_states = [[np.mean(particles[i], axis=0)] for i in range(len(trajectories))] particles = utils.to_torch(particles, device=device) log_weights = torch.ones((N, M), device=device) * (-np.log(M)) # (N, t, M, state_dim) particles_history = [] # (N, t, M) weights_history = [] for i in range(N): particles_history.append([utils.to_numpy(particles[i])]) weights_history.append([utils.to_numpy(log_weights[i])]) for t in tqdm(range(start_time + 1, end_time)): s = [] o = {} c = [] for i, traj in enumerate(trajectories): states, observations, controls = traj s.append(predicted_states[i][t - start_time - 1]) o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) (s, o, c) = utils.to_torch((s, o, c), device=device) state_estimates, new_particles, new_log_weights = pf_model.forward( particles, log_weights, o, c, resample=True, noisy_dynamics=noisy_dynamics) particles = new_particles log_weights = new_log_weights for i in range(len(trajectories)): predicted_states[i].append(utils.to_numpy(state_estimates[i])) particles_history[i].append(utils.to_numpy(particles[i])) weights_history[i].append(np.exp(utils.to_numpy(log_weights[i]))) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) ### Eval timesteps = len(actual_states[0]) def color(i): colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] return colors[i % len(colors)] state_dim = actual_states.shape[-1] for j in range(state_dim): plt.figure(figsize=(8, 6)) for i, (pred, actual, particles, weights) in enumerate( zip(predicted_states, actual_states, particles_history, weights_history)): predicted_label_arg = {} actual_label_arg = {} if i == 0: predicted_label_arg['label'] = "Predicted" actual_label_arg['label'] = "Ground Truth" plt.plot(range(timesteps), pred[:, j], c=color(i), alpha=0.5, **predicted_label_arg) plt.plot(range(timesteps), actual[:, j], c=color(i), **actual_label_arg) for t in range(0, timesteps, 20): particle_ys = particles[t][:, j] particle_xs = [t for _ in particle_ys] plt.scatter(particle_xs, particle_ys, c=color(i), alpha=0.02) # particle_alphas = weights[t] # particle_alphas /= np.max(particle_alphas) # particle_alphas *= 0.3 # particle_alphas += 0.05 # # for px, py, pa in zip( # particle_xs, particle_ys, particle_alphas): # plt.scatter([px], [py], c=color(i), alpha=pa) rmse = np.sqrt( np.mean( (predicted_states[:, 10:, j] - actual_states[:, 10:, j])**2)) print(rmse) plt.title(f"State #{j} // RMSE = {rmse}") plt.xlabel("Timesteps") plt.ylabel("Value") plt.legend() plt.show()
def rollout(pf_model, trajectories, start_time=0, max_timesteps=300, particle_count=100, noisy_dynamics=True, true_initial=False): # To make things easier, we're going to cut all our trajectories to the # same length :) end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps]) actual_states = [ states[start_time:end_time] for states, _, _ in trajectories ] state_dim = len(actual_states[0][0]) N = len(trajectories) M = particle_count device = next(pf_model.parameters()).device particles = np.zeros((N, M, state_dim)) if true_initial: for i in range(N): particles[i, :] = trajectories[i][0][0] particles += np.random.normal(0, 0.1, size=particles.shape) else: # Distribute initial particles randomly particles += np.random.normal(0, 1.0, size=particles.shape) # Populate the initial state estimate as just the estimate of our particles # This is a little hacky predicted_states = [[np.mean(particles[i], axis=0)] for i in range(len(trajectories))] particles = utils.to_torch(particles, device=device) log_weights = torch.ones((N, M), device=device) * (-np.log(M)) for t in tqdm(range(start_time + 1, end_time)): s = [] o = {} c = [] for i, traj in enumerate(trajectories): states, observations, controls = traj s.append(predicted_states[i][t - start_time - 1]) o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) (s, o, c) = utils.to_torch((s, o, c), device=device) state_estimates, new_particles, new_log_weights = pf_model.forward( particles, log_weights, o, c, resample=True, noisy_dynamics=noisy_dynamics) particles = new_particles log_weights = new_log_weights for i in range(len(trajectories)): predicted_states[i].append(utils.to_numpy(state_estimates[i])) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) return predicted_states, actual_states
def train_e2e(buddy, pf_model, dataloader, log_interval=2, loss_type="mse", optim_name="e2e", resample=False, know_image_blackout=False): # Train for 1 epoch for batch_idx, batch in enumerate(tqdm(dataloader)): # Transfer to GPU and pull out batch data batch_gpu = utils.to_device(batch, buddy._device) batch_particles, batch_states, batch_obs, batch_controls = batch_gpu # N = batch size, M = particle count N, timesteps, control_dim = batch_controls.shape N, timesteps, state_dim = batch_states.shape N, M, state_dim = batch_particles.shape assert batch_controls.shape == (N, timesteps, control_dim) # Give all particle equal weights particles = batch_particles log_weights = torch.ones((N, M), device=buddy._device) * (-np.log(M)) # Accumulate losses from each timestep losses = [] for t in range(1, timesteps): prev_particles = particles prev_log_weights = log_weights if know_image_blackout: state_estimates, new_particles, new_log_weights = pf_model.forward( prev_particles, prev_log_weights, utils.DictIterator(batch_obs)[:, t - 1, :], batch_controls[:, t, :], resample=resample, noisy_dynamics=True, know_image_blackout=True) else: state_estimates, new_particles, new_log_weights = pf_model.forward( prev_particles, prev_log_weights, utils.DictIterator(batch_obs)[:, t - 1, :], batch_controls[:, t, :], resample=resample, noisy_dynamics=True, ) if loss_type == "gmm": loss = dpf.gmm_loss(particles_states=new_particles, log_weights=new_log_weights, true_states=batch_states[:, t, :], gmm_variances=np.array([0.1])) elif loss_type == "mse": loss = torch.mean((state_estimates - batch_states[:, t, :])**2) else: assert False, "Invalid loss" losses.append(loss) # Enable backprop through time particles = new_particles log_weights = new_log_weights # # Disable backprop through time # particles = new_particles.detach() # log_weights = new_log_weights.detach() # assert state_estimates.shape == batch_states[:, t, :].shape buddy.minimize(torch.mean(torch.stack(losses)), optimizer_name=optim_name, checkpoint_interval=1000) if buddy.optimizer_steps % log_interval == 0: with buddy.log_scope(optim_name): buddy.log("Training loss", np.mean(utils.to_numpy(losses))) buddy.log("Log weights mean", log_weights.mean()) buddy.log("Log weights std", log_weights.std()) buddy.log("Particle states mean", particles.mean()) buddy.log("particle states std", particles.std()) print("Epoch loss:", np.mean(utils.to_numpy(losses)))
def rollout_kf(kf_model, trajectories, start_time=0, max_timesteps=300, noisy_dynamics=False, true_initial=False, init_state_noise=0.2, save_data_name=None): # To make things easier, we're going to cut all our trajectories to the # same length :) kf_model.eval() end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps]) print("endtime: ", end_time) actual_states = [ states[start_time:end_time] for states, _, _ in trajectories ] contact_states = [ action[start_time:end_time][:, -1] for states, obs, action in trajectories ] state_dim = len(actual_states[0][0]) N = len(trajectories) controls_dim = trajectories[0][2][0].shape device = next(kf_model.parameters()).device initial_states = np.zeros((N, state_dim)) initial_sigmas = np.zeros((N, state_dim, state_dim)) initial_obs = {} if true_initial: for i in range(N): initial_states[i] = trajectories[i][0][0] + np.random.normal( 0.0, scale=init_state_noise, size=initial_states[i].shape) initial_sigmas[i] = np.eye(state_dim) * init_state_noise**2 (initial_states, initial_sigmas) = utils.to_torch( (initial_states, initial_sigmas), device=device) else: # Put into measurement model! dummy_controls = torch.ones((N, ) + controls_dim, ).to(device) for i in range(N): utils.DictIterator(initial_obs).append( utils.DictIterator(trajectories[i][1])[0]) utils.DictIterator(initial_obs).convert_to_numpy() (initial_obs, initial_states, initial_sigmas) = utils.to_torch( (initial_obs, initial_states, initial_sigmas), device=device) states_tuple = kf_model.forward( initial_states, initial_sigmas, initial_obs, dummy_controls, ) initial_states = states_tuple[0] initial_sigmas = states_tuple[1] predicted_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] states = initial_states sigmas = initial_sigmas predicted_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] predicted_sigmas = [[utils.to_numpy(initial_sigmas[i])] for i in range(len(trajectories))] for t in tqdm(range(start_time + 1, end_time)): s = [] o = {} c = [] for i, traj in enumerate(trajectories): s, observations, controls = traj o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) (s, o, c) = utils.to_torch((s, o, c), device=device) estimates = kf_model.forward( states, sigmas, o, c, ) state_estimates = estimates[0].data sigma_estimates = estimates[1].data states = state_estimates sigmas = sigma_estimates for i in range(len(trajectories)): predicted_states[i].append(utils.to_numpy(state_estimates[i])) predicted_sigmas[i].append(utils.to_numpy(sigma_estimates[i])) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) predicted_sigmas = np.array(predicted_sigmas) rmse_x = np.sqrt( np.mean((predicted_states[:, start_time:, 0] - actual_states[:, start_time:, 0])**2)) rmse_y = np.sqrt( np.mean((predicted_states[:, start_time:, 1] - actual_states[:, start_time:, 1])**2)) print("rsme x: \n{} \n y:\n{}".format(rmse_x, rmse_y)) if save_data_name is not None: import h5py filename = "rollout/" + save_data_name + ".h5" try: f = h5py.File(filename, 'w') except: import os new_dest = "rollout/old/{}.h5".format(save_data_name) os.rename(filename, new_dest) f = h5py.File(filename, 'w') f.create_dataset("predicted_states", data=predicted_states) f.create_dataset("actual_states", data=actual_states) f.create_dataset("predicted_sigmas", data=predicted_sigmas) f.close() return predicted_states, actual_states, predicted_sigmas, contact_states
def rollout_kf_full( kf_model, trajectories, start_time=0, max_timesteps=300, true_initial=False, init_state_noise=0.2, ): # To make things easier, we're going to cut all our trajectories to the # same length :) kf_model.eval() end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps]) print("endtime: ", end_time) actual_states = [ states[start_time:end_time] for states, _, _ in trajectories ] contact_states = [ action[start_time:end_time][:, -1] for states, obs, action in trajectories ] actions = get_actions(trajectories, start_time, max_timesteps) state_dim = len(actual_states[0][0]) N = len(trajectories) controls_dim = trajectories[0][2][0].shape device = next(kf_model.parameters()).device initial_states = np.zeros((N, state_dim)) initial_sigmas = np.zeros((N, state_dim, state_dim)) initial_obs = {} if true_initial: for i in range(N): initial_states[i] = trajectories[i][0][0] + np.random.normal( 0.0, scale=init_state_noise, size=initial_states[i].shape) initial_sigmas[i] = np.eye(state_dim) * init_state_noise**2 (initial_states, initial_sigmas) = utils.to_torch( (initial_states, initial_sigmas), device=device) else: print("put in measurement model") # Put into measurement model! dummy_controls = torch.ones((N, ) + controls_dim, ).to(device) for i in range(N): utils.DictIterator(initial_obs).append( utils.DictIterator(trajectories[i][1])[0]) utils.DictIterator(initial_obs).convert_to_numpy() (initial_obs, initial_states, initial_sigmas) = utils.to_torch( (initial_obs, initial_states, initial_sigmas), device=device) state, state_sigma = kf_model.measurement_model.forward( initial_obs, initial_states) initial_states = state initial_sigmas = state_sigma predicted_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] states = initial_states sigmas = initial_sigmas predicted_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] predicted_sigmas = [[utils.to_numpy(initial_sigmas[i])] for i in range(len(trajectories))] predicted_dyn_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] predicted_dyn_sigmas = [[utils.to_numpy(initial_sigmas[i])] for i in range(len(trajectories))] predicted_meas_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] predicted_meas_sigmas = [[utils.to_numpy(initial_sigmas[i])] for i in range(len(trajectories))] # jacobian is not initialized predicted_jac = [[] for i in range(len(trajectories))] for t in tqdm(range(start_time + 1, end_time)): s = [] o = {} c = [] for i, traj in enumerate(trajectories): s, observations, controls = traj o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) (s, o, c) = utils.to_torch((s, o, c), device=device) estimates = kf_model.forward( states, sigmas, o, c, ) state_estimates = estimates[0].data sigma_estimates = estimates[1].data states = state_estimates sigmas = sigma_estimates dynamics_states = kf_model.dynamics_states dynamics_sigma = kf_model.dynamics_sigma measurement_states = kf_model.measurement_states measurement_sigma = kf_model.measurement_sigma dynamics_jac = kf_model.dynamics_jac for i in range(len(trajectories)): predicted_dyn_states[i].append(utils.to_numpy(dynamics_states[i])) predicted_dyn_sigmas[i].append(utils.to_numpy(dynamics_sigma)) predicted_meas_states[i].append( utils.to_numpy(measurement_states[i])) predicted_meas_sigmas[i].append( utils.to_numpy(measurement_sigma[i])) predicted_jac[i].append(utils.to_numpy(dynamics_jac[i])) predicted_states[i].append(utils.to_numpy(state_estimates[i])) predicted_sigmas[i].append(utils.to_numpy(sigma_estimates[i])) results = {} results['dyn_states'] = np.array(predicted_dyn_states) results['dyn_sigmas'] = np.array(predicted_dyn_sigmas) results['meas_states'] = np.array(predicted_meas_states) results['meas_sigmas'] = np.array(predicted_meas_sigmas) results['dyn_jac'] = np.array(predicted_jac) results['predicted_states'] = np.array(predicted_states) results['predicted_sigmas'] = np.array(predicted_sigmas) results['actual_states'] = np.array(actual_states) results['contact_states'] = np.array(contact_states) results['actions'] = np.array(actions) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) rmse_x = np.sqrt( np.mean((predicted_states[:, start_time:, 0] - actual_states[:, start_time:, 0])**2)) rmse_y = np.sqrt( np.mean((predicted_states[:, start_time:, 1] - actual_states[:, start_time:, 1])**2)) print("rsme x: \n{} \n y:\n{}".format(rmse_x, rmse_y)) return results
def rollout(pf_model, trajectories, start_time=0, max_timesteps=300, particle_count=100, noisy_dynamics=True): # To make things easier, we're going to cut all our trajectories to the # same length :) end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps]) predicted_states = [[states[start_time]] for states, _, _ in trajectories] actual_states = [ states[start_time:end_time] for states, _, _ in trajectories ] state_dim = len(actual_states[0][0]) N = len(trajectories) M = particle_count device = next(pf_model.parameters()).device particles = np.zeros((N, M, state_dim)) for i in range(N): particles[i, :] = predicted_states[i][0] particles = utils.to_torch(particles, device=device) log_weights = torch.ones((N, M), device=device) * (-np.log(M)) for t in tqdm_notebook(range(start_time + 1, end_time)): s = [] o = {} c = [] for i, traj in enumerate(trajectories): states, observations, controls = traj s.append(predicted_states[i][t - start_time - 1]) o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) (s, o, c) = utils.to_torch((s, o, c), device=device) state_estimates, new_particles, new_log_weights = pf_model.forward( particles, log_weights, o, c, resample=True, noisy_dynamics=noisy_dynamics) particles = new_particles log_weights = new_log_weights for i in range(len(trajectories)): predicted_states[i].append(utils.to_numpy(state_estimates[i])) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) return predicted_states, actual_states
def load_trajectories(*paths, use_vision=True, vision_interval=10, use_proprioception=True, use_haptics=True, use_mass=False, use_depth=False, image_blackout_ratio=0, sequential_image_rate=1, start_timestep=0, direction_filter=None, **unused): """ Loads a list of trajectories from a set of input paths, where each trajectory is a tuple containing... states: an (T, state_dim) array of state vectors observations: a key->(T, *) dict of observations controls: an (T, control_dim) array of control vectors Each path can either be a string or a (string, int) tuple, where int indicates the maximum number of trajectories to import. """ trajectories = [] assert 1 > image_blackout_ratio >= 0 assert image_blackout_ratio == 0 or sequential_image_rate == 1 for path in paths: count = np.float('inf') if type(path) == tuple: path, count = path assert type(count) == int with utils.TrajectoriesFile(path) as f: # Iterate over each trajectory for i, trajectory in enumerate(f): if i >= count: break timesteps = len(trajectory['Cylinder0_pos']) # Define our state: we expect this to be: # (x, y, cos theta, sin theta, mass, friction) # TODO: add mass, friction state_dim = 2 states = np.full((timesteps, state_dim), np.nan) states[:, :2] = trajectory['Cylinder0_pos'][:, :2] # x, y if use_mass: states[:, 3] = trajectory['Cylinder0_mass'][:, 0] # states[:, 2] = np.cos(trajectory['object_z_angle']) # states[:, 3] = np.sin(trajectory['object_z_angle']) # states[:, 5] = trajectory['Cylinder0_friction'][:, 0] # Pull out observations ## This is currently consisted of: ## > gripper_pos: end effector position ## > gripper_sensors: F/T, contact sensors ## > image: camera image observations = {} observations['gripper_pos'] = trajectory['eef_pos'] assert observations['gripper_pos'].shape == (timesteps, 3) observations['gripper_sensors'] = np.concatenate(( trajectory['force'], trajectory['contact'][:, np.newaxis], ), axis=1) assert observations['gripper_sensors'].shape[1] == 7 if not use_proprioception: observations['gripper_pos'][:] = 0 if not use_haptics: observations['gripper_sensors'][:] = 0 if 'raw_image' in trajectory: observations['raw_image'] = trajectory['raw_image'] observations['image'] = np.zeros_like(trajectory['image']) if use_vision: for i in range(len(observations['image'])): index = (i // vision_interval) * vision_interval index = min(index, len(observations['image'])) blackout_chance = np.random.uniform() # if blackout chance > ratio, then fill image # otherwise zero if image_blackout_ratio == 0 and i % sequential_image_rate == 0: observations['image'][i] = trajectory['image'][ index] if blackout_chance > image_blackout_ratio and sequential_image_rate == 1: observations['image'][i] = trajectory['image'][ index] observations['depth'] = np.zeros_like(trajectory['depth']) if use_depth: for i in range(len(observations['depth'])): index = (i // vision_interval) * vision_interval index = min(index, len(observations['depth'])) observations['depth'][i] = trajectory['depth'][index] # Pull out controls ## This is currently consisted of: ## > previous end effector position ## > end effector position delta ## > binary contact reading eef_positions = trajectory['eef_pos'] eef_positions_shifted = np.roll(eef_positions, shift=1, axis=0) eef_positions_shifted[0] = eef_positions[0] controls = np.concatenate([ eef_positions_shifted, eef_positions - eef_positions_shifted, trajectory['contact'][:, np.newaxis], ], axis=1) assert controls.shape == (timesteps, 7) # Normalization observations['gripper_pos'] -= np.array( [[0.46806443, -0.0017836, 0.88028437]], dtype=np.float32) observations['gripper_pos'] /= np.array( [[0.02410769, 0.02341035, 0.04018243]], dtype=np.float32) observations['gripper_sensors'] -= np.array([[ 4.9182904e-01, 4.5039989e-02, -3.2791464e+00, -3.3874984e-03, 1.1552566e-02, -8.4817986e-04, 2.1303751e-01 ]], dtype=np.float32) observations['gripper_sensors'] /= np.array([[ 1.6152629, 1.666905, 1.9186896, 0.14219016, 0.14232528, 0.01675198, 0.40950698 ]], dtype=np.float32) states -= np.array([[0.4970164, -0.00916641]]) states /= np.array([[0.0572766, 0.06118315]]) controls -= np.array([[ 4.6594709e-01, -2.5247163e-03, 8.8094306e-01, 1.2939950e-04, -5.4364675e-05, -6.1112235e-04, 2.2041667e-01 ]], dtype=np.float32) controls /= np.array([[ 0.02239027, 0.02356066, 0.0405312, 0.00054858, 0.0005754, 0.00046352, 0.41451886 ]], dtype=np.float32) x_delta = states[start_timestep, 0] - states[-1, 0] y_delta = states[start_timestep, 1] - states[-1, 1] if direction_filter == 'x': if not (abs(x_delta) > 0.55 and abs(y_delta) < 0.2): continue if direction_filter == 'y': if not (abs(x_delta) < 0.20 and abs(y_delta) > 0.55): continue trajectories.append( (states[start_timestep:], utils.DictIterator(observations)[start_timestep:], controls[start_timestep:])) ## Uncomment this line to generate the lines required to normalize data # _print_normalization(trajectories) return trajectories
def train_fusion(buddy, fusion_model, dataloader, log_interval=2, optim_name="fusion", measurement_init=True, init_state_noise=0.2, one_loss=True, nll=False): # todo: change loss to selection/mixed for batch_idx, batch in enumerate(dataloader): # Transfer to GPU and pull out batch data batch_gpu = utils.to_device(batch, buddy._device) _, batch_states, batch_obs, batch_controls = batch_gpu # N = batch size N, timesteps, control_dim = batch_controls.shape N, timesteps, state_dim = batch_states.shape assert batch_controls.shape == (N, timesteps, control_dim) state = batch_states[:, 0, :] state_sigma = torch.eye(state.shape[-1], device=buddy._device) * init_state_noise**2 state_sigma = state_sigma.unsqueeze(0).repeat(N, 1, 1) if measurement_init: state, state_sigma = fusion_model.measurement_only( utils.DictIterator(batch_obs)[:, 0, :], state) else: dist = torch.distributions.Normal( torch.tensor([0.]), torch.ones(state.shape) * init_state_noise) noise = dist.sample().to(state.device) state += noise losses_image = [] losses_force = [] losses_fused = [] losses_nll = [] losses_total = [] for t in range(1, timesteps - 1): prev_state = state prev_state_sigma = state_sigma state, state_sigma, force_state, image_state = fusion_model.forward( prev_state, prev_state_sigma, utils.DictIterator(batch_obs)[:, t, :], batch_controls[:, t, :], ) loss_image = torch.mean((image_state - batch_states[:, t, :])**2) loss_force = torch.mean((force_state - batch_states[:, t, :])**2) loss_fused = torch.mean((state - batch_states[:, t, :])**2) losses_force.append(loss_force.item()) losses_image.append(loss_image.item()) losses_fused.append(loss_fused.item()) if nll: loss_nll = torch.mean(-1.0 * utility.gaussian_log_likelihood( state, batch_states[:, t, :], state_sigma)) losses_nll.append(loss_nll) losses_total.append(loss_nll) elif one_loss: losses_total.append(loss_fused) else: losses_total.append(loss_image + loss_force + loss_fused) loss = torch.mean(torch.stack(losses_total)) buddy.minimize(loss, optimizer_name=optim_name, checkpoint_interval=10000) if buddy.optimizer_steps % log_interval == 0: with buddy.log_scope("fusion"): buddy.log("Training loss", loss.item()) buddy.log("Image loss", np.mean(np.array(losses_image))) buddy.log("Force loss", np.mean(np.array(losses_force))) buddy.log("Fused loss", np.mean(np.array(losses_fused))) buddy.log_model_grad_norm()
def train_e2e(buddy, ekf_model, dataloader, log_interval=2, optim_name="ekf", measurement_init=True, checkpoint_interval=1000, init_state_noise=0.2, loss_type="mse"): # Train for 1 epoch for batch_idx, batch in enumerate(dataloader): # Transfer to GPU and pull out batch data batch_gpu = utils.to_device(batch, buddy._device) _, batch_states, batch_obs, batch_controls = batch_gpu # N = batch size, M = particle count N, timesteps, control_dim = batch_controls.shape N, timesteps, state_dim = batch_states.shape assert batch_controls.shape == (N, timesteps, control_dim) state = batch_states[:, 0, :] state_sigma = torch.eye(state.shape[-1], device=buddy._device) * init_state_noise**2 state_sigma = state_sigma.unsqueeze(0).repeat(N, 1, 1) if measurement_init: state, state_sigma = ekf_model.measurement_model.forward( utils.DictIterator(batch_obs)[:, 0, :], batch_states[:, 0, :]) else: dist = torch.distributions.Normal( torch.tensor([0.]), torch.ones(state.shape) * init_state_noise) noise = dist.sample().to(state.device) state += noise # Accumulate losses from each timestep losses = [] for t in range(1, timesteps - 1): prev_state = state prev_state_sigma = state_sigma state, state_sigma = ekf_model.forward( prev_state, prev_state_sigma, utils.DictIterator(batch_obs)[:, t, :], batch_controls[:, t, :], ) assert state.shape == batch_states[:, t, :].shape mse = torch.mean((state - batch_states[:, t, :])**2) assert loss_type in ['nll', 'mse', 'mixed'] if loss_type == 'nll': nll = -1.0 * utility.gaussian_log_likelihood( state, batch_states[:, t, :], state_sigma) nll = torch.mean(nll) # import ipdb;ipdb.set_trace() loss = nll elif loss_type == 'mse': loss = mse else: nll = -1.0 * utility.gaussian_log_likelihood( state, batch_states[:, t, :], state_sigma) nll = torch.mean(nll) loss = nll + mse losses.append(loss) loss = torch.mean(torch.stack(losses)) buddy.minimize(loss, optimizer_name=optim_name, checkpoint_interval=checkpoint_interval) if buddy.optimizer_steps % log_interval == 0: with buddy.log_scope(optim_name): buddy.log("Training loss", loss.item()) buddy.log_model_grad_norm()