def __init__(self, experiment_name, initial_obs, e2e=True): super().__init__() door_pos = initial_obs['object-state'][1] # Initialize models dynamics_model = panda_models.PandaSimpleDynamicsModel( state_noise=(0.05)) measurement_model = panda_models.PandaMeasurementModel(units=32) self.pf_model = dpf.ParticleFilterNetwork(dynamics_model, measurement_model) # Create a buddy, who'll automatically load the latest checkpoint, etc self.buddy = utils.TrainingBuddy( experiment_name, self.pf_model, optimizer_names=["e2e", "dynamics", "measurement"], log_dir="logs/pf", checkpoint_dir="checkpoints/pf", load_checkpoint=e2e) if not e2e: self.buddy.load_checkpoint(label="before_e2e_training") # Initialize particles M = 200 particles = np.zeros((1, M, 1)) particles[:] = door_pos particles = utils.to_torch(particles, device=self.buddy._device) log_weights = torch.ones( (1, M), device=self.buddy._device) * (-np.log(M)) self.particles = particles self.log_weights = log_weights
def __getitem__(self, index): """ Get a subsequence from our dataset Returns: sample: (prev_state, observation, control, new_state) """ state, observation = self.dataset[index // self.samples_per_pair] assert self.stddev.shape == state.shape # Generate half of our samples close to the mean, and the other half # far away if index % self.samples_per_pair < self.samples_per_pair * 0.5: noisy_state = state + \ np.random.normal( loc=0., scale=self.stddev, size=state.shape) else: noisy_state = state + \ np.random.normal( loc=0., scale=self.stddev * 10, size=state.shape) log_likelihood = np.asarray( scipy.stats.multivariate_normal.logpdf(noisy_state[:2], mean=state[:2], cov=np.diag( self.stddev[:2]**2))) return utils.to_torch( (noisy_state, observation, log_likelihood, state))
def rollout(model, trajectories, max_timesteps=300): # To make things easier, we're going to cut all our trajectories to the # same length :) timesteps = np.min([len(s) for s, _, _ in trajectories] + [max_timesteps]) predicted_states = [[states[0]] for states, _, _ in trajectories] actual_states = [states[:timesteps] for states, _, _ in trajectories] for t in range(1, timesteps): s = [] o = {} c = [] for i, traj in enumerate(trajectories): states, observations, controls = traj s.append(predicted_states[i][t - 1]) o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) device = next(model.parameters()).device pred = model(*utils.to_torch([s, o, c], device=device)) pred = utils.to_numpy(pred) assert pred.shape == (len(trajectories), 2) for i in range(len(trajectories)): predicted_states[i].append(pred[i]) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) return predicted_states, actual_states
def _split_trajectories(trajectories, subsequence_length): """Split a set of a trajectories into overlapping subsequences. Args: trajectories (list): a list of trajectories, which are each tuples of the form (states, observations, controls). subsequence_length (int): # of timesteps per output subsequence Returns: subsequences (list): a list of (states, observations, controls) tuples; the length of each is determined by subsequence_length """ # Chop up each trajectory into overlapping subsequences subsequences = [] for trajectory in trajectories: assert len(trajectory) == 3 states, observation, controls = trajectory observation = observation assert len(states) == len(controls) trajectory_length = len(states) sections = trajectory_length // subsequence_length def split(x): if type(x) == np.ndarray: new_length = (len(x) // subsequence_length) * \ subsequence_length x = x[:new_length] return np.split(x[:new_length], sections) elif type(x) == dict: output = {} for key, value in x.items(): output[key] = split(value) return utils.DictIterator(output) else: assert False for s, o, c in zip(split(states), split(observation), split(controls)): # Numpy => Torch s = utils.to_torch(s) o = utils.to_torch(o) c = utils.to_torch(c) # Add to subsequences subsequences.append((s, o, c)) return subsequences
def train_dynamics(buddy, pf_model, dataloader, log_interval=10, optim_name="dynamics"): losses = [] # Train dynamics only for 1 epoch # Train for 1 epoch for batch_idx, batch in enumerate(tqdm(dataloader)): # Transfer to GPU and pull out batch data batch_gpu = utils.to_device(batch, buddy._device) prev_states, _unused_observations, controls, new_states = batch_gpu prev_states += utils.to_torch(np.random.normal(0, 0.05, size=prev_states.shape), device=buddy._device) prev_states = prev_states[:, np.newaxis, :] new_states_pred = pf_model.dynamics_model(prev_states, controls, noisy=False) new_states_pred = new_states_pred.squeeze(dim=1) mse_pos = F.mse_loss(new_states_pred, new_states) # mse_pos = torch.mean((new_states_pred - new_states) ** 2, axis=0) loss = mse_pos losses.append(utils.to_numpy(loss)) buddy.minimize(loss, optimizer_name=optim_name, checkpoint_interval=1000) if buddy.optimizer_steps % log_interval == 0: with buddy.log_scope(optim_name): # buddy.log("Training loss", loss) buddy.log("MSE position", mse_pos) label_std = new_states.std(dim=0) buddy.log("Label pos std", label_std[0]) pred_std = new_states_pred.std(dim=0) buddy.log("Predicted pos std", pred_std[0]) label_mean = new_states.mean(dim=0) buddy.log("Label pos mean", label_mean[0]) pred_mean = new_states_pred.mean(dim=0) buddy.log("Predicted pos mean", pred_mean[0]) # print(".", end="") print("Epoch loss:", np.mean(losses))
def rollout_lstm(model, trajectories, max_timesteps=300): timesteps = np.min([len(s) for s, _, _ in trajectories] + [max_timesteps]) trajectory_count = len(trajectories) state_dim = trajectories[0][0].shape[-1] actual_states = np.zeros((trajectory_count, timesteps, state_dim)) batched_observations = {} batched_controls = [] # Trajectories is a list of (states, observations, controls) for i, (states, observations, controls) in enumerate(trajectories): observations = utils.DictIterator(observations)[1:timesteps] utils.DictIterator(batched_observations).append(observations) batched_controls.append(controls[1:timesteps]) assert states.shape == (timesteps, state_dim) actual_states[i] = states[:timesteps] # * 0 + 0.1 utils.DictIterator(batched_observations).convert_to_numpy() batched_controls = np.array(batched_controls) # Propagate through model # model.reset_hidden_states(utils.to_torch(actual_states[:, 0, :])) device = next(model.parameters()).device predicted_states = np.concatenate([ actual_states[:, 0:1, :], utils.to_numpy( model( utils.to_torch(batched_observations, device), utils.to_torch(batched_controls, device), )), ], axis=1) # Indexing: batch, sequence length, state return predicted_states, actual_states
def _update(self, observations, controls): # Pre-process model inputs states_prev = np.array(self.prev_estimate)[np.newaxis, np.newaxis] # Prediction with torch.no_grad(): states_new = self.model( *utils.to_torch([states_prev, observations, controls], device=self.buddy._device)) # Post-process & return estimate = np.squeeze(states_new) self.prev_estimate = estimate return utils.to_numpy(estimate)
def _update(self, observations, controls): # Run model state_estimates, new_particles, new_log_weights = self.pf_model.forward( self.particles, self.log_weights, *utils.to_torch([ observations, controls, ], device=self.buddy._device), resample=True, noisy_dynamics=True) self.particles = new_particles self.log_weights = new_log_weights return np.squeeze(utils.to_numpy(state_estimates))
def train(buddy, model, dataloader, log_interval=10, state_noise_std=0.2): losses = [] # Train for 1 epoch for batch_idx, batch in enumerate(tqdm(dataloader)): # Transfer to GPU and pull out batch data batch_gpu = utils.to_device(batch, buddy._device) prev_states, observations, controls, new_states = batch_gpu prev_states += utils.to_torch(np.random.normal(0, state_noise_std, size=prev_states.shape), device=buddy._device) new_states_pred = model(prev_states, observations, controls) # mse_pos, mse_vel = torch.mean((new_states_pred - new_states) ** 2, axis=0) # loss = (mse_pos + mse_vel) / 2 loss = torch.mean((new_states_pred - new_states)**2) losses.append(utils.to_numpy(loss)) buddy.minimize(loss, checkpoint_interval=10000) if buddy._steps % log_interval == 0: with buddy.log_scope("baseline_training"): buddy.log("Training loss", loss) # buddy.log("MSE position", mse_pos) # buddy.log("MSE velocity", mse_vel) label_std = new_states.std(dim=0) buddy.log("Training pos std", label_std[0]) # buddy.log("Training vel std", label_std[1]) pred_std = new_states_pred.std(dim=0) buddy.log("Predicted pos std", pred_std[0]) # buddy.log("Predicted vel std", pred_std[1]) label_mean = new_states.mean(dim=0) buddy.log("Training pos mean", label_mean[0]) # buddy.log("Training vel mean", label_mean[1]) pred_mean = new_states_pred.mean(dim=0) buddy.log("Predicted pos mean", pred_mean[0]) # buddy.log("Predicted vel mean", pred_mean[1]) print("Epoch loss:", np.mean(losses))
def gmm_loss(particles_states, log_weights, true_states, gmm_variances=1.): N, M, state_dim = particles_states.shape device = particles_states.device assert true_states.shape == (N, state_dim) assert type(gmm_variances) == float or gmm_variances.shape == (state_dim, ) # Gaussian mixture model loss # There's probably a better way to do this with torch.distributions? if type(gmm_variances) == float: gmm_variances = torch.ones( (N, state_dim), device=device) * gmm_variances elif type(gmm_variances) == np.ndarray: new_gmm_variances = torch.ones((N, state_dim), device=device) new_gmm_variances[:, :] = utils.to_torch(gmm_variances) gmm_variances = new_gmm_variances else: assert False, "Invalid variances" particle_squared_errors = (particles_states - true_states[:, np.newaxis, :])**2 assert particle_squared_errors.shape == (N, M, state_dim) log_pdfs = -0.5 * ( torch.log(gmm_variances[:, np.newaxis, :]) + particle_squared_errors / gmm_variances[:, np.newaxis, :]).sum(axis=2) assert log_pdfs.shape == (N, M) log_pdfs = -0.5 * np.log(2 * np.pi) + log_pdfs # Given a Gaussian centered at each particle, # `log_pdf` should now be the likelihoods of the true state # Next, let's use the particle weight as our GMM priors log_pdfs = log_weights + log_pdfs # I think that's it? # GMM density function: p(x) = \sum_k p(x|z=k)p(z=k) log_beliefs = torch.logsumexp(log_pdfs, axis=1) assert log_beliefs.shape == (N, ) loss = -torch.mean(log_beliefs) return loss
def __init__(self, *paths, **kwargs): """ Input: *paths: paths to dataset hdf5 files """ trajectories = load_trajectories(*paths, **kwargs) active_dataset = [] inactive_dataset = [] for trajectory in trajectories: assert len(trajectory) == 3 states, observations, controls = trajectory timesteps = len(states) assert type(observations) == dict assert len(controls) == timesteps for t in range(1, timesteps): # Pull out data & labels prev_state = states[t - 1] observation = utils.DictIterator(observations)[t] control = controls[t] new_state = states[t] # Construct sample, bring to torch, & add to dataset sample = (prev_state, observation, control, new_state) sample = tuple(utils.to_torch(x) for x in sample) if np.linalg.norm(new_state - prev_state) > 1e-5: active_dataset.append(sample) else: inactive_dataset.append(sample) print("Parsed data: {} active, {} inactive".format( len(active_dataset), len(inactive_dataset))) keep_count = min(len(active_dataset) // 2, len(inactive_dataset)) print("Keeping:", keep_count) np.random.shuffle(inactive_dataset) self.dataset = active_dataset + inactive_dataset[:keep_count]
def rollout_and_eval(pf_model, trajectories, start_time=0, max_timesteps=300, particle_count=100, noisy_dynamics=True, true_initial=False): # To make things easier, we're going to cut all our trajectories to the # same length :) end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps]) actual_states = [ states[start_time:end_time] for states, _, _ in trajectories ] state_dim = len(actual_states[0][0]) N = len(trajectories) M = particle_count device = next(pf_model.parameters()).device particles = np.zeros((N, M, state_dim)) if true_initial: for i in range(N): particles[i, :] = trajectories[i][0][0] particles += np.random.normal(0, 0.2, size=[N, 1, state_dim]) particles += np.random.normal(0, 0.2, size=particles.shape) else: # Distribute initial particles randomly particles += np.random.normal(0, 1.0, size=particles.shape) # Populate the initial state estimate as just the estimate of our particles # This is a little hacky # (N, t, state_dim) predicted_states = [[np.mean(particles[i], axis=0)] for i in range(len(trajectories))] particles = utils.to_torch(particles, device=device) log_weights = torch.ones((N, M), device=device) * (-np.log(M)) # (N, t, M, state_dim) particles_history = [] # (N, t, M) weights_history = [] for i in range(N): particles_history.append([utils.to_numpy(particles[i])]) weights_history.append([utils.to_numpy(log_weights[i])]) for t in tqdm(range(start_time + 1, end_time)): s = [] o = {} c = [] for i, traj in enumerate(trajectories): states, observations, controls = traj s.append(predicted_states[i][t - start_time - 1]) o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) (s, o, c) = utils.to_torch((s, o, c), device=device) state_estimates, new_particles, new_log_weights = pf_model.forward( particles, log_weights, o, c, resample=True, noisy_dynamics=noisy_dynamics) particles = new_particles log_weights = new_log_weights for i in range(len(trajectories)): predicted_states[i].append(utils.to_numpy(state_estimates[i])) particles_history[i].append(utils.to_numpy(particles[i])) weights_history[i].append(np.exp(utils.to_numpy(log_weights[i]))) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) ### Eval timesteps = len(actual_states[0]) def color(i): colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] return colors[i % len(colors)] state_dim = actual_states.shape[-1] for j in range(state_dim): plt.figure(figsize=(8, 6)) for i, (pred, actual, particles, weights) in enumerate( zip(predicted_states, actual_states, particles_history, weights_history)): predicted_label_arg = {} actual_label_arg = {} if i == 0: predicted_label_arg['label'] = "Predicted" actual_label_arg['label'] = "Ground Truth" plt.plot(range(timesteps), pred[:, j], c=color(i), alpha=0.5, **predicted_label_arg) plt.plot(range(timesteps), actual[:, j], c=color(i), **actual_label_arg) for t in range(0, timesteps, 20): particle_ys = particles[t][:, j] particle_xs = [t for _ in particle_ys] plt.scatter(particle_xs, particle_ys, c=color(i), alpha=0.02) # particle_alphas = weights[t] # particle_alphas /= np.max(particle_alphas) # particle_alphas *= 0.3 # particle_alphas += 0.05 # # for px, py, pa in zip( # particle_xs, particle_ys, particle_alphas): # plt.scatter([px], [py], c=color(i), alpha=pa) rmse = np.sqrt( np.mean( (predicted_states[:, 10:, j] - actual_states[:, 10:, j])**2)) print(rmse) plt.title(f"State #{j} // RMSE = {rmse}") plt.xlabel("Timesteps") plt.ylabel("Value") plt.legend() plt.show()
def rollout(pf_model, trajectories, start_time=0, max_timesteps=300, particle_count=100, noisy_dynamics=True, true_initial=False): # To make things easier, we're going to cut all our trajectories to the # same length :) end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps]) actual_states = [ states[start_time:end_time] for states, _, _ in trajectories ] state_dim = len(actual_states[0][0]) N = len(trajectories) M = particle_count device = next(pf_model.parameters()).device particles = np.zeros((N, M, state_dim)) if true_initial: for i in range(N): particles[i, :] = trajectories[i][0][0] particles += np.random.normal(0, 0.1, size=particles.shape) else: # Distribute initial particles randomly particles += np.random.normal(0, 1.0, size=particles.shape) # Populate the initial state estimate as just the estimate of our particles # This is a little hacky predicted_states = [[np.mean(particles[i], axis=0)] for i in range(len(trajectories))] particles = utils.to_torch(particles, device=device) log_weights = torch.ones((N, M), device=device) * (-np.log(M)) for t in tqdm(range(start_time + 1, end_time)): s = [] o = {} c = [] for i, traj in enumerate(trajectories): states, observations, controls = traj s.append(predicted_states[i][t - start_time - 1]) o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) (s, o, c) = utils.to_torch((s, o, c), device=device) state_estimates, new_particles, new_log_weights = pf_model.forward( particles, log_weights, o, c, resample=True, noisy_dynamics=noisy_dynamics) particles = new_particles log_weights = new_log_weights for i in range(len(trajectories)): predicted_states[i].append(utils.to_numpy(state_estimates[i])) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) return predicted_states, actual_states
def rollout_kf(kf_model, trajectories, start_time=0, max_timesteps=300, noisy_dynamics=False, true_initial=False, init_state_noise=0.2, save_data_name=None): # To make things easier, we're going to cut all our trajectories to the # same length :) kf_model.eval() end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps]) print("endtime: ", end_time) actual_states = [ states[start_time:end_time] for states, _, _ in trajectories ] contact_states = [ action[start_time:end_time][:, -1] for states, obs, action in trajectories ] state_dim = len(actual_states[0][0]) N = len(trajectories) controls_dim = trajectories[0][2][0].shape device = next(kf_model.parameters()).device initial_states = np.zeros((N, state_dim)) initial_sigmas = np.zeros((N, state_dim, state_dim)) initial_obs = {} if true_initial: for i in range(N): initial_states[i] = trajectories[i][0][0] + np.random.normal( 0.0, scale=init_state_noise, size=initial_states[i].shape) initial_sigmas[i] = np.eye(state_dim) * init_state_noise**2 (initial_states, initial_sigmas) = utils.to_torch( (initial_states, initial_sigmas), device=device) else: # Put into measurement model! dummy_controls = torch.ones((N, ) + controls_dim, ).to(device) for i in range(N): utils.DictIterator(initial_obs).append( utils.DictIterator(trajectories[i][1])[0]) utils.DictIterator(initial_obs).convert_to_numpy() (initial_obs, initial_states, initial_sigmas) = utils.to_torch( (initial_obs, initial_states, initial_sigmas), device=device) states_tuple = kf_model.forward( initial_states, initial_sigmas, initial_obs, dummy_controls, ) initial_states = states_tuple[0] initial_sigmas = states_tuple[1] predicted_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] states = initial_states sigmas = initial_sigmas predicted_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] predicted_sigmas = [[utils.to_numpy(initial_sigmas[i])] for i in range(len(trajectories))] for t in tqdm(range(start_time + 1, end_time)): s = [] o = {} c = [] for i, traj in enumerate(trajectories): s, observations, controls = traj o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) (s, o, c) = utils.to_torch((s, o, c), device=device) estimates = kf_model.forward( states, sigmas, o, c, ) state_estimates = estimates[0].data sigma_estimates = estimates[1].data states = state_estimates sigmas = sigma_estimates for i in range(len(trajectories)): predicted_states[i].append(utils.to_numpy(state_estimates[i])) predicted_sigmas[i].append(utils.to_numpy(sigma_estimates[i])) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) predicted_sigmas = np.array(predicted_sigmas) rmse_x = np.sqrt( np.mean((predicted_states[:, start_time:, 0] - actual_states[:, start_time:, 0])**2)) rmse_y = np.sqrt( np.mean((predicted_states[:, start_time:, 1] - actual_states[:, start_time:, 1])**2)) print("rsme x: \n{} \n y:\n{}".format(rmse_x, rmse_y)) if save_data_name is not None: import h5py filename = "rollout/" + save_data_name + ".h5" try: f = h5py.File(filename, 'w') except: import os new_dest = "rollout/old/{}.h5".format(save_data_name) os.rename(filename, new_dest) f = h5py.File(filename, 'w') f.create_dataset("predicted_states", data=predicted_states) f.create_dataset("actual_states", data=actual_states) f.create_dataset("predicted_sigmas", data=predicted_sigmas) f.close() return predicted_states, actual_states, predicted_sigmas, contact_states
def rollout_kf_full( kf_model, trajectories, start_time=0, max_timesteps=300, true_initial=False, init_state_noise=0.2, ): # To make things easier, we're going to cut all our trajectories to the # same length :) kf_model.eval() end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps]) print("endtime: ", end_time) actual_states = [ states[start_time:end_time] for states, _, _ in trajectories ] contact_states = [ action[start_time:end_time][:, -1] for states, obs, action in trajectories ] actions = get_actions(trajectories, start_time, max_timesteps) state_dim = len(actual_states[0][0]) N = len(trajectories) controls_dim = trajectories[0][2][0].shape device = next(kf_model.parameters()).device initial_states = np.zeros((N, state_dim)) initial_sigmas = np.zeros((N, state_dim, state_dim)) initial_obs = {} if true_initial: for i in range(N): initial_states[i] = trajectories[i][0][0] + np.random.normal( 0.0, scale=init_state_noise, size=initial_states[i].shape) initial_sigmas[i] = np.eye(state_dim) * init_state_noise**2 (initial_states, initial_sigmas) = utils.to_torch( (initial_states, initial_sigmas), device=device) else: print("put in measurement model") # Put into measurement model! dummy_controls = torch.ones((N, ) + controls_dim, ).to(device) for i in range(N): utils.DictIterator(initial_obs).append( utils.DictIterator(trajectories[i][1])[0]) utils.DictIterator(initial_obs).convert_to_numpy() (initial_obs, initial_states, initial_sigmas) = utils.to_torch( (initial_obs, initial_states, initial_sigmas), device=device) state, state_sigma = kf_model.measurement_model.forward( initial_obs, initial_states) initial_states = state initial_sigmas = state_sigma predicted_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] states = initial_states sigmas = initial_sigmas predicted_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] predicted_sigmas = [[utils.to_numpy(initial_sigmas[i])] for i in range(len(trajectories))] predicted_dyn_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] predicted_dyn_sigmas = [[utils.to_numpy(initial_sigmas[i])] for i in range(len(trajectories))] predicted_meas_states = [[utils.to_numpy(initial_states[i])] for i in range(len(trajectories))] predicted_meas_sigmas = [[utils.to_numpy(initial_sigmas[i])] for i in range(len(trajectories))] # jacobian is not initialized predicted_jac = [[] for i in range(len(trajectories))] for t in tqdm(range(start_time + 1, end_time)): s = [] o = {} c = [] for i, traj in enumerate(trajectories): s, observations, controls = traj o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) (s, o, c) = utils.to_torch((s, o, c), device=device) estimates = kf_model.forward( states, sigmas, o, c, ) state_estimates = estimates[0].data sigma_estimates = estimates[1].data states = state_estimates sigmas = sigma_estimates dynamics_states = kf_model.dynamics_states dynamics_sigma = kf_model.dynamics_sigma measurement_states = kf_model.measurement_states measurement_sigma = kf_model.measurement_sigma dynamics_jac = kf_model.dynamics_jac for i in range(len(trajectories)): predicted_dyn_states[i].append(utils.to_numpy(dynamics_states[i])) predicted_dyn_sigmas[i].append(utils.to_numpy(dynamics_sigma)) predicted_meas_states[i].append( utils.to_numpy(measurement_states[i])) predicted_meas_sigmas[i].append( utils.to_numpy(measurement_sigma[i])) predicted_jac[i].append(utils.to_numpy(dynamics_jac[i])) predicted_states[i].append(utils.to_numpy(state_estimates[i])) predicted_sigmas[i].append(utils.to_numpy(sigma_estimates[i])) results = {} results['dyn_states'] = np.array(predicted_dyn_states) results['dyn_sigmas'] = np.array(predicted_dyn_sigmas) results['meas_states'] = np.array(predicted_meas_states) results['meas_sigmas'] = np.array(predicted_meas_sigmas) results['dyn_jac'] = np.array(predicted_jac) results['predicted_states'] = np.array(predicted_states) results['predicted_sigmas'] = np.array(predicted_sigmas) results['actual_states'] = np.array(actual_states) results['contact_states'] = np.array(contact_states) results['actions'] = np.array(actions) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) rmse_x = np.sqrt( np.mean((predicted_states[:, start_time:, 0] - actual_states[:, start_time:, 0])**2)) rmse_y = np.sqrt( np.mean((predicted_states[:, start_time:, 1] - actual_states[:, start_time:, 1])**2)) print("rsme x: \n{} \n y:\n{}".format(rmse_x, rmse_y)) return results
def rollout(pf_model, trajectories, start_time=0, max_timesteps=300, particle_count=100, noisy_dynamics=True): # To make things easier, we're going to cut all our trajectories to the # same length :) end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps]) predicted_states = [[states[start_time]] for states, _, _ in trajectories] actual_states = [ states[start_time:end_time] for states, _, _ in trajectories ] state_dim = len(actual_states[0][0]) N = len(trajectories) M = particle_count device = next(pf_model.parameters()).device particles = np.zeros((N, M, state_dim)) for i in range(N): particles[i, :] = predicted_states[i][0] particles = utils.to_torch(particles, device=device) log_weights = torch.ones((N, M), device=device) * (-np.log(M)) for t in tqdm_notebook(range(start_time + 1, end_time)): s = [] o = {} c = [] for i, traj in enumerate(trajectories): states, observations, controls = traj s.append(predicted_states[i][t - start_time - 1]) o_t = utils.DictIterator(observations)[t] utils.DictIterator(o).append(o_t) c.append(controls[t]) s = np.array(s) utils.DictIterator(o).convert_to_numpy() c = np.array(c) (s, o, c) = utils.to_torch((s, o, c), device=device) state_estimates, new_particles, new_log_weights = pf_model.forward( particles, log_weights, o, c, resample=True, noisy_dynamics=noisy_dynamics) particles = new_particles log_weights = new_log_weights for i in range(len(trajectories)): predicted_states[i].append(utils.to_numpy(state_estimates[i])) predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) return predicted_states, actual_states