Beispiel #1
0
    def __init__(self, experiment_name, initial_obs, e2e=True):
        super().__init__()

        door_pos = initial_obs['object-state'][1]

        # Initialize models
        dynamics_model = panda_models.PandaSimpleDynamicsModel(
            state_noise=(0.05))
        measurement_model = panda_models.PandaMeasurementModel(units=32)
        self.pf_model = dpf.ParticleFilterNetwork(dynamics_model,
                                                  measurement_model)

        # Create a buddy, who'll automatically load the latest checkpoint, etc
        self.buddy = utils.TrainingBuddy(
            experiment_name,
            self.pf_model,
            optimizer_names=["e2e", "dynamics", "measurement"],
            log_dir="logs/pf",
            checkpoint_dir="checkpoints/pf",
            load_checkpoint=e2e)
        if not e2e:
            self.buddy.load_checkpoint(label="before_e2e_training")

        # Initialize particles
        M = 200
        particles = np.zeros((1, M, 1))
        particles[:] = door_pos
        particles = utils.to_torch(particles, device=self.buddy._device)
        log_weights = torch.ones(
            (1, M), device=self.buddy._device) * (-np.log(M))

        self.particles = particles
        self.log_weights = log_weights
    def __getitem__(self, index):
        """ Get a subsequence from our dataset

        Returns:
            sample: (prev_state, observation, control, new_state)
        """

        state, observation = self.dataset[index // self.samples_per_pair]

        assert self.stddev.shape == state.shape

        # Generate half of our samples close to the mean, and the other half
        # far away
        if index % self.samples_per_pair < self.samples_per_pair * 0.5:
            noisy_state = state + \
                np.random.normal(
                    loc=0., scale=self.stddev, size=state.shape)
        else:
            noisy_state = state + \
                np.random.normal(
                    loc=0., scale=self.stddev * 10, size=state.shape)

        log_likelihood = np.asarray(
            scipy.stats.multivariate_normal.logpdf(noisy_state[:2],
                                                   mean=state[:2],
                                                   cov=np.diag(
                                                       self.stddev[:2]**2)))

        return utils.to_torch(
            (noisy_state, observation, log_likelihood, state))
Beispiel #3
0
def rollout(model, trajectories, max_timesteps=300):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)
    timesteps = np.min([len(s) for s, _, _ in trajectories] + [max_timesteps])

    predicted_states = [[states[0]] for states, _, _ in trajectories]
    actual_states = [states[:timesteps] for states, _, _ in trajectories]
    for t in range(1, timesteps):
        s = []
        o = {}
        c = []
        for i, traj in enumerate(trajectories):
            states, observations, controls = traj

            s.append(predicted_states[i][t - 1])
            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)

        device = next(model.parameters()).device
        pred = model(*utils.to_torch([s, o, c], device=device))
        pred = utils.to_numpy(pred)
        assert pred.shape == (len(trajectories), 2)
        for i in range(len(trajectories)):
            predicted_states[i].append(pred[i])

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    return predicted_states, actual_states
def _split_trajectories(trajectories, subsequence_length):
    """Split a set of a trajectories into overlapping subsequences.

    Args:
        trajectories (list): a list of trajectories, which are each tuples of
            the form (states, observations, controls).
        subsequence_length (int): # of timesteps per output subsequence
    Returns:
        subsequences (list): a list of (states, observations, controls)
            tuples; the length of each is determined by subsequence_length
    """
    # Chop up each trajectory into overlapping subsequences
    subsequences = []
    for trajectory in trajectories:
        assert len(trajectory) == 3
        states, observation, controls = trajectory
        observation = observation

        assert len(states) == len(controls)
        trajectory_length = len(states)

        sections = trajectory_length // subsequence_length

        def split(x):
            if type(x) == np.ndarray:
                new_length = (len(x) // subsequence_length) * \
                    subsequence_length
                x = x[:new_length]
                return np.split(x[:new_length], sections)
            elif type(x) == dict:
                output = {}
                for key, value in x.items():
                    output[key] = split(value)
                return utils.DictIterator(output)
            else:
                assert False

        for s, o, c in zip(split(states), split(observation), split(controls)):
            # Numpy => Torch
            s = utils.to_torch(s)
            o = utils.to_torch(o)
            c = utils.to_torch(c)

            # Add to subsequences
            subsequences.append((s, o, c))

    return subsequences
def train_dynamics(buddy,
                   pf_model,
                   dataloader,
                   log_interval=10,
                   optim_name="dynamics"):
    losses = []

    # Train dynamics only for 1 epoch
    # Train for 1 epoch
    for batch_idx, batch in enumerate(tqdm(dataloader)):
        # Transfer to GPU and pull out batch data
        batch_gpu = utils.to_device(batch, buddy._device)
        prev_states, _unused_observations, controls, new_states = batch_gpu

        prev_states += utils.to_torch(np.random.normal(0,
                                                       0.05,
                                                       size=prev_states.shape),
                                      device=buddy._device)
        prev_states = prev_states[:, np.newaxis, :]
        new_states_pred = pf_model.dynamics_model(prev_states,
                                                  controls,
                                                  noisy=False)
        new_states_pred = new_states_pred.squeeze(dim=1)

        mse_pos = F.mse_loss(new_states_pred, new_states)
        # mse_pos = torch.mean((new_states_pred - new_states) ** 2, axis=0)
        loss = mse_pos
        losses.append(utils.to_numpy(loss))

        buddy.minimize(loss,
                       optimizer_name=optim_name,
                       checkpoint_interval=1000)

        if buddy.optimizer_steps % log_interval == 0:
            with buddy.log_scope(optim_name):
                # buddy.log("Training loss", loss)
                buddy.log("MSE position", mse_pos)

                label_std = new_states.std(dim=0)
                buddy.log("Label pos std", label_std[0])

                pred_std = new_states_pred.std(dim=0)
                buddy.log("Predicted pos std", pred_std[0])

                label_mean = new_states.mean(dim=0)
                buddy.log("Label pos mean", label_mean[0])

                pred_mean = new_states_pred.mean(dim=0)
                buddy.log("Predicted pos mean", pred_mean[0])

            # print(".", end="")
    print("Epoch loss:", np.mean(losses))
Beispiel #6
0
def rollout_lstm(model, trajectories, max_timesteps=300):
    timesteps = np.min([len(s) for s, _, _ in trajectories] + [max_timesteps])

    trajectory_count = len(trajectories)

    state_dim = trajectories[0][0].shape[-1]
    actual_states = np.zeros((trajectory_count, timesteps, state_dim))

    batched_observations = {}
    batched_controls = []

    # Trajectories is a list of (states, observations, controls)
    for i, (states, observations, controls) in enumerate(trajectories):

        observations = utils.DictIterator(observations)[1:timesteps]
        utils.DictIterator(batched_observations).append(observations)
        batched_controls.append(controls[1:timesteps])

        assert states.shape == (timesteps, state_dim)
        actual_states[i] = states[:timesteps]  # * 0 + 0.1

    utils.DictIterator(batched_observations).convert_to_numpy()
    batched_controls = np.array(batched_controls)

    # Propagate through model
    # model.reset_hidden_states(utils.to_torch(actual_states[:, 0, :]))
    device = next(model.parameters()).device
    predicted_states = np.concatenate([
        actual_states[:, 0:1, :],
        utils.to_numpy(
            model(
                utils.to_torch(batched_observations, device),
                utils.to_torch(batched_controls, device),
            )),
    ],
                                      axis=1)

    # Indexing: batch, sequence length, state
    return predicted_states, actual_states
Beispiel #7
0
    def _update(self, observations, controls):
        # Pre-process model inputs
        states_prev = np.array(self.prev_estimate)[np.newaxis, np.newaxis]

        # Prediction
        with torch.no_grad():
            states_new = self.model(
                *utils.to_torch([states_prev, observations, controls],
                                device=self.buddy._device))

        # Post-process & return
        estimate = np.squeeze(states_new)
        self.prev_estimate = estimate
        return utils.to_numpy(estimate)
Beispiel #8
0
    def _update(self, observations, controls):
        # Run model
        state_estimates, new_particles, new_log_weights = self.pf_model.forward(
            self.particles,
            self.log_weights,
            *utils.to_torch([
                observations,
                controls,
            ],
                            device=self.buddy._device),
            resample=True,
            noisy_dynamics=True)

        self.particles = new_particles
        self.log_weights = new_log_weights

        return np.squeeze(utils.to_numpy(state_estimates))
Beispiel #9
0
def train(buddy, model, dataloader, log_interval=10, state_noise_std=0.2):
    losses = []

    # Train for 1 epoch
    for batch_idx, batch in enumerate(tqdm(dataloader)):
        # Transfer to GPU and pull out batch data
        batch_gpu = utils.to_device(batch, buddy._device)
        prev_states, observations, controls, new_states = batch_gpu
        prev_states += utils.to_torch(np.random.normal(0,
                                                       state_noise_std,
                                                       size=prev_states.shape),
                                      device=buddy._device)

        new_states_pred = model(prev_states, observations, controls)

        # mse_pos, mse_vel = torch.mean((new_states_pred - new_states) ** 2, axis=0)
        # loss = (mse_pos + mse_vel) / 2
        loss = torch.mean((new_states_pred - new_states)**2)
        losses.append(utils.to_numpy(loss))

        buddy.minimize(loss, checkpoint_interval=10000)

        if buddy._steps % log_interval == 0:
            with buddy.log_scope("baseline_training"):
                buddy.log("Training loss", loss)
                # buddy.log("MSE position", mse_pos)
                # buddy.log("MSE velocity", mse_vel)

                label_std = new_states.std(dim=0)
                buddy.log("Training pos std", label_std[0])
                # buddy.log("Training vel std", label_std[1])

                pred_std = new_states_pred.std(dim=0)
                buddy.log("Predicted pos std", pred_std[0])
                # buddy.log("Predicted vel std", pred_std[1])

                label_mean = new_states.mean(dim=0)
                buddy.log("Training pos mean", label_mean[0])
                # buddy.log("Training vel mean", label_mean[1])

                pred_mean = new_states_pred.mean(dim=0)
                buddy.log("Predicted pos mean", pred_mean[0])
                # buddy.log("Predicted vel mean", pred_mean[1])

    print("Epoch loss:", np.mean(losses))
Beispiel #10
0
def gmm_loss(particles_states, log_weights, true_states, gmm_variances=1.):

    N, M, state_dim = particles_states.shape
    device = particles_states.device

    assert true_states.shape == (N, state_dim)
    assert type(gmm_variances) == float or gmm_variances.shape == (state_dim, )

    # Gaussian mixture model loss
    # There's probably a better way to do this with torch.distributions?
    if type(gmm_variances) == float:
        gmm_variances = torch.ones(
            (N, state_dim), device=device) * gmm_variances
    elif type(gmm_variances) == np.ndarray:
        new_gmm_variances = torch.ones((N, state_dim), device=device)
        new_gmm_variances[:, :] = utils.to_torch(gmm_variances)
        gmm_variances = new_gmm_variances
    else:
        assert False, "Invalid variances"

    particle_squared_errors = (particles_states -
                               true_states[:, np.newaxis, :])**2
    assert particle_squared_errors.shape == (N, M, state_dim)
    log_pdfs = -0.5 * (
        torch.log(gmm_variances[:, np.newaxis, :]) +
        particle_squared_errors / gmm_variances[:, np.newaxis, :]).sum(axis=2)
    assert log_pdfs.shape == (N, M)
    log_pdfs = -0.5 * np.log(2 * np.pi) + log_pdfs

    # Given a Gaussian centered at each particle,
    # `log_pdf` should now be the likelihoods of the true state

    # Next, let's use the particle weight as our GMM priors
    log_pdfs = log_weights + log_pdfs

    # I think that's it?
    # GMM density function: p(x) = \sum_k p(x|z=k)p(z=k)
    log_beliefs = torch.logsumexp(log_pdfs, axis=1)
    assert log_beliefs.shape == (N, )

    loss = -torch.mean(log_beliefs)

    return loss
    def __init__(self, *paths, **kwargs):
        """
        Input:
          *paths: paths to dataset hdf5 files
        """

        trajectories = load_trajectories(*paths, **kwargs)
        active_dataset = []
        inactive_dataset = []
        for trajectory in trajectories:
            assert len(trajectory) == 3
            states, observations, controls = trajectory

            timesteps = len(states)
            assert type(observations) == dict
            assert len(controls) == timesteps

            for t in range(1, timesteps):
                # Pull out data & labels
                prev_state = states[t - 1]
                observation = utils.DictIterator(observations)[t]
                control = controls[t]
                new_state = states[t]

                # Construct sample, bring to torch, & add to dataset
                sample = (prev_state, observation, control, new_state)
                sample = tuple(utils.to_torch(x) for x in sample)

                if np.linalg.norm(new_state - prev_state) > 1e-5:
                    active_dataset.append(sample)
                else:
                    inactive_dataset.append(sample)

        print("Parsed data: {} active, {} inactive".format(
            len(active_dataset), len(inactive_dataset)))
        keep_count = min(len(active_dataset) // 2, len(inactive_dataset))
        print("Keeping:", keep_count)
        np.random.shuffle(inactive_dataset)
        self.dataset = active_dataset + inactive_dataset[:keep_count]
def rollout_and_eval(pf_model,
                     trajectories,
                     start_time=0,
                     max_timesteps=300,
                     particle_count=100,
                     noisy_dynamics=True,
                     true_initial=False):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)
    end_time = np.min([len(s) for s, _, _ in trajectories] +
                      [start_time + max_timesteps])
    actual_states = [
        states[start_time:end_time] for states, _, _ in trajectories
    ]

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    M = particle_count

    device = next(pf_model.parameters()).device

    particles = np.zeros((N, M, state_dim))
    if true_initial:
        for i in range(N):
            particles[i, :] = trajectories[i][0][0]
        particles += np.random.normal(0, 0.2, size=[N, 1, state_dim])
        particles += np.random.normal(0, 0.2, size=particles.shape)
    else:
        # Distribute initial particles randomly
        particles += np.random.normal(0, 1.0, size=particles.shape)

    # Populate the initial state estimate as just the estimate of our particles
    # This is a little hacky
    # (N, t, state_dim)
    predicted_states = [[np.mean(particles[i], axis=0)]
                        for i in range(len(trajectories))]

    particles = utils.to_torch(particles, device=device)
    log_weights = torch.ones((N, M), device=device) * (-np.log(M))

    # (N, t, M, state_dim)
    particles_history = []
    # (N, t, M)
    weights_history = []

    for i in range(N):
        particles_history.append([utils.to_numpy(particles[i])])
        weights_history.append([utils.to_numpy(log_weights[i])])

    for t in tqdm(range(start_time + 1, end_time)):
        s = []
        o = {}
        c = []
        for i, traj in enumerate(trajectories):
            states, observations, controls = traj

            s.append(predicted_states[i][t - start_time - 1])
            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = utils.to_torch((s, o, c), device=device)

        state_estimates, new_particles, new_log_weights = pf_model.forward(
            particles,
            log_weights,
            o,
            c,
            resample=True,
            noisy_dynamics=noisy_dynamics)

        particles = new_particles
        log_weights = new_log_weights

        for i in range(len(trajectories)):
            predicted_states[i].append(utils.to_numpy(state_estimates[i]))

            particles_history[i].append(utils.to_numpy(particles[i]))
            weights_history[i].append(np.exp(utils.to_numpy(log_weights[i])))

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)

    ### Eval
    timesteps = len(actual_states[0])

    def color(i):
        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
        return colors[i % len(colors)]

    state_dim = actual_states.shape[-1]
    for j in range(state_dim):
        plt.figure(figsize=(8, 6))
        for i, (pred, actual, particles, weights) in enumerate(
                zip(predicted_states, actual_states, particles_history,
                    weights_history)):
            predicted_label_arg = {}
            actual_label_arg = {}
            if i == 0:
                predicted_label_arg['label'] = "Predicted"
                actual_label_arg['label'] = "Ground Truth"
            plt.plot(range(timesteps),
                     pred[:, j],
                     c=color(i),
                     alpha=0.5,
                     **predicted_label_arg)
            plt.plot(range(timesteps),
                     actual[:, j],
                     c=color(i),
                     **actual_label_arg)

            for t in range(0, timesteps, 20):
                particle_ys = particles[t][:, j]
                particle_xs = [t for _ in particle_ys]
                plt.scatter(particle_xs, particle_ys, c=color(i), alpha=0.02)
                # particle_alphas = weights[t]
                # particle_alphas /= np.max(particle_alphas)
                # particle_alphas *= 0.3
                # particle_alphas += 0.05
                #
                # for px, py, pa in zip(
                #         particle_xs, particle_ys, particle_alphas):
                #     plt.scatter([px], [py], c=color(i), alpha=pa)

        rmse = np.sqrt(
            np.mean(
                (predicted_states[:, 10:, j] - actual_states[:, 10:, j])**2))
        print(rmse)

        plt.title(f"State #{j} // RMSE = {rmse}")
        plt.xlabel("Timesteps")
        plt.ylabel("Value")
        plt.legend()
        plt.show()
def rollout(pf_model,
            trajectories,
            start_time=0,
            max_timesteps=300,
            particle_count=100,
            noisy_dynamics=True,
            true_initial=False):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)
    end_time = np.min([len(s) for s, _, _ in trajectories] +
                      [start_time + max_timesteps])
    actual_states = [
        states[start_time:end_time] for states, _, _ in trajectories
    ]

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    M = particle_count

    device = next(pf_model.parameters()).device

    particles = np.zeros((N, M, state_dim))
    if true_initial:
        for i in range(N):
            particles[i, :] = trajectories[i][0][0]
        particles += np.random.normal(0, 0.1, size=particles.shape)
    else:
        # Distribute initial particles randomly
        particles += np.random.normal(0, 1.0, size=particles.shape)

    # Populate the initial state estimate as just the estimate of our particles
    # This is a little hacky
    predicted_states = [[np.mean(particles[i], axis=0)]
                        for i in range(len(trajectories))]

    particles = utils.to_torch(particles, device=device)
    log_weights = torch.ones((N, M), device=device) * (-np.log(M))

    for t in tqdm(range(start_time + 1, end_time)):
        s = []
        o = {}
        c = []
        for i, traj in enumerate(trajectories):
            states, observations, controls = traj

            s.append(predicted_states[i][t - start_time - 1])
            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = utils.to_torch((s, o, c), device=device)

        state_estimates, new_particles, new_log_weights = pf_model.forward(
            particles,
            log_weights,
            o,
            c,
            resample=True,
            noisy_dynamics=noisy_dynamics)

        particles = new_particles
        log_weights = new_log_weights

        for i in range(len(trajectories)):
            predicted_states[i].append(utils.to_numpy(state_estimates[i]))

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    return predicted_states, actual_states
def rollout_kf(kf_model,
               trajectories,
               start_time=0,
               max_timesteps=300,
               noisy_dynamics=False,
               true_initial=False,
               init_state_noise=0.2,
               save_data_name=None):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)

    kf_model.eval()
    end_time = np.min([len(s) for s, _, _ in trajectories] +
                      [start_time + max_timesteps])

    print("endtime: ", end_time)

    actual_states = [
        states[start_time:end_time] for states, _, _ in trajectories
    ]

    contact_states = [
        action[start_time:end_time][:, -1]
        for states, obs, action in trajectories
    ]

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    controls_dim = trajectories[0][2][0].shape

    device = next(kf_model.parameters()).device

    initial_states = np.zeros((N, state_dim))
    initial_sigmas = np.zeros((N, state_dim, state_dim))
    initial_obs = {}

    if true_initial:
        for i in range(N):
            initial_states[i] = trajectories[i][0][0] + np.random.normal(
                0.0, scale=init_state_noise, size=initial_states[i].shape)
            initial_sigmas[i] = np.eye(state_dim) * init_state_noise**2
        (initial_states, initial_sigmas) = utils.to_torch(
            (initial_states, initial_sigmas), device=device)
    else:
        # Put into measurement model!
        dummy_controls = torch.ones((N, ) + controls_dim, ).to(device)
        for i in range(N):
            utils.DictIterator(initial_obs).append(
                utils.DictIterator(trajectories[i][1])[0])

        utils.DictIterator(initial_obs).convert_to_numpy()

        (initial_obs, initial_states, initial_sigmas) = utils.to_torch(
            (initial_obs, initial_states, initial_sigmas), device=device)

        states_tuple = kf_model.forward(
            initial_states,
            initial_sigmas,
            initial_obs,
            dummy_controls,
        )

        initial_states = states_tuple[0]
        initial_sigmas = states_tuple[1]
        predicted_states = [[utils.to_numpy(initial_states[i])]
                            for i in range(len(trajectories))]

    states = initial_states
    sigmas = initial_sigmas

    predicted_states = [[utils.to_numpy(initial_states[i])]
                        for i in range(len(trajectories))]
    predicted_sigmas = [[utils.to_numpy(initial_sigmas[i])]
                        for i in range(len(trajectories))]

    for t in tqdm(range(start_time + 1, end_time)):
        s = []
        o = {}
        c = []

        for i, traj in enumerate(trajectories):
            s, observations, controls = traj

            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = utils.to_torch((s, o, c), device=device)

        estimates = kf_model.forward(
            states,
            sigmas,
            o,
            c,
        )

        state_estimates = estimates[0].data
        sigma_estimates = estimates[1].data

        states = state_estimates
        sigmas = sigma_estimates

        for i in range(len(trajectories)):
            predicted_states[i].append(utils.to_numpy(state_estimates[i]))
            predicted_sigmas[i].append(utils.to_numpy(sigma_estimates[i]))

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    predicted_sigmas = np.array(predicted_sigmas)

    rmse_x = np.sqrt(
        np.mean((predicted_states[:, start_time:, 0] -
                 actual_states[:, start_time:, 0])**2))

    rmse_y = np.sqrt(
        np.mean((predicted_states[:, start_time:, 1] -
                 actual_states[:, start_time:, 1])**2))

    print("rsme x: \n{} \n y:\n{}".format(rmse_x, rmse_y))

    if save_data_name is not None:
        import h5py
        filename = "rollout/" + save_data_name + ".h5"

        try:
            f = h5py.File(filename, 'w')
        except:
            import os
            new_dest = "rollout/old/{}.h5".format(save_data_name)
            os.rename(filename, new_dest)
            f = h5py.File(filename, 'w')
        f.create_dataset("predicted_states", data=predicted_states)
        f.create_dataset("actual_states", data=actual_states)
        f.create_dataset("predicted_sigmas", data=predicted_sigmas)
        f.close()

    return predicted_states, actual_states, predicted_sigmas, contact_states
def rollout_kf_full(
    kf_model,
    trajectories,
    start_time=0,
    max_timesteps=300,
    true_initial=False,
    init_state_noise=0.2,
):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)

    kf_model.eval()
    end_time = np.min([len(s) for s, _, _ in trajectories] +
                      [start_time + max_timesteps])

    print("endtime: ", end_time)

    actual_states = [
        states[start_time:end_time] for states, _, _ in trajectories
    ]

    contact_states = [
        action[start_time:end_time][:, -1]
        for states, obs, action in trajectories
    ]

    actions = get_actions(trajectories, start_time, max_timesteps)

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    controls_dim = trajectories[0][2][0].shape

    device = next(kf_model.parameters()).device

    initial_states = np.zeros((N, state_dim))
    initial_sigmas = np.zeros((N, state_dim, state_dim))
    initial_obs = {}

    if true_initial:
        for i in range(N):
            initial_states[i] = trajectories[i][0][0] + np.random.normal(
                0.0, scale=init_state_noise, size=initial_states[i].shape)
            initial_sigmas[i] = np.eye(state_dim) * init_state_noise**2
        (initial_states, initial_sigmas) = utils.to_torch(
            (initial_states, initial_sigmas), device=device)
    else:
        print("put in measurement model")
        # Put into measurement model!
        dummy_controls = torch.ones((N, ) + controls_dim, ).to(device)
        for i in range(N):
            utils.DictIterator(initial_obs).append(
                utils.DictIterator(trajectories[i][1])[0])

        utils.DictIterator(initial_obs).convert_to_numpy()

        (initial_obs, initial_states, initial_sigmas) = utils.to_torch(
            (initial_obs, initial_states, initial_sigmas), device=device)

        state, state_sigma = kf_model.measurement_model.forward(
            initial_obs, initial_states)
        initial_states = state
        initial_sigmas = state_sigma
        predicted_states = [[utils.to_numpy(initial_states[i])]
                            for i in range(len(trajectories))]

    states = initial_states
    sigmas = initial_sigmas

    predicted_states = [[utils.to_numpy(initial_states[i])]
                        for i in range(len(trajectories))]
    predicted_sigmas = [[utils.to_numpy(initial_sigmas[i])]
                        for i in range(len(trajectories))]

    predicted_dyn_states = [[utils.to_numpy(initial_states[i])]
                            for i in range(len(trajectories))]
    predicted_dyn_sigmas = [[utils.to_numpy(initial_sigmas[i])]
                            for i in range(len(trajectories))]

    predicted_meas_states = [[utils.to_numpy(initial_states[i])]
                             for i in range(len(trajectories))]
    predicted_meas_sigmas = [[utils.to_numpy(initial_sigmas[i])]
                             for i in range(len(trajectories))]

    # jacobian is not initialized
    predicted_jac = [[] for i in range(len(trajectories))]

    for t in tqdm(range(start_time + 1, end_time)):
        s = []
        o = {}
        c = []

        for i, traj in enumerate(trajectories):
            s, observations, controls = traj

            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = utils.to_torch((s, o, c), device=device)

        estimates = kf_model.forward(
            states,
            sigmas,
            o,
            c,
        )

        state_estimates = estimates[0].data
        sigma_estimates = estimates[1].data

        states = state_estimates
        sigmas = sigma_estimates

        dynamics_states = kf_model.dynamics_states
        dynamics_sigma = kf_model.dynamics_sigma
        measurement_states = kf_model.measurement_states
        measurement_sigma = kf_model.measurement_sigma
        dynamics_jac = kf_model.dynamics_jac

        for i in range(len(trajectories)):
            predicted_dyn_states[i].append(utils.to_numpy(dynamics_states[i]))
            predicted_dyn_sigmas[i].append(utils.to_numpy(dynamics_sigma))
            predicted_meas_states[i].append(
                utils.to_numpy(measurement_states[i]))
            predicted_meas_sigmas[i].append(
                utils.to_numpy(measurement_sigma[i]))
            predicted_jac[i].append(utils.to_numpy(dynamics_jac[i]))
            predicted_states[i].append(utils.to_numpy(state_estimates[i]))
            predicted_sigmas[i].append(utils.to_numpy(sigma_estimates[i]))

    results = {}

    results['dyn_states'] = np.array(predicted_dyn_states)
    results['dyn_sigmas'] = np.array(predicted_dyn_sigmas)
    results['meas_states'] = np.array(predicted_meas_states)
    results['meas_sigmas'] = np.array(predicted_meas_sigmas)
    results['dyn_jac'] = np.array(predicted_jac)
    results['predicted_states'] = np.array(predicted_states)
    results['predicted_sigmas'] = np.array(predicted_sigmas)
    results['actual_states'] = np.array(actual_states)
    results['contact_states'] = np.array(contact_states)
    results['actions'] = np.array(actions)

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)

    rmse_x = np.sqrt(
        np.mean((predicted_states[:, start_time:, 0] -
                 actual_states[:, start_time:, 0])**2))

    rmse_y = np.sqrt(
        np.mean((predicted_states[:, start_time:, 1] -
                 actual_states[:, start_time:, 1])**2))

    print("rsme x: \n{} \n y:\n{}".format(rmse_x, rmse_y))

    return results
def rollout(pf_model,
            trajectories,
            start_time=0,
            max_timesteps=300,
            particle_count=100,
            noisy_dynamics=True):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)
    end_time = np.min([len(s) for s, _, _ in trajectories] +
                      [start_time + max_timesteps])
    predicted_states = [[states[start_time]] for states, _, _ in trajectories]
    actual_states = [
        states[start_time:end_time] for states, _, _ in trajectories
    ]

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    M = particle_count

    device = next(pf_model.parameters()).device

    particles = np.zeros((N, M, state_dim))
    for i in range(N):
        particles[i, :] = predicted_states[i][0]
    particles = utils.to_torch(particles, device=device)
    log_weights = torch.ones((N, M), device=device) * (-np.log(M))

    for t in tqdm_notebook(range(start_time + 1, end_time)):
        s = []
        o = {}
        c = []
        for i, traj in enumerate(trajectories):
            states, observations, controls = traj

            s.append(predicted_states[i][t - start_time - 1])
            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = utils.to_torch((s, o, c), device=device)

        state_estimates, new_particles, new_log_weights = pf_model.forward(
            particles,
            log_weights,
            o,
            c,
            resample=True,
            noisy_dynamics=noisy_dynamics)

        particles = new_particles
        log_weights = new_log_weights

        for i in range(len(trajectories)):
            predicted_states[i].append(utils.to_numpy(state_estimates[i]))

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    return predicted_states, actual_states