Exemple #1
0
def rollout(model, trajectories, max_timesteps=300):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)
    timesteps = np.min([len(s) for s, _, _ in trajectories] + [max_timesteps])

    predicted_states = [[states[0]] for states, _, _ in trajectories]
    actual_states = [states[:timesteps] for states, _, _ in trajectories]
    for t in range(1, timesteps):
        s = []
        o = {}
        c = []
        for i, traj in enumerate(trajectories):
            states, observations, controls = traj

            s.append(predicted_states[i][t - 1])
            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)

        device = next(model.parameters()).device
        pred = model(*utils.to_torch([s, o, c], device=device))
        pred = utils.to_numpy(pred)
        assert pred.shape == (len(trajectories), 2)
        for i in range(len(trajectories)):
            predicted_states[i].append(pred[i])

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    return predicted_states, actual_states
    def __init__(self, *paths, stddev=None, samples_per_pair=20, **kwargs):
        """
        Args:
          *paths: paths to dataset hdf5 files
        """

        trajectories = load_trajectories(*paths, **kwargs)

        if stddev is None:
            stddev = self.default_stddev
        self.stddev = np.array(stddev)
        self.samples_per_pair = samples_per_pair
        self.dataset = []
        for i, trajectory in enumerate(tqdm(trajectories)):
            assert len(trajectory) == 3
            states, observations, controls = trajectory

            timesteps = len(states)
            assert type(observations) == dict
            assert len(controls) == timesteps

            for t in range(0, timesteps):
                # Pull out data & labels
                state = states[t]
                observation = utils.DictIterator(observations)[t]

                self.dataset.append((state, observation))

        print("Loaded {} points".format(len(self.dataset)))
Exemple #3
0
def rollout_lstm(model, trajectories, max_timesteps=300):
    timesteps = np.min([len(s) for s, _, _ in trajectories] + [max_timesteps])

    trajectory_count = len(trajectories)

    state_dim = trajectories[0][0].shape[-1]
    actual_states = np.zeros((trajectory_count, timesteps, state_dim))

    batched_observations = {}
    batched_controls = []

    # Trajectories is a list of (states, observations, controls)
    for i, (states, observations, controls) in enumerate(trajectories):

        observations = utils.DictIterator(observations)[1:timesteps]
        utils.DictIterator(batched_observations).append(observations)
        batched_controls.append(controls[1:timesteps])

        assert states.shape == (timesteps, state_dim)
        actual_states[i] = states[:timesteps]  # * 0 + 0.1

    utils.DictIterator(batched_observations).convert_to_numpy()
    batched_controls = np.array(batched_controls)

    # Propagate through model
    # model.reset_hidden_states(utils.to_torch(actual_states[:, 0, :]))
    device = next(model.parameters()).device
    predicted_states = np.concatenate([
        actual_states[:, 0:1, :],
        utils.to_numpy(
            model(
                utils.to_torch(batched_observations, device),
                utils.to_torch(batched_controls, device),
            )),
    ],
                                      axis=1)

    # Indexing: batch, sequence length, state
    return predicted_states, actual_states
 def split(x):
     if type(x) == np.ndarray:
         new_length = (len(x) // subsequence_length) * \
             subsequence_length
         x = x[:new_length]
         return np.split(x[:new_length], sections)
     elif type(x) == dict:
         output = {}
         for key, value in x.items():
             output[key] = split(value)
         return utils.DictIterator(output)
     else:
         assert False
    def __init__(self, *paths, **kwargs):
        """
        Input:
          *paths: paths to dataset hdf5 files
        """

        trajectories = load_trajectories(*paths, **kwargs)
        active_dataset = []
        inactive_dataset = []
        for trajectory in trajectories:
            assert len(trajectory) == 3
            states, observations, controls = trajectory

            timesteps = len(states)
            assert type(observations) == dict
            assert len(controls) == timesteps

            for t in range(1, timesteps):
                # Pull out data & labels
                prev_state = states[t - 1]
                observation = utils.DictIterator(observations)[t]
                control = controls[t]
                new_state = states[t]

                # Construct sample, bring to torch, & add to dataset
                sample = (prev_state, observation, control, new_state)
                sample = tuple(utils.to_torch(x) for x in sample)

                if np.linalg.norm(new_state - prev_state) > 1e-5:
                    active_dataset.append(sample)
                else:
                    inactive_dataset.append(sample)

        print("Parsed data: {} active, {} inactive".format(
            len(active_dataset), len(inactive_dataset)))
        keep_count = min(len(active_dataset) // 2, len(inactive_dataset))
        print("Keeping:", keep_count)
        np.random.shuffle(inactive_dataset)
        self.dataset = active_dataset + inactive_dataset[:keep_count]
def _print_normalization(trajectories):
    """ Helper for producing code to normalize inputs
    """
    states = []
    observations = {}
    controls = []
    for t in trajectories:
        states.extend(t[0])
        utils.DictIterator(observations).extend(t[1])
        controls.extend(t[2])

    def print_ranges(**kwargs):
        for k, v in kwargs.items():
            mean = repr(np.mean(v, axis=0, keepdims=True))
            stddev = repr(np.std(v, axis=0, keepdims=True))
            print(f"{k} -= np.{mean}")
            print(f"{k} /= np.{stddev}")

    print_ranges(
        gripper_pos=observations['gripper_pos'],
        gripper_sensors=observations['gripper_sensors'],
        states=states,
        controls=controls,
    )
def rollout_and_eval(pf_model,
                     trajectories,
                     start_time=0,
                     max_timesteps=300,
                     particle_count=100,
                     noisy_dynamics=True,
                     true_initial=False):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)
    end_time = np.min([len(s) for s, _, _ in trajectories] +
                      [start_time + max_timesteps])
    actual_states = [
        states[start_time:end_time] for states, _, _ in trajectories
    ]

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    M = particle_count

    device = next(pf_model.parameters()).device

    particles = np.zeros((N, M, state_dim))
    if true_initial:
        for i in range(N):
            particles[i, :] = trajectories[i][0][0]
        particles += np.random.normal(0, 0.2, size=[N, 1, state_dim])
        particles += np.random.normal(0, 0.2, size=particles.shape)
    else:
        # Distribute initial particles randomly
        particles += np.random.normal(0, 1.0, size=particles.shape)

    # Populate the initial state estimate as just the estimate of our particles
    # This is a little hacky
    # (N, t, state_dim)
    predicted_states = [[np.mean(particles[i], axis=0)]
                        for i in range(len(trajectories))]

    particles = utils.to_torch(particles, device=device)
    log_weights = torch.ones((N, M), device=device) * (-np.log(M))

    # (N, t, M, state_dim)
    particles_history = []
    # (N, t, M)
    weights_history = []

    for i in range(N):
        particles_history.append([utils.to_numpy(particles[i])])
        weights_history.append([utils.to_numpy(log_weights[i])])

    for t in tqdm(range(start_time + 1, end_time)):
        s = []
        o = {}
        c = []
        for i, traj in enumerate(trajectories):
            states, observations, controls = traj

            s.append(predicted_states[i][t - start_time - 1])
            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = utils.to_torch((s, o, c), device=device)

        state_estimates, new_particles, new_log_weights = pf_model.forward(
            particles,
            log_weights,
            o,
            c,
            resample=True,
            noisy_dynamics=noisy_dynamics)

        particles = new_particles
        log_weights = new_log_weights

        for i in range(len(trajectories)):
            predicted_states[i].append(utils.to_numpy(state_estimates[i]))

            particles_history[i].append(utils.to_numpy(particles[i]))
            weights_history[i].append(np.exp(utils.to_numpy(log_weights[i])))

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)

    ### Eval
    timesteps = len(actual_states[0])

    def color(i):
        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
        return colors[i % len(colors)]

    state_dim = actual_states.shape[-1]
    for j in range(state_dim):
        plt.figure(figsize=(8, 6))
        for i, (pred, actual, particles, weights) in enumerate(
                zip(predicted_states, actual_states, particles_history,
                    weights_history)):
            predicted_label_arg = {}
            actual_label_arg = {}
            if i == 0:
                predicted_label_arg['label'] = "Predicted"
                actual_label_arg['label'] = "Ground Truth"
            plt.plot(range(timesteps),
                     pred[:, j],
                     c=color(i),
                     alpha=0.5,
                     **predicted_label_arg)
            plt.plot(range(timesteps),
                     actual[:, j],
                     c=color(i),
                     **actual_label_arg)

            for t in range(0, timesteps, 20):
                particle_ys = particles[t][:, j]
                particle_xs = [t for _ in particle_ys]
                plt.scatter(particle_xs, particle_ys, c=color(i), alpha=0.02)
                # particle_alphas = weights[t]
                # particle_alphas /= np.max(particle_alphas)
                # particle_alphas *= 0.3
                # particle_alphas += 0.05
                #
                # for px, py, pa in zip(
                #         particle_xs, particle_ys, particle_alphas):
                #     plt.scatter([px], [py], c=color(i), alpha=pa)

        rmse = np.sqrt(
            np.mean(
                (predicted_states[:, 10:, j] - actual_states[:, 10:, j])**2))
        print(rmse)

        plt.title(f"State #{j} // RMSE = {rmse}")
        plt.xlabel("Timesteps")
        plt.ylabel("Value")
        plt.legend()
        plt.show()
def rollout(pf_model,
            trajectories,
            start_time=0,
            max_timesteps=300,
            particle_count=100,
            noisy_dynamics=True,
            true_initial=False):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)
    end_time = np.min([len(s) for s, _, _ in trajectories] +
                      [start_time + max_timesteps])
    actual_states = [
        states[start_time:end_time] for states, _, _ in trajectories
    ]

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    M = particle_count

    device = next(pf_model.parameters()).device

    particles = np.zeros((N, M, state_dim))
    if true_initial:
        for i in range(N):
            particles[i, :] = trajectories[i][0][0]
        particles += np.random.normal(0, 0.1, size=particles.shape)
    else:
        # Distribute initial particles randomly
        particles += np.random.normal(0, 1.0, size=particles.shape)

    # Populate the initial state estimate as just the estimate of our particles
    # This is a little hacky
    predicted_states = [[np.mean(particles[i], axis=0)]
                        for i in range(len(trajectories))]

    particles = utils.to_torch(particles, device=device)
    log_weights = torch.ones((N, M), device=device) * (-np.log(M))

    for t in tqdm(range(start_time + 1, end_time)):
        s = []
        o = {}
        c = []
        for i, traj in enumerate(trajectories):
            states, observations, controls = traj

            s.append(predicted_states[i][t - start_time - 1])
            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = utils.to_torch((s, o, c), device=device)

        state_estimates, new_particles, new_log_weights = pf_model.forward(
            particles,
            log_weights,
            o,
            c,
            resample=True,
            noisy_dynamics=noisy_dynamics)

        particles = new_particles
        log_weights = new_log_weights

        for i in range(len(trajectories)):
            predicted_states[i].append(utils.to_numpy(state_estimates[i]))

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    return predicted_states, actual_states
def train_e2e(buddy,
              pf_model,
              dataloader,
              log_interval=2,
              loss_type="mse",
              optim_name="e2e",
              resample=False,
              know_image_blackout=False):
    # Train for 1 epoch
    for batch_idx, batch in enumerate(tqdm(dataloader)):
        # Transfer to GPU and pull out batch data
        batch_gpu = utils.to_device(batch, buddy._device)
        batch_particles, batch_states, batch_obs, batch_controls = batch_gpu

        # N = batch size, M = particle count
        N, timesteps, control_dim = batch_controls.shape
        N, timesteps, state_dim = batch_states.shape
        N, M, state_dim = batch_particles.shape
        assert batch_controls.shape == (N, timesteps, control_dim)

        # Give all particle equal weights
        particles = batch_particles
        log_weights = torch.ones((N, M), device=buddy._device) * (-np.log(M))

        # Accumulate losses from each timestep
        losses = []
        for t in range(1, timesteps):
            prev_particles = particles
            prev_log_weights = log_weights

            if know_image_blackout:
                state_estimates, new_particles, new_log_weights = pf_model.forward(
                    prev_particles,
                    prev_log_weights,
                    utils.DictIterator(batch_obs)[:, t - 1, :],
                    batch_controls[:, t, :],
                    resample=resample,
                    noisy_dynamics=True,
                    know_image_blackout=True)
            else:
                state_estimates, new_particles, new_log_weights = pf_model.forward(
                    prev_particles,
                    prev_log_weights,
                    utils.DictIterator(batch_obs)[:, t - 1, :],
                    batch_controls[:, t, :],
                    resample=resample,
                    noisy_dynamics=True,
                )

            if loss_type == "gmm":
                loss = dpf.gmm_loss(particles_states=new_particles,
                                    log_weights=new_log_weights,
                                    true_states=batch_states[:, t, :],
                                    gmm_variances=np.array([0.1]))
            elif loss_type == "mse":
                loss = torch.mean((state_estimates - batch_states[:, t, :])**2)
            else:
                assert False, "Invalid loss"

            losses.append(loss)

            # Enable backprop through time
            particles = new_particles
            log_weights = new_log_weights

            # # Disable backprop through time
            # particles = new_particles.detach()
            # log_weights = new_log_weights.detach()

            # assert state_estimates.shape == batch_states[:, t, :].shape

        buddy.minimize(torch.mean(torch.stack(losses)),
                       optimizer_name=optim_name,
                       checkpoint_interval=1000)

        if buddy.optimizer_steps % log_interval == 0:
            with buddy.log_scope(optim_name):
                buddy.log("Training loss", np.mean(utils.to_numpy(losses)))
                buddy.log("Log weights mean", log_weights.mean())
                buddy.log("Log weights std", log_weights.std())
                buddy.log("Particle states mean", particles.mean())
                buddy.log("particle states std", particles.std())

    print("Epoch loss:", np.mean(utils.to_numpy(losses)))
def rollout_kf(kf_model,
               trajectories,
               start_time=0,
               max_timesteps=300,
               noisy_dynamics=False,
               true_initial=False,
               init_state_noise=0.2,
               save_data_name=None):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)

    kf_model.eval()
    end_time = np.min([len(s) for s, _, _ in trajectories] +
                      [start_time + max_timesteps])

    print("endtime: ", end_time)

    actual_states = [
        states[start_time:end_time] for states, _, _ in trajectories
    ]

    contact_states = [
        action[start_time:end_time][:, -1]
        for states, obs, action in trajectories
    ]

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    controls_dim = trajectories[0][2][0].shape

    device = next(kf_model.parameters()).device

    initial_states = np.zeros((N, state_dim))
    initial_sigmas = np.zeros((N, state_dim, state_dim))
    initial_obs = {}

    if true_initial:
        for i in range(N):
            initial_states[i] = trajectories[i][0][0] + np.random.normal(
                0.0, scale=init_state_noise, size=initial_states[i].shape)
            initial_sigmas[i] = np.eye(state_dim) * init_state_noise**2
        (initial_states, initial_sigmas) = utils.to_torch(
            (initial_states, initial_sigmas), device=device)
    else:
        # Put into measurement model!
        dummy_controls = torch.ones((N, ) + controls_dim, ).to(device)
        for i in range(N):
            utils.DictIterator(initial_obs).append(
                utils.DictIterator(trajectories[i][1])[0])

        utils.DictIterator(initial_obs).convert_to_numpy()

        (initial_obs, initial_states, initial_sigmas) = utils.to_torch(
            (initial_obs, initial_states, initial_sigmas), device=device)

        states_tuple = kf_model.forward(
            initial_states,
            initial_sigmas,
            initial_obs,
            dummy_controls,
        )

        initial_states = states_tuple[0]
        initial_sigmas = states_tuple[1]
        predicted_states = [[utils.to_numpy(initial_states[i])]
                            for i in range(len(trajectories))]

    states = initial_states
    sigmas = initial_sigmas

    predicted_states = [[utils.to_numpy(initial_states[i])]
                        for i in range(len(trajectories))]
    predicted_sigmas = [[utils.to_numpy(initial_sigmas[i])]
                        for i in range(len(trajectories))]

    for t in tqdm(range(start_time + 1, end_time)):
        s = []
        o = {}
        c = []

        for i, traj in enumerate(trajectories):
            s, observations, controls = traj

            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = utils.to_torch((s, o, c), device=device)

        estimates = kf_model.forward(
            states,
            sigmas,
            o,
            c,
        )

        state_estimates = estimates[0].data
        sigma_estimates = estimates[1].data

        states = state_estimates
        sigmas = sigma_estimates

        for i in range(len(trajectories)):
            predicted_states[i].append(utils.to_numpy(state_estimates[i]))
            predicted_sigmas[i].append(utils.to_numpy(sigma_estimates[i]))

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    predicted_sigmas = np.array(predicted_sigmas)

    rmse_x = np.sqrt(
        np.mean((predicted_states[:, start_time:, 0] -
                 actual_states[:, start_time:, 0])**2))

    rmse_y = np.sqrt(
        np.mean((predicted_states[:, start_time:, 1] -
                 actual_states[:, start_time:, 1])**2))

    print("rsme x: \n{} \n y:\n{}".format(rmse_x, rmse_y))

    if save_data_name is not None:
        import h5py
        filename = "rollout/" + save_data_name + ".h5"

        try:
            f = h5py.File(filename, 'w')
        except:
            import os
            new_dest = "rollout/old/{}.h5".format(save_data_name)
            os.rename(filename, new_dest)
            f = h5py.File(filename, 'w')
        f.create_dataset("predicted_states", data=predicted_states)
        f.create_dataset("actual_states", data=actual_states)
        f.create_dataset("predicted_sigmas", data=predicted_sigmas)
        f.close()

    return predicted_states, actual_states, predicted_sigmas, contact_states
def rollout_kf_full(
    kf_model,
    trajectories,
    start_time=0,
    max_timesteps=300,
    true_initial=False,
    init_state_noise=0.2,
):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)

    kf_model.eval()
    end_time = np.min([len(s) for s, _, _ in trajectories] +
                      [start_time + max_timesteps])

    print("endtime: ", end_time)

    actual_states = [
        states[start_time:end_time] for states, _, _ in trajectories
    ]

    contact_states = [
        action[start_time:end_time][:, -1]
        for states, obs, action in trajectories
    ]

    actions = get_actions(trajectories, start_time, max_timesteps)

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    controls_dim = trajectories[0][2][0].shape

    device = next(kf_model.parameters()).device

    initial_states = np.zeros((N, state_dim))
    initial_sigmas = np.zeros((N, state_dim, state_dim))
    initial_obs = {}

    if true_initial:
        for i in range(N):
            initial_states[i] = trajectories[i][0][0] + np.random.normal(
                0.0, scale=init_state_noise, size=initial_states[i].shape)
            initial_sigmas[i] = np.eye(state_dim) * init_state_noise**2
        (initial_states, initial_sigmas) = utils.to_torch(
            (initial_states, initial_sigmas), device=device)
    else:
        print("put in measurement model")
        # Put into measurement model!
        dummy_controls = torch.ones((N, ) + controls_dim, ).to(device)
        for i in range(N):
            utils.DictIterator(initial_obs).append(
                utils.DictIterator(trajectories[i][1])[0])

        utils.DictIterator(initial_obs).convert_to_numpy()

        (initial_obs, initial_states, initial_sigmas) = utils.to_torch(
            (initial_obs, initial_states, initial_sigmas), device=device)

        state, state_sigma = kf_model.measurement_model.forward(
            initial_obs, initial_states)
        initial_states = state
        initial_sigmas = state_sigma
        predicted_states = [[utils.to_numpy(initial_states[i])]
                            for i in range(len(trajectories))]

    states = initial_states
    sigmas = initial_sigmas

    predicted_states = [[utils.to_numpy(initial_states[i])]
                        for i in range(len(trajectories))]
    predicted_sigmas = [[utils.to_numpy(initial_sigmas[i])]
                        for i in range(len(trajectories))]

    predicted_dyn_states = [[utils.to_numpy(initial_states[i])]
                            for i in range(len(trajectories))]
    predicted_dyn_sigmas = [[utils.to_numpy(initial_sigmas[i])]
                            for i in range(len(trajectories))]

    predicted_meas_states = [[utils.to_numpy(initial_states[i])]
                             for i in range(len(trajectories))]
    predicted_meas_sigmas = [[utils.to_numpy(initial_sigmas[i])]
                             for i in range(len(trajectories))]

    # jacobian is not initialized
    predicted_jac = [[] for i in range(len(trajectories))]

    for t in tqdm(range(start_time + 1, end_time)):
        s = []
        o = {}
        c = []

        for i, traj in enumerate(trajectories):
            s, observations, controls = traj

            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = utils.to_torch((s, o, c), device=device)

        estimates = kf_model.forward(
            states,
            sigmas,
            o,
            c,
        )

        state_estimates = estimates[0].data
        sigma_estimates = estimates[1].data

        states = state_estimates
        sigmas = sigma_estimates

        dynamics_states = kf_model.dynamics_states
        dynamics_sigma = kf_model.dynamics_sigma
        measurement_states = kf_model.measurement_states
        measurement_sigma = kf_model.measurement_sigma
        dynamics_jac = kf_model.dynamics_jac

        for i in range(len(trajectories)):
            predicted_dyn_states[i].append(utils.to_numpy(dynamics_states[i]))
            predicted_dyn_sigmas[i].append(utils.to_numpy(dynamics_sigma))
            predicted_meas_states[i].append(
                utils.to_numpy(measurement_states[i]))
            predicted_meas_sigmas[i].append(
                utils.to_numpy(measurement_sigma[i]))
            predicted_jac[i].append(utils.to_numpy(dynamics_jac[i]))
            predicted_states[i].append(utils.to_numpy(state_estimates[i]))
            predicted_sigmas[i].append(utils.to_numpy(sigma_estimates[i]))

    results = {}

    results['dyn_states'] = np.array(predicted_dyn_states)
    results['dyn_sigmas'] = np.array(predicted_dyn_sigmas)
    results['meas_states'] = np.array(predicted_meas_states)
    results['meas_sigmas'] = np.array(predicted_meas_sigmas)
    results['dyn_jac'] = np.array(predicted_jac)
    results['predicted_states'] = np.array(predicted_states)
    results['predicted_sigmas'] = np.array(predicted_sigmas)
    results['actual_states'] = np.array(actual_states)
    results['contact_states'] = np.array(contact_states)
    results['actions'] = np.array(actions)

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)

    rmse_x = np.sqrt(
        np.mean((predicted_states[:, start_time:, 0] -
                 actual_states[:, start_time:, 0])**2))

    rmse_y = np.sqrt(
        np.mean((predicted_states[:, start_time:, 1] -
                 actual_states[:, start_time:, 1])**2))

    print("rsme x: \n{} \n y:\n{}".format(rmse_x, rmse_y))

    return results
def rollout(pf_model,
            trajectories,
            start_time=0,
            max_timesteps=300,
            particle_count=100,
            noisy_dynamics=True):
    # To make things easier, we're going to cut all our trajectories to the
    # same length :)
    end_time = np.min([len(s) for s, _, _ in trajectories] +
                      [start_time + max_timesteps])
    predicted_states = [[states[start_time]] for states, _, _ in trajectories]
    actual_states = [
        states[start_time:end_time] for states, _, _ in trajectories
    ]

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    M = particle_count

    device = next(pf_model.parameters()).device

    particles = np.zeros((N, M, state_dim))
    for i in range(N):
        particles[i, :] = predicted_states[i][0]
    particles = utils.to_torch(particles, device=device)
    log_weights = torch.ones((N, M), device=device) * (-np.log(M))

    for t in tqdm_notebook(range(start_time + 1, end_time)):
        s = []
        o = {}
        c = []
        for i, traj in enumerate(trajectories):
            states, observations, controls = traj

            s.append(predicted_states[i][t - start_time - 1])
            o_t = utils.DictIterator(observations)[t]
            utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = utils.to_torch((s, o, c), device=device)

        state_estimates, new_particles, new_log_weights = pf_model.forward(
            particles,
            log_weights,
            o,
            c,
            resample=True,
            noisy_dynamics=noisy_dynamics)

        particles = new_particles
        log_weights = new_log_weights

        for i in range(len(trajectories)):
            predicted_states[i].append(utils.to_numpy(state_estimates[i]))

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    return predicted_states, actual_states
def load_trajectories(*paths,
                      use_vision=True,
                      vision_interval=10,
                      use_proprioception=True,
                      use_haptics=True,
                      use_mass=False,
                      use_depth=False,
                      image_blackout_ratio=0,
                      sequential_image_rate=1,
                      start_timestep=0,
                      direction_filter=None,
                      **unused):
    """
    Loads a list of trajectories from a set of input paths, where each
    trajectory is a tuple containing...
        states: an (T, state_dim) array of state vectors
        observations: a key->(T, *) dict of observations
        controls: an (T, control_dim) array of control vectors

    Each path can either be a string or a (string, int) tuple, where int
    indicates the maximum number of trajectories to import.
    """
    trajectories = []

    assert 1 > image_blackout_ratio >= 0
    assert image_blackout_ratio == 0 or sequential_image_rate == 1

    for path in paths:
        count = np.float('inf')
        if type(path) == tuple:
            path, count = path
            assert type(count) == int

        with utils.TrajectoriesFile(path) as f:
            # Iterate over each trajectory
            for i, trajectory in enumerate(f):
                if i >= count:
                    break

                timesteps = len(trajectory['Cylinder0_pos'])

                # Define our state:  we expect this to be:
                # (x, y, cos theta, sin theta, mass, friction)
                # TODO: add mass, friction
                state_dim = 2
                states = np.full((timesteps, state_dim), np.nan)

                states[:, :2] = trajectory['Cylinder0_pos'][:, :2]  # x, y
                if use_mass:
                    states[:, 3] = trajectory['Cylinder0_mass'][:, 0]

                # states[:, 2] = np.cos(trajectory['object_z_angle'])
                # states[:, 3] = np.sin(trajectory['object_z_angle'])
                # states[:, 5] = trajectory['Cylinder0_friction'][:, 0]

                # Pull out observations
                ## This is currently consisted of:
                ## > gripper_pos: end effector position
                ## > gripper_sensors: F/T, contact sensors
                ## > image: camera image

                observations = {}
                observations['gripper_pos'] = trajectory['eef_pos']
                assert observations['gripper_pos'].shape == (timesteps, 3)

                observations['gripper_sensors'] = np.concatenate((
                    trajectory['force'],
                    trajectory['contact'][:, np.newaxis],
                ),
                                                                 axis=1)
                assert observations['gripper_sensors'].shape[1] == 7

                if not use_proprioception:
                    observations['gripper_pos'][:] = 0
                if not use_haptics:
                    observations['gripper_sensors'][:] = 0

                if 'raw_image' in trajectory:
                    observations['raw_image'] = trajectory['raw_image']
                observations['image'] = np.zeros_like(trajectory['image'])
                if use_vision:
                    for i in range(len(observations['image'])):
                        index = (i // vision_interval) * vision_interval
                        index = min(index, len(observations['image']))
                        blackout_chance = np.random.uniform()
                        # if blackout chance > ratio, then fill image
                        # otherwise zero
                        if image_blackout_ratio == 0 and i % sequential_image_rate == 0:
                            observations['image'][i] = trajectory['image'][
                                index]

                        if blackout_chance > image_blackout_ratio and sequential_image_rate == 1:
                            observations['image'][i] = trajectory['image'][
                                index]

                observations['depth'] = np.zeros_like(trajectory['depth'])
                if use_depth:
                    for i in range(len(observations['depth'])):
                        index = (i // vision_interval) * vision_interval
                        index = min(index, len(observations['depth']))
                        observations['depth'][i] = trajectory['depth'][index]

                # Pull out controls
                ## This is currently consisted of:
                ## > previous end effector position
                ## > end effector position delta
                ## > binary contact reading
                eef_positions = trajectory['eef_pos']
                eef_positions_shifted = np.roll(eef_positions, shift=1, axis=0)
                eef_positions_shifted[0] = eef_positions[0]
                controls = np.concatenate([
                    eef_positions_shifted,
                    eef_positions - eef_positions_shifted,
                    trajectory['contact'][:, np.newaxis],
                ],
                                          axis=1)
                assert controls.shape == (timesteps, 7)

                # Normalization

                observations['gripper_pos'] -= np.array(
                    [[0.46806443, -0.0017836, 0.88028437]], dtype=np.float32)
                observations['gripper_pos'] /= np.array(
                    [[0.02410769, 0.02341035, 0.04018243]], dtype=np.float32)
                observations['gripper_sensors'] -= np.array([[
                    4.9182904e-01, 4.5039989e-02, -3.2791464e+00,
                    -3.3874984e-03, 1.1552566e-02, -8.4817986e-04,
                    2.1303751e-01
                ]],
                                                            dtype=np.float32)
                observations['gripper_sensors'] /= np.array([[
                    1.6152629, 1.666905, 1.9186896, 0.14219016, 0.14232528,
                    0.01675198, 0.40950698
                ]],
                                                            dtype=np.float32)
                states -= np.array([[0.4970164, -0.00916641]])
                states /= np.array([[0.0572766, 0.06118315]])
                controls -= np.array([[
                    4.6594709e-01, -2.5247163e-03, 8.8094306e-01,
                    1.2939950e-04, -5.4364675e-05, -6.1112235e-04,
                    2.2041667e-01
                ]],
                                     dtype=np.float32)
                controls /= np.array([[
                    0.02239027, 0.02356066, 0.0405312, 0.00054858, 0.0005754,
                    0.00046352, 0.41451886
                ]],
                                     dtype=np.float32)

                x_delta = states[start_timestep, 0] - states[-1, 0]
                y_delta = states[start_timestep, 1] - states[-1, 1]
                if direction_filter == 'x':
                    if not (abs(x_delta) > 0.55 and abs(y_delta) < 0.2):

                        continue
                if direction_filter == 'y':
                    if not (abs(x_delta) < 0.20 and abs(y_delta) > 0.55):
                        continue

                trajectories.append(
                    (states[start_timestep:],
                     utils.DictIterator(observations)[start_timestep:],
                     controls[start_timestep:]))

    ## Uncomment this line to generate the lines required to normalize data
    # _print_normalization(trajectories)

    return trajectories
def train_fusion(buddy,
                 fusion_model,
                 dataloader,
                 log_interval=2,
                 optim_name="fusion",
                 measurement_init=True,
                 init_state_noise=0.2,
                 one_loss=True,
                 nll=False):
    # todo: change loss to selection/mixed
    for batch_idx, batch in enumerate(dataloader):
        # Transfer to GPU and pull out batch data
        batch_gpu = utils.to_device(batch, buddy._device)
        _, batch_states, batch_obs, batch_controls = batch_gpu
        # N = batch size
        N, timesteps, control_dim = batch_controls.shape
        N, timesteps, state_dim = batch_states.shape
        assert batch_controls.shape == (N, timesteps, control_dim)

        state = batch_states[:, 0, :]
        state_sigma = torch.eye(state.shape[-1],
                                device=buddy._device) * init_state_noise**2
        state_sigma = state_sigma.unsqueeze(0).repeat(N, 1, 1)

        if measurement_init:
            state, state_sigma = fusion_model.measurement_only(
                utils.DictIterator(batch_obs)[:, 0, :], state)

        else:
            dist = torch.distributions.Normal(
                torch.tensor([0.]),
                torch.ones(state.shape) * init_state_noise)
            noise = dist.sample().to(state.device)
            state += noise

        losses_image = []
        losses_force = []
        losses_fused = []
        losses_nll = []
        losses_total = []

        for t in range(1, timesteps - 1):
            prev_state = state
            prev_state_sigma = state_sigma

            state, state_sigma, force_state, image_state = fusion_model.forward(
                prev_state,
                prev_state_sigma,
                utils.DictIterator(batch_obs)[:, t, :],
                batch_controls[:, t, :],
            )

            loss_image = torch.mean((image_state - batch_states[:, t, :])**2)
            loss_force = torch.mean((force_state - batch_states[:, t, :])**2)
            loss_fused = torch.mean((state - batch_states[:, t, :])**2)

            losses_force.append(loss_force.item())
            losses_image.append(loss_image.item())
            losses_fused.append(loss_fused.item())

            if nll:
                loss_nll = torch.mean(-1.0 * utility.gaussian_log_likelihood(
                    state, batch_states[:, t, :], state_sigma))
                losses_nll.append(loss_nll)
                losses_total.append(loss_nll)

            elif one_loss:
                losses_total.append(loss_fused)
            else:
                losses_total.append(loss_image + loss_force + loss_fused)

        loss = torch.mean(torch.stack(losses_total))

        buddy.minimize(loss,
                       optimizer_name=optim_name,
                       checkpoint_interval=10000)

        if buddy.optimizer_steps % log_interval == 0:
            with buddy.log_scope("fusion"):
                buddy.log("Training loss", loss.item())
                buddy.log("Image loss", np.mean(np.array(losses_image)))
                buddy.log("Force loss", np.mean(np.array(losses_force)))
                buddy.log("Fused loss", np.mean(np.array(losses_fused)))
                buddy.log_model_grad_norm()
def train_e2e(buddy,
              ekf_model,
              dataloader,
              log_interval=2,
              optim_name="ekf",
              measurement_init=True,
              checkpoint_interval=1000,
              init_state_noise=0.2,
              loss_type="mse"):
    # Train for 1 epoch
    for batch_idx, batch in enumerate(dataloader):
        # Transfer to GPU and pull out batch data
        batch_gpu = utils.to_device(batch, buddy._device)
        _, batch_states, batch_obs, batch_controls = batch_gpu
        # N = batch size, M = particle count
        N, timesteps, control_dim = batch_controls.shape
        N, timesteps, state_dim = batch_states.shape
        assert batch_controls.shape == (N, timesteps, control_dim)

        state = batch_states[:, 0, :]
        state_sigma = torch.eye(state.shape[-1],
                                device=buddy._device) * init_state_noise**2
        state_sigma = state_sigma.unsqueeze(0).repeat(N, 1, 1)

        if measurement_init:
            state, state_sigma = ekf_model.measurement_model.forward(
                utils.DictIterator(batch_obs)[:, 0, :], batch_states[:, 0, :])
        else:
            dist = torch.distributions.Normal(
                torch.tensor([0.]),
                torch.ones(state.shape) * init_state_noise)
            noise = dist.sample().to(state.device)
            state += noise

        # Accumulate losses from each timestep
        losses = []
        for t in range(1, timesteps - 1):
            prev_state = state
            prev_state_sigma = state_sigma

            state, state_sigma = ekf_model.forward(
                prev_state,
                prev_state_sigma,
                utils.DictIterator(batch_obs)[:, t, :],
                batch_controls[:, t, :],
            )

            assert state.shape == batch_states[:, t, :].shape

            mse = torch.mean((state - batch_states[:, t, :])**2)

            assert loss_type in ['nll', 'mse', 'mixed']
            if loss_type == 'nll':
                nll = -1.0 * utility.gaussian_log_likelihood(
                    state, batch_states[:, t, :], state_sigma)
                nll = torch.mean(nll)
                # import ipdb;ipdb.set_trace()
                loss = nll
            elif loss_type == 'mse':
                loss = mse
            else:
                nll = -1.0 * utility.gaussian_log_likelihood(
                    state, batch_states[:, t, :], state_sigma)
                nll = torch.mean(nll)
                loss = nll + mse

            losses.append(loss)

        loss = torch.mean(torch.stack(losses))
        buddy.minimize(loss,
                       optimizer_name=optim_name,
                       checkpoint_interval=checkpoint_interval)

        if buddy.optimizer_steps % log_interval == 0:
            with buddy.log_scope(optim_name):
                buddy.log("Training loss", loss.item())
                buddy.log_model_grad_norm()