コード例 #1
0
        delta_state_traj = state_traj[1:] - state_traj[:-1]

        (pred_gp_mean, pred_gp_variance, rollout_gp, pred_gp_mean_trajs,
         pred_gp_variance_trajs,
         rollout_gp_trajs) = predict_gp(train_x, train_y, state_traj[0],
                                        action_traj)

        for i in range(len(state_traj) - 1):
            vis.set_gt_cartpole_state(state_traj[i][3], state_traj[i][2])
            vis.set_gt_delta_state_trajectory(ts[:i + 1],
                                              delta_state_traj[:i + 1])

            if i == 0:
                vis.set_gp_cartpole_state(state_traj[i][3], state_traj[i][2])
                vis.set_gp_cartpole_rollout_state(
                    [state_traj[i][3]] * NUM_TRAJ_SAMPLES,
                    [state_traj[i][2]] * NUM_TRAJ_SAMPLES)
            else:
                vis.set_gp_cartpole_state(rollout_gp[i - 1][3],
                                          rollout_gp[i - 1][2])
                vis.set_gp_cartpole_rollout_state(
                    rollout_gp_trajs[:, i - 1, 3], rollout_gp_trajs[:, i - 1,
                                                                    2])

            vis.set_gp_delta_state_trajectory(ts[:i + 1], pred_gp_mean[:i + 1],
                                              pred_gp_variance[:i + 1])

            if policy == swingup_policy:
                policy_type = 'swing up'
            else:
                policy_type = 'random'
コード例 #2
0
        (pred_gp_mean,
         pred_gp_variance,
         rollout_gp,
         pred_gp_mean_trajs,
         pred_gp_variance_trajs,
         rollout_gp_trajs) = predict_dpf(model, img_vis, state_traj, action_traj)
        error.append(np.power(state_traj[1:] - rollout_gp, 2).mean(axis = 0))


        for i in range(len(state_traj) - 1):
            vis.set_gt_cartpole_state(state_traj[i][3], state_traj[i][2])
            vis.set_gt_delta_state_trajectory(ts[:i+1], delta_state_traj[:i+1])

            if i == 0:
                vis.set_gp_cartpole_state(state_traj[i][3], state_traj[i][2])
                vis.set_gp_cartpole_rollout_state([state_traj[i][3]] * NUM_TRAJ_SAMPLES,
                                                  [state_traj[i][2]] * NUM_TRAJ_SAMPLES)
            else:
                vis.set_gp_cartpole_state(rollout_gp[i-1][3], rollout_gp[i-1][2])
                #vis.set_gp_cartpole_rollout_state(rollout_gp_trajs[:, i-1, 3], rollout_gp_trajs[:, i-1, 2])

            vis.set_gp_delta_state_trajectory(ts[:i+1], pred_gp_mean[:i+1], pred_gp_variance[:i+1])

            if policy == swingup_policy:
                policy_type = 'swing up'
            else:
                policy_type = 'random'

            vis.set_info_text('epoch: %d\npolicy: %s' % (epoch, policy_type))

            vis_img = vis.draw(redraw=(i==0))
            cv2.imshow('vis', vis_img)