delta_state_traj = state_traj[1:] - state_traj[:-1] (pred_gp_mean, pred_gp_variance, rollout_gp, pred_gp_mean_trajs, pred_gp_variance_trajs, rollout_gp_trajs) = predict_gp(train_x, train_y, state_traj[0], action_traj) for i in range(len(state_traj) - 1): vis.set_gt_cartpole_state(state_traj[i][3], state_traj[i][2]) vis.set_gt_delta_state_trajectory(ts[:i + 1], delta_state_traj[:i + 1]) if i == 0: vis.set_gp_cartpole_state(state_traj[i][3], state_traj[i][2]) vis.set_gp_cartpole_rollout_state( [state_traj[i][3]] * NUM_TRAJ_SAMPLES, [state_traj[i][2]] * NUM_TRAJ_SAMPLES) else: vis.set_gp_cartpole_state(rollout_gp[i - 1][3], rollout_gp[i - 1][2]) vis.set_gp_cartpole_rollout_state( rollout_gp_trajs[:, i - 1, 3], rollout_gp_trajs[:, i - 1, 2]) vis.set_gp_delta_state_trajectory(ts[:i + 1], pred_gp_mean[:i + 1], pred_gp_variance[:i + 1]) if policy == swingup_policy: policy_type = 'swing up' else: policy_type = 'random'
(pred_gp_mean, pred_gp_variance, rollout_gp, pred_gp_mean_trajs, pred_gp_variance_trajs, rollout_gp_trajs) = predict_dpf(model, img_vis, state_traj, action_traj) error.append(np.power(state_traj[1:] - rollout_gp, 2).mean(axis = 0)) for i in range(len(state_traj) - 1): vis.set_gt_cartpole_state(state_traj[i][3], state_traj[i][2]) vis.set_gt_delta_state_trajectory(ts[:i+1], delta_state_traj[:i+1]) if i == 0: vis.set_gp_cartpole_state(state_traj[i][3], state_traj[i][2]) vis.set_gp_cartpole_rollout_state([state_traj[i][3]] * NUM_TRAJ_SAMPLES, [state_traj[i][2]] * NUM_TRAJ_SAMPLES) else: vis.set_gp_cartpole_state(rollout_gp[i-1][3], rollout_gp[i-1][2]) #vis.set_gp_cartpole_rollout_state(rollout_gp_trajs[:, i-1, 3], rollout_gp_trajs[:, i-1, 2]) vis.set_gp_delta_state_trajectory(ts[:i+1], pred_gp_mean[:i+1], pred_gp_variance[:i+1]) if policy == swingup_policy: policy_type = 'swing up' else: policy_type = 'random' vis.set_info_text('epoch: %d\npolicy: %s' % (epoch, policy_type)) vis_img = vis.draw(redraw=(i==0)) cv2.imshow('vis', vis_img)