else: vis.set_gp_cartpole_state(rollout_gp[i - 1][3], rollout_gp[i - 1][2]) vis.set_gp_cartpole_rollout_state( rollout_gp_trajs[:, i - 1, 3], rollout_gp_trajs[:, i - 1, 2]) vis.set_gp_delta_state_trajectory(ts[:i + 1], pred_gp_mean[:i + 1], pred_gp_variance[:i + 1]) if policy == swingup_policy: policy_type = 'swing up' else: policy_type = 'random' vis.set_info_text('epoch: %d\npolicy: %s' % (epoch, policy_type)) vis_img = vis.draw(redraw=(i == 0)) cv2.imshow('vis', vis_img) if epoch == 0 and i == 0: # First frame video_out = cv2.VideoWriter( 'cartpole.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), int(1.0 / DELTA_T), (vis_img.shape[1], vis_img.shape[0])) video_out.write(vis_img) cv2.waitKey(int(1000 * DELTA_T)) # Augment training data new_train_x, new_train_y = make_training_data(state_traj[:-1],
else: vis.set_gp_cartpole_state(rollout_gp[i - 1][3], rollout_gp[i - 1][2]) vis.set_gp_cartpole_rollout_state( rollout_gp_trajs[:, i - 1, 3], rollout_gp_trajs[:, i - 1, 2]) vis.set_gp_delta_state_trajectory(ts[:i + 1], pred_gp_mean[:i + 1], pred_gp_variance[:i + 1]) if policy == swingup_policy: policy_type = "swing up" else: policy_type = "random" vis.set_info_text("epoch: %d\npolicy: %s" % (epoch, policy_type)) vis_img = vis.draw(redraw=(i == 0)) cv2.imshow("vis", vis_img) if epoch == 0 and i == 0: # First frame video_out = cv2.VideoWriter( "cartpole.mp4", cv2.VideoWriter_fourcc("m", "p", "4", "v"), int(1.0 / DELTA_T), (vis_img.shape[1], vis_img.shape[0]), ) video_out.write(vis_img) cv2.waitKey(int(1000 * DELTA_T))