def train(self): next_v = 1e6 v = self.value_fun.get_values() itr = 0 videos = [] contours = [] returns = [] delay_cs = [] fig = None while not self._stop_condition(itr, next_v, v) and itr < self.max_itr: log = itr % self.log_itr == 0 render = (itr % self.render_itr == 0) and self.render if log: next_pi = self.get_next_policy() self.policy.update(next_pi) average_return, avg_delay_cost, video = rollout(self.env, self.policy, render=render, num_rollouts=self.num_rollouts, max_path_length=self.max_path_length,iteration=itr) if render: contour, fig = plot_contour(self.env, self.value_fun, fig=fig, iteration=itr) contours += [contour] * len(video) videos += video returns.append(average_return) delay_cs.append(avg_delay_cost) logger.logkv('Iteration', itr) logger.logkv('Average Returns', average_return) logger.logkv('Average Delayed Costs', avg_delay_cost) logger.dumpkvs() next_v = self.get_next_values() self.value_fun.update(next_v) itr += 1 next_pi = self.get_next_policy() self.policy.update(next_pi) contour, fig = plot_contour(self.env, self.value_fun, save=True, fig=fig, iteration=itr) average_return, avg_delay_cost, video = rollout(self.env, self.policy, render=True, num_rollouts=self.num_rollouts, max_path_length=self.max_path_length, iteration=itr) self.env.close() plot_returns(returns) plot_returns(delay_cs,'delayed_cost') videos += video if self.render: contours += [contour] logger.logkv('Iteration', itr) logger.logkv('Average Returns', average_return) logger.logkv('Average Delayed Costs', avg_delay_cost) fps = int(4/getattr(self.env, 'dt', 0.1)) if contours and contours[0] is not None: clip = mpy.ImageSequenceClip(contours, fps=fps) clip.write_videofile('%s/contours_progress.mp4' % logger.get_dir()) if videos: clip = mpy.ImageSequenceClip(videos, fps=fps) clip.write_videofile('%s/roll_outs.mp4' % logger.get_dir()) plt.close()
def train(self): params = self.value_fun._params videos = [] contours = [] returns = [] fig = None for itr in range(self.max_itr): params = self.optimizer.grad_step(self.objective, params) self.value_fun.update(params) log = itr % self.log_itr == 0 or itr == self.max_itr - 1 render = (itr % self.render_itr == 0) and self.render if log: average_return, video = rollout(self.env, self.policy, render=render, iteration=itr) if render: contour, fig = plot_contour(self.env, self.value_fun, fig=fig, iteration=itr) contours += [contour] videos += video returns.append(average_return) logger.logkv('Iteration', itr) logger.logkv('Average Returns', average_return) logger.dumpkvs() plot_returns(returns) plot_contour(self.env, self.value_fun, save=True, fig=fig) if contours and contours[0] is not None: contours = list(upsample(np.array(contours), 10)) clip = mpy.ImageSequenceClip(contours, fps=10) clip.write_videofile('%s/contours_progress.mp4' % logger.get_dir()) if videos: fps = int(10 / getattr(self.env, 'dt', 0.1)) clip = mpy.ImageSequenceClip(videos, fps=fps) clip.write_videofile('%s/learning_progress.mp4' % logger.get_dir()) plt.close()
def train(self): # params = self.value_fun._params videos = [] contours = [] returns = [] delay_cs = [] fig = None for itr in range(self.max_itr): itr_starttime = time.time() self.value_fun_update() itr_time = time.time() - itr_starttime log = itr % self.log_itr == 0 or itr == self.max_itr - 1 render = (itr % self.render_itr == 0) and self.render if log: rollout_starttime = time.time() average_return, avg_delay_cost, video = rollout( self.env, self.policy, num_rollouts=self.num_rollouts, render=render, iteration=itr, max_path_length=self.max_path_length) rollout_time = time.time() - rollout_starttime if render: # contour, fig = plot_contour(self.env, self.value_fun, fig=fig, iteration=itr) # contours += [contour] videos += video returns.append(average_return) delay_cs.append(avg_delay_cost) logger.logkv('Iteration', itr) logger.logkv('Average Returns', average_return) logger.logkv('Average Delayed Costs', avg_delay_cost) logger.logkv('Iteration Time', itr_time) logger.logkv('Policy Rollout Time', rollout_time) logger.dumpkvs() plot_returns(returns) plot_returns(delay_cs, 'delayed_cost') # plot_contour(self.env, self.value_fun, save=True, fig=fig) # if contours and contours[0] is not None: # contours = list(upsample(np.array(contours), 10)) # clip = mpy.ImageSequenceClip(contours, fps=10) # clip.write_videofile('%s/contours_progress.mp4' % logger.get_dir()) if videos: fps = int(4 / getattr(self.env, 'dt', 0.1)) clip = mpy.ImageSequenceClip(videos, fps=fps) clip.write_videofile('%s/learning_progress.mp4' % logger.get_dir()) itr = self.max_itr average_return, avg_delay_cost, final_itr_video = rollout( self.env, self.policy, num_rollouts=2, render=True, iteration=itr, last_max_path_length=self.last_max_path_length, last_iteration=True) final_clip = mpy.ImageSequenceClip(final_itr_video, fps=40) final_clip.write_videofile('%s/final_rollout.mp4' % logger.get_dir()) plt.close()
from envs.asrs_env import ASRSEnv from utils.utils import RandomPolicy from utils.plot import rollout import moviepy.editor as mpy videos = [] contours = [] returns = [] fig = None itr = 0 env = ASRSEnv((2, 3), dist_param=[0.01, 0.2, 0.4, 0.5, 0.7, 0.9]) policy = RandomPolicy(env) average_return, video = rollout(env, policy, render=True, num_rollouts=1) videos += video fps = int(4 / getattr(env, 'dt', 0.1)) if videos: clip = mpy.ImageSequenceClip(videos, fps=fps) clip.write_videofile('data/roll_outs.mp4') env.close()