def train(self): next_v = 1e6 v = self.value_fun.get_values() itr = 0 videos = [] contours = [] returns = [] delay_cs = [] fig = None while not self._stop_condition(itr, next_v, v) and itr < self.max_itr: log = itr % self.log_itr == 0 render = (itr % self.render_itr == 0) and self.render if log: next_pi = self.get_next_policy() self.policy.update(next_pi) average_return, avg_delay_cost, video = rollout(self.env, self.policy, render=render, num_rollouts=self.num_rollouts, max_path_length=self.max_path_length,iteration=itr) if render: contour, fig = plot_contour(self.env, self.value_fun, fig=fig, iteration=itr) contours += [contour] * len(video) videos += video returns.append(average_return) delay_cs.append(avg_delay_cost) logger.logkv('Iteration', itr) logger.logkv('Average Returns', average_return) logger.logkv('Average Delayed Costs', avg_delay_cost) logger.dumpkvs() next_v = self.get_next_values() self.value_fun.update(next_v) itr += 1 next_pi = self.get_next_policy() self.policy.update(next_pi) contour, fig = plot_contour(self.env, self.value_fun, save=True, fig=fig, iteration=itr) average_return, avg_delay_cost, video = rollout(self.env, self.policy, render=True, num_rollouts=self.num_rollouts, max_path_length=self.max_path_length, iteration=itr) self.env.close() plot_returns(returns) plot_returns(delay_cs,'delayed_cost') videos += video if self.render: contours += [contour] logger.logkv('Iteration', itr) logger.logkv('Average Returns', average_return) logger.logkv('Average Delayed Costs', avg_delay_cost) fps = int(4/getattr(self.env, 'dt', 0.1)) if contours and contours[0] is not None: clip = mpy.ImageSequenceClip(contours, fps=fps) clip.write_videofile('%s/contours_progress.mp4' % logger.get_dir()) if videos: clip = mpy.ImageSequenceClip(videos, fps=fps) clip.write_videofile('%s/roll_outs.mp4' % logger.get_dir()) plt.close()
def train(self): params = self.value_fun._params videos = [] contours = [] returns = [] fig = None for itr in range(self.max_itr): params = self.optimizer.grad_step(self.objective, params) self.value_fun.update(params) log = itr % self.log_itr == 0 or itr == self.max_itr - 1 render = (itr % self.render_itr == 0) and self.render if log: average_return, video = rollout(self.env, self.policy, render=render, iteration=itr) if render: contour, fig = plot_contour(self.env, self.value_fun, fig=fig, iteration=itr) contours += [contour] videos += video returns.append(average_return) logger.logkv('Iteration', itr) logger.logkv('Average Returns', average_return) logger.dumpkvs() plot_returns(returns) plot_contour(self.env, self.value_fun, save=True, fig=fig) if contours and contours[0] is not None: contours = list(upsample(np.array(contours), 10)) clip = mpy.ImageSequenceClip(contours, fps=10) clip.write_videofile('%s/contours_progress.mp4' % logger.get_dir()) if videos: fps = int(10 / getattr(self.env, 'dt', 0.1)) clip = mpy.ImageSequenceClip(videos, fps=fps) clip.write_videofile('%s/learning_progress.mp4' % logger.get_dir()) plt.close()
with open(filename, 'rb') as f: raw = pickle.load(f) plot_mesh(num_nodes_x, num_nodes_y, length_x, length_y) if quad_order is not None: fill = raw['fill'].reshape(( (num_nodes_y - 1) * quad_order, (num_nodes_x - 1) * quad_order, )) boundary = raw['boundary'].reshape(( (num_nodes_y - 1) * quad_order, (num_nodes_x - 1) * quad_order, )) plot_contour(gpt_mesh, fill, plot_fill=True) plot_contour(gpt_mesh, boundary, plot_boundary=True) elif 0: # param multipliers = raw['multipliers'].reshape((num_nodes_y, num_nodes_x)) plot_contour(mesh, multipliers, plot_fill=True) else: # SIMP multipliers = raw['multipliers'].reshape((num_nodes_y-1, num_nodes_x-1)) plot_contour(mesh, multipliers, plot_fill=True) plot_save(save='save/save%03i.png'%counter) counter += 1 filename = 'save/data%03i.pkl' % counter # import movie