def train(self):
        next_v = 1e6
        v = self.value_fun.get_values()
        itr = 0
        videos = []
        contours = []
        returns = []
        delay_cs = []
        fig = None

        while not self._stop_condition(itr, next_v, v) and itr < self.max_itr:
            log = itr % self.log_itr == 0
            render = (itr % self.render_itr == 0) and self.render
            if log:
                next_pi = self.get_next_policy()
                self.policy.update(next_pi)
                average_return, avg_delay_cost, video = rollout(self.env, self.policy, render=render,
                                                num_rollouts=self.num_rollouts, max_path_length=self.max_path_length,iteration=itr)
                if render:
                    contour, fig = plot_contour(self.env, self.value_fun, fig=fig, iteration=itr)
                    contours += [contour] * len(video)
                    videos += video
                returns.append(average_return)
                delay_cs.append(avg_delay_cost)
                logger.logkv('Iteration', itr)
                logger.logkv('Average Returns', average_return)
                logger.logkv('Average Delayed Costs', avg_delay_cost)
                logger.dumpkvs()
            next_v = self.get_next_values()
            self.value_fun.update(next_v)
            itr += 1

        next_pi = self.get_next_policy()
        self.policy.update(next_pi)
        contour, fig = plot_contour(self.env, self.value_fun, save=True, fig=fig, iteration=itr)
        average_return, avg_delay_cost, video = rollout(self.env, self.policy,
                                        render=True, num_rollouts=self.num_rollouts, max_path_length=self.max_path_length, iteration=itr)
        self.env.close()
        plot_returns(returns)
        plot_returns(delay_cs,'delayed_cost')
        videos += video
        if self.render:
            contours += [contour]
        logger.logkv('Iteration', itr)
        logger.logkv('Average Returns', average_return)
        logger.logkv('Average Delayed Costs', avg_delay_cost)

        fps = int(4/getattr(self.env, 'dt', 0.1))
        if contours and contours[0] is not None:
            clip = mpy.ImageSequenceClip(contours, fps=fps)
            clip.write_videofile('%s/contours_progress.mp4' % logger.get_dir())

        if videos:
            clip = mpy.ImageSequenceClip(videos, fps=fps)
            clip.write_videofile('%s/roll_outs.mp4' % logger.get_dir())

        plt.close()
Esempio n. 2
0
    def train(self):
        params = self.value_fun._params
        videos = []
        contours = []
        returns = []
        fig = None
        for itr in range(self.max_itr):
            params = self.optimizer.grad_step(self.objective, params)
            self.value_fun.update(params)
            log = itr % self.log_itr == 0 or itr == self.max_itr - 1
            render = (itr % self.render_itr == 0) and self.render
            if log:
                average_return, video = rollout(self.env,
                                                self.policy,
                                                render=render,
                                                iteration=itr)
                if render:
                    contour, fig = plot_contour(self.env,
                                                self.value_fun,
                                                fig=fig,
                                                iteration=itr)
                    contours += [contour]
                    videos += video
                returns.append(average_return)
                logger.logkv('Iteration', itr)
                logger.logkv('Average Returns', average_return)
                logger.dumpkvs()

        plot_returns(returns)
        plot_contour(self.env, self.value_fun, save=True, fig=fig)

        if contours and contours[0] is not None:
            contours = list(upsample(np.array(contours), 10))
            clip = mpy.ImageSequenceClip(contours, fps=10)
            clip.write_videofile('%s/contours_progress.mp4' % logger.get_dir())

        if videos:
            fps = int(10 / getattr(self.env, 'dt', 0.1))
            clip = mpy.ImageSequenceClip(videos, fps=fps)
            clip.write_videofile('%s/learning_progress.mp4' % logger.get_dir())

        plt.close()
Esempio n. 3
0
    with open(filename, 'rb') as f:
        raw = pickle.load(f)

    plot_mesh(num_nodes_x, num_nodes_y, length_x, length_y)

    if quad_order is not None:
        fill = raw['fill'].reshape((
            (num_nodes_y - 1) * quad_order,
            (num_nodes_x - 1) * quad_order,
        ))
        boundary = raw['boundary'].reshape((
            (num_nodes_y - 1) * quad_order,
            (num_nodes_x - 1) * quad_order,
        ))
        plot_contour(gpt_mesh, fill, plot_fill=True)
        plot_contour(gpt_mesh, boundary, plot_boundary=True)
    elif 0: # param
        multipliers = raw['multipliers'].reshape((num_nodes_y, num_nodes_x))
        plot_contour(mesh, multipliers, plot_fill=True)
    else: # SIMP
        multipliers = raw['multipliers'].reshape((num_nodes_y-1, num_nodes_x-1))
        plot_contour(mesh, multipliers, plot_fill=True)

    plot_save(save='save/save%03i.png'%counter)

    counter += 1
    filename = 'save/data%03i.pkl' % counter

# import movie