def train(self):
        next_v = 1e6
        v = self.value_fun.get_values()
        itr = 0
        videos = []
        contours = []
        returns = []
        delay_cs = []
        fig = None

        while not self._stop_condition(itr, next_v, v) and itr < self.max_itr:
            log = itr % self.log_itr == 0
            render = (itr % self.render_itr == 0) and self.render
            if log:
                next_pi = self.get_next_policy()
                self.policy.update(next_pi)
                average_return, avg_delay_cost, video = rollout(self.env, self.policy, render=render,
                                                num_rollouts=self.num_rollouts, max_path_length=self.max_path_length,iteration=itr)
                if render:
                    contour, fig = plot_contour(self.env, self.value_fun, fig=fig, iteration=itr)
                    contours += [contour] * len(video)
                    videos += video
                returns.append(average_return)
                delay_cs.append(avg_delay_cost)
                logger.logkv('Iteration', itr)
                logger.logkv('Average Returns', average_return)
                logger.logkv('Average Delayed Costs', avg_delay_cost)
                logger.dumpkvs()
            next_v = self.get_next_values()
            self.value_fun.update(next_v)
            itr += 1

        next_pi = self.get_next_policy()
        self.policy.update(next_pi)
        contour, fig = plot_contour(self.env, self.value_fun, save=True, fig=fig, iteration=itr)
        average_return, avg_delay_cost, video = rollout(self.env, self.policy,
                                        render=True, num_rollouts=self.num_rollouts, max_path_length=self.max_path_length, iteration=itr)
        self.env.close()
        plot_returns(returns)
        plot_returns(delay_cs,'delayed_cost')
        videos += video
        if self.render:
            contours += [contour]
        logger.logkv('Iteration', itr)
        logger.logkv('Average Returns', average_return)
        logger.logkv('Average Delayed Costs', avg_delay_cost)

        fps = int(4/getattr(self.env, 'dt', 0.1))
        if contours and contours[0] is not None:
            clip = mpy.ImageSequenceClip(contours, fps=fps)
            clip.write_videofile('%s/contours_progress.mp4' % logger.get_dir())

        if videos:
            clip = mpy.ImageSequenceClip(videos, fps=fps)
            clip.write_videofile('%s/roll_outs.mp4' % logger.get_dir())

        plt.close()
Beispiel #2
0
    def train(self):
        params = self.value_fun._params
        videos = []
        contours = []
        returns = []
        fig = None
        for itr in range(self.max_itr):
            params = self.optimizer.grad_step(self.objective, params)
            self.value_fun.update(params)
            log = itr % self.log_itr == 0 or itr == self.max_itr - 1
            render = (itr % self.render_itr == 0) and self.render
            if log:
                average_return, video = rollout(self.env,
                                                self.policy,
                                                render=render,
                                                iteration=itr)
                if render:
                    contour, fig = plot_contour(self.env,
                                                self.value_fun,
                                                fig=fig,
                                                iteration=itr)
                    contours += [contour]
                    videos += video
                returns.append(average_return)
                logger.logkv('Iteration', itr)
                logger.logkv('Average Returns', average_return)
                logger.dumpkvs()

        plot_returns(returns)
        plot_contour(self.env, self.value_fun, save=True, fig=fig)

        if contours and contours[0] is not None:
            contours = list(upsample(np.array(contours), 10))
            clip = mpy.ImageSequenceClip(contours, fps=10)
            clip.write_videofile('%s/contours_progress.mp4' % logger.get_dir())

        if videos:
            fps = int(10 / getattr(self.env, 'dt', 0.1))
            clip = mpy.ImageSequenceClip(videos, fps=fps)
            clip.write_videofile('%s/learning_progress.mp4' % logger.get_dir())

        plt.close()
    def train(self):
        # params = self.value_fun._params
        videos = []
        contours = []
        returns = []
        delay_cs = []
        fig = None
        for itr in range(self.max_itr):
            itr_starttime = time.time()
            self.value_fun_update()
            itr_time = time.time() - itr_starttime
            log = itr % self.log_itr == 0 or itr == self.max_itr - 1
            render = (itr % self.render_itr == 0) and self.render
            if log:
                rollout_starttime = time.time()
                average_return, avg_delay_cost, video = rollout(
                    self.env,
                    self.policy,
                    num_rollouts=self.num_rollouts,
                    render=render,
                    iteration=itr,
                    max_path_length=self.max_path_length)
                rollout_time = time.time() - rollout_starttime
                if render:
                    # contour, fig = plot_contour(self.env, self.value_fun, fig=fig, iteration=itr)
                    # contours += [contour]
                    videos += video
                returns.append(average_return)
                delay_cs.append(avg_delay_cost)
                logger.logkv('Iteration', itr)
                logger.logkv('Average Returns', average_return)
                logger.logkv('Average Delayed Costs', avg_delay_cost)
                logger.logkv('Iteration Time', itr_time)
                logger.logkv('Policy Rollout Time', rollout_time)
                logger.dumpkvs()

        plot_returns(returns)
        plot_returns(delay_cs, 'delayed_cost')
        # plot_contour(self.env, self.value_fun, save=True, fig=fig)

        # if contours and contours[0] is not None:
        #     contours = list(upsample(np.array(contours), 10))
        #     clip = mpy.ImageSequenceClip(contours, fps=10)
        #     clip.write_videofile('%s/contours_progress.mp4' % logger.get_dir())

        if videos:
            fps = int(4 / getattr(self.env, 'dt', 0.1))
            clip = mpy.ImageSequenceClip(videos, fps=fps)
            clip.write_videofile('%s/learning_progress.mp4' % logger.get_dir())

        itr = self.max_itr
        average_return, avg_delay_cost, final_itr_video = rollout(
            self.env,
            self.policy,
            num_rollouts=2,
            render=True,
            iteration=itr,
            last_max_path_length=self.last_max_path_length,
            last_iteration=True)

        final_clip = mpy.ImageSequenceClip(final_itr_video, fps=40)
        final_clip.write_videofile('%s/final_rollout.mp4' % logger.get_dir())
        plt.close()
Beispiel #4
0
from envs.asrs_env import ASRSEnv
from utils.utils import RandomPolicy
from utils.plot import rollout
import moviepy.editor as mpy

videos = []
contours = []
returns = []
fig = None
itr = 0
env = ASRSEnv((2, 3), dist_param=[0.01, 0.2, 0.4, 0.5, 0.7, 0.9])
policy = RandomPolicy(env)

average_return, video = rollout(env, policy, render=True, num_rollouts=1)
videos += video

fps = int(4 / getattr(env, 'dt', 0.1))
if videos:
    clip = mpy.ImageSequenceClip(videos, fps=fps)
    clip.write_videofile('data/roll_outs.mp4')
env.close()