コード例 #1
0
def main():
    num_cpu = 1
    load_version = ''
    save_version = '1b_v0'
    load_dir = '../models'
    save_dir = '../models'
    timesteps_per_checkpoint = int(1e6)
    num_checkpoints = int(1e1)  # controlling performance level of agent

    try:
        os.mkdir(save_dir)
    except OSError as error:
        pass

    alg_env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
    print('created alg env')

    train_policy = 'MlpPolicy'
    load_path = '{}/alg_v{}.zip'.format(load_dir, load_version)
    if os.path.exists(load_path):
        alg = PPO(train_policy, alg_env, verbose=0)
        alg.set_parameters(load_path, exact_match=True)
        # alg = PPO.load(load_path, env=alg_env)
        print('loaded alg checkpoint' + load_path)
    else:
        alg = PPO(train_policy, alg_env, verbose=0)
        print('created alg model')

    save_path = '{}/alg_v{}.zip'.format(save_dir, save_version)
    for _ in range(num_checkpoints):
        alg.learn(total_timesteps=timesteps_per_checkpoint)
        alg.save(save_path)
        print('saved alg checkpoint' + save_path)
コード例 #2
0
class AgentDemoWrapper(gym.Wrapper):
    def __init__(self, env, agent_path=None, tempdir_path=None):
        self.alg = PPO('MlpPolicy', env, verbose=0)
        if agent_path is not None:
            load_path = agent_path
            self.alg.set_parameters(load_path, exact_match=True)

        if tempdir_path is None:
            tempdir_path = 'temp'

        try:
            os.mkdir(tempdir_path)
        except:
            pass
        self.save_dir = tempdir_path
        self.max_attempt = 1000
        super(AgentDemoWrapper, self).__init__(env)

    def reset(self):
        obs = self.env.reset()
        return obs

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        return obs, reward, done, info

    def generate_episode_gif(self, init_map):
        images = []
        done = False
        obs = self.env.manual_reset(init_map)
        im = room_to_rgb(obs)
        images.append(im)
        while not done:
            action, _ = self.alg.predict(obs, deterministic=True)
            obs, _, done, _ = self.env.step(action)
            im = room_to_rgb(obs)
            images.append(im)

        im_name = '{}/agent_episode.gif'.format(self.save_dir)
        imageio.mimsave(im_name, images, 'GIF', fps=2)
コード例 #3
0
        unique_solver_idx = -1
        success = False
        while not success:
            success = True
            try:
                fix_room = room_utils.generate_room(dim=dim_room,
                                                    num_steps=num_gen_steps,
                                                    num_boxes=num_boxes,
                                                    second_player=False)
                _, state, _ = fix_room
            except:
                success = False
        for i in range(len(version_li)):
            version = version_li[i]
            load_path = '{}/agent_v{}.zip'.format(load_dir, version)
            agent.set_parameters(load_path, exact_match=True)
            # agent = agent_li[i]
            done = False
            obs = np.expand_dims(soko_env.env_method('manual_reset', state)[0],
                                 axis=0)
            while not done:
                action, _ = agent.predict(obs, deterministic=True)
                obs, _, done, info = soko_env.step(action)

            # solved
            if info[0]["all_boxes_on_target"]:
                num_solved_li[i] += 1
                if unique_solver_idx == -1:
                    unique_solver_idx = i
                else:
                    unique_solver_idx = -1
コード例 #4
0
class ALGDemoWrapper(gym.Wrapper):
    def __init__(self, env, alg_path=None, alg_version=0, tempdir_path=None):
        self.alg = PPO('MlpPolicy', env, verbose=0)
        if alg_path is not None:
            load_path = alg_path + str(alg_version)
            self.alg.set_parameters(load_path, exact_match=True)

        if tempdir_path is None:
            tempdir_path = 'temp'

        try:
            os.mkdir(tempdir_path)
        except:
            pass
        self.save_dir = tempdir_path
        self.max_attempt = 1000
        self.version = alg_version
        super(ALGDemoWrapper, self).__init__(env)

    def reset(self):
        obs = self.env.reset()
        return obs

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        return obs, reward, done, info

    def generate_level(self):
        while True:
            done = False
            obs = self.env.reset()
            while not done:
                action, _ = self.alg.predict(obs, deterministic=True)
                obs, _, done, info = self.env.step(action)

            if info['fail_type'] == -1:
                return obs

    def generate_episode_gif(self):
        attempt = 0
        while True:
            images = []
            done = False
            obs = self.env.reset()
            im = room_to_rgb(obs)
            images.append(im)
            while not done:
                action, _ = self.alg.predict(obs, deterministic=True)
                obs, _, done, info = self.env.step(action)
                im = room_to_rgb(obs)
                images.append(im)

            if info['train_result'] == 0:
                im_name = '{}/alg_episode_v{}.gif'.format(
                    self.save_dir, self.version)
                imageio.mimsave(im_name, images, 'GIF', fps=2)
                return True, obs

            attempt += 1
            if attempt >= self.max_attempt:
                print('Time out. Wasn\'t able to generate good map.')
                return False, None
コード例 #5
0
def evaluate_agents(version_li=['1b_0', '1b_1'], num_boxes=1, dim_room=(7, 7),
                    max_steps=20, num_tests=1000, train_mode='mlp', load_dir='../demo_checkpoints',
                    alg_path=None, alg_version=0):

    num_gen_steps = int(1.7 * (dim_room[0] + dim_room[1]))
    # env_li = [lambda: SokobanEnv(dim_room=dim_room, max_steps=max_steps,
    #                             num_boxes=num_boxes, train_mode=train_mode, log_train_info=False)]
    soko_env = SokobanEnv(dim_room=dim_room, max_steps=max_steps,
                          num_boxes=num_boxes, train_mode=train_mode, log_train_info=False)
    print('created soko env')

    # agent_li = []
    if train_mode == 'cnn':
        train_policy = 'CnnPolicy'
    else:
        train_policy = 'MlpPolicy'
    # for version in version_li:
    agent = PPO(train_policy, soko_env, verbose=0)
        # agent = PPO.load('../models/soko_v'+version+'.zip', env=soko_env)
        # print('loaded', version, 'model')
        # agent_li.append(agent)

    if alg_path is not None:
        alg_env = ALGEnv(dim_room=dim_room, num_boxes=num_boxes, train_mode=train_mode, alg_version=0,
                         agent_lb_path=None, agent_ub_path=None, init_probs=[0.5,0.5,0.5], log_interval=0)
        load_path = alg_path + str(alg_version)
        alg_demo = ALGDemoWrapper(alg_env, alg_path=alg_path, alg_version=alg_version)

    for _ in range(1):
        num_solved_li = []
        num_unique_solved_li = []
        for _ in range(len(version_li)):
            num_solved_li.append(0)
            num_unique_solved_li.append(0)
        for _ in tqdm(range(num_tests)):
            unique_solver_idx = -1

            # use random generator
            if alg_path is None:
                while True:
                    success = True
                    try:
                        fix_room = room_utils.generate_room(
                            dim=dim_room,
                            num_steps=num_gen_steps,
                            num_boxes=num_boxes,
                            second_player=False
                        )
                        _, state, _ = fix_room
                        break
                    except:
                        pass
            else:
                state = alg_demo.generate_level()

            for i in range(len(version_li)):
                version = version_li[i]
                load_path = '{}/agent_v{}.zip'.format(load_dir, version)
                agent.set_parameters(load_path, exact_match=True)
                # agent = agent_li[i]
                done = False
                obs = soko_env.manual_reset(state)
                while not done:
                    action, _ = agent.predict(obs, deterministic=True)
                    obs, _, done, info = soko_env.step(action)

                # solved
                if info["all_boxes_on_target"]:
                    num_solved_li[i] += 1
                    if unique_solver_idx == -1:
                        unique_solver_idx = i
                    else:
                        unique_solver_idx = -1

            if unique_solver_idx != -1:
                num_unique_solved_li[unique_solver_idx] += 1

        for i in range(len(version_li)):
            print('{} solved {}, uniquely solved {}'.format(
                version_li[i], num_solved_li[i], num_unique_solved_li[i]))