Esempio n. 1
0
def batch_evaluate(agent, env_name, seed, episodes, return_obss_actions=False, pixel=False):
    num_envs = min(256, episodes)

    envs = []
    for i in range(num_envs):
        env = gym.make(env_name)
        if pixel:
            env = RGBImgPartialObsWrapper(env)
        envs.append(env)
    env = ManyEnvs(envs)

    logs = {
        "num_frames_per_episode": [],
        "return_per_episode": [],
        "observations_per_episode": [],
        "actions_per_episode": [],
        "seed_per_episode": []
    }

    for i in tqdm(range((episodes + num_envs - 1) // num_envs)):
        seeds = range(seed + i * num_envs, seed + (i + 1) * num_envs)
        env.seed(seeds)

        # Reset agent.
        if hasattr(agent, 'reset'):
            agent.reset()

        many_obs = env.reset()

        cur_num_frames = 0
        num_frames = np.zeros((num_envs,), dtype='int64')
        returns = np.zeros((num_envs,))
        already_done = np.zeros((num_envs,), dtype='bool')
        if return_obss_actions:
            obss = [[] for _ in range(num_envs)]
            actions = [[] for _ in range(num_envs)]
        while (num_frames == 0).any():
            action = agent.act_batch(many_obs)['action']
            if return_obss_actions:
                for i in range(num_envs):
                    if not already_done[i]:
                        obss[i].append(many_obs[i])
                        actions[i].append(action[i].item())
            many_obs, reward, done, _ = env.step(action)
            agent.analyze_feedback(reward, done)
            done = np.array(done)
            just_done = done & (~already_done)
            returns += reward * just_done
            cur_num_frames += 1
            num_frames[just_done] = cur_num_frames
            already_done[done] = True

        logs["num_frames_per_episode"].extend(list(num_frames))
        logs["return_per_episode"].extend(list(returns))
        logs["seed_per_episode"].extend(list(seeds))
        if return_obss_actions:
            logs["observations_per_episode"].extend(obss)
            logs["actions_per_episode"].extend(actions)

    return logs
Esempio n. 2
0
    def __init__(self, name, horizon=None, gamma=0.99, history_length=4,
                 fixed_seed=None, use_pixels=False):
        """
        Constructor.

        Args:
             name (str): name of the environment;
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             history_length (int, 4): number of frames to form a state;
             fixed_seed (int, None): if passed, it fixes the seed of the
                environment at every reset. This way, the environment is fixed
                rather than procedurally generated;
             use_pixels (bool, False): if True, MiniGrid's default 7x7x3
                observations is converted to an image of resolution 56x56x3.

        """
        # MDP creation
        self._not_pybullet = True
        self._first = True

        env = gym.make(name)
        obs_high = 10.
        if use_pixels:
            env = RGBImgPartialObsWrapper(env) # Get pixel observations
            obs_high = 255.
        env = ImgObsWrapper(env) # Get rid of the 'mission' field
        self.env = env

        self._fixed_seed = fixed_seed

        self._img_size = env.observation_space.shape[0:2]
        self._history_length = history_length

        # Get the default horizon
        if horizon is None:
            horizon = self.env.max_steps

        # MDP properties
        action_space = Discrete(self.env.action_space.n)
        observation_space = Box(
            low=0., high=obs_high, shape=(history_length, self._img_size[1], self._img_size[0]))
        self.env.max_steps = horizon + 1 # Hack to ignore gym time limit (do not use np.inf, since MiniGrid returns r(t) = 1 - 0.9t/T)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        Environment.__init__(self, mdp_info)

        self._state = None
Esempio n. 3
0
parser.add_argument("--save-interval", type=int, default=50,
                    help="number of updates between two saves (default: 50, 0 means no saving)")
args = parser.parse_args()

utils.seed(args.seed)

if os.environ.get("TORCH_DETECT_ANOMALY", None):
    torch.set_anomaly_enabled(True)

# Generate environments
envs = []
use_pixel = 'pixel' in args.arch
for i in range(args.procs):
    env = gym.make(args.env)
    if use_pixel:
        env = RGBImgPartialObsWrapper(env)
    env.seed(100 * args.seed + i)
    envs.append(env)

# Define model name
suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
instr = args.instr_arch if args.instr_arch else "noinstr"
mem = "mem" if not args.no_mem else "nomem"
model_name_parts = {
    'env': args.env,
    'algo': args.algo,
    'arch': args.arch,
    'instr': instr,
    'mem': mem,
    'seed': args.seed,
    'info': '',
Esempio n. 4
0
    "pagedown": "drop",
    " ": "toggle"
}

assert args.model is not None or args.demos is not None, "--model or --demos must be specified."
# if args.seed is None:
#     args.seed = 0 if args.model is not None else 1

# Set seed for all randomness sources

utils.seed(args.seed)

# Generate environment
env = gym.make(args.env)
if args.model is not None and 'pixel' in args.model:
    env = RGBImgPartialObsWrapper(env)
env.seed(args.seed)

global obs
obs = env.reset()
print("Mission: {}".format(obs["mission"]))

# Define agent
agent = utils.load_agent(env, args.model, args.demos, args.demos_origin,
                         args.argmax, args.env)

# Run the agent

done = True

action = None
Esempio n. 5
0
def make_env(args, dream_env: bool = False, seed: Optional[int] = None,
             keep_image: bool = False, wrap_rnn: bool = True, load_model: bool = True):
    # Prepares an environment that matches the expected format:
    # - The environment returns a 64x64 image in observation["image"]
    #   and camera data (x, y, z, pitch, yaw) in observation["camera"]
    # - If wrapped in the RNN, observation["features"] returns the RNN output to be used for the controller
    # - A dream environment simulates the actual environment using the RNN. It never returns an image
    #   (because the actual environment doesn't get run) and only returns the features
    # - A wrapped environment always returns the features, and can return the original image when keep_image is True

    full_episode = args.full_episode

    # Initialize VAE and MDNRNN networks
    if dream_env or wrap_rnn:
        features_mode = FeatureMode.MODE_ZCH if args.state_space == 2 else FeatureMode.MODE_ZH

        if args.use_gqn:
            encoder = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim,
                                             args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn")
            encoder_path = get_path(args, "tf_gqn")
        else:
            encoder = CVAE(args)
            encoder_path = get_path(args, "tf_vae")
        rnn = MDNRNN(args)
        rnn_path = get_path(args, "tf_rnn")

        # TODO: Is this still needed? Do we ever NOT load the model?
        if load_model:
            encoder.load_weights(str(encoder_path))
            rnn.load_weights(str(rnn_path))

    if dream_env:
        assert keep_image is False, "Dream environment doesn't support image observations"

        import json
        initial_z_dir = get_path(args, "tf_initial_z")
        if args.use_gqn:
            initial_z_path = initial_z_dir / "initial_z_gqn.json"
            with open(str(initial_z_path), 'r') as f:
                initial_z = json.load(f)
        else:
            initial_z_path = initial_z_dir / "initial_z_vae.json"
            with open(str(initial_z_path), 'r') as f:
                [initial_mu, initial_logvar] = json.load(f)
            # This could probably be done more efficiently
            initial_z = np.array([list(elem) for elem in zip(initial_mu, initial_logvar)], dtype=np.float)

        # Create dream environment
        # noinspection PyUnboundLocalVariable
        env = DreamEnv(initial_z, args.z_size, rnn, features_mode)

    else:
        # Create real environment
        kwargs = {}
        if args.env_name.startswith("VizdoomTakeCover"):
            kwargs["position"] = True  # Include position data as observation for Vizdoom environment

        print("Making environment {}...".format(args.env_name))
        env = gym.make(args.env_name, **kwargs)
        print("Raw environment:", env)

        from gym.envs.box2d import CarRacing
        from vizdoomgym.envs import VizdoomTakeCover
        from gym_minigrid.minigrid import MiniGridEnv
        if isinstance(env.unwrapped, CarRacing):
            # Accept actions in the required format
            env = CarRacingActionWrapper(env)
            # Transform CarRacing observations into expected format and add camera data
            env = CarRacingObservationWrapper(env)
            # Cut off "status bar" at the bottom of CarRacing observation (copied from original paper)
            env = ClipPixelObservationWrapper(env, (slice(84),))
        elif isinstance(env.unwrapped, VizdoomTakeCover):
            # Accept actions in the required format
            env = VizdoomTakeCoverActionWrapper(env)
            # Transform Vizdoom observations into expected format
            env = VizdoomObservationWrapper(env)
            # Cut off "status bar" at the bottom of the screen (copied from original paper)
            env = ClipPixelObservationWrapper(env, (slice(400),))
        elif isinstance(env.unwrapped, MiniGridEnv):
            from gym_minigrid.wrappers import RGBImgPartialObsWrapper
            # Accept actions in the required format
            env = MiniGridActionWrapper(env)
            # Get RGB image observations from the agent's viewpoint
            # (7x7 grid of tiles, with tile size 9 this results in a 63x63 image)
            env = RGBImgPartialObsWrapper(env, tile_size=9)
            # Add camera data to the observation
            env = MiniGridObservationWrapper(env)
            # Pad image to 64x64 to match the requirements (in effect just adding one row at the right and bottom edge
            # with repeated values from the edge)
            env = PadPixelObservationWrapper(env, target_size=64)
        else:
            env = PixelObservationWrapper(env, pixel_keys=("image",))

        if env.observation_space["image"].shape[:2] != (64, 64):
            # Resize image to 64x64
            env = ResizePixelObservationWrapper(env, size=(64, 64))

        # Wrap in RNN to add features to observation
        if wrap_rnn:
            # noinspection PyUnboundLocalVariable
            env = MDNRNNWrapper(env, encoder, rnn, keep_image=keep_image, features_mode=features_mode)

    # TODO: Is this needed? It was only ever implemented for CarRacing and didn't work
    # Force done=False if full_episode is True
    if full_episode:
        env = NoEarlyStopWrapper(env)

    # Set seed if given
    if seed is not None:
        env.seed(seed)

    print("Wrapped environment:", env)
    return env
            if reward_mean > 500:
                break

    def play(self, num_episodes, render=True):
        """Test the trained agent.
        """
        for episode in range(num_episodes):
            state = self.env.reset()
            total_reward = 0.0
            while True:
                if render:
                    self.env.render()
                action = self.get_action(state)
                state, reward, done, _ = self.env.step(action)
                total_reward += reward
                if done:
                    print(
                        f"Total reward: {total_reward} in episode {episode + 1}"
                    )
                    break


if __name__ == "__main__":
    env = gym.make("MiniGrid-Empty-8x8-v0")
    env = RGBImgPartialObsWrapper(env)  # Get pixel observations
    env = ImgObsWrapper(env)  # Get rid of the 'mission' field
    agent = Agent(env)
    print("Number of actions: ", agent.actions)
    agent.train(percentile=99.9, num_iterations=64, num_episodes=128)
    agent.play(num_episodes=3)
Esempio n. 7
0
)
parser.add_argument(
    "--tile_size",
    type=int,
    help="size at which to render tiles",
    default=32
)
parser.add_argument(
    '--agent_view',
    default=False,
    help="draw the agent sees (partially observable view)",
    action='store_true'
)

args = parser.parse_args()

env = gym.make(args.env)

if args.agent_view:
    env = RGBImgPartialObsWrapper(env)
    env = ImgObsWrapper(env)

print(dijkstras(env))
window = Window('gym_minigrid - ' + args.env)
window.reg_key_handler(key_handler)

reset()

# Blocking event loop
window.show(block=True)
Esempio n. 8
0
def generate_demos(n_episodes, valid, seed, shift=0):
    utils.seed(seed)

    # Generate environment
    env = gym.make(args.env)
    use_pixels = args.pixels
    if use_pixels:
        env = RGBImgPartialObsWrapper(env)

    agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax,
                             args.env)
    demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid)
    demos = []

    checkpoint_time = time.time()

    just_crashed = False
    while True:
        if len(demos) == n_episodes:
            break

        done = False
        if just_crashed:
            logger.info(
                "reset the environment to find a mission that the bot can solve"
            )
            env.reset()
        else:
            env.seed(seed + len(demos))
        obs = env.reset()
        agent.on_reset()

        actions = []
        mission = obs["mission"]
        images = []
        directions = []

        try:
            while not done:
                action = agent.act(obs)['action']
                if isinstance(action, torch.Tensor):
                    action = action.item()
                new_obs, reward, done, _ = env.step(action)
                agent.analyze_feedback(reward, done)

                actions.append(action)
                images.append(obs['image'])
                if use_pixels:
                    directions.append(None)
                else:
                    directions.append(obs['direction'])

                obs = new_obs
            if reward > 0 and (args.filter_steps == 0
                               or len(images) <= args.filter_steps):
                demos.append((mission, blosc.pack_array(np.array(images)),
                              directions, actions))
                just_crashed = False

            if reward == 0:
                if args.on_exception == 'crash':
                    raise Exception(
                        "mission failed, the seed is {}".format(seed +
                                                                len(demos)))
                just_crashed = True
                logger.info("mission failed")
        except (Exception, AssertionError):
            if args.on_exception == 'crash':
                raise
            just_crashed = True
            logger.exception("error while generating demo #{}".format(
                len(demos)))
            continue

        if len(demos) and len(demos) % args.log_interval == 0:
            now = time.time()
            demos_per_second = args.log_interval / (now - checkpoint_time)
            to_go = (n_episodes - len(demos)) / demos_per_second
            logger.info(
                "demo #{}, {:.3f} demos per second, {:.3f} seconds to go".
                format(len(demos) - 1, demos_per_second, to_go))
            checkpoint_time = now

        # Save demonstrations

        if args.save_interval > 0 and len(
                demos) < n_episodes and len(demos) % args.save_interval == 0:
            logger.info("Saving demos...")
            utils.save_demos(demos, demos_path)
            logger.info("{} demos saved".format(len(demos)))
            # print statistics for the last 100 demonstrations
            print_demo_lengths(demos[-100:])

    # Save demonstrations
    logger.info("Saving demos...")
    utils.save_demos(demos, demos_path)
    logger.info("{} demos saved".format(len(demos)))
    print_demo_lengths(demos[-100:])
Esempio n. 9
0
def batch_evaluate(agent,
                   env_name,
                   seed,
                   episodes,
                   return_obss_actions=False,
                   pixel=False,
                   monitor_gym=False,
                   pairs_dict=None,
                   model_path=None):

    num_envs = min(256, episodes)

    envs = []
    for i in range(num_envs):
        if '_c' in env_name:
            env = gym.make(env_name, pairs_dict=pairs_dict, test_mode=True)
        else:
            env = gym.make(env_name)
        if pixel:
            env = RGBImgPartialObsWrapper(env)

        if monitor_gym:
            demo_path = os.path.join(model_path, 'gym_demos')

            if not i % 64:
                env = Monitor(
                    env,
                    demo_path,
                    video_callable=lambda episode_id: episode_id == 1,
                    force=True)
            else:
                env = Monitor(env, demo_path, video_callable=False, force=True)

        envs.append(env)
    env = ManyEnvs(envs)

    logs = {
        "num_frames_per_episode": [],
        "return_per_episode": [],
        "observations_per_episode": [],
        "actions_per_episode": [],
        "seed_per_episode": [],
        "seen_missions": [env.mission for env in envs]
    }

    for i in range((episodes + num_envs - 1) // num_envs):
        seeds = range(seed + i * num_envs, seed + (i + 1) * num_envs)
        env.seed(seeds)

        many_obs = env.reset()

        cur_num_frames = 0
        num_frames = np.zeros((num_envs, ), dtype='int64')
        returns = np.zeros((num_envs, ))
        already_done = np.zeros((num_envs, ), dtype='bool')
        if return_obss_actions:
            obss = [[] for _ in range(num_envs)]
            actions = [[] for _ in range(num_envs)]
        while (num_frames == 0).any():
            action = agent.act_batch(many_obs)['action']
            if return_obss_actions:
                for i in range(num_envs):
                    if not already_done[i]:
                        obss[i].append(many_obs[i])
                        actions[i].append(action[i].item())
            many_obs, reward, done, _ = env.step(action)
            agent.analyze_feedback(reward, done)
            done = np.array(done)
            just_done = done & (~already_done)
            returns += reward * just_done
            cur_num_frames += 1
            num_frames[just_done] = cur_num_frames
            already_done[done] = True

        logs["num_frames_per_episode"].extend(list(num_frames))
        logs["return_per_episode"].extend(list(returns))
        logs["seed_per_episode"].extend(list(seeds))
        if return_obss_actions:
            logs["observations_per_episode"].extend(obss)
            logs["actions_per_episode"].extend(actions)

    return logs
Esempio n. 10
0
def BobEnv(size):
    return ImgObsWrapper(RGBImgPartialObsWrapper(_BobEnv(size)))
Esempio n. 11
0
    def __init__(
        self,
        args,
    ):
        self.args = args

        utils.seed(self.args.seed)
        self.val_seed = self.args.val_seed
        self.use_pixel = 'pixels' in args.model

        # args.env is a list when training on multiple environments
        if getattr(args, 'multi_env', None):
            self.env = [gym.make(item) for item in args.multi_env]
            if self.use_pixel:
                self.env = [RGBImgPartialObsWrapper(e) for e in self.env]

            self.train_demos = []
            for demos, episodes in zip(args.multi_demos, args.multi_episodes):
                demos_path = utils.get_demos_path(demos,
                                                  None,
                                                  None,
                                                  valid=False)
                logger.info('loading {} of {} demos'.format(episodes, demos))
                train_demos = utils.load_demos(demos_path)
                logger.info('loaded demos')
                if episodes > len(train_demos):
                    raise ValueError(
                        "there are only {} train demos in {}".format(
                            len(train_demos), demos))
                self.train_demos.extend(train_demos[:episodes])
                logger.info('So far, {} demos loaded'.format(
                    len(self.train_demos)))

            self.val_demos = []
            for demos, episodes in zip(args.multi_demos, [args.val_episodes] *
                                       len(args.multi_demos)):
                demos_path_valid = utils.get_demos_path(demos,
                                                        None,
                                                        None,
                                                        valid=True)
                logger.info('loading {} of {} valid demos'.format(
                    episodes, demos))
                valid_demos = utils.load_demos(demos_path_valid)
                logger.info('loaded demos')
                if episodes > len(valid_demos):
                    logger.info(
                        'Using all the available {} demos to evaluate valid. accuracy'
                        .format(len(valid_demos)))
                self.val_demos.extend(valid_demos[:episodes])
                logger.info('So far, {} valid demos loaded'.format(
                    len(self.val_demos)))

            logger.info('Loaded all demos')

            observation_space = self.env[0].observation_space
            action_space = self.env[0].action_space

        else:
            self.env = gym.make(self.args.env)
            if self.use_pixel:
                self.env = RGBImgPartialObsWrapper(self.env)

            demos_path = utils.get_demos_path(args.demos,
                                              args.env,
                                              args.demos_origin,
                                              valid=False)
            demos_path_valid = utils.get_demos_path(args.demos,
                                                    args.env,
                                                    args.demos_origin,
                                                    valid=True)

            logger.info('loading demos')
            self.train_demos = utils.load_demos(demos_path)
            logger.info('loaded demos')
            if args.episodes:
                if args.episodes > len(self.train_demos):
                    raise ValueError("there are only {} train demos".format(
                        len(self.train_demos)))
                self.train_demos = self.train_demos[:args.episodes]

            self.val_demos = utils.load_demos(demos_path_valid)
            if args.val_episodes > len(self.val_demos):
                logger.info(
                    'Using all the available {} demos to evaluate valid. accuracy'
                    .format(len(self.val_demos)))
            self.val_demos = self.val_demos[:self.args.val_episodes]

            observation_space = self.env.observation_space
            action_space = self.env.action_space

        self.obss_preprocessor = utils.select_obss_preprocessor(
            args.model, observation_space,
            getattr(self.args, 'pretrained_model', None))

        # Define actor-critic model
        self.acmodel = utils.load_model(args.model, raise_not_found=False)
        if self.acmodel is None:
            if getattr(self.args, 'pretrained_model', None):
                self.acmodel = utils.load_model(args.pretrained_model,
                                                raise_not_found=True)
            else:
                logger.info('Creating new model')
                self.acmodel = ACModel(self.obss_preprocessor.obs_space,
                                       action_space, args.image_dim,
                                       args.memory_dim, args.instr_dim,
                                       not self.args.no_instr,
                                       self.args.instr_arch,
                                       not self.args.no_mem, self.args.arch)
        if self.obss_preprocessor.vocab is not None:
            self.obss_preprocessor.vocab.save()
        utils.save_model(self.acmodel, args.model)

        self.acmodel.train()
        if torch.cuda.is_available():
            self.acmodel.cuda()

        self.optimizer = torch.optim.Adam(self.acmodel.parameters(),
                                          self.args.lr,
                                          eps=self.args.optim_eps)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=100,
                                                         gamma=0.9)

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
Esempio n. 12
0
def partial_rgb_train(env):
    return RGBImgPartialObsWrapper(env, tile_size=6)