Esempio n. 1
0
def run_environment(args: argparse.Namespace,
                    device: str = 'cpu',
                    logger: torch.utils.tensorboard.SummaryWriter = None):

    # ==
    # Set up environment
    env = gym.make(args.env_name)
    env = MiniGridFlatWrapper(env,
                              use_tensor=False,
                              scale_observation=True,
                              scale_min=0,
                              scale_max=10)

    # ==
    # Set up agent
    agent = init_agent(args, env, device=device)

    # ==
    # Start training
    print(f'Starting training, {args.num_episode} episodes')
    for episode_idx in range(args.num_episode):
        # Reset environment and agent
        observation = env.reset()
        action = agent.begin_episode(observation)

        # Counters
        cumu_reward = 0.0
        timestep = 0

        # (optional) Record video
        video = None
        max_vid_len = 200
        if args.video_freq is not None:
            if episode_idx % int(args.video_freq) == 0:
                # Render first frame and insert to video array
                frame = env.render()
                video = np.zeros(shape=((max_vid_len, ) + frame.shape),
                                 dtype=np.uint8)  # (max_vid_len, C, W, H)
                video[0] = frame

        while True:
            # ==
            # Interact with environment
            observation, reward, done, info = env.step(action)
            action = agent.step(observation, reward, done)

            # ==
            # Counters
            cumu_reward += reward
            timestep += 1

            # ==
            # Optional video
            if video is not None:
                if timestep < max_vid_len:
                    video[timestep] = env.render()

            # ==
            # Episode done
            if done:
                # Logging
                if args.log_dir is not None:
                    # Add reward
                    logger.add_scalar('Reward',
                                      cumu_reward,
                                      global_step=episode_idx)
                    # Optionally add video
                    if video is not None:
                        # Determine last frame
                        last_frame_idx = timestep + 2
                        if last_frame_idx > max_vid_len:
                            last_frame_idx = max_vid_len

                        # Change to tensor
                        vid_tensor = torch.tensor(
                            video[:last_frame_idx, :, :, :], dtype=torch.uint8)
                        vid_tensor = vid_tensor.unsqueeze(0)

                        # Add to tensorboard
                        logger.add_video('Run_Video',
                                         vid_tensor,
                                         global_step=episode_idx,
                                         fps=8)

                    # Occasional print
                    if episode_idx % 100 == 0:
                        print(
                            f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}'
                        )

                else:
                    print(
                        f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}'
                    )

                # Agent logging TODO: not sure if this is the best practice
                agent.report(logger=logger, episode_idx=episode_idx)
                break

            # TODO: have some debugging print-out (e.g. every 100 episode) to make sure times and
            # things are good and training is happening

    env.close()
    if args.log_dir is not None:
        logger.close()
Esempio n. 2
0
def run_environment(config: configparser.ConfigParser,
                    device: str = 'cpu',
                    logger: torch.utils.tensorboard.SummaryWriter = None):
    # =========
    # Set up environment
    config_env_name = config['Training']['env_name']
    config_seed = config['Training'].getint('seed')
    env = gym.make(config_env_name)
    env = MiniGridFlatWrapper(env,
                              use_tensor=False,
                              scale_observation=False,
                              scale_min=0,
                              scale_max=10)
    env.seed(config_seed)

    # =========
    # Set up agent
    agent = init_agent(config, env, device=device)

    # =========
    # Start training

    # Extract training variables
    config_num_episodes = config['Training'].getint('num_episode')
    config_record_video = config['Video'].getboolean('record')
    config_video_freq = config['Video'].getint('frequency')
    config_video_maxlen = config['Video'].getint('max_length')
    config_video_fps = config['Video'].getint('fps')

    # Train
    print(f'Starting training, {config_num_episodes} episodes')
    for episode_idx in range(config_num_episodes):
        # ==
        # Reset environment and agent
        observation = env.reset()
        action = agent.begin_episode(observation)

        # Counters
        cumu_reward = 0.0
        timestep = 0

        # ==
        # (optional) Record video
        video = None
        if config_record_video:
            if episode_idx % int(config_video_freq) == 0:
                # Render first frame and insert to video array
                frame = env.render()
                video = np.zeros(shape=((config_video_maxlen, ) + frame.shape),
                                 dtype=np.uint8)  # (max_vid_len, C, W, H)
                video[0] = frame

        # ==
        # Run episode
        while True:
            # ==
            # Interact with environment
            observation, reward, done, info = env.step(action)
            action = agent.step(observation, reward, done)

            # ==
            # Counters
            cumu_reward += reward
            timestep += 1

            # ==
            # Optional video
            if video is not None:
                if timestep < config_video_maxlen:
                    video[timestep] = env.render()

            # ==
            # Episode done
            if done:
                # Logging
                if logger is not None:
                    # Add reward
                    logger.add_scalar('Reward',
                                      cumu_reward,
                                      global_step=episode_idx)
                    # Optionally add video
                    if video is not None:
                        # Determine last frame
                        last_frame_idx = timestep + 2
                        if last_frame_idx > config_video_maxlen:
                            last_frame_idx = config_video_maxlen

                        # Change to tensor
                        vid_tensor = torch.tensor(
                            video[:last_frame_idx, :, :, :], dtype=torch.uint8)
                        vid_tensor = vid_tensor.unsqueeze(0)

                        # Add to tensorboard
                        logger.add_video('Run_Video',
                                         vid_tensor,
                                         global_step=episode_idx,
                                         fps=config_video_fps)

                    # Occasional print
                    if episode_idx % 100 == 0:
                        print(
                            f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}'
                        )

                else:
                    print(
                        f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}'
                    )

                # Agent logging TODO: not sure if this is the best practice
                agent.report(logger=logger, episode_idx=episode_idx)
                break

            # TODO: have some debugging print-out (e.g. every 100 episode) to make sure times and
            # things are good and training is happening

    env.close()
    if logger is not None:
        logger.close()