Example #1
0
def train_mem(ctx, path, vae_path):
    """Train MDN-RNN model as specified in .json config with data at `PATH`."""

    from third_party.torchtrainer import EarlyStopping, LambdaCallback, ModelCheckpoint, CSVLogger, RandomBatchSampler, evaluate
    from torch.utils.data import DataLoader
    config = obtain_config(ctx)

    env = hrl.create_gym(config.general['game_name'])

    # Create checkpoint directory, if it doesn't exist
    create_directory(os.path.dirname(config.rnn['ckpt_path']))

    # Create training DataLoader
    dataset = MDNDataset(path, config.rnn['sequence_len'],
                         config.rnn['terminal_prob'],
                         config.rnn['dataset_fraction'])
    data_loader = DataLoader(dataset,
                             batch_sampler=RandomBatchSampler(
                                 dataset, config.rnn['batch_size']),
                             pin_memory=True)

    # Build model
    rnn = build_rnn_model(config.rnn, config.vae['latent_space_dim'],
                          env.action_space)

    # Create callbacks
    callbacks = [
        EarlyStopping(metric='loss',
                      patience=config.rnn['patience'],
                      verbose=1),
        LambdaCallback(on_batch_begin=lambda _, batch_size: rnn.model.
                       init_hidden(batch_size)),
        ModelCheckpoint(config.rnn['ckpt_path'], metric='loss',
                        save_best=True),
        CSVLogger(
            filename=os.path.join(config.rnn['logs_dir'], 'train_mem.csv'))
    ]

    # Evaluate and visualize memory progress if render allowed
    if config.allow_render:
        if vae_path is None:
            raise ValueError("To render provide valid path to VAE checkpoint!")

        # Build VAE model and load checkpoint
        _, _, decoder = build_vae_model(config.vae,
                                        config.general['state_shape'],
                                        vae_path)

        callbacks += [
            MemoryVisualization(config, decoder, rnn.model, dataset,
                                'mdn_plots')
        ]

    # Fit MDN-RNN model!
    rnn.fit_loader(data_loader,
                   epochs=config.rnn['epochs'],
                   callbacks=callbacks)

    dataset.close()
Example #2
0
def train_ctrl(ctx, vae_path, mdn_path):
    """Plays chosen game and trains Controller on preprocessed states with VAE and MDN-RNN
    (loaded from `vae_path` or `mdn_path`)."""

    # We will spawn multiple workers, we don't want them to access GPU
    config = obtain_config(ctx, use_gpu=False)

    # Book keeping variables
    best_return = float('-inf')

    # Gen number of workers to run
    processes = config.es['processes']
    processes = processes if processes > 0 else None

    # Get action space size
    env = hrl.create_gym(config.general['game_name'])
    action_space = env.action_space
    del env

    input_dim = config.vae['latent_space_dim'] + config.rnn['hidden_units']
    out_dim = action_space.num
    n_params = (input_dim + 1) * out_dim
    # Build CMA-ES solver
    solver = build_es_model(config.es, n_params=n_params)
    best_return = solver.best_score

    # Train for N epochs
    pbar = tqdm(range(config.es['epochs']), ascii=True)
    pbar.set_postfix(current=best_return)
    for _ in pbar:
        # Get new population
        population = solver.ask()

        # Evaluate population in parallel
        hists = hrl.pool(Evaluator(
            config,
            config.vae['latent_space_dim'] + config.rnn['hidden_units'],
            action_space, vae_path, mdn_path),
                         jobs=population,
                         processes=processes,
                         n_episodes=config.es['n_episodes'],
                         render_mode=config.allow_render,
                         verbose=0)
        returns = [np.mean(hist['return']) for hist in hists]

        # Print logs and update best return
        pbar.set_postfix(best=best_return, current=max(returns))
        best_return = max(best_return, max(returns))

        # Update solver
        solver.tell(returns)

        # Save solver in given path
        solver.save_es_ckpt_and_mind_weights(config.es['ckpt_path'],
                                             config.es['mind_path'],
                                             score=best_return)
Example #3
0
    def test_qualitative_reward_env(self):
        """Test for QualitativeRewardEnvironment wrapper.
        There should be no reward during game and -1/1 reward at the end of game."""

        env = QualitativeRewardEnvironment(create_gym("Sokoban-v0"))
        env.reset()
        reward, is_done = 0, False
        while not is_done:
            assert reward == 0
            _, reward, is_done, _ = env.step(env.sample_action())
        assert reward == -1 or reward == 1
Example #4
0
    def test_random_maze_env(self):
        """Test random maze environment parameters."""

        env = create_gym("MazeEnv-v0")

        assert isinstance(env.action_space, Discrete)
        assert np.all(env.state_space.shape == (21, 21, 2))
        assert np.all(env.valid_actions == np.array(range(4)))
        assert env.action_space.num == 4

        state = env.reset()
        assert np.all(env.current_state == state)

        for action in env.valid_actions:
            state, reward, done, info = env.step(action)
            assert np.all(env.current_state == state)
Example #5
0
    def test_sokoban_env(self):
        """Tests box state space and discrete action space handling,
        all properties, reset, step and create_gym methods."""

        env = create_gym("Sokoban-v0")

        assert isinstance(env.action_space, Discrete)
        assert np.all(env.state_space.shape == (160, 160, 3, 2))
        assert np.all(env.valid_actions == np.array(range(8)))
        assert env.action_space.num == 8

        state = env.reset()
        assert np.all(env.current_state == state)

        for action in env.valid_actions:
            state, reward, done, info = env.step(action)
            assert np.all(env.current_state == state)
Example #6
0
    def test_frozenlake_env(self):
        """Tests discrete state space and discrete action space handling,
        all properties, reset, step and create_gym methods."""

        env = create_gym("FrozenLake-v0")

        assert isinstance(env.action_space, Discrete)
        assert np.all(env.state_space == 16)
        assert np.all(env.valid_actions == np.array([0, 1, 2, 3]))
        assert env.action_space.num == 4

        state = env.reset()
        assert np.all(env.current_state == state)

        for action in env.valid_actions:
            state, reward, done, info = env.step(action)
            assert np.all(env.current_state == state)
Example #7
0
def record_data(ctx, path, n_games, chunk_size, state_dtype):
    """Plays chosen game randomly and records transitions to hdf5 file in `PATH`."""

    config = obtain_config(ctx)

    # Create Gym environment, random agent and store to hdf5 callback
    env = hrl.create_gym(config.general['game_name'])
    mind, agent_callbacks = create_generating_agent(
        config.general['generating_agent'], env)
    store_callback = StoreTransitions(path,
                                      config.general['state_shape'],
                                      env.action_space,
                                      chunk_size=chunk_size,
                                      state_dtype=state_dtype,
                                      reward_dtype=np.float32)
    callbacks = agent_callbacks + [store_callback]

    if store_callback.game_count >= n_games:
        log.warning(
            "Data is already fully present in dataset you specified! If you wish to create"
            " a new dataset, please remove the one under this path or specify a different"
            " path. If you wish to gather more data, increase the number of games to "
            " record with --n-games parameter.")
        return
    elif 0 < store_callback.game_count < n_games:
        diff = n_games - store_callback.game_count
        log.info(
            "{}/{} games were already recorded in specified dataset. {} more game will be"
            " added!".format(store_callback.game_count, n_games, diff))
        n_games = diff

    # Resizes states to `state_shape` with cropping
    interpreter = BasicInterpreter(state_shape=config.general['state_shape'],
                                   crop_range=config.general['crop_range'],
                                   scale=255)

    # Play `N` random games and gather data as it goes
    hrl.loop(env,
             mind,
             interpreter,
             n_episodes=n_games,
             verbose=1,
             callbacks=callbacks,
             render_mode=config.allow_render)
Example #8
0
    def test_mountain_car_continuous_env(self):
        """Tests box state space and discrete action space handling,
        all properties, reset, step and create_gym methods."""

        env = create_gym("MountainCarContinuous-v0")

        assert isinstance(env.action_space, Continuous)
        assert np.all(env.state_space.shape == (2, 2))
        assert isinstance(env.valid_actions, Continuous)
        assert env.valid_actions == env.action_space
        assert env.action_space.num == 1

        state = env.reset()
        assert np.all(env.current_state == state)

        for action in [[env.action_space.low], [0], [env.action_space.high],
                       env.sample_action()]:
            state, reward, done, info = env.step(action)
            assert np.all(env.current_state == state)
Example #9
0
    def test_cartpole_env(self):
        """Tests continuous state space and discrete action space handling,
        all properties, reset, step and create_gym methods."""

        env = create_gym("CartPole-v0")

        assert isinstance(env.action_space, Discrete)
        assert np.allclose(env.state_space,
                           np.array([[-4.8, 4.8],
                                     [-3.40282347e+38, 3.40282347e+38],
                                     [-0.419, 0.419],
                                     [-3.40282347e+38, 3.40282347e+38]]),
                           atol=1e-3)
        assert np.all(env.valid_actions == np.array([0, 1]))
        assert env.action_space.num == 2

        state = env.reset()
        assert np.all(env.current_state == state)

        for action in env.valid_actions:
            state, reward, done, info = env.step(action)
            assert np.all(env.current_state == state)
Example #10
0
def eval(ctx, controller_path, vae_path, mdn_path, n_games):
    """Plays chosen game testing whole pipeline: VAE -> MDN-RNN -> Controller
    (loaded from `vae_path`, `mdn_path` and `controller_path`)."""

    config = obtain_config(ctx)

    # Get action space size
    env = hrl.create_gym(config.general['game_name'])

    # Create VAE + MDN-RNN interpreter
    _, encoder, _ = build_vae_model(config.vae, config.general['state_shape'],
                                    vae_path)

    rnn = build_rnn_model(config.rnn, config.vae['latent_space_dim'],
                          env.action_space, mdn_path)

    basic_interpreter = BasicInterpreter(
        state_shape=config.general['state_shape'],
        crop_range=config.general['crop_range'])
    mdn_interpreter = MDNInterpreter(encoder, rnn.model,
                                     config.vae['latent_space_dim'])

    # Build CMA-ES solver and linear model
    mind = build_mind(
        config.es, config.vae['latent_space_dim'] + config.rnn['hidden_units'],
        env.action_space, controller_path)

    hist = hrl.loop(env,
                    mind,
                    ChainInterpreter(basic_interpreter, mdn_interpreter),
                    n_episodes=n_games,
                    render_mode=config.allow_render,
                    verbose=1,
                    callbacks=[ReturnTracker(), mdn_interpreter])

    print("Returns:", *hist['return'])
    print("Avg. return:", np.mean(hist['return']))
Example #11
0
 def initialize(self):
     self._env = hrl.create_gym(self.config.general['game_name'])
     self._basic_interpreter, self._mdn_interpreter = self._interpreter_factory(
     )
Example #12
0
    @property
    def metrics(self):
        return {"avg. return": self.running_avg}

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='HumbleRL tabular Q-Learning sample')
    parser.add_argument('--episodes', type=int, default=865, metavar='N',
                        help='number of episodes to train (default: 865)')
    parser.add_argument('--lr', type=float, default=0.75, metavar='LR',
                        help='learning rate (default: 0.75)')
    parser.add_argument('--decay', type=int, default=400, metavar='N',
                        help='exploration decay steps (default: 400)')
    parser.add_argument('--gamma', type=float, default=0.95, metavar='G',
                        help='discount factor (default: 0.95)')
    args = parser.parse_args()

    # Create environment and q-learning agent
    env = hrl.create_gym("FrozenLake-v0")
    mind = TabularQLearning(env.state_space, env.action_space.num,
                            learning_rate=args.lr,
                            decay_steps=args.decay,
                            discount_factor=args.gamma)

    # Seed env and numpy
    np.random.seed(7)
    env.env.seed(7)

    # Run training
    hrl.loop(env, mind, n_episodes=args.episodes, callbacks=[mind])
Example #13
0
 def initialize(self):
     self._env = hrl.create_gym("CartPole-v0")
Example #14
0
                        help='checkpoint path to load from/save to model (default: None)')
    parser.add_argument('--render', action='store_true', default=False,
                        help='enable visual play after each epoch (default: False)')
    parser.add_argument('--debug', action='store_true', default=False,
                        help='enable debug logging (default: False)')
    args = parser.parse_args()

    # Configure logger
    log.basicConfig(level=log.DEBUG if args.debug else log.WARNING,
                    format="[%(levelname)s]: %(message)s")

    # Book keeping variables
    best_return = float('-inf')

    # Create environment and mind
    env = hrl.create_gym("CartPole-v0")
    mind = LinearModel(env.state_space.shape[0], len(env.valid_actions))

    # Load CMA-ES solver if ckpt available
    if args.ckpt and os.path.isfile(args.ckpt):
        solver = CMAES.load_ckpt(args.ckpt)
        log.info("Loaded solver from ckpt (NOTE: pop. size and l2 decay was also loaded).")
    else:
        solver = CMAES(mind.n_weights, popsize=args.popsize, weight_decay=args.decay)
        log.info("Created solver with pop. size: %d and l2 decay: %f.", args.popsize, args.decay)

    # Train for N epochs
    pbar = tqdm(range(args.epochs))
    for epoch in pbar:
        # Get new population
        population = solver.ask()