def experiment(n_epochs, n_iterations, ep_per_run, save_states_to_disk):
    np.random.seed()

    logger = Logger('plot_and_norm_example', results_dir=None)
    logger.strong_line()
    logger.info('Plotting and normalization example')

    # MDP
    mdp = LQR.generate(dimensions=2, max_pos=10., max_action=5., episodic=True)

    approximator = Regressor(LinearApproximator,
                             input_shape=mdp.info.observation_space.shape,
                             output_shape=mdp.info.action_space.shape)

    sigma = Regressor(LinearApproximator,
                      input_shape=mdp.info.observation_space.shape,
                      output_shape=mdp.info.action_space.shape)

    sigma_weights = 2 * np.ones(sigma.weights_size)
    sigma.set_weights(sigma_weights)

    policy = StateStdGaussianPolicy(approximator, sigma)

    # Agent
    optimizer = AdaptiveOptimizer(eps=.01)
    algorithm_params = dict(optimizer=optimizer)
    agent = REINFORCE(mdp.info, policy, **algorithm_params)

    # normalization callback
    prepro = MinMaxPreprocessor(mdp_info=mdp.info)

    # plotting callback
    plotter = PlotDataset(mdp.info, obs_normalized=True)

    # Train
    core = Core(agent, mdp, callback_step=plotter, preprocessors=[prepro])

    # training loop
    for n in range(n_epochs):
        core.learn(n_episodes=n_iterations * ep_per_run,
                   n_episodes_per_fit=ep_per_run)
        dataset = core.evaluate(n_episodes=ep_per_run, render=False)
        J = np.mean(compute_J(dataset, mdp.info.gamma))
        logger.epoch_info(n + 1, J=J)

    if save_states_to_disk:
        # save normalization / plot states to disk path
        logger.info('Saving plotting and normalization data')
        os.makedirs("./logs/plot_and_norm", exist_ok=True)
        prepro.save("./logs/plot_and_norm/preprocessor.msh")
        plotter.save_state("./logs/plot_and_norm/plotting_state")

        # load states from disk path
        logger.info('Loading preprocessor and plotter')
        prerpo = MinMaxPreprocessor.load(
            "./logs/plot_and_norm/preprocessor.msh")
        plotter.load_state("./logs/plot_and_norm/plotting_state")
def experiment(goal, use_muscles, n_epochs, n_steps, n_episodes_test):
    np.random.seed(1)

    # MDP
    gamma = 0.99
    horizon = 2000
    mdp = create_mdp(gamma, horizon, goal, use_muscles=use_muscles)

    # Agent
    agent = create_SAC_agent(mdp)

    # normalization callback
    normalizer = MinMaxPreprocessor(mdp_info=mdp.info)

    # plotting callback
    plotter = PlotDataset(mdp.info)

    # Algorithm(with normalization and plotting)
    core = Core(agent, mdp, callback_step=plotter, preprocessors=[normalizer])

    # training loop
    for n in range(n_epochs):
        core.learn(n_steps=n_steps, n_steps_per_fit=1)
        dataset = core.evaluate(n_episodes=n_episodes_test, render=True)
        print('Epoch: ', n, '  J: ', np.mean(compute_J(dataset, gamma)),
              '  Len_ep: ', int(np.round(np.mean(episodes_length(dataset)))))

    print('Press a button to visualize humanoid')
    input()
    core.evaluate(n_episodes=10, render=True)
def experiment(goal, use_muscles, n_epochs, n_steps, n_episodes_test):
    np.random.seed(1)

    logger = Logger('SAC', results_dir=None)
    logger.strong_line()
    logger.info('Humanoid Experiment, Algorithm: SAC')

    # MDP
    gamma = 0.99
    horizon = 2000
    mdp = create_mdp(gamma, horizon, goal, use_muscles=use_muscles)

    # Agent
    agent = create_SAC_agent(mdp)

    # normalization callback
    normalizer = MinMaxPreprocessor(mdp_info=mdp.info)

    # plotting callback
    plotter = PlotDataset(mdp.info)

    # Algorithm(with normalization and plotting)
    core = Core(agent, mdp, callback_step=plotter, preprocessors=[normalizer])
    dataset = core.evaluate(n_episodes=n_episodes_test, render=True)

    J = np.mean(compute_J(dataset, gamma))
    L = int(np.round(np.mean(episodes_length(dataset))))

    logger.epoch_info(0, J=J, episode_lenght=L)

    # training loop
    for n in trange(n_epochs, leave=False):
        core.learn(n_steps=n_steps, n_steps_per_fit=1)
        dataset = core.evaluate(n_episodes=n_episodes_test, render=True)

        J = np.mean(compute_J(dataset, gamma))
        L = int(np.round(np.mean(episodes_length(dataset))))


        logger.epoch_info(n+1, J=J, episode_lenght=L)

    logger.info('Press a button to visualize humanoid')
    input()
    core.evaluate(n_episodes=10, render=True)
Exemple #4
0
def experiment(n_epochs, n_iterations, ep_per_run, save_states_to_disk):
    np.random.seed()

    # MDP
    mdp = LQR.generate(dimensions=2, max_pos=10., max_action=5., episodic=True)

    approximator = Regressor(LinearApproximator,
                             input_shape=mdp.info.observation_space.shape,
                             output_shape=mdp.info.action_space.shape)

    sigma = Regressor(LinearApproximator,
                      input_shape=mdp.info.observation_space.shape,
                      output_shape=mdp.info.action_space.shape)

    sigma_weights = 2 * np.ones(sigma.weights_size)
    sigma.set_weights(sigma_weights)

    policy = StateStdGaussianPolicy(approximator, sigma)

    # Agent
    learning_rate = AdaptiveParameter(value=.01)
    algorithm_params = dict(learning_rate=learning_rate)
    agent = REINFORCE(mdp.info, policy, **algorithm_params)

    # normalization callback
    prepro = MinMaxPreprocessor(mdp_info=mdp.info)

    # plotting callback
    plotter = PlotDataset(mdp.info, obs_normalized=True)

    # Train
    core = Core(agent, mdp, callback_step=plotter, preprocessors=[prepro])

    # training loop
    for n in range(n_epochs):
        core.learn(n_episodes=n_iterations * ep_per_run,
                   n_episodes_per_fit=ep_per_run)
        dataset = core.evaluate(n_episodes=ep_per_run, render=False)
        print('Epoch: ', n, '  J: ', np.mean(compute_J(dataset,
                                                       mdp.info.gamma)))

    if save_states_to_disk:
        # save normalization / plot states to disk path
        os.makedirs("./temp/", exist_ok=True)
        prepro.save_state("./temp/normalization_state")
        plotter.save_state("./temp/plotting_state")

        # load states from disk path
        prepro.load_state("./temp/normalization_state")
        plotter.load_state("./temp/plotting_state")
Exemple #5
0
def test_normalizing_preprocessor(tmpdir):
    np.random.seed(88)

    class Network(nn.Module):
        def __init__(self, input_shape, output_shape, **kwargs):
            super().__init__()

            n_input = input_shape[-1]
            n_output = output_shape[0]

            self._h1 = nn.Linear(n_input, n_output)

            nn.init.xavier_uniform_(self._h1.weight,
                                    gain=nn.init.calculate_gain('relu'))

        def forward(self, state, action=None):
            q = F.relu(self._h1(torch.squeeze(state, 1).float()))
            if action is None:
                return q
            else:
                action = action.long()
                q_acted = torch.squeeze(q.gather(1, action))
                return q_acted

    mdp = Gym('CartPole-v0', horizon=500, gamma=.99)

    # Policy
    epsilon_random = Parameter(value=1.)
    pi = EpsGreedy(epsilon=epsilon_random)

    # Approximator
    input_shape = mdp.info.observation_space.shape

    approximator_params = dict(network=Network,
                               optimizer={'class':  optim.Adam,
                                          'params': {'lr': .001}},
                               loss=F.smooth_l1_loss,
                               input_shape=input_shape,
                               output_shape=mdp.info.action_space.size,
                               n_actions=mdp.info.action_space.n,
                               n_features=2, use_cuda=False)

    alg_params = dict(batch_size=5, initial_replay_size=10,
                      max_replay_size=500, target_update_frequency=50)

    agent = DQN(mdp.info, pi, TorchApproximator,
                approximator_params=approximator_params, **alg_params)

    norm_box = MinMaxPreprocessor(mdp_info=mdp.info,
                                  clip_obs=5.0, alpha=0.001)

    core = Core(agent, mdp, preprocessors=[norm_box])

    core.learn(n_steps=100, n_steps_per_fit=1, quiet=True)

    # training correctly
    assert (core._state.min() >= -norm_box._clip_obs
            and core._state.max() <= norm_box._clip_obs)

    # loading and setting data correctly
    state_dict1 = norm_box.get_state()
    norm_box.save(tmpdir / 'norm_box.msh')

    core.learn(n_steps=100, n_steps_per_fit=1, quiet=True)

    norm_box = MinMaxPreprocessor.load(tmpdir / 'norm_box.msh')
    state_dict2 = norm_box.get_state()

    assert ((state_dict1["mean"] == state_dict2["mean"]).all()
            and (state_dict1["var"] == state_dict2["var"]).all()
            and state_dict1["count"] == state_dict2["count"])

    core = Core(agent, mdp, preprocessors=[norm_box])
    core.learn(n_steps=100, n_steps_per_fit=1, quiet=True)