Beispiel #1
0
def main(args):
    # environment
    env = MuJoCoWrapper(gym.make(args.env), args.reward_scale, args.render)
    env.seed(args.seed)
    eval_env = MuJoCoWrapper(gym.make(args.env))
    eval_env.seed(args.seed)
    num_actions = env.action_space.shape[0]

    # network parameters
    params = TD3NetworkParams(fcs=args.layers,
                              concat_index=args.concat_index,
                              state_shape=env.observation_space.shape,
                              num_actions=num_actions,
                              gamma=args.gamma,
                              tau=args.tau,
                              actor_lr=args.actor_lr,
                              critic_lr=args.critic_lr,
                              target_noise_sigma=args.target_noise_sigma,
                              target_noise_clip=args.target_noise_clip)

    # deep neural network
    network = TD3Network(params)

    # replay buffer
    buffer = Buffer(args.buffer_size)

    # metrics
    saver = tf.train.Saver()
    metrics = Metrics(args.name, args.log_adapter, saver)

    # exploration noise
    noise = NormalActionNoise(np.zeros(num_actions),
                              np.ones(num_actions) * 0.1)

    # controller
    controller = TD3Controller(network, buffer, metrics, noise, num_actions,
                               args.batch_size, args.final_steps,
                               args.log_interval, args.save_interval,
                               args.eval_interval)

    # view
    view = View(controller)

    # evaluation
    eval_controller = EvalController(network, metrics, args.eval_episode)
    eval_view = View(eval_controller)

    # save hyperparameters
    metrics.log_parameters(vars(args))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # save model graph for debugging
        metrics.set_model_graph(sess.graph)

        if args.load is not None:
            saver.restore(sess, args.load)

        interact(env, view, eval_env, eval_view)
Beispiel #2
0
def main(args):
    # environments
    env = BatchEnvWrapper(
        make_envs(args.env, args.num_envs, args.reward_scale), args.render)
    env.seed(args.seed)
    eval_env = BatchEnvWrapper(
        make_envs(args.env, args.num_envs, args.reward_scale))
    eval_env.seed(args.seed)
    num_actions = env.action_space.shape[0]

    # network parameters
    params = PPONetworkParams(fcs=args.layers,
                              num_actions=num_actions,
                              state_shape=env.observation_space.shape,
                              num_envs=args.num_envs,
                              batch_size=args.batch_size,
                              epsilon=args.epsilon,
                              learning_rate=args.lr,
                              grad_clip=args.grad_clip,
                              value_factor=args.value_factor,
                              entropy_factor=args.entropy_factor)

    # deep neural network
    network = PPONetwork(params)

    # rollout buffer
    rollout = Rollout()

    # metrics
    saver = tf.train.Saver()
    metrics = Metrics(args.name, args.log_adapter, saver)

    # controller
    controller = PPOController(network, rollout, metrics, args.num_envs,
                               args.time_horizon, args.epoch, args.batch_size,
                               args.gamma, args.lam, args.final_steps,
                               args.log_interval, args.save_interval,
                               args.eval_interval)

    # view
    view = View(controller)

    # evaluation
    eval_controller = EvalController(network, metrics, args.eval_episodes)
    eval_view = View(eval_controller)

    # save hyperparameters
    metrics.log_parameters(vars(args))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # save model graph for debugging
        metrics.set_model_graph(sess.graph)

        if args.load is not None:
            saver.restore(sess, args.load)

        interact(env, view, eval_env, eval_view, batch=True)
Beispiel #3
0
def main(args):
    # environment
    env = MuJoCoWrapper(gym.make(args.env), args.reward_scale, args.render)
    eval_env = MuJoCoWrapper(gym.make(args.env))
    num_actions = env.action_space.shape[0]

    # deep neural network
    network = SACNetwork(args.layers, args.concat_index,
                         env.observation_space.shape, num_actions, args.gamma,
                         args.tau, args.pi_lr, args.q_lr, args.v_lr, args.reg)

    # replay buffer
    buffer = Buffer(args.buffer_size)

    # metrics
    saver = tf.train.Saver()
    metrics = Metrics(args.name, args.log_adapter, saver)

    # exploration noise
    noise = EmptyNoise()

    # controller
    controller = SACController(network, buffer, metrics, noise, num_actions,
                               args.batch_size, args.final_steps,
                               args.log_interval, args.save_interval,
                               args.eval_interval)

    # view
    view = View(controller)

    # evaluation
    eval_controller = EvalController(network, metrics, args.eval_episode)
    eval_view = View(eval_controller)

    # save hyperparameters
    metrics.log_parameters(vars(args))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # save model graph for debugging
        metrics.set_model_graph(sess.graph)

        if args.load is not None:
            saver.restore(sess, args.load)

        interact(env, view, eval_env, eval_view)
Beispiel #4
0
def main(args):
    env = BatchEnvWrapper(
        make_envs(args.env, args.num_envs, args.reward_scale), args.render)
    eval_env = BatchEnvWrapper(
        make_envs(args.env, args.num_envs, args.reward_scale))

    num_actions = env.action_space.shape[0]

    network = PPONetwork(args.layers, env.observation_space.shape,
                         args.num_envs, num_actions, args.batch_size,
                         args.epsilon, args.lr, args.grad_clip,
                         args.value_factor, args.entropy_factor)

    rollout = Rollout()

    saver = tf.train.Saver()
    metrics = Metrics(args.name, args.log_adapter, saver)

    controller = PPOController(network, rollout, metrics, args.num_envs,
                               args.time_horizon, args.epoch, args.batch_size,
                               args.gamma, args.lam, args.final_steps,
                               args.log_interval, args.save_interval,
                               args.eval_interval)
    view = View(controller)

    eval_controller = EvalController(network, metrics, args.eval_episodes)
    eval_view = View(eval_controller)

    # save hyperparameters
    metrics.log_parameters(vars(args))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # save model graph for debugging
        metrics.set_model_graph(sess.graph)

        if args.load is not None:
            saver.restore(sess, args.load)

        batch_interact(env, view, eval_env, eval_view)
Beispiel #5
0
 def setup_method(self):
     self.network = DummyNetwork()
     self.metrics = DummyMetrics()
     self.metrics.has = MagicMock(return_value=True)
     self.controller = EvalController(self.network, self.metrics, 10)
Beispiel #6
0
class TestEvalController:
    def setup_method(self):
        self.network = DummyNetwork()
        self.metrics = DummyMetrics()
        self.metrics.has = MagicMock(return_value=True)
        self.controller = EvalController(self.network, self.metrics, 10)

    @pytest.mark.parametrize("batch", [True, False])
    def test_step(self, batch):
        output = make_output()
        self.network.infer = MagicMock(return_value=output)
        self.metrics.get = MagicMock(return_value=0)

        if batch:
            inpt = list(make_input(batch_size=4, batch=True))
            inpt[2] = np.zeros((4, ))
        else:
            inpt = list(make_input())
            inpt[2] = 0.0
        step_output = self.controller.step(*inpt)

        assert step_output is output.action
        assert self.network.infer.call_count == 1
        if batch:
            assert self.metrics.get.call_count == 4
        else:
            assert self.metrics.get.call_count == 0

    @pytest.mark.parametrize("batch", [True, False])
    def test_step_with_done(self, batch):
        output = make_output()
        self.network.infer = MagicMock(return_value=output)
        self.metrics.add = MagicMock()
        self.metrics.get = MagicMock(return_value=1)

        reward = np.random.random()
        if batch:
            inpt = list(make_input(batch_size=4, batch=True))
            index = np.random.randint(4)
            inpt[2] = np.zeros((4, ))
            inpt[2][index] = 1.0
            inpt[3][index]['reward'] = reward
        else:
            inpt = list(make_input())
            inpt[2] = 1.0
            inpt[3]['reward'] = reward
        self.controller.step(*inpt)

        if batch:
            assert self.metrics.add.call_count == 2
            assert list(self.metrics.add.mock_calls[1])[1] == ('eval_reward',
                                                               reward)
            assert list(self.metrics.add.mock_calls[0])[1] == ('eval_episode',
                                                               1)
        else:
            self.metrics.add.assert_not_called()

    def test_step_with_eval_episode_over_limit(self):
        output = make_output()
        self.network.infer = MagicMock(return_value=output)
        self.metrics.add = MagicMock(side_effect=Exception)
        self.metrics.get = MagicMock(return_value=10)

        inpt = list(make_input(batch_size=4, batch=True))
        index = np.random.randint(4)
        reward = np.random.random()
        inpt[2] = np.zeros((4, ))
        inpt[2][index] = 1.0
        inpt[3][index]['reward'] = reward
        self.controller.step(*inpt)

    def test_stop_episode(self):
        self.metrics.add = MagicMock()

        obs, reward, _, info = make_input()
        self.controller.stop_episode(obs, reward, info)

        assert self.metrics.add.call_count == 2
        assert list(self.metrics.add.mock_calls[0])[1] == ('eval_reward',
                                                           info['reward'])
        assert list(self.metrics.add.mock_calls[1])[1] == ('eval_episode', 1)

    def test_should_update(self):
        assert not self.controller.should_update()

    def test_update(self):
        with pytest.raises(Exception):
            self.controller.update()

    def test_should_log(self):
        assert not self.controller.should_log()

    def test_log(self):
        with pytest.raises(Exception):
            self.controller.log()

    def test_is_finished(self):
        self.metrics.get = MagicMock(return_value=5)
        self.metrics.reset = MagicMock()
        self.metrics.log_metric = MagicMock()

        assert not self.controller.is_finished()
        self.metrics.reset.assert_not_called()
        self.metrics.log_metric.assert_not_called()

        self.metrics.get = MagicMock(return_value=10)
        assert self.controller.is_finished()
        assert list(self.metrics.reset.mock_calls[0])[1] == ('eval_episode', )
        assert list(self.metrics.reset.mock_calls[1])[1] == ('eval_reward', )
        self.metrics.log_metric.assert_called_once_with('eval_reward', 10)

    def test_should_save(self):
        assert not self.controller.should_save()

    def test_save(self):
        self.metrics.save_model = MagicMock()
        self.controller.save()
        self.metrics.save_model.assert_not_called()