Beispiel #1
0
def main(args):
    # environment
    env = MuJoCoWrapper(gym.make(args.env), args.reward_scale, args.render)
    env.seed(args.seed)
    eval_env = MuJoCoWrapper(gym.make(args.env))
    eval_env.seed(args.seed)
    num_actions = env.action_space.shape[0]

    # network parameters
    params = TD3NetworkParams(fcs=args.layers,
                              concat_index=args.concat_index,
                              state_shape=env.observation_space.shape,
                              num_actions=num_actions,
                              gamma=args.gamma,
                              tau=args.tau,
                              actor_lr=args.actor_lr,
                              critic_lr=args.critic_lr,
                              target_noise_sigma=args.target_noise_sigma,
                              target_noise_clip=args.target_noise_clip)

    # deep neural network
    network = TD3Network(params)

    # replay buffer
    buffer = Buffer(args.buffer_size)

    # metrics
    saver = tf.train.Saver()
    metrics = Metrics(args.name, args.log_adapter, saver)

    # exploration noise
    noise = NormalActionNoise(np.zeros(num_actions),
                              np.ones(num_actions) * 0.1)

    # controller
    controller = TD3Controller(network, buffer, metrics, noise, num_actions,
                               args.batch_size, args.final_steps,
                               args.log_interval, args.save_interval,
                               args.eval_interval)

    # view
    view = View(controller)

    # evaluation
    eval_controller = EvalController(network, metrics, args.eval_episode)
    eval_view = View(eval_controller)

    # save hyperparameters
    metrics.log_parameters(vars(args))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # save model graph for debugging
        metrics.set_model_graph(sess.graph)

        if args.load is not None:
            saver.restore(sess, args.load)

        interact(env, view, eval_env, eval_view)
Beispiel #2
0
def main(args):
    # environments
    env = BatchEnvWrapper(
        make_envs(args.env, args.num_envs, args.reward_scale), args.render)
    env.seed(args.seed)
    eval_env = BatchEnvWrapper(
        make_envs(args.env, args.num_envs, args.reward_scale))
    eval_env.seed(args.seed)
    num_actions = env.action_space.shape[0]

    # network parameters
    params = PPONetworkParams(fcs=args.layers,
                              num_actions=num_actions,
                              state_shape=env.observation_space.shape,
                              num_envs=args.num_envs,
                              batch_size=args.batch_size,
                              epsilon=args.epsilon,
                              learning_rate=args.lr,
                              grad_clip=args.grad_clip,
                              value_factor=args.value_factor,
                              entropy_factor=args.entropy_factor)

    # deep neural network
    network = PPONetwork(params)

    # rollout buffer
    rollout = Rollout()

    # metrics
    saver = tf.train.Saver()
    metrics = Metrics(args.name, args.log_adapter, saver)

    # controller
    controller = PPOController(network, rollout, metrics, args.num_envs,
                               args.time_horizon, args.epoch, args.batch_size,
                               args.gamma, args.lam, args.final_steps,
                               args.log_interval, args.save_interval,
                               args.eval_interval)

    # view
    view = View(controller)

    # evaluation
    eval_controller = EvalController(network, metrics, args.eval_episodes)
    eval_view = View(eval_controller)

    # save hyperparameters
    metrics.log_parameters(vars(args))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # save model graph for debugging
        metrics.set_model_graph(sess.graph)

        if args.load is not None:
            saver.restore(sess, args.load)

        interact(env, view, eval_env, eval_view, batch=True)
Beispiel #3
0
    def test_stop_episode(self):
        controller = DummyController()
        view = View(controller)
        controller.stop_episode = MagicMock()

        self.assertEqual(view.stop_episode('obs', 'reward', 'info'), None)
        controller.stop_episode.assert_called_once_with(
            'obs', 'reward', 'info')
Beispiel #4
0
    def test_should_eval(self):
        controller = DummyController()
        view = View(controller)

        controller.should_eval = MagicMock(return_value=False)
        assert not view.should_eval()

        controller.should_eval = MagicMock(return_value=True)
        assert view.should_eval()
Beispiel #5
0
    def test_step_without_update(self):
        controller = DummyController()
        view = View(controller)
        controller.should_update = MagicMock(return_value=False)
        controller.update = MagicMock(side_effect=Exception)
        controller.step = MagicMock(return_value='action')

        self.assertEqual(view.step('obs', 'reward', 'done', 'info'), 'action')
        controller.step.assert_called_once_with('obs', 'reward', 'done',
                                                'info')
Beispiel #6
0
    def test_step_without_save(self):
        controller = DummyController()
        view = View(controller)
        controller.should_save = MagicMock(return_value=True)
        controller.save = MagicMock(unsafe=True)
        controller.step = MagicMock(return_value='action')

        self.assertEqual(view.step('obs', 'reward', 'done', 'info'), 'action')
        controller.step.assert_called_once_with('obs', 'reward', 'done',
                                                'info')
        controller.save.assert_called_once_with()
Beispiel #7
0
    def test_is_finished(self):
        controller = DummyController()
        view = View(controller)
        controller.save = MagicMock()

        controller.is_finished = MagicMock(return_value=False)
        assert not view.is_finished()
        controller.save.assert_not_called()

        controller.is_finished = MagicMock(return_value=True)
        assert view.is_finished()
        controller.save.assert_called_once_with()
Beispiel #8
0
def main(args):
    # environment
    env = MuJoCoWrapper(gym.make(args.env), args.reward_scale, args.render)
    eval_env = MuJoCoWrapper(gym.make(args.env))
    num_actions = env.action_space.shape[0]

    # deep neural network
    network = SACNetwork(args.layers, args.concat_index,
                         env.observation_space.shape, num_actions, args.gamma,
                         args.tau, args.pi_lr, args.q_lr, args.v_lr, args.reg)

    # replay buffer
    buffer = Buffer(args.buffer_size)

    # metrics
    saver = tf.train.Saver()
    metrics = Metrics(args.name, args.log_adapter, saver)

    # exploration noise
    noise = EmptyNoise()

    # controller
    controller = SACController(network, buffer, metrics, noise, num_actions,
                               args.batch_size, args.final_steps,
                               args.log_interval, args.save_interval,
                               args.eval_interval)

    # view
    view = View(controller)

    # evaluation
    eval_controller = EvalController(network, metrics, args.eval_episode)
    eval_view = View(eval_controller)

    # save hyperparameters
    metrics.log_parameters(vars(args))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # save model graph for debugging
        metrics.set_model_graph(sess.graph)

        if args.load is not None:
            saver.restore(sess, args.load)

        interact(env, view, eval_env, eval_view)
Beispiel #9
0
def main(args):
    env = BatchEnvWrapper(
        make_envs(args.env, args.num_envs, args.reward_scale), args.render)
    eval_env = BatchEnvWrapper(
        make_envs(args.env, args.num_envs, args.reward_scale))

    num_actions = env.action_space.shape[0]

    network = PPONetwork(args.layers, env.observation_space.shape,
                         args.num_envs, num_actions, args.batch_size,
                         args.epsilon, args.lr, args.grad_clip,
                         args.value_factor, args.entropy_factor)

    rollout = Rollout()

    saver = tf.train.Saver()
    metrics = Metrics(args.name, args.log_adapter, saver)

    controller = PPOController(network, rollout, metrics, args.num_envs,
                               args.time_horizon, args.epoch, args.batch_size,
                               args.gamma, args.lam, args.final_steps,
                               args.log_interval, args.save_interval,
                               args.eval_interval)
    view = View(controller)

    eval_controller = EvalController(network, metrics, args.eval_episodes)
    eval_view = View(eval_controller)

    # save hyperparameters
    metrics.log_parameters(vars(args))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # save model graph for debugging
        metrics.set_model_graph(sess.graph)

        if args.load is not None:
            saver.restore(sess, args.load)

        batch_interact(env, view, eval_env, eval_view)
Beispiel #10
0
 def __init__(self):
     self.controller = Controller()
     self.models = Models(self.controller)
     self.view = View(self.models)