コード例 #1
0
    def __init__(
        self,
        env_name,
        save_dir=None,
        max_episodes=constants.DEFAULT_MAX_EPISODES,
        gamma_discount=constants.DEFAULT_GAMMA,
        learning_rate=constants.DEFAULT_LEARNING_RATE,
        thread_count=None,
        worker_update_frequency=constants.DEFAULT_UPDATE_FREQUENCY,
    ):
        super().__init__(env_name, "a3c", save_dir=save_dir)

        self.max_episodes = max_episodes
        self.gamma_discount = gamma_discount
        self.worker_update_frequency = worker_update_frequency
        self.thread_count = thread_count or multiprocessing.cpu_count()

        # Global optimizer and model
        self.optimizer = tf.train.AdamOptimizer(use_locking=True,
                                                learning_rate=learning_rate)
        self.model = model.A3CModel(self.env)

        # Calling the model once will essentially tell it what to expect as
        # input and thus initialize all variables and weights.
        # If we don't do this, the model cannot be updated from worker threads.
        self.model(
            tf.convert_to_tensor(np.zeros((1, ) +
                                          self.env.observation_space.shape),
                                 dtype=tf.float32))
コード例 #2
0
    def __init__(
        self,
        env,
        global_model,
        global_optimizer,
        tracker,
        index,
        max_episodes,
        max_steps,
        gamma_discount,
        update_frequency,
    ):
        super().__init__()
        self.env = env
        self.global_model = global_model
        self.global_optimizer = global_optimizer
        self.tracker = tracker
        self.index = index
        self.max_episodes = max_episodes
        self.max_steps = max_steps
        self.update_frequency = update_frequency
        self.gamma_discount = gamma_discount

        self.model = model.A3CModel(env)

        # TODO: Make this configurable
        self.terminal_reward = -1
コード例 #3
0
def test_rescale_mean(pendulum_env):
    a3c_model = model.A3CModel(pendulum_env)

    assert actor.rescale_mean(a3c_model, 0) == 0
    assert actor.rescale_mean(a3c_model, 0.5) == 1
    assert actor.rescale_mean(a3c_model, 1) == 2
    assert actor.rescale_mean(a3c_model, -1) == -2
    assert actor.rescale_mean(a3c_model, -0.5) == -1
コード例 #4
0
def test_model_call_discrete(cartpole_env):
    a3c_model = model.A3CModel(cartpole_env)

    # Cartpole has two actions
    state = cartpole_env.reset()
    state_tensor = tf.convert_to_tensor([state], dtype=tf.float32)

    # Assert it's callable
    logits, values = a3c_model(state_tensor)

    assert logits.shape == (1, 2)
    assert values.shape == (1, 1)
コード例 #5
0
def test_model_call_continuous(pendulum_env):
    a3c_model = model.A3CModel(pendulum_env)

    # Pendulum has
    state = pendulum_env.reset()
    state_tensor = tf.convert_to_tensor([state], dtype=tf.float32)

    # Assert it's callable and the output is a tuple.
    (logits1, logits2), values = a3c_model(state_tensor)

    assert logits1.shape == (1, 1)
    assert logits2.shape == (1, 1)
    assert values.shape == (1, 1)
コード例 #6
0
def test_model_variable_scope(cartpole_env):
    a3c_model = model.A3CModel(cartpole_env)
    state = cartpole_env.reset()
    state_tensor = tf.convert_to_tensor([state], dtype=tf.float32)
    a3c_model(state_tensor)

    value_weights = [
        v for v in a3c_model.trainable_weights if "value_scope" in v.name
    ]
    actor_weights = [
        v for v in a3c_model.trainable_weights if "actor_scope" in v.name
    ]

    # Two sets of weight layers, each with a bias.
    assert len(value_weights) == 4
    assert len(actor_weights) == 4