def init_agent_env(self, proc_id, role, role_id):
        class Agent(A2C.Agent):
            def build_model(self, name):
                #                 randUni = tf.keras.initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=None)
                inputs = tf.keras.layers.Input(shape=self.state_size,
                                               name='inputs')
                # gru = tf.keras.layers.GRU(128, activation = 'tanh')(inputs)
                common = tf.keras.layers.Dense(128, activation="relu")(inputs)
                common = tf.keras.layers.Dense(128, activation="relu")(common)
                action = tf.keras.layers.Dense(self.num_action,
                                               activation="softmax",
                                               name='action_outputs')(common)
                critic = tf.keras.layers.Dense(1, name='value_output')(common)

                model = tf.keras.Model(inputs=inputs, outputs=[action, critic])

                return model

        # env = remote.RemoteEnv()
        env = cartPole.CartPoleEnv()
        # env = flappyBird.FlappyBirdEnv()
        NUM_STATE_FEATURES = env.get_num_state_features()
        NUM_ACTIONS = env.get_num_actions()
        PRINT_EVERY_EPISODE = 20
        #         LEARNING_RATE = 0.03
        REWARD_DISCOUNT = 0.99
        COEF_VALUE = 1
        COEF_ENTROPY = 0
        #         agent = A2C.Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, COEF_VALUE, COEF_ENTROPY)
        agent = Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT,
                      LEARNING_RATE, COEF_VALUE, COEF_ENTROPY)

        return agent, env
Beispiel #2
0
    def init_agent_env(self, proc_id, role, role_id):
        env = cartPole.CartPoleEnv()
        # env = flappyBird.FlappyBirdEnv()
        NUM_STATE_FEATURES = env.get_num_state_features()
        NUM_ACTIONS = env.get_num_actions()
        PRINT_EVERY_EPISODE = 20
        LEARNING_RATE = 0.003
        REWARD_DISCOUNT = 0.99
        COEF_VALUE= 1
        COEF_ENTROPY = 0
        agent = A2C.Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, COEF_VALUE, COEF_ENTROPY)

        return agent, env
Beispiel #3
0
import numpy as np
# To run tqdm on notebook, import tqdm.notebook
# from tqdm.notebook import tqdm
# Run on pure python
from tqdm import tqdm

# Config Logging format
# logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
# Config logging module to enable on notebook
# logger = logging.getLogger()
# logger.setLevel(logging.DEBUG)

# Test GPU and show the available logical & physical GPUs
Util.test_gpu()

env = cartPole.CartPoleEnv()
NUM_STATE_FEATURES = env.get_num_state_features()
NUM_ACTIONS = env.get_num_actions()
EPISODE_NUM = 2000
PRINT_EVERY_EPISODE = 20
LEARNING_RATE = 0.003
REWARD_DISCOUNT = 0.99

exp_stg = EPSG.EpsilonGreedy(0.2, NUM_ACTIONS)
# agent = Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, exp_stg)

agent_params = ((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT,
                LEARNING_RATE, exp_stg)

init_local_agent_funct = lambda: A2C.Agent(
    (NUM_STATE_FEATURES,
Beispiel #4
0
import numpy as np
# To run tqdm on notebook, import tqdm.notebook
# from tqdm.notebook import tqdm
# Run on pure python
from tqdm import tqdm

# Config Logging format
# logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
# Config logging module to enable on notebook
# logger = logging.getLogger()
# logger.setLevel(logging.DEBUG)

# Test GPU and show the available logical & physical GPUs
Util.test_gpu()

env = cartPole.CartPoleEnv()
NUM_STATE_FEATURES = env.get_num_state_features()
NUM_ACTIONS = env.get_num_actions()
EPISODE_NUM = 200
PRINT_EVERY_EPISODE = 20
LEARNING_RATE = 0.03
REWARD_DISCOUNT = 0.99
COEF_VALUE = 1
COEF_ENTROPY = 0

exp_stg = EPSG.EpsilonGreedy(0.2, NUM_ACTIONS)
agent = A2C.Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT,
                  LEARNING_RATE, COEF_VALUE, COEF_ENTROPY)

# agent_params = ((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, exp_stg)
# init_local_agent_funct = lambda: Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, exp_stg)