def init_agent_env(self, proc_id, role, role_id): class Agent(A2C.Agent): def build_model(self, name): # randUni = tf.keras.initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=None) inputs = tf.keras.layers.Input(shape=self.state_size, name='inputs') # gru = tf.keras.layers.GRU(128, activation = 'tanh')(inputs) common = tf.keras.layers.Dense(128, activation="relu")(inputs) common = tf.keras.layers.Dense(128, activation="relu")(common) action = tf.keras.layers.Dense(self.num_action, activation="softmax", name='action_outputs')(common) critic = tf.keras.layers.Dense(1, name='value_output')(common) model = tf.keras.Model(inputs=inputs, outputs=[action, critic]) return model # env = remote.RemoteEnv() env = cartPole.CartPoleEnv() # env = flappyBird.FlappyBirdEnv() NUM_STATE_FEATURES = env.get_num_state_features() NUM_ACTIONS = env.get_num_actions() PRINT_EVERY_EPISODE = 20 # LEARNING_RATE = 0.03 REWARD_DISCOUNT = 0.99 COEF_VALUE = 1 COEF_ENTROPY = 0 # agent = A2C.Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, COEF_VALUE, COEF_ENTROPY) agent = Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, COEF_VALUE, COEF_ENTROPY) return agent, env
def init_agent_env(self, proc_id, role, role_id): env = cartPole.CartPoleEnv() # env = flappyBird.FlappyBirdEnv() NUM_STATE_FEATURES = env.get_num_state_features() NUM_ACTIONS = env.get_num_actions() PRINT_EVERY_EPISODE = 20 LEARNING_RATE = 0.003 REWARD_DISCOUNT = 0.99 COEF_VALUE= 1 COEF_ENTROPY = 0 agent = A2C.Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, COEF_VALUE, COEF_ENTROPY) return agent, env
import numpy as np # To run tqdm on notebook, import tqdm.notebook # from tqdm.notebook import tqdm # Run on pure python from tqdm import tqdm # Config Logging format # logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) # Config logging module to enable on notebook # logger = logging.getLogger() # logger.setLevel(logging.DEBUG) # Test GPU and show the available logical & physical GPUs Util.test_gpu() env = cartPole.CartPoleEnv() NUM_STATE_FEATURES = env.get_num_state_features() NUM_ACTIONS = env.get_num_actions() EPISODE_NUM = 2000 PRINT_EVERY_EPISODE = 20 LEARNING_RATE = 0.003 REWARD_DISCOUNT = 0.99 exp_stg = EPSG.EpsilonGreedy(0.2, NUM_ACTIONS) # agent = Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, exp_stg) agent_params = ((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, exp_stg) init_local_agent_funct = lambda: A2C.Agent( (NUM_STATE_FEATURES,
import numpy as np # To run tqdm on notebook, import tqdm.notebook # from tqdm.notebook import tqdm # Run on pure python from tqdm import tqdm # Config Logging format # logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) # Config logging module to enable on notebook # logger = logging.getLogger() # logger.setLevel(logging.DEBUG) # Test GPU and show the available logical & physical GPUs Util.test_gpu() env = cartPole.CartPoleEnv() NUM_STATE_FEATURES = env.get_num_state_features() NUM_ACTIONS = env.get_num_actions() EPISODE_NUM = 200 PRINT_EVERY_EPISODE = 20 LEARNING_RATE = 0.03 REWARD_DISCOUNT = 0.99 COEF_VALUE = 1 COEF_ENTROPY = 0 exp_stg = EPSG.EpsilonGreedy(0.2, NUM_ACTIONS) agent = A2C.Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, COEF_VALUE, COEF_ENTROPY) # agent_params = ((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, exp_stg) # init_local_agent_funct = lambda: Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, exp_stg)