def run_a2c_experiment(entropy_reg, run: int): """ This function runs a single run of a2c on cartpole using the specified parameters :param entropy_reg: Entropy regularization on the policy loss function, higher means a more random policy :param run: Specifies the run number, this is used in the filename of the output file """ import keras as ks import numpy as np from agents.actor_critic import ActorCriticAgent from environments.cartpole import CartPole from q_network_sarsa_lambda import QNetworkSL from p_network import PNetwork from experiment_util import Logger value_network = ks.models.Sequential() value_network.add( ks.layers.Dense(150, activation='relu', input_shape=(4, ))) value_network.add(ks.layers.Dense(50, activation='relu', input_shape=(4, ))) value_network.add(ks.layers.Dense(2, activation='linear')) value_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse') policy_network = ks.models.Sequential() policy_network.add( ks.layers.Dense(150, activation='relu', input_shape=(4, ))) policy_network.add( ks.layers.Dense(50, activation='relu', input_shape=(4, ))) policy_network.add(ks.layers.Dense(2, activation='softmax')) l = Logger(filename="../results/AC_VS_SL_cartpole_a2c_%.5f_%d.h5" % (entropy_reg, run)) env = CartPole(render=False) actions = env.valid_actions() dn = QNetworkSL(value_network, actions, lambda x: np.reshape(x.state, newshape=(1, 4)), lambd=0.9, gamma=1.0, reward_factor=0.01, fixed_length=100, lambda_min=1e-2) pn = PNetwork(policy_network, actions, lambda x: np.array(x.state), fixed_steps=100, entropy_regularization=entropy_reg, alpha=0.001, use_advantage=True) ac = ActorCriticAgent(env, dn, pn, replay_memory_size=1000) c = ac.get_configuration() experiment = l.start_experiment(c) q = ac.learn(num_episodes=250, result_handler=experiment.log)
def snake_conv_sarsa(episodes=10000, file_name='snek'): import tensorflow as tf from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 1 / NUM_RUNS - 0.05 set_session(tf.Session(config=config)) import keras as ks import numpy as np from experiment_util import Logger from agents.deep_sarsa import DeepSarsa from environments.snake import SnakeVisual from q_network_sarsa_lambda import QNetworkSL logger = Logger(filename=file_name) env = SnakeVisual(grid_size=[8, 8], render=False, render_freq=10) actions = env.valid_actions() size = np.shape(env.reset().state) nn = ks.models.Sequential() nn.add( ks.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='sigmoid', input_shape=size)) nn.add( ks.layers.Conv2D(filters=24, kernel_size=(3, 3), activation='sigmoid')) nn.add( ks.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='sigmoid')) nn.add(ks.layers.Flatten()) nn.add(ks.layers.Dense(units=16, activation='sigmoid')) nn.add(ks.layers.Dense(units=3, activation='linear')) nn.compile(optimizer=ks.optimizers.Adam(lr=0.0001), loss='mse') def normalize_state(s): return np.reshape(s.state, newshape=(1, ) + size) dqn = QNetworkSL(nn, actions, normalize_state, lambd=0.9, lambda_min=1e-3, gamma=0.9, reward_factor=1, fixed_length=100) dql = DeepSarsa(env, dqn, epsilon=0.3, epsilon_step_factor=0.9999, epsilon_min=0.005, replay_memory_size=1000) experiment = logger.start_experiment(dql.get_configuration()) q = dql.learn(num_episodes=episodes, result_handler=experiment.log) experiment.save_attribute("weights", nn.get_weights())
def run_saraslambda_experiment(epsilon_start, epsilon_min, epsilon_decay, run: int): """ Runs deep sarasa lambda on cartpole :param epsilon_start: Starting epsilon value :param epsilon_min: Minimum epsilon value :param epsilon_decay: Factor multiplied with epsilon each step :param run: Run identifier used in the output filename :return: """ import keras as ks import numpy as np from agents.deep_sarsa import DeepSarsa from environments.cartpole import CartPole from q_network_sarsa_lambda import QNetworkSL from experiment_util import Logger value_network = ks.models.Sequential() value_network.add( ks.layers.Dense(150, activation='relu', input_shape=(4, ))) value_network.add(ks.layers.Dense(50, activation='relu', input_shape=(4, ))) value_network.add(ks.layers.Dense(2, activation='linear')) value_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse') l = Logger(filename="../results/AC_VS_SL_cartpole_sl_%.4f_%.4f_%f_%d.h5" % (epsilon_start, epsilon_min, epsilon_decay, run)) env = CartPole(render=False) actions = env.valid_actions() dn = QNetworkSL(value_network, actions, lambda x: np.reshape(x.state, newshape=(1, 4)), lambd=0.9, gamma=1.0, reward_factor=0.01, fixed_length=100, lambda_min=1e-2) sarsa = DeepSarsa(env, dn, replay_memory_size=1000, epsilon_min=epsilon_min, epsilon_step_factor=epsilon_decay, epsilon=epsilon_start) c = sarsa.get_configuration() experiment = l.start_experiment(c) q = sarsa.learn(num_episodes=250, result_handler=experiment.log)
def snake_deep_sarsa(episodes=10000, file_name='snek'): import tensorflow as tf from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 1 / NUM_RUNS - 0.05 set_session(tf.Session(config=config)) import keras as ks import numpy as np from experiment_util import Logger from agents.deep_sarsa import DeepSarsa from environments.snake import SnakeContinuous from q_network_sarsa_lambda import QNetworkSL logger = Logger(filename=file_name) neural_network = ks.models.Sequential() neural_network.add( ks.layers.Dense(150, activation='relu', input_shape=(9, ))) neural_network.add(ks.layers.Dense(50, activation='relu')) neural_network.add(ks.layers.Dense(3, activation='linear')) neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse') env = SnakeContinuous(grid_size=[8, 8], render=False, render_freq=10) actions = env.valid_actions() dqn = QNetworkSL(neural_network, actions, lambda x: np.reshape(x.state, newshape=(1, 9)), lambd=0.9, lambda_min=1e-3, gamma=0.9, reward_factor=1, fixed_length=100) dql = DeepSarsa(env, dqn, epsilon=0.3, epsilon_step_factor=0.9999, epsilon_min=0.005, replay_memory_size=1000) experiment = logger.start_experiment(dql.get_configuration()) q = dql.learn(num_episodes=episodes, result_handler=experiment.log) experiment.save_attribute("weights", neural_network.get_weights())
if __name__ == '__main__': import keras as ks import numpy as np from agents.deep_sarsa import DeepSarsa from environments.cartpole import CartPole from q_network_sarsa_lambda import QNetworkSL from experiment_util import Logger l = Logger() lambd = [1.0] for i in range(len(lambd)): neural_network = ks.models.Sequential() neural_network.add( ks.layers.Dense(150, activation='relu', input_shape=(4, ))) neural_network.add(ks.layers.Dense(50, activation='relu')) neural_network.add(ks.layers.Dense(2, activation='linear')) neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse') env = CartPole(render=False) actions = env.valid_actions() dqn = QNetworkSL(neural_network, actions, lambda x: np.reshape(x.state, newshape=(1, 4)), lambd=lambd[i], lambda_min=1e-3, gamma=1.0, reward_factor=0.01,
def experiment(run_n, episodes, sigmas, lambda_parameter): """ Runs a single experiment for each sigma value of Deep SARSA lambda on Cartpole :param run_n: The run number, used in the filename of the experiment :param episodes: Number of epsiodes to run :param sigmas: Values of sigma (noise standard deviation) :param lambda_parameter: The lambda value for this experiment :return: The filename of the output file """ import tensorflow as tf # This code is used to stop tensorflow from allocating all GPU memory ar once. This allows for more runs on one GPU # These settings are ignored when running on CPU (which is often faster for this experiment) from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.2 set_session(tf.Session(config=config)) import keras as ks import numpy as np from agents.deep_sarsa import DeepSarsa from environments.cartpole import NoisyCartPole from q_network_sarsa_lambda import QNetworkSL from experiment_util import Logger filename = ("results/cartpole_deepsarsalambda_lambda_%1.2f_%d.h5" % (lambda_parameter, run_n)) l = Logger(filename=filename) for sigma in sigmas: neural_network = ks.models.Sequential() neural_network.add( ks.layers.Dense(150, activation='relu', input_shape=(4, ))) neural_network.add(ks.layers.Dense(50, activation='relu')) neural_network.add(ks.layers.Dense(2, activation='linear')) neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse') env = NoisyCartPole(std=sigma, render=False) actions = env.valid_actions() dqn = QNetworkSL(neural_network, actions, lambda x: np.reshape(x.state, newshape=(1, 4)), lambd=lambda_parameter, lambda_min=1e-3, gamma=1.0, reward_factor=0.01, fixed_length=100) dql = DeepSarsa(env, dqn, epsilon=1.0, epsilon_step_factor=0.9995, epsilon_min=0.0, replay_memory_size=1000) c = dql.get_configuration() experiment = l.start_experiment(c) q = dql.learn(num_episodes=episodes, result_handler=experiment.log) experiment.save_attribute("weights", neural_network.get_weights()) print("%s finished sigma=%1.2f, run=%i" % (filename, sigma, run_n)) return filename
def experiment(run, episodes, sigmas, lambda_parameter): """ Runs a single experiment for each sigma value of SARSA lambda on Cartpole :param run_n: The run number, used in the filename of the experiment :param episodes: Number of epsiodes to run :param sigmas: Values of sigma (noise standard deviation) :param lambda_parameter: The lambda value for this experiment :return: The filename of the output file """ import numpy as np from agents.sarsalambda import SarsaLambda from environments.cartpole import NoisyCartPole from experiment_util import Logger filename = ("../results/cartpole_sarsalambda_lambda_%1.2f_%d.h5" % (lambda_parameter, run)) l = Logger(filename=filename) def cartpole_discretization(x): """ The discretization function used for this experiment :param x: The state object :return: A tuple representing the discrete state """ s = x.state if s.shape != (4, ): raise ValueError("Expected array of shape (4,). Instead got: %s" % (str(x.shape))) out = np.zeros((4, )) # cart position range [-2.4 2.4] out[0] = np.round(5 * (s[0] + 2.4) / 4.8, 0) # normalized to [0 1] # cart velocity [-inf +inf] out[1] = np.round(5 * np.sqrt(np.abs(s[1])) * np.sign(s[1]), 0) # pole angle : [-.26rad .26rad] out[2] = np.round(10 * (s[2] + .26) / .52, 0) # normalized to [0 1] then multiplied by 10 # pole velocity at tip out[3] = np.round(7 * np.sqrt(np.abs(s[3])) * np.sign(s[3]), 0) return tuple(out) def transform_state(s): # This is a different discretization used in earlier stages, similar to cartpole_discretization s = s.state s *= np.array([1, 1, 10, 1]) s *= 2 s = np.round(s) return str(s) for sigma in sigmas: env = NoisyCartPole(std=sigma, render=False) actions = env.valid_actions() sl = SarsaLambda(env, lam=lambda_parameter, gamma=1.0, epsilon=0.7, epsilon_step_factor=0.99998, epsilon_min=0.0, fex=cartpole_discretization) c = sl.get_configuration() experiment = l.start_experiment(c) pi = sl.learn(num_iter=episodes, result_handler=experiment.log) experiment.save_attribute("pi", pi) print("%s finished sigma=%1.2f, run=%i" % (filename, sigma, run)) return filename