예제 #1
0
 def __init__(self,
              env: FiniteActionEnvironment,
              model: QNetworkSL,
              replay_memory_size: int = 3000,
              minibatch_size: int = 32,
              epsilon=0.05,
              epsilon_step_factor=1.0,
              epsilon_min=0.0):
     """
     Initialized the Deep SARSA(λ) agent
     :param env: The FiniteActionEnvironment that should be learned by the agent
     :param model: The function approximator used to estimate and learn Q(s,a)
     :param replay_memory_size: The size of the replay memory (in trajectories)
     :param minibatch_size: The size of the minibatches sampled from the raply memory each step for training
     :param epsilon: The probability of performing a random move, used for exploration
     :param epsilon_step_factor: The epsilon decay parameter. Epsilon is multiplied each step with this factor.
     :param epsilon_min: The minimum value of epsilon. It will not decay further than this.
     """
     super().__init__(env)
     self.epsilon_step_factor = epsilon_step_factor
     self.epsilon_min = epsilon_min
     self.qnetwork = model
     self.epsilon_v = epsilon
     self.policy = model.derive_policy(EpsilonGreedyPolicy,
                                       env.valid_actions_from,
                                       epsilon=self.epsilon)
     self.replay_memory = deque(maxlen=replay_memory_size)
     self.minibatch_size = minibatch_size
예제 #2
0
def run_a2c_experiment(entropy_reg, run: int):
    """
    This function runs a single run of a2c on cartpole using the specified parameters
    :param entropy_reg: Entropy regularization on the policy loss function, higher means a more random policy
    :param run: Specifies the run number, this is used in the filename of the output file
    """
    import keras as ks
    import numpy as np
    from agents.actor_critic import ActorCriticAgent
    from environments.cartpole import CartPole
    from q_network_sarsa_lambda import QNetworkSL
    from p_network import PNetwork
    from experiment_util import Logger

    value_network = ks.models.Sequential()
    value_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(4, )))
    value_network.add(ks.layers.Dense(50, activation='relu',
                                      input_shape=(4, )))

    value_network.add(ks.layers.Dense(2, activation='linear'))

    value_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse')

    policy_network = ks.models.Sequential()
    policy_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(4, )))
    policy_network.add(
        ks.layers.Dense(50, activation='relu', input_shape=(4, )))
    policy_network.add(ks.layers.Dense(2, activation='softmax'))

    l = Logger(filename="../results/AC_VS_SL_cartpole_a2c_%.5f_%d.h5" %
               (entropy_reg, run))
    env = CartPole(render=False)
    actions = env.valid_actions()

    dn = QNetworkSL(value_network,
                    actions,
                    lambda x: np.reshape(x.state, newshape=(1, 4)),
                    lambd=0.9,
                    gamma=1.0,
                    reward_factor=0.01,
                    fixed_length=100,
                    lambda_min=1e-2)

    pn = PNetwork(policy_network,
                  actions,
                  lambda x: np.array(x.state),
                  fixed_steps=100,
                  entropy_regularization=entropy_reg,
                  alpha=0.001,
                  use_advantage=True)

    ac = ActorCriticAgent(env, dn, pn, replay_memory_size=1000)

    c = ac.get_configuration()
    experiment = l.start_experiment(c)
    q = ac.learn(num_episodes=250, result_handler=experiment.log)
예제 #3
0
def snake_conv_sarsa(episodes=10000, file_name='snek'):
    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1 / NUM_RUNS - 0.05
    set_session(tf.Session(config=config))
    import keras as ks
    import numpy as np
    from experiment_util import Logger
    from agents.deep_sarsa import DeepSarsa
    from environments.snake import SnakeVisual
    from q_network_sarsa_lambda import QNetworkSL

    logger = Logger(filename=file_name)

    env = SnakeVisual(grid_size=[8, 8], render=False, render_freq=10)
    actions = env.valid_actions()
    size = np.shape(env.reset().state)

    nn = ks.models.Sequential()
    nn.add(
        ks.layers.Conv2D(filters=16,
                         kernel_size=(3, 3),
                         activation='sigmoid',
                         input_shape=size))
    nn.add(
        ks.layers.Conv2D(filters=24, kernel_size=(3, 3), activation='sigmoid'))
    nn.add(
        ks.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='sigmoid'))
    nn.add(ks.layers.Flatten())
    nn.add(ks.layers.Dense(units=16, activation='sigmoid'))
    nn.add(ks.layers.Dense(units=3, activation='linear'))

    nn.compile(optimizer=ks.optimizers.Adam(lr=0.0001), loss='mse')

    def normalize_state(s):
        return np.reshape(s.state, newshape=(1, ) + size)

    dqn = QNetworkSL(nn,
                     actions,
                     normalize_state,
                     lambd=0.9,
                     lambda_min=1e-3,
                     gamma=0.9,
                     reward_factor=1,
                     fixed_length=100)

    dql = DeepSarsa(env,
                    dqn,
                    epsilon=0.3,
                    epsilon_step_factor=0.9999,
                    epsilon_min=0.005,
                    replay_memory_size=1000)

    experiment = logger.start_experiment(dql.get_configuration())
    q = dql.learn(num_episodes=episodes, result_handler=experiment.log)
    experiment.save_attribute("weights", nn.get_weights())
예제 #4
0
def run_saraslambda_experiment(epsilon_start, epsilon_min, epsilon_decay,
                               run: int):
    """
    Runs deep sarasa lambda on cartpole
    :param epsilon_start: Starting epsilon value
    :param epsilon_min: Minimum epsilon value
    :param epsilon_decay: Factor multiplied with epsilon each step
    :param run: Run identifier used in the output filename
    :return:
    """

    import keras as ks
    import numpy as np
    from agents.deep_sarsa import DeepSarsa
    from environments.cartpole import CartPole
    from q_network_sarsa_lambda import QNetworkSL
    from experiment_util import Logger

    value_network = ks.models.Sequential()
    value_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(4, )))
    value_network.add(ks.layers.Dense(50, activation='relu',
                                      input_shape=(4, )))

    value_network.add(ks.layers.Dense(2, activation='linear'))

    value_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse')

    l = Logger(filename="../results/AC_VS_SL_cartpole_sl_%.4f_%.4f_%f_%d.h5" %
               (epsilon_start, epsilon_min, epsilon_decay, run))
    env = CartPole(render=False)
    actions = env.valid_actions()

    dn = QNetworkSL(value_network,
                    actions,
                    lambda x: np.reshape(x.state, newshape=(1, 4)),
                    lambd=0.9,
                    gamma=1.0,
                    reward_factor=0.01,
                    fixed_length=100,
                    lambda_min=1e-2)

    sarsa = DeepSarsa(env,
                      dn,
                      replay_memory_size=1000,
                      epsilon_min=epsilon_min,
                      epsilon_step_factor=epsilon_decay,
                      epsilon=epsilon_start)

    c = sarsa.get_configuration()
    experiment = l.start_experiment(c)
    q = sarsa.learn(num_episodes=250, result_handler=experiment.log)
예제 #5
0
def snake_deep_sarsa(episodes=10000, file_name='snek'):
    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1 / NUM_RUNS - 0.05
    set_session(tf.Session(config=config))
    import keras as ks
    import numpy as np
    from experiment_util import Logger
    from agents.deep_sarsa import DeepSarsa
    from environments.snake import SnakeContinuous
    from q_network_sarsa_lambda import QNetworkSL

    logger = Logger(filename=file_name)

    neural_network = ks.models.Sequential()
    neural_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(9, )))
    neural_network.add(ks.layers.Dense(50, activation='relu'))
    neural_network.add(ks.layers.Dense(3, activation='linear'))

    neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse')

    env = SnakeContinuous(grid_size=[8, 8], render=False, render_freq=10)
    actions = env.valid_actions()

    dqn = QNetworkSL(neural_network,
                     actions,
                     lambda x: np.reshape(x.state, newshape=(1, 9)),
                     lambd=0.9,
                     lambda_min=1e-3,
                     gamma=0.9,
                     reward_factor=1,
                     fixed_length=100)

    dql = DeepSarsa(env,
                    dqn,
                    epsilon=0.3,
                    epsilon_step_factor=0.9999,
                    epsilon_min=0.005,
                    replay_memory_size=1000)
    experiment = logger.start_experiment(dql.get_configuration())
    q = dql.learn(num_episodes=episodes, result_handler=experiment.log)
    experiment.save_attribute("weights", neural_network.get_weights())
예제 #6
0
    def normalize_state(s):
        o = np.zeros(shape=(1, 8))
        o[0, 0] = s.state['player_y'] / height
        o[0, 1] = s.state['player_vel']
        o[0, 2] = s.state['next_pipe_dist_to_player'] / width
        o[0, 3] = s.state['next_pipe_top_y'] / (height / 2)
        o[0, 4] = s.state['next_pipe_bottom_y'] / (height / 2)
        o[0, 5] = s.state['next_next_pipe_dist_to_player'] / width
        o[0, 6] = s.state['next_next_pipe_top_y'] / (height / 2)
        o[0, 7] = s.state['next_next_pipe_bottom_y'] / (height / 2)
        return o

    vn = QNetworkSL(neural_network,
                    actions,
                    normalize_state,
                    lambd=0.9,
                    gamma=0.9,
                    reward_factor=1,
                    fixed_length=100,
                    lambda_min=1e-2)

    pn = PNetwork(policy_network,
                  actions,
                  lambda x: normalize_state(x)[0],
                  fixed_steps=100,
                  entropy_regularization=0.1,
                  alpha=0.001,
                  use_advantage=True)

    dql = ActorCriticAgent(env, vn, pn, replay_memory_size=1000)

    q = dql.learn()
예제 #7
0
        neural_network = ks.models.Sequential()
        neural_network.add(
            ks.layers.Dense(150, activation='relu', input_shape=(4, )))
        neural_network.add(ks.layers.Dense(50, activation='relu'))
        neural_network.add(ks.layers.Dense(2, activation='linear'))

        neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001),
                               loss='mse')

        env = CartPole(render=False)
        actions = env.valid_actions()

        dqn = QNetworkSL(neural_network,
                         actions,
                         lambda x: np.reshape(x.state, newshape=(1, 4)),
                         lambd=lambd[i],
                         lambda_min=1e-3,
                         gamma=1.0,
                         reward_factor=0.01,
                         fixed_length=100)

        dql = DeepSarsa(env,
                        dqn,
                        epsilon=1.0,
                        epsilon_step_factor=0.9995,
                        epsilon_min=0.0,
                        replay_memory_size=1000)

        c = dql.get_configuration()
        print(c)
        experiment = l.start_experiment(c)
        try:
예제 #8
0
    from environments.snake import SnakeContinuous
    from q_network_sarsa_lambda import QNetworkSL

    neural_network = ks.models.Sequential()
    neural_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(9, )))
    neural_network.add(ks.layers.Dense(50, activation='relu'))
    neural_network.add(ks.layers.Dense(3, activation='linear'))

    neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse')

    env = SnakeContinuous(grid_size=[8, 8], render=True, render_freq=10)
    actions = env.valid_actions()

    dqn = QNetworkSL(neural_network,
                     actions,
                     lambda x: np.reshape(x.state, newshape=(1, 9)),
                     lambd=0.9,
                     gamma=0.9,
                     reward_factor=0.01,
                     fixed_length=100)

    dql = DeepSarsa(env,
                    dqn,
                    epsilon=0.3,
                    epsilon_step_factor=0.999,
                    epsilon_min=0.05,
                    replay_memory_size=1000)

    q = dql.learn()
def experiment(run_n, episodes, sigmas, lambda_parameter):
    """
    Runs a single experiment for each sigma value of Deep SARSA lambda on Cartpole
    :param run_n: The run number, used in the filename of the experiment
    :param episodes: Number of epsiodes to run
    :param sigmas: Values of sigma (noise standard deviation)
    :param lambda_parameter: The lambda value for this experiment
    :return: The filename of the output file
    """
    import tensorflow as tf

    # This code is used to stop tensorflow from allocating all GPU memory ar once. This allows for more runs on one GPU
    # These settings are ignored when running on CPU (which is often faster for this experiment)
    from keras.backend.tensorflow_backend import set_session
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.2
    set_session(tf.Session(config=config))
    import keras as ks

    import numpy as np
    from agents.deep_sarsa import DeepSarsa
    from environments.cartpole import NoisyCartPole
    from q_network_sarsa_lambda import QNetworkSL

    from experiment_util import Logger

    filename = ("results/cartpole_deepsarsalambda_lambda_%1.2f_%d.h5" %
                (lambda_parameter, run_n))
    l = Logger(filename=filename)

    for sigma in sigmas:

        neural_network = ks.models.Sequential()
        neural_network.add(
            ks.layers.Dense(150, activation='relu', input_shape=(4, )))
        neural_network.add(ks.layers.Dense(50, activation='relu'))
        neural_network.add(ks.layers.Dense(2, activation='linear'))

        neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001),
                               loss='mse')

        env = NoisyCartPole(std=sigma, render=False)
        actions = env.valid_actions()

        dqn = QNetworkSL(neural_network,
                         actions,
                         lambda x: np.reshape(x.state, newshape=(1, 4)),
                         lambd=lambda_parameter,
                         lambda_min=1e-3,
                         gamma=1.0,
                         reward_factor=0.01,
                         fixed_length=100)

        dql = DeepSarsa(env,
                        dqn,
                        epsilon=1.0,
                        epsilon_step_factor=0.9995,
                        epsilon_min=0.0,
                        replay_memory_size=1000)

        c = dql.get_configuration()
        experiment = l.start_experiment(c)
        q = dql.learn(num_episodes=episodes, result_handler=experiment.log)
        experiment.save_attribute("weights", neural_network.get_weights())
        print("%s finished sigma=%1.2f, run=%i" % (filename, sigma, run_n))
    return filename