Ejemplo n.º 1
0
def run_a2c_experiment(entropy_reg, run: int):
    """
    This function runs a single run of a2c on cartpole using the specified parameters
    :param entropy_reg: Entropy regularization on the policy loss function, higher means a more random policy
    :param run: Specifies the run number, this is used in the filename of the output file
    """
    import keras as ks
    import numpy as np
    from agents.actor_critic import ActorCriticAgent
    from environments.cartpole import CartPole
    from q_network_sarsa_lambda import QNetworkSL
    from p_network import PNetwork
    from experiment_util import Logger

    value_network = ks.models.Sequential()
    value_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(4, )))
    value_network.add(ks.layers.Dense(50, activation='relu',
                                      input_shape=(4, )))

    value_network.add(ks.layers.Dense(2, activation='linear'))

    value_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse')

    policy_network = ks.models.Sequential()
    policy_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(4, )))
    policy_network.add(
        ks.layers.Dense(50, activation='relu', input_shape=(4, )))
    policy_network.add(ks.layers.Dense(2, activation='softmax'))

    l = Logger(filename="../results/AC_VS_SL_cartpole_a2c_%.5f_%d.h5" %
               (entropy_reg, run))
    env = CartPole(render=False)
    actions = env.valid_actions()

    dn = QNetworkSL(value_network,
                    actions,
                    lambda x: np.reshape(x.state, newshape=(1, 4)),
                    lambd=0.9,
                    gamma=1.0,
                    reward_factor=0.01,
                    fixed_length=100,
                    lambda_min=1e-2)

    pn = PNetwork(policy_network,
                  actions,
                  lambda x: np.array(x.state),
                  fixed_steps=100,
                  entropy_regularization=entropy_reg,
                  alpha=0.001,
                  use_advantage=True)

    ac = ActorCriticAgent(env, dn, pn, replay_memory_size=1000)

    c = ac.get_configuration()
    experiment = l.start_experiment(c)
    q = ac.learn(num_episodes=250, result_handler=experiment.log)
Ejemplo n.º 2
0
def snake_conv_sarsa(episodes=10000, file_name='snek'):
    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1 / NUM_RUNS - 0.05
    set_session(tf.Session(config=config))
    import keras as ks
    import numpy as np
    from experiment_util import Logger
    from agents.deep_sarsa import DeepSarsa
    from environments.snake import SnakeVisual
    from q_network_sarsa_lambda import QNetworkSL

    logger = Logger(filename=file_name)

    env = SnakeVisual(grid_size=[8, 8], render=False, render_freq=10)
    actions = env.valid_actions()
    size = np.shape(env.reset().state)

    nn = ks.models.Sequential()
    nn.add(
        ks.layers.Conv2D(filters=16,
                         kernel_size=(3, 3),
                         activation='sigmoid',
                         input_shape=size))
    nn.add(
        ks.layers.Conv2D(filters=24, kernel_size=(3, 3), activation='sigmoid'))
    nn.add(
        ks.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='sigmoid'))
    nn.add(ks.layers.Flatten())
    nn.add(ks.layers.Dense(units=16, activation='sigmoid'))
    nn.add(ks.layers.Dense(units=3, activation='linear'))

    nn.compile(optimizer=ks.optimizers.Adam(lr=0.0001), loss='mse')

    def normalize_state(s):
        return np.reshape(s.state, newshape=(1, ) + size)

    dqn = QNetworkSL(nn,
                     actions,
                     normalize_state,
                     lambd=0.9,
                     lambda_min=1e-3,
                     gamma=0.9,
                     reward_factor=1,
                     fixed_length=100)

    dql = DeepSarsa(env,
                    dqn,
                    epsilon=0.3,
                    epsilon_step_factor=0.9999,
                    epsilon_min=0.005,
                    replay_memory_size=1000)

    experiment = logger.start_experiment(dql.get_configuration())
    q = dql.learn(num_episodes=episodes, result_handler=experiment.log)
    experiment.save_attribute("weights", nn.get_weights())
Ejemplo n.º 3
0
def run_saraslambda_experiment(epsilon_start, epsilon_min, epsilon_decay,
                               run: int):
    """
    Runs deep sarasa lambda on cartpole
    :param epsilon_start: Starting epsilon value
    :param epsilon_min: Minimum epsilon value
    :param epsilon_decay: Factor multiplied with epsilon each step
    :param run: Run identifier used in the output filename
    :return:
    """

    import keras as ks
    import numpy as np
    from agents.deep_sarsa import DeepSarsa
    from environments.cartpole import CartPole
    from q_network_sarsa_lambda import QNetworkSL
    from experiment_util import Logger

    value_network = ks.models.Sequential()
    value_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(4, )))
    value_network.add(ks.layers.Dense(50, activation='relu',
                                      input_shape=(4, )))

    value_network.add(ks.layers.Dense(2, activation='linear'))

    value_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse')

    l = Logger(filename="../results/AC_VS_SL_cartpole_sl_%.4f_%.4f_%f_%d.h5" %
               (epsilon_start, epsilon_min, epsilon_decay, run))
    env = CartPole(render=False)
    actions = env.valid_actions()

    dn = QNetworkSL(value_network,
                    actions,
                    lambda x: np.reshape(x.state, newshape=(1, 4)),
                    lambd=0.9,
                    gamma=1.0,
                    reward_factor=0.01,
                    fixed_length=100,
                    lambda_min=1e-2)

    sarsa = DeepSarsa(env,
                      dn,
                      replay_memory_size=1000,
                      epsilon_min=epsilon_min,
                      epsilon_step_factor=epsilon_decay,
                      epsilon=epsilon_start)

    c = sarsa.get_configuration()
    experiment = l.start_experiment(c)
    q = sarsa.learn(num_episodes=250, result_handler=experiment.log)
Ejemplo n.º 4
0
def snake_deep_sarsa(episodes=10000, file_name='snek'):
    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1 / NUM_RUNS - 0.05
    set_session(tf.Session(config=config))
    import keras as ks
    import numpy as np
    from experiment_util import Logger
    from agents.deep_sarsa import DeepSarsa
    from environments.snake import SnakeContinuous
    from q_network_sarsa_lambda import QNetworkSL

    logger = Logger(filename=file_name)

    neural_network = ks.models.Sequential()
    neural_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(9, )))
    neural_network.add(ks.layers.Dense(50, activation='relu'))
    neural_network.add(ks.layers.Dense(3, activation='linear'))

    neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse')

    env = SnakeContinuous(grid_size=[8, 8], render=False, render_freq=10)
    actions = env.valid_actions()

    dqn = QNetworkSL(neural_network,
                     actions,
                     lambda x: np.reshape(x.state, newshape=(1, 9)),
                     lambd=0.9,
                     lambda_min=1e-3,
                     gamma=0.9,
                     reward_factor=1,
                     fixed_length=100)

    dql = DeepSarsa(env,
                    dqn,
                    epsilon=0.3,
                    epsilon_step_factor=0.9999,
                    epsilon_min=0.005,
                    replay_memory_size=1000)
    experiment = logger.start_experiment(dql.get_configuration())
    q = dql.learn(num_episodes=episodes, result_handler=experiment.log)
    experiment.save_attribute("weights", neural_network.get_weights())
Ejemplo n.º 5
0
if __name__ == '__main__':
    import keras as ks
    import numpy as np
    from agents.deep_sarsa import DeepSarsa
    from environments.cartpole import CartPole
    from q_network_sarsa_lambda import QNetworkSL

    from experiment_util import Logger
    l = Logger()

    lambd = [1.0]
    for i in range(len(lambd)):
        neural_network = ks.models.Sequential()
        neural_network.add(
            ks.layers.Dense(150, activation='relu', input_shape=(4, )))
        neural_network.add(ks.layers.Dense(50, activation='relu'))
        neural_network.add(ks.layers.Dense(2, activation='linear'))

        neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001),
                               loss='mse')

        env = CartPole(render=False)
        actions = env.valid_actions()

        dqn = QNetworkSL(neural_network,
                         actions,
                         lambda x: np.reshape(x.state, newshape=(1, 4)),
                         lambd=lambd[i],
                         lambda_min=1e-3,
                         gamma=1.0,
                         reward_factor=0.01,
def experiment(run_n, episodes, sigmas, lambda_parameter):
    """
    Runs a single experiment for each sigma value of Deep SARSA lambda on Cartpole
    :param run_n: The run number, used in the filename of the experiment
    :param episodes: Number of epsiodes to run
    :param sigmas: Values of sigma (noise standard deviation)
    :param lambda_parameter: The lambda value for this experiment
    :return: The filename of the output file
    """
    import tensorflow as tf

    # This code is used to stop tensorflow from allocating all GPU memory ar once. This allows for more runs on one GPU
    # These settings are ignored when running on CPU (which is often faster for this experiment)
    from keras.backend.tensorflow_backend import set_session
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.2
    set_session(tf.Session(config=config))
    import keras as ks

    import numpy as np
    from agents.deep_sarsa import DeepSarsa
    from environments.cartpole import NoisyCartPole
    from q_network_sarsa_lambda import QNetworkSL

    from experiment_util import Logger

    filename = ("results/cartpole_deepsarsalambda_lambda_%1.2f_%d.h5" %
                (lambda_parameter, run_n))
    l = Logger(filename=filename)

    for sigma in sigmas:

        neural_network = ks.models.Sequential()
        neural_network.add(
            ks.layers.Dense(150, activation='relu', input_shape=(4, )))
        neural_network.add(ks.layers.Dense(50, activation='relu'))
        neural_network.add(ks.layers.Dense(2, activation='linear'))

        neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001),
                               loss='mse')

        env = NoisyCartPole(std=sigma, render=False)
        actions = env.valid_actions()

        dqn = QNetworkSL(neural_network,
                         actions,
                         lambda x: np.reshape(x.state, newshape=(1, 4)),
                         lambd=lambda_parameter,
                         lambda_min=1e-3,
                         gamma=1.0,
                         reward_factor=0.01,
                         fixed_length=100)

        dql = DeepSarsa(env,
                        dqn,
                        epsilon=1.0,
                        epsilon_step_factor=0.9995,
                        epsilon_min=0.0,
                        replay_memory_size=1000)

        c = dql.get_configuration()
        experiment = l.start_experiment(c)
        q = dql.learn(num_episodes=episodes, result_handler=experiment.log)
        experiment.save_attribute("weights", neural_network.get_weights())
        print("%s finished sigma=%1.2f, run=%i" % (filename, sigma, run_n))
    return filename
def experiment(run, episodes, sigmas, lambda_parameter):
    """
    Runs a single experiment for each sigma value of SARSA lambda on Cartpole
    :param run_n: The run number, used in the filename of the experiment
    :param episodes: Number of epsiodes to run
    :param sigmas: Values of sigma (noise standard deviation)
    :param lambda_parameter: The lambda value for this experiment
    :return: The filename of the output file
    """
    import numpy as np
    from agents.sarsalambda import SarsaLambda
    from environments.cartpole import NoisyCartPole

    from experiment_util import Logger

    filename = ("../results/cartpole_sarsalambda_lambda_%1.2f_%d.h5" %
                (lambda_parameter, run))
    l = Logger(filename=filename)

    def cartpole_discretization(x):
        """
        The discretization function used for this experiment
        :param x: The state object
        :return: A tuple representing the discrete state
        """
        s = x.state
        if s.shape != (4, ):
            raise ValueError("Expected array of shape (4,). Instead got: %s" %
                             (str(x.shape)))

        out = np.zeros((4, ))

        # cart position range [-2.4 2.4]
        out[0] = np.round(5 * (s[0] + 2.4) / 4.8, 0)  # normalized to [0 1]

        # cart velocity [-inf +inf]
        out[1] = np.round(5 * np.sqrt(np.abs(s[1])) * np.sign(s[1]), 0)
        # pole angle : [-.26rad .26rad]
        out[2] = np.round(10 * (s[2] + .26) / .52,
                          0)  # normalized to [0 1] then multiplied by 10
        # pole velocity at tip
        out[3] = np.round(7 * np.sqrt(np.abs(s[3])) * np.sign(s[3]), 0)
        return tuple(out)

    def transform_state(s):
        # This is a different discretization used in earlier stages, similar to cartpole_discretization
        s = s.state
        s *= np.array([1, 1, 10, 1])
        s *= 2
        s = np.round(s)
        return str(s)

    for sigma in sigmas:
        env = NoisyCartPole(std=sigma, render=False)
        actions = env.valid_actions()

        sl = SarsaLambda(env,
                         lam=lambda_parameter,
                         gamma=1.0,
                         epsilon=0.7,
                         epsilon_step_factor=0.99998,
                         epsilon_min=0.0,
                         fex=cartpole_discretization)

        c = sl.get_configuration()
        experiment = l.start_experiment(c)
        pi = sl.learn(num_iter=episodes, result_handler=experiment.log)
        experiment.save_attribute("pi", pi)
        print("%s finished sigma=%1.2f, run=%i" % (filename, sigma, run))
    return filename