Exemple #1
0
def run_a2c_experiment(entropy_reg, run: int):
    """
    This function runs a single run of a2c on cartpole using the specified parameters
    :param entropy_reg: Entropy regularization on the policy loss function, higher means a more random policy
    :param run: Specifies the run number, this is used in the filename of the output file
    """
    import keras as ks
    import numpy as np
    from agents.actor_critic import ActorCriticAgent
    from environments.cartpole import CartPole
    from q_network_sarsa_lambda import QNetworkSL
    from p_network import PNetwork
    from experiment_util import Logger

    value_network = ks.models.Sequential()
    value_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(4, )))
    value_network.add(ks.layers.Dense(50, activation='relu',
                                      input_shape=(4, )))

    value_network.add(ks.layers.Dense(2, activation='linear'))

    value_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse')

    policy_network = ks.models.Sequential()
    policy_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(4, )))
    policy_network.add(
        ks.layers.Dense(50, activation='relu', input_shape=(4, )))
    policy_network.add(ks.layers.Dense(2, activation='softmax'))

    l = Logger(filename="../results/AC_VS_SL_cartpole_a2c_%.5f_%d.h5" %
               (entropy_reg, run))
    env = CartPole(render=False)
    actions = env.valid_actions()

    dn = QNetworkSL(value_network,
                    actions,
                    lambda x: np.reshape(x.state, newshape=(1, 4)),
                    lambd=0.9,
                    gamma=1.0,
                    reward_factor=0.01,
                    fixed_length=100,
                    lambda_min=1e-2)

    pn = PNetwork(policy_network,
                  actions,
                  lambda x: np.array(x.state),
                  fixed_steps=100,
                  entropy_regularization=entropy_reg,
                  alpha=0.001,
                  use_advantage=True)

    ac = ActorCriticAgent(env, dn, pn, replay_memory_size=1000)

    c = ac.get_configuration()
    experiment = l.start_experiment(c)
    q = ac.learn(num_episodes=250, result_handler=experiment.log)
Exemple #2
0
def run_saraslambda_experiment(epsilon_start, epsilon_min, epsilon_decay,
                               run: int):
    """
    Runs deep sarasa lambda on cartpole
    :param epsilon_start: Starting epsilon value
    :param epsilon_min: Minimum epsilon value
    :param epsilon_decay: Factor multiplied with epsilon each step
    :param run: Run identifier used in the output filename
    :return:
    """

    import keras as ks
    import numpy as np
    from agents.deep_sarsa import DeepSarsa
    from environments.cartpole import CartPole
    from q_network_sarsa_lambda import QNetworkSL
    from experiment_util import Logger

    value_network = ks.models.Sequential()
    value_network.add(
        ks.layers.Dense(150, activation='relu', input_shape=(4, )))
    value_network.add(ks.layers.Dense(50, activation='relu',
                                      input_shape=(4, )))

    value_network.add(ks.layers.Dense(2, activation='linear'))

    value_network.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse')

    l = Logger(filename="../results/AC_VS_SL_cartpole_sl_%.4f_%.4f_%f_%d.h5" %
               (epsilon_start, epsilon_min, epsilon_decay, run))
    env = CartPole(render=False)
    actions = env.valid_actions()

    dn = QNetworkSL(value_network,
                    actions,
                    lambda x: np.reshape(x.state, newshape=(1, 4)),
                    lambd=0.9,
                    gamma=1.0,
                    reward_factor=0.01,
                    fixed_length=100,
                    lambda_min=1e-2)

    sarsa = DeepSarsa(env,
                      dn,
                      replay_memory_size=1000,
                      epsilon_min=epsilon_min,
                      epsilon_step_factor=epsilon_decay,
                      epsilon=epsilon_start)

    c = sarsa.get_configuration()
    experiment = l.start_experiment(c)
    q = sarsa.learn(num_episodes=250, result_handler=experiment.log)
if __name__ == '__main__':
    import keras as ks
    import numpy as np
    from agents.deep_q import DeepQLearning
    from environments.cartpole import CartPole
    from q_network import QNetwork

    nn = ks.models.Sequential()
    nn.add(ks.layers.Dense(32, activation='sigmoid', input_shape=(4, )))
    nn.add(ks.layers.Dense(32, activation='sigmoid'))
    nn.add(ks.layers.Dense(2, activation='linear'))

    nn.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse')

    env = CartPole(render=True)
    actions = env.valid_actions()

    dqn = QNetwork(nn, actions, lambda x: np.reshape(x.state, newshape=(1, 4)))

    dql = DeepQLearning(env, dqn)

    q = dql.learn()
Exemple #4
0
    from experiment_util import Logger
    l = Logger()

    lambd = [1.0]
    for i in range(len(lambd)):
        neural_network = ks.models.Sequential()
        neural_network.add(
            ks.layers.Dense(150, activation='relu', input_shape=(4, )))
        neural_network.add(ks.layers.Dense(50, activation='relu'))
        neural_network.add(ks.layers.Dense(2, activation='linear'))

        neural_network.compile(optimizer=ks.optimizers.Adam(lr=0.001),
                               loss='mse')

        env = CartPole(render=False)
        actions = env.valid_actions()

        dqn = QNetworkSL(neural_network,
                         actions,
                         lambda x: np.reshape(x.state, newshape=(1, 4)),
                         lambd=lambd[i],
                         lambda_min=1e-3,
                         gamma=1.0,
                         reward_factor=0.01,
                         fixed_length=100)

        dql = DeepSarsa(env,
                        dqn,
                        epsilon=1.0,
                        epsilon_step_factor=0.9995,
Exemple #5
0
        """
        Add one sample to the replay memory
        :param s: State
        :param a: Action performed on that state
        :param r: Reward obtained from performing the action
        :param sp: Resulting state
        """
        self.replay_memory.append((s, a, r, sp))


if __name__ == '__main__':
    import keras as ks
    from environments.cartpole import CartPole

    nn = ks.models.Sequential()
    nn.add(ks.layers.Dense(32, activation='sigmoid', input_shape=(4,)))
    nn.add(ks.layers.Dense(32, activation='sigmoid'))
    nn.add(ks.layers.Dense(2, activation='linear'))

    nn.compile(optimizer=ks.optimizers.Adam(lr=0.001),
               loss='mse')

    _e = CartPole(render=True)
    _out_map = _e.valid_actions()

    dqn = QNetwork(nn, _out_map, lambda x: np.reshape(x.observation, newshape=(1, 4)))

    dql = DeepQLearning(_e, dqn)

    q = dql.learn()
from sacx.multi_task_logger import PlottingMultiTaskLogger

if __name__ == '__main__':
    import keras as ks
    from environments.cartpole import CartPole
    from environments.wrappers.MultiTaskWrapper import MultiTaskWrapper
    from sacx.tasked_q_network import QNetwork
    from sacx.tasked_p_network import PolicyNetwork
    from sacx.sacu import SACU
    from sacx.extcore import Task

    senv = CartPole(render=True)
    tasks = [Task("MAIN_TASK")]
    env = MultiTaskWrapper(senv, lambda s, a, r, t: {tasks[0]: r}, tasks)
    actions = env.valid_actions()

    listeners = [PlottingMultiTaskLogger(tasks, 1000, ['red'])]

    q_network = QNetwork((4, ),
                         actions,
                         tasks,
                         ks.layers.Dense(100, activation='relu'),
                         ks.layers.Dense(2, activation='linear'),
                         lambda x: x.state,
                         gamma=1.0,
                         alpha=0.001,
                         fixed_steps=100,
                         reward_scale=0.01)
    p_network = PolicyNetwork((4, ),
                              actions,
                              tasks,