Esempio n. 1
0
def create(env):
    np.random.seed(config.current.domain_seed)
    env.seed(config.current.domain_seed)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(config.current.agent_vfn_complexity))
    model.add(Activation('relu'))
    model.add(Dense(config.current.agent_vfn_complexity))
    model.add(Activation('relu'))
    model.add(Dense(config.current.agent_vfn_complexity))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    global graph
    graph = tf.get_default_graph()

    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()
    sarsa = SARSAAgent(model=model,
                       nb_actions=nb_actions,
                       nb_steps_warmup=10,
                       policy=policy)
    sarsa.compile(Adam(lr=1e-3), metrics=['mae'])
    return sarsa
def init_sarsa(env, nb_actions, lr=1e-3):
    """ Initialize the Sarsa agent using the keras-rl package.

    :param env: the environment to be played, required to determine the input size
    :param nb_actions: number of actions
    :param lr: learning rate
    :return: Sarsa Agent
    """
    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()
    sarsa = SARSAAgent(model=model,
                       nb_actions=nb_actions,
                       nb_steps_warmup=10,
                       policy=policy)
    sarsa.model_name = f"SARSA"
    sarsa.compile(Adam(lr=lr), metrics=['mae'])
    return sarsa
Esempio n. 3
0
def main():

    model = Sequential()
    model.add(Flatten(input_shape=(1, 7)))
    model.add(Dense(units=20, activation='relu'))
    model.add(Dense(units=20, activation='relu'))
    model.add(Dense(units=6, activation='linear'))
    logger.info(model.summary())

    steps = 1E9
    interval = steps // 100

    # policy = MyPolicy()
    policy = BoltzmannQPolicy()
    agent = SARSAAgent(model=model, nb_actions=6, policy=policy, train_interval=10, nb_steps_warmup=10)

    adam = Adam()
    sgd = SGD(lr=1e-3, momentum=0, decay=0, nesterov=False)
    agent.compile(optimizer=adam, metrics=['mse'])

    env = MyEnv()
    agent.fit(env, steps, verbose=2, visualize=True)

    fp = Path(__file__).resolve().parent / 'sarsa_weights.h5f'
    agent.save_weights(fp, overwrite=True)

    logger.info('Done')
Esempio n. 4
0
def run_sarsa():

    global N_NODE_NETWORK

    env = SnakeGymDiscrete()
    nb_actions = env.action_space.n

    # initialize randomness
    np.random.seed(123)
    env.seed(123)

    # create model
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()
    sarsa = SARSAAgent(model=model,
                       nb_actions=nb_actions,
                       nb_steps_warmup=10,
                       policy=policy)
    sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

    sarsa.fit(env, nb_steps=50000, visualize=False, verbose=2)
    sarsa.save_weights('sarsa_SnakeGymDiscrete_weights.h5f', overwrite=True)

    sarsa.test(env, nb_episodes=5, visualize=True)
Esempio n. 5
0
def create_sarsa_agent(env):
    env = create_environment()
    model = create_deep_model(env)
    nb_actions = env.action_space.n
    policy = BoltzmannQPolicy()
    sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy)
    sarsa.compile(Adam(lr=1e-3), metrics=['mae'])
    return sarsa
Esempio n. 6
0
def main():
    # binance = DataReader()
    env = BinanceEnv()
    # binance.get_recent_trades()
    # env.next_observation()
    # binance_market = BinanceMarket()
    # binance_market.long()
    # time.sleep(3)
    # binance_market.close_long()
    # time.sleep(3)
    # binance_market.short()
    # time.sleep(3)
    # binance_market.close_short()
    # binance_market.update_positions()
    # print(binance_market.balance)

    # episodes = 10
    # for episode in range(1, episodes + 1):
    #     # At each begining reset the game
    #     state = env.reset()
    #     # set done to False
    #     done = False
    #     # set score to 0
    #     score = 0
    #     # while the game is not finished
    #     while not done:
    #         # visualize each step
    #         env.render()
    #         # choose a random action
    #         action = random.randint(0, 5)
    #         # execute the action
    #         n_state, reward, done, info = env.step(action)
    #         # keep track of rewards
    #         score += reward
    #     print('episode {} score {}'.format(episode, score))

    model = agent(env.observation_space.shape[0], env.action_space.n)
    policy = EpsGreedyQPolicy()
    sarsa = SARSAAgent(model=model, policy=policy, nb_actions=env.action_space.n)
    sarsa.compile('adam', metrics=['mse', 'accuracy'])
    # sarsa.load_weights('sarsa_weights_bnb_07.h5f')
    env.is_testing = False
    sarsa.fit(env, nb_steps=100000, visualize=False, verbose=1)
    sarsa.save_weights('sarsa_weights_bnb_07_1.h5f', overwrite=True)
    # sarsa.load_weights('sarsa_weights_bnb_07_1.h5f')
    # env.simulator = False
    env.is_testing = True
    scores = sarsa.test(env, nb_episodes=1, visualize=False)
    print('Average score over 100 test games:{}'.format(np.mean(scores.history['episode_reward'])))

    _ = sarsa.test(env, nb_episodes=10, visualize=True)
    obs = env.reset()
    for i in range(2000):
        action, _states = model.predict(obs)
        obs, rewards, done, info = env.step(action)
        env.render()
Esempio n. 7
0
class SarsaAgent(Agent):
    def __init__(self, state_dim, action_space, epsilon, gamma, lr):
        self._model = self._get_model(state_dim, action_space)
        self.agent = SARSAAgent(self._model,
                                nb_actions=action_space,
                                gamma=gamma,
                                policy=EpsGreedyQPolicy(epsilon),
                                test_policy=EpsGreedyQPolicy(eps=0.01))

        self.agent.compile(Adam(lr))

    def model_summary(self):
        print(self._model.summary())
Esempio n. 8
0
def main():

    # nb_actions = cpst._action_space
    nb_actions = 2
    # Next, we build a very simple model.
    model = Sequential()
    #n_os = cpst._observation_space.shape

    n_os = 4
    model.add(Flatten(input_shape=[1] + [n_os]))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    print(model.summary())
    model._make_predict_function()

    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()
    sarsa = SARSAAgent(model=model,
                       nb_actions=nb_actions,
                       nb_steps_warmup=10,
                       policy=policy)
    sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

    cart_pole = CartPole(name='cp')

    log = logging.getLogger('bact2')

    RE = RunEngine({})
    RE.log.setLevel('DEBUG')
    cart_pole.log = RE.log

    stm = [cart_pole.x, cart_pole.x_dot, cart_pole.theta, cart_pole.theta_dot]
    cpst = CartPoleEnv(detectors=[cart_pole],
                       motors=[cart_pole],
                       state_motors=stm,
                       user_kwargs={'mode_var': cart_pole.rl_mode})

    np.random.seed(123)
    cpst.seed(123)

    partial = functools.partial(run_test, sarsa, cpst, log=RE.log)
    RE(run_environement(cpst, partial, log=RE.log))
Esempio n. 9
0
def main():
    # with ServerProxy("http://127.0.0.1:8000/", verbose=False, allow_none=True) as proxy:
    if True:
        pass

    #D:\Devel\github\keras-rl;D:\Devel\github\Devel\hz-b\naus
    # set PYTHONPATH=D:\Devel\github\keras-rl;D:\Devel\github\Devel\hz-b\naus
    # & python d:\Devel\github\Devel\hz-b\naus\examples\rl\cart_pole\sarsa_cartpole.py

    def stop_my_application():
        print('Stopping application')

    with allow_interrupt():
        # main polling loop.

        env = EnvironmentProxyForClient(receiver=None)
        np.random.seed(1974)
        env.seed(1974)

        env.reset()

        # nb_actions = cpst._action_space
        nb_actions = 2
        # Next, we build a very simple model.
        model = Sequential()
        #n_os = cpst._observation_space.shape

        n_os = 4
        model.add(Flatten(input_shape=[1] +[n_os]))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))
        print(model.summary())

        # SARSA does not require a memory.
        policy = BoltzmannQPolicy()
        sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy)
        sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

        run_test(sarsa, env, log=log)
Esempio n. 10
0
def run_sarsa_agent(driver, queries, candidate_indices, tuning_config):
    # Get the environment and extract the number of actions.
    env = gym.make("udo_optimization-v0", driver=driver, queries=queries, candidate_indices=candidate_indices,
                   config=tuning_config)
    env.horizon = tuning_config['horizon']

    nb_actions = env.action_space.n
    logging.info(f"nr action: {nb_actions}")
    logging.info(f"observation space: {env.observation_space.shape}")

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(52))
    model.add(Activation('relu'))
    model.add(Dense(252))
    model.add(Activation('relu'))
    model.add(Dense(526))
    model.add(Activation('relu'))
    model.add(Dense(252))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    logging.info(model.summary())

    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()
    # policy.select_action()
    sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy)
    sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    sarsa.fit(env, nb_steps=500, visualize=False, verbose=2)

    # After training is done, we save the final weights.
    # sarsa.save_weights('sarsa_{}_weights.h5f'.format(udo_optimization-v0), overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    sarsa.test(env, nb_episodes=5, visualize=False)
    env.print_state_summary(env.best_state)
Esempio n. 11
0
def test_sarsa():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))

    policy = EpsGreedyQPolicy(eps=.1)
    sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=50, policy=policy)
    sarsa.compile(Adam(lr=1e-3))

    sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0)
    policy.eps = 0.
    h = sarsa.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
Esempio n. 12
0
def test_sarsa():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))

    policy = EpsGreedyQPolicy(eps=.1)
    sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=50, policy=policy)
    sarsa.compile(Adam(lr=1e-3))

    sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0)
    policy.eps = 0.
    h = sarsa.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
Esempio n. 13
0
def get_agent(agent_type, model_type, lr):
    if agent_type == "sarsa":
        policy = BoltzmannQPolicy()
        model = get_model(model_type)
        agent = SARSAAgent(model=model,
                           policy=policy,
                           nb_actions=nb_actions,
                           nb_steps_warmup=10,
                           gamma=0.99)
        agent.compile(Adam(lr), metrics=['mae'])
        return agent
    elif agent_type == "dqn":
        policy = BoltzmannQPolicy()
        model = get_model(model_type)
        memory = SequentialMemory(limit=50000, window_length=1)
        agent = DQNAgent(model=model,
                         policy=policy,
                         nb_actions=nb_actions,
                         memory=memory,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         enable_double_dqn=True)
        agent.compile(Adam(lr), metrics=['mae'])
        return agent
    elif agent_type == "a2c":
        agent = A2CAgent(nb_actions,
                         len(env.observation_space.high),
                         nb_steps_warmup=10,
                         actor_lr=0.001,
                         critic_lr=0.005)
        agent.compile(Adam(lr))
        return agent
    elif agent_type == "ppo":
        pass
    else:
        print("Unsupported model")
        exit(1)
Esempio n. 14
0
    #    print(inverse_model.summary())

    # predicts future state from current state and action
    forward_model = build_forward_model(fmap, nb_actions)
    forward_model.compile(Adam(lr=1e-3), loss='mse', metrics=['mse'])
    #    print(forward_model.summary())

    model = build_actor_model((1, ) + observation_shape, nb_actions)
    #    print(model.summary())

    policy = BoltzmannQPolicy()
    agent = SARSAAgent(model=model,
                       nb_actions=nb_actions,
                       nb_steps_warmup=3,
                       policy=policy)
    agent.compile(Adam(lr=1e-3), metrics=['mae'])
    agent.reset_states()

    #=========================================================================#

    # re-use weights if possible
    if (os.path.isfile(inv_weights_fname)):
        inverse_model.load_weights(inv_weights_fname)

    if (os.path.isfile(fwd_weights_fname)):
        forward_model.load_weights(fwd_weights_fname)

    if (os.path.isfile(agent_weights_fname)):
        agent.load_weights(agent_weights_fname)
#    else:
# FIXME: this bit is necessary or agent does nothing???
Esempio n. 15
0
y = Activation('relu')(y)
y = Dense(24)(y)
y = Activation('relu')(y)
y = Dense(24)(y)
y = Activation('relu')(y)
y = Dense(nb_actions)(y)
y = Activation('linear')(y)
model = Model(x, y)

policy = EpsGreedyQPolicy()
sarsa = SARSAAgent(model=model,
                   nb_actions=nb_actions,
                   nb_steps_warmup=10000,
                   policy=policy,
                   gamma=.85)
sarsa.compile(Adam(lr=.3, decay=.001), metrics=['mae'])

rewards = []
hist = sarsa.fit(env, nb_steps=100000, visualize=False, verbose=2)
rewards.extend(hist.history.get('episode_reward'))
plt.plot(rewards)

sarsa.test(env, nb_episodes=5, visualize=True)

state = env.reset()
action = env.action_space.sample()
print(action)
for i in range(300):
    # action = np.argmax(sarsa.model.predict(np.expand_dims(np.expand_dims(state, 0), 0))[0])
    state, reward, done, _ = env.step(action)
    env.render()
Esempio n. 16
0
class DistopiaSARSA:
    def __init__(self,
                 env_name='distopia-initial4-v0',
                 in_path=None,
                 out_path=None,
                 terminate_on_fail=False,
                 reconstruct=False):
        self.ENV_NAME = env_name
        self.filename = self.ENV_NAME
        self.init_paths(in_path, out_path)
        self.init_env(terminate_on_fail)
        self.init_model(reconstruct)
        self.compile_agent()

    def init_paths(self, in_path, out_path):
        self.in_path = in_path  #if self.in_path != None else './'
        self.out_path = out_path if out_path != None else './'
        self.log_path = "./logs/{}".format(time.time())
        os.mkdir(self.log_path)

    def init_env(self, terminate_on_fail):
        self.env = gym.make(self.ENV_NAME)
        self.env.terminate_on_fail = terminate_on_fail
        self.env.record_path = "{}/ep_".format(self.log_path)
        self.env = gym.wrappers.Monitor(self.env, "recording", force=True)
        np.random.seed(234)
        self.env.seed(234)
        self.nb_actions = np.sum(self.env.action_space.nvec)
        self.num_actions = self.env.NUM_DIRECTIONS
        self.num_blocks = self.env.NUM_DISTRICTS * self.env.BLOCKS_PER_DISTRICT

    def init_model(self, reconstruct=False):
        if self.in_path != None:
            if reconstruct == True:
                self.construct_model()
            else:
                yaml_file = open(
                    "{}/{}.yaml".format(self.in_path, self.filename), 'r')
                model_yaml = yaml_file.read()
                yaml_file.close()
                self.model = model_from_yaml(model_yaml)
            self.model.load_weights("{}/{}.h5".format(self.in_path,
                                                      self.filename))
        else:
            # Next, we build a very simple model.
            self.construct_model()
        self.save_model()
        print(self.model.summary())

    def construct_model(self):
        self.model = Sequential()
        self.model.add(
            Flatten(input_shape=(1, ) + self.env.observation_space.shape))
        self.model.add(Dense(64))
        self.model.add(Activation('relu'))
        self.model.add(Dense(64))
        self.model.add(Activation('relu'))
        # self.model.add(Dense(16))
        # self.model.add(Activation('relu'))
        self.model.add(Dense(self.nb_actions))
        self.model.add(Activation('linear'))

    def save_model(self):
        if self.out_path != None:
            with open(self.filename + ".yaml", 'w+') as yaml_file:
                yaml_file.write(self.model.to_yaml())
            self.model.save_weights('{}/{}.h5'.format(self.out_path,
                                                      self.ENV_NAME))

    def compile_agent(self):
        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        processor = DistopiaProcessor(self.num_blocks, self.num_actions)
        #memory = SequentialMemory(limit=50000, window_length=1)
        #policy = PatchedBoltzmannQPolicy(num_actions = self.num_actions, num_blocks = self.num_blocks)
        #test_policy = PatchedGreedyQPolicy(num_actions = self.num_actions, num_blocks = self.num_blocks)
        policy = BoltzmannQPolicy()
        test_policy = GreedyQPolicy()
        self.sarsa = SARSAAgent(model=self.model,
                                processor=processor,
                                nb_actions=self.nb_actions,
                                nb_steps_warmup=1000,
                                policy=policy,
                                test_policy=test_policy,
                                gamma=0.9)
        self.sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

    def train(self, max_steps=100, episodes=100):
        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        self.env._max_steps = max_steps
        #for i in range(episodes):
        self.env.current_step = 0
        n_steps = max_steps * episodes
        logger = FileLogger(
            filepath='{}/{}.json'.format(self.out_path, self.ENV_NAME))
        self.sarsa.fit(self.env,
                       nb_steps=n_steps,
                       nb_max_episode_steps=max_steps,
                       visualize=False,
                       verbose=1,
                       callbacks=[logger])
        #self.env.reset()

        # After episode is done, we save the final weights.
        self.sarsa.save_weights('{}/{}.h5'.format(self.out_path,
                                                  self.ENV_NAME),
                                overwrite=True)

    def test(self):
        # Finally, evaluate our algorithm for 5 episodes.
        self.sarsa.test(self.env,
                        nb_episodes=5,
                        nb_max_start_steps=0,
                        visualize=True)
Esempio n. 17
0
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# SARSA does not require a memory.
policy = BoltzmannQPolicy()
sarsa = SARSAAgent(model=model,
                   nb_actions=nb_actions,
                   nb_steps_warmup=10,
                   policy=policy)
sarsa.compile(Adam(learning_rate=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
sarsa.fit(env, nb_steps=50000, visualize=False, verbose=2)

# After training is done, we save the final weights.
sarsa.save_weights(f'sarsa_{ENV_NAME}_weights.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
sarsa.test(env, nb_episodes=5, visualize=True)
Esempio n. 18
0
def train():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()
    # processor_noisy = CartpoleSurrogateProcessor(e_= ERR_N, e=ERR_P, surrogate=False)
    # processor_surrogate = CartpoleSurrogateProcessor(e_= ERR_N, e=ERR_P, surrogate=True)
    if not SMOOTH:
        processor_noisy = CartpoleProcessor(e_= ERR_N, e=ERR_P, smooth=False, surrogate=False)
        processor_surrogate = CartpoleProcessor(e_= ERR_N, e=ERR_P, smooth=False, surrogate=True)
    else:
        processor_noisy = CartpoleProcessor(e_= ERR_N, e=ERR_P, smooth=True, surrogate=False)
        processor_surrogate = CartpoleProcessor(e_= ERR_N, e=ERR_P, smooth=True, surrogate=True)        

    if REWARD == "normal":
        sarsa_normal = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, 
                                  policy=policy)
        sarsa_normal.compile(Adam(lr=1e-3), metrics=['mae'])
        history_normal = sarsa_normal.fit(env, nb_steps=50000, visualize=False, verbose=2)
        sarsa_normal.save_weights(os.path.join(LOG_DIR, 'sarsa_normal_{}_weights.h5f'.format(ENV_NAME)), overwrite=True)
        sarsa_normal.test(env, nb_episodes=10, visualize=False, verbose=2)

        pandas.DataFrame(history_normal.history).to_csv(os.path.join(LOG_DIR, "normal.csv"))


    elif REWARD == "noisy":
        sarsa_noisy = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, 
                                 policy=policy, processor=processor_noisy)
        sarsa_noisy.compile(Adam(lr=1e-3), metrics=['mae'])
        history_noisy = sarsa_noisy.fit(env, nb_steps=50000, visualize=False, verbose=2)
        if not SMOOTH:
            sarsa_noisy.save_weights(os.path.join(LOG_DIR, 'sarsa_noisy_{}_weights.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy.csv"))
        else:
            sarsa_noisy.save_weights(os.path.join(LOG_DIR, 'sarsa_noisy_smooth_{}_weights.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy_smooth.csv"))

        sarsa_noisy.test(env, nb_episodes=10, visualize=False)


    elif REWARD == "surrogate":
        sarsa_surrogate = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, 
                                     policy=policy, processor=processor_surrogate)
        sarsa_surrogate.compile(Adam(lr=1e-3), metrics=['mae'])
        history_surrogate = sarsa_surrogate.fit(env, nb_steps=50000, visualize=False, verbose=2)
        if not SMOOTH:
            sarsa_surrogate.save_weights(os.path.join(LOG_DIR, 'sarsa_surrogate_{}_weights.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate.csv"))

        else:
            sarsa_surrogate.save_weights(os.path.join(LOG_DIR, 'sarsa_surrogate_smooth_{}_weights.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate_smooth.csv"))

        sarsa_surrogate.test(env, nb_episodes=10, visualize=False)
Esempio n. 19
0
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# SARSA does not require a memory.
policy = BoltzmannQPolicy()
sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy)
sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
sarsa.fit(env, nb_steps=50000, visualize=False, verbose=2)

# After training is done, we save the final weights.
sarsa.save_weights('sarsa_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
sarsa.test(env, nb_episodes=5, visualize=True)
Esempio n. 20
0
model.summary()

#%%
from rl.agents import SARSAAgent
from rl.policy import EpsGreedyQPolicy

policy = EpsGreedyQPolicy()

sarsa = SARSAAgent(
    model=model,
    policy=policy,
    nb_actions=env.action_space.n  # from env !
)

sarsa.compile('adam', metrics=['mse'])  # just model.compile(...)

sarsa.fit(env, nb_steps=5e4, visualize=False, verbose=1)

#%%
scores = sarsa.test(env, nb_episodes=100, visualize=False)
mean_score = np.mean(scores.history['episode_reward'])
print('Average score over 100 test games: {}'.format(mean_score))

#%%
sarsa.save_weights('sarsa_weights.h5f', overwrite=True)

#%%
sarsa.load_weights('sarsa_weights.h5f')

#%%
Esempio n. 21
0
class KerasSarsaAgent(AbstractAgent):
    def __init__(self, env, timesteps_per_episode=10001):
        super().__init__(env, timesteps_per_episode)
        self.num_episodes = 400
        self.evaluating = False
        self.action_size = env.action_space.n
        self.state_size = env.num_states
        self.model = self._build_compile_model()
        self.agent = SARSAAgent(model=self.model,
                                nb_actions=self.action_size,
                                policy=EpsGreedyQPolicy())

    def run(self) -> {str: float}:
        """
        The agent's training method.
        Returns: a dictionary - {"episode_reward_mean": __, "episode_reward_min": __, "episode_reward_max": __,
        "episode_len_mean": __}
        """
        self.agent.compile(Adam(lr=0.001), metrics=["mse"])
        history = self.agent.fit(self.env,
                                 nb_steps=ITER_NUM,
                                 visualize=False,
                                 verbose=1)
        if len(history.history) > 0:
            episode_reward = history.history["episode_reward"]
            nb_episode_steps = history.history["nb_episode_steps"]
        else:
            episode_reward, nb_episode_steps = [0], [0]  # TODO - placeholder
        result = {
            EPISODE_REWARD_MEAN: np.array(episode_reward),
            EPISODE_STEP_NUM_MEAN: np.array(nb_episode_steps),
            EPISODE_REWARD_MIN: np.empty([]),
            EPISODE_REWARD_MAX: np.empty([]),
            EPISODE_VARIANCE: np.empty([])
        }
        return result

    def _build_compile_model(self):
        model = Sequential()
        # model.add(Flatten(input_shape=(1, self.action_size)))
        model.add(Embedding(self.state_size, 10, input_length=1))  # 600000
        model.add(Reshape((10, )))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(16, activation='relu'))
        model.add(Dense(16, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        return model

    def compute_action(self, state) -> int:
        """
        Computes the best action from a given state.
        Returns: a int that represents the best action.
        """
        state = np.array([[state]])
        return int(np.argmax(self.model.predict(state)))

    def stop_episode(self):
        pass

    def episode_callback(self, state, action, reward, next_state, terminated):
        pass

    def evaluate(self, visualize=False):
        self.agent.test(self.env,
                        nb_episodes=5,
                        visualize=visualize,
                        nb_max_episode_steps=60)

    def replay_experiences(self):
        pass
        model.add(Flatten(input_shape=(1, states)))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(actions, activation="linear"))
        return model

    model = agent(env.observation_space.shape[0], env.action_space.n)



    policy = EpsGreedyQPolicy()

    sarsa = SARSAAgent(model=model, policy=policy, nb_actions=env.action_space.n)

    sarsa.compile("adam", metrics=["mse"])

    sarsa.fit(env, nb_steps=10000, visualize=False, verbose=1)


    ready()


    scores = sarsa.test(env, nb_episodes=5, visualize=True)

    print('Average score over 100 test games:{}'.format(np.mean(scores.history['episode_reward'])))

    #sarsa.save_weights('sarsa_weights.h5f', overwrite=True) # save trained weights

    # sarsa.load_weights('sarsa_weights.h5f') # can be used to load trained weights
# Make a neural net with 3 hidden layers
def agent(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1, states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model


# Actually make a neural net with 3 hidden layers
model = agent(env.observation_space.shape[0], env.action_space.n)

policy = EpsGreedyQPolicy()
# Create a tensorflow reinforcement learning agent using the [state > action > reward] system
sarsa = SARSAAgent(model=model, policy=policy, nb_actions=env.action_space.n)
# Choose how we calculate reward and modify the model
sarsa.compile('adam', metrics=['mse'])

# sarsa.fit(env, nb_steps = 50000, visualize = False, verbose = 1)
sarsa.load_weights('cartpolekerassarsa.h5f')

scores = sarsa.test(env, nb_episodes=10, visualize=False)
print('Average score over 10 test games: {}'.format(
    np.mean(scores.history['episode_reward'])))

sarsa.save_weights('cartpolekerassarsa.h5f', overwrite=True)
sarsa.test(env, nb_episodes=2, visualize=True)
Esempio n. 24
0
policy = BoltzmannQPolicy()
#policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
#                               nb_steps=10000)

if args.use_sarsa:
    # SARSA does not require a memory.
    agent = SARSAAgent(model=model,
                       nb_actions=nb_actions,
                       nb_steps_warmup=10,
                       policy=policy)
else:
    memory = SequentialMemory(limit=50000, window_length=1)
    agent = DQNAgent(model=model,
                     memory=memory,
                     nb_actions=nb_actions,
                     nb_steps_warmup=50,
                     policy=policy)

agent.compile(Adam(lr=args.learning_rate), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=args.n_steps, visualize=False, verbose=2)

# After training is done, we save the final weights.
#sarsa.save_weights('sarsa_osc_weights.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
#sarsa.test(env, nb_episodes=5, visualize=True)
Esempio n. 25
0
        hiddenLayer = Dense(nHiddenLayerNodes,
                            activation='relu',
                            kernel_initializer=weight_initializer)(hiddenLayer)

outputLayer = Dense(nb_actions, activation='linear')(hiddenLayer)

model = Model(inputLayer, outputLayer)
print(model.summary())

# SARSA does not require a memory.
policy = BoltzmannQPolicy()
sarsa = SARSAAgent(model=model,
                   nb_actions=nb_actions,
                   nb_steps_warmup=10,
                   policy=policy)
sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

if loadFromExisting:
    sarsa.load_weights(file_path)
else:
    startTime = time.time()
    sarsa.fit(env, nb_steps=nSteps, visualize=True, verbose=1)
    endTime = time.time()
    sarsa.save_weights(file_path, overwrite=True)

# After training is done, we save the final weights.

# Finally, evaluate our algorithm for 5 episodes.
sarsa.test(env, nb_episodes=5, visualize=True)

if not loadFromExisting:
Esempio n. 26
0
class DQN:
    def __init__(
            self,
            env="CartPole-v1",
            emulateOculus=True,
            visualize=True,
            teachingFilesPath=None,
            policyValues={
                "inner_policy": EpsGreedyQPolicy(),
                "attr": "eps",
                "value_max": 0.75,
                "value_min": .01,
                "value_test": .0,
                "nb_steps": 50000
            },
            dobotEmulation=False):
        self.policyValues = policyValues
        os.environ[
            "PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
        physical_devices = tf.config.experimental.list_physical_devices('GPU')
        print("physical_devices-------------", len(physical_devices))
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        self.episodeLength = 25
        if env == "CartPole-v1":
            self.env = gym.make('CartPole-v1')
            self.states = self.env.observation_space.shape[0]
            self.actions = self.env.action_space.n
            self.saveFileName = 'sarsa_weights.h5f'
            logdir = "logs/CartPoleV1/" + datetime.now().strftime(
                "%Y%m%d-%H%M%S")
            self.tensorboard_callback = keras.callbacks.TensorBoard(
                log_dir=logdir)
            self.visualize = True
        elif env == "Dobot":
            self.env = dobotGym.dobotGym(emulateOculus=emulateOculus,
                                         episodeLength=self.episodeLength,
                                         visualize=visualize,
                                         teachingFilesPath=teachingFilesPath,
                                         dobotEmulation=dobotEmulation)
            self.states = self.env.observation_space.shape[0]
            self.actions = self.env.action_space.shape[0]
            self.saveFileName = 'sarsa_weights_dobot.h5f'
            logdir = "logs/Dobot/" + datetime.now().strftime("%Y%m%d-%H%M%S")
            self.tensorboard_callback = keras.callbacks.TensorBoard(
                log_dir=logdir)
            self.visualize = True
        else:
            raise TypeError("Wrong env")

        print(
            'States', self.states
        )  # To get an idea about the number of variables affecting the environment
        print(
            'Actions', self.actions
        )  # To get an idea about the number of possible actions in the environment, do [right,left]

        #

        # episodes = 10
        # for episode in range(1, episodes + 1):
        #     # At each begining reset the game
        #     state = self.env.reset()
        #     # set done to False
        #     done = False
        #     # set score to 0
        #     score = 0
        #     # while the game is not finished
        #     while not done:
        #         # visualize each step
        #         self.env.render()
        #         # choose a random action
        #         action = random.choice([0, 1])
        #         # execute the action
        #         n_state, reward, done, info = self.env.step(action)
        #         # keep track of rewards
        #         score += reward
        #     print('episode {} score {}'.format(episode, score))

        # not working :(
        # self.agent = self.agentDDP(self.states, self.actions)
        # self.agent = self.NAFAgent(self.states, self.actions)

        # self.policy = EpsGreedyQPolicy()

        self.savingFreq = 100
        self.actualSaving = 0

        self.model = self.agentSarsa(self.states, self.actions)
        self.policy = LinearAnnealedPolicy(
            inner_policy=self.policyValues["inner_policy"],
            attr=self.policyValues["attr"],
            value_max=self.policyValues["value_max"],
            value_min=self.policyValues["value_min"],
            value_test=self.policyValues["value_test"],
            nb_steps=self.policyValues["nb_steps"])
        self.agent = SARSAAgent(model=self.model,
                                policy=self.policy,
                                nb_actions=self.actions)

        self.agent._is_graph_network = True

        def t():
            return False

        self.agent._in_multi_worker_mode = t

        self.agent.save = self.saveAgentWeights

        def lenmeh():
            return self.actions

        # self.agent.__len__ = lenmeh

    def saveAgentWeights(self, path, overwrite=True):
        if self.actualSaving < self.savingFreq:
            self.actualSaving += 1
            return None
        else:
            self.actualSaving = 0
        path = 'model/checkpoint/' + datetime.now().strftime(
            "%Y%m%d-%H%M%S") + self.saveFileName
        self.agent.save_weights(path, overwrite)

    def agentSarsa(self, states, actions):
        self.model = Sequential()
        self.model.add(LSTM(42, activation='sigmoid', input_shape=(1, states)))
        self.model.add(Dense(42, activation='sigmoid'))
        self.model.add(Dense(42, activation='sigmoid'))
        self.model.add(Dense(24, activation='sigmoid'))
        self.model.add(Dense(12, activation='sigmoid'))
        self.model.add(Dense(actions, activation='linear'))
        self.path = fileOperation.saveToFolder(self.model.to_json(),
                                               name='modelShape',
                                               folder="model\\checkpoint")

        # , stateful=False states are resetted together after each batch.
        # model.add(Flatten(input_shape=(1, states)))
        # dot_img_file = '/model_1.png'
        # keras.utils.plot_model(self.model, to_file=dot_img_file, show_shapes=True)
        # model.reset_states()
        return self.model

    def load(self):
        path = fileOperation.openDialogFunction(".h5f")
        self.agent.compile('adam', metrics=['mse'])
        self.agent.load_weights(path)
        self.agent.compile('adam', metrics=['mse'])

    def test(self, nb_episodes=2):
        _ = self.agent.test(self.env,
                            nb_episodes=nb_episodes,
                            visualize=self.visualize)

    def fit(self, visualize=False):
        checkpoint_filepath = 'model/checkpoint/'
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=checkpoint_filepath,
            save_weights_only=False,
            save_freq=25)
        self.agent.compile('adam', metrics=['mse'])
        self.agent.fit(
            self.env,
            nb_steps=self.policyValues["nb_steps"],
            log_interval=self.episodeLength,
            visualize=visualize,
            verbose=1,
            nb_max_start_steps=1,
            start_step_policy=self.model.reset_states,

            # callbacks=[PlotLossesKeras()])
            callbacks=[self.tensorboard_callback, model_checkpoint_callback],
        )

        scores = self.agent.test(self.env, nb_episodes=5, visualize=visualize)
        print('Average score over 5 test games:{}'.format(
            np.mean(scores.history['episode_reward'])))