def run_sarsa():

    global N_NODE_NETWORK

    env = SnakeGymDiscrete()
    nb_actions = env.action_space.n

    # initialize randomness
    np.random.seed(123)
    env.seed(123)

    # create model
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(N_NODE_NETWORK))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()
    sarsa = SARSAAgent(model=model,
                       nb_actions=nb_actions,
                       nb_steps_warmup=10,
                       policy=policy)
    sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

    sarsa.fit(env, nb_steps=50000, visualize=False, verbose=2)
    sarsa.save_weights('sarsa_SnakeGymDiscrete_weights.h5f', overwrite=True)

    sarsa.test(env, nb_episodes=5, visualize=True)
Beispiel #2
0
def main():

    model = Sequential()
    model.add(Flatten(input_shape=(1, 7)))
    model.add(Dense(units=20, activation='relu'))
    model.add(Dense(units=20, activation='relu'))
    model.add(Dense(units=6, activation='linear'))
    logger.info(model.summary())

    steps = 1E9
    interval = steps // 100

    # policy = MyPolicy()
    policy = BoltzmannQPolicy()
    agent = SARSAAgent(model=model, nb_actions=6, policy=policy, train_interval=10, nb_steps_warmup=10)

    adam = Adam()
    sgd = SGD(lr=1e-3, momentum=0, decay=0, nesterov=False)
    agent.compile(optimizer=adam, metrics=['mse'])

    env = MyEnv()
    agent.fit(env, steps, verbose=2, visualize=True)

    fp = Path(__file__).resolve().parent / 'sarsa_weights.h5f'
    agent.save_weights(fp, overwrite=True)

    logger.info('Done')
Beispiel #3
0
def main():
    # binance = DataReader()
    env = BinanceEnv()
    # binance.get_recent_trades()
    # env.next_observation()
    # binance_market = BinanceMarket()
    # binance_market.long()
    # time.sleep(3)
    # binance_market.close_long()
    # time.sleep(3)
    # binance_market.short()
    # time.sleep(3)
    # binance_market.close_short()
    # binance_market.update_positions()
    # print(binance_market.balance)

    # episodes = 10
    # for episode in range(1, episodes + 1):
    #     # At each begining reset the game
    #     state = env.reset()
    #     # set done to False
    #     done = False
    #     # set score to 0
    #     score = 0
    #     # while the game is not finished
    #     while not done:
    #         # visualize each step
    #         env.render()
    #         # choose a random action
    #         action = random.randint(0, 5)
    #         # execute the action
    #         n_state, reward, done, info = env.step(action)
    #         # keep track of rewards
    #         score += reward
    #     print('episode {} score {}'.format(episode, score))

    model = agent(env.observation_space.shape[0], env.action_space.n)
    policy = EpsGreedyQPolicy()
    sarsa = SARSAAgent(model=model, policy=policy, nb_actions=env.action_space.n)
    sarsa.compile('adam', metrics=['mse', 'accuracy'])
    # sarsa.load_weights('sarsa_weights_bnb_07.h5f')
    env.is_testing = False
    sarsa.fit(env, nb_steps=100000, visualize=False, verbose=1)
    sarsa.save_weights('sarsa_weights_bnb_07_1.h5f', overwrite=True)
    # sarsa.load_weights('sarsa_weights_bnb_07_1.h5f')
    # env.simulator = False
    env.is_testing = True
    scores = sarsa.test(env, nb_episodes=1, visualize=False)
    print('Average score over 100 test games:{}'.format(np.mean(scores.history['episode_reward'])))

    _ = sarsa.test(env, nb_episodes=10, visualize=True)
    obs = env.reset()
    for i in range(2000):
        action, _states = model.predict(obs)
        obs, rewards, done, info = env.step(action)
        env.render()
# Make a neural net with 3 hidden layers
def agent(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1, states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model


# Actually make a neural net with 3 hidden layers
model = agent(env.observation_space.shape[0], env.action_space.n)

policy = EpsGreedyQPolicy()
# Create a tensorflow reinforcement learning agent using the [state > action > reward] system
sarsa = SARSAAgent(model=model, policy=policy, nb_actions=env.action_space.n)
# Choose how we calculate reward and modify the model
sarsa.compile('adam', metrics=['mse'])

# sarsa.fit(env, nb_steps = 50000, visualize = False, verbose = 1)
sarsa.load_weights('cartpolekerassarsa.h5f')

scores = sarsa.test(env, nb_episodes=10, visualize=False)
print('Average score over 10 test games: {}'.format(
    np.mean(scores.history['episode_reward'])))

sarsa.save_weights('cartpolekerassarsa.h5f', overwrite=True)
sarsa.test(env, nb_episodes=2, visualize=True)
Beispiel #5
0
sarsa.compile(Adam(lr=lrn_rate), metrics=['mae'])

# setting up callbacks for result collection and realtime visualization of the results through tensorboard
tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
tpl = TrainEpisodeLogger()

# finally perform the training----- visualize=False enables training without visualizing the game which speeds up the training process
sarsa.fit(env,
          nb_steps=nb_steps,
          visualize=False,
          verbose=2,
          callbacks=[tensorboard, tpl],
          nb_max_episode_steps=nb_max_episode_steps)

# save the model weights
sarsa.save_weights('sarsa_%d_%s_weights.h5f' % (scale, ENV_NAME),
                   overwrite=True)

# save the training results
metrics = []


def dict_to_list(dc):
    re = []
    for key in dc:
        re.append(dc[key])
    return re


tt = dict_to_list(tpl.rewards_mean)
mm = np.array(tt[:-1])
kk = dict_to_list(tpl.metrics_at_end)
Beispiel #6
0
class DistopiaSARSA:
    def __init__(self,
                 env_name='distopia-initial4-v0',
                 in_path=None,
                 out_path=None,
                 terminate_on_fail=False,
                 reconstruct=False):
        self.ENV_NAME = env_name
        self.filename = self.ENV_NAME
        self.init_paths(in_path, out_path)
        self.init_env(terminate_on_fail)
        self.init_model(reconstruct)
        self.compile_agent()

    def init_paths(self, in_path, out_path):
        self.in_path = in_path  #if self.in_path != None else './'
        self.out_path = out_path if out_path != None else './'
        self.log_path = "./logs/{}".format(time.time())
        os.mkdir(self.log_path)

    def init_env(self, terminate_on_fail):
        self.env = gym.make(self.ENV_NAME)
        self.env.terminate_on_fail = terminate_on_fail
        self.env.record_path = "{}/ep_".format(self.log_path)
        self.env = gym.wrappers.Monitor(self.env, "recording", force=True)
        np.random.seed(234)
        self.env.seed(234)
        self.nb_actions = np.sum(self.env.action_space.nvec)
        self.num_actions = self.env.NUM_DIRECTIONS
        self.num_blocks = self.env.NUM_DISTRICTS * self.env.BLOCKS_PER_DISTRICT

    def init_model(self, reconstruct=False):
        if self.in_path != None:
            if reconstruct == True:
                self.construct_model()
            else:
                yaml_file = open(
                    "{}/{}.yaml".format(self.in_path, self.filename), 'r')
                model_yaml = yaml_file.read()
                yaml_file.close()
                self.model = model_from_yaml(model_yaml)
            self.model.load_weights("{}/{}.h5".format(self.in_path,
                                                      self.filename))
        else:
            # Next, we build a very simple model.
            self.construct_model()
        self.save_model()
        print(self.model.summary())

    def construct_model(self):
        self.model = Sequential()
        self.model.add(
            Flatten(input_shape=(1, ) + self.env.observation_space.shape))
        self.model.add(Dense(64))
        self.model.add(Activation('relu'))
        self.model.add(Dense(64))
        self.model.add(Activation('relu'))
        # self.model.add(Dense(16))
        # self.model.add(Activation('relu'))
        self.model.add(Dense(self.nb_actions))
        self.model.add(Activation('linear'))

    def save_model(self):
        if self.out_path != None:
            with open(self.filename + ".yaml", 'w+') as yaml_file:
                yaml_file.write(self.model.to_yaml())
            self.model.save_weights('{}/{}.h5'.format(self.out_path,
                                                      self.ENV_NAME))

    def compile_agent(self):
        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        processor = DistopiaProcessor(self.num_blocks, self.num_actions)
        #memory = SequentialMemory(limit=50000, window_length=1)
        #policy = PatchedBoltzmannQPolicy(num_actions = self.num_actions, num_blocks = self.num_blocks)
        #test_policy = PatchedGreedyQPolicy(num_actions = self.num_actions, num_blocks = self.num_blocks)
        policy = BoltzmannQPolicy()
        test_policy = GreedyQPolicy()
        self.sarsa = SARSAAgent(model=self.model,
                                processor=processor,
                                nb_actions=self.nb_actions,
                                nb_steps_warmup=1000,
                                policy=policy,
                                test_policy=test_policy,
                                gamma=0.9)
        self.sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

    def train(self, max_steps=100, episodes=100):
        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        self.env._max_steps = max_steps
        #for i in range(episodes):
        self.env.current_step = 0
        n_steps = max_steps * episodes
        logger = FileLogger(
            filepath='{}/{}.json'.format(self.out_path, self.ENV_NAME))
        self.sarsa.fit(self.env,
                       nb_steps=n_steps,
                       nb_max_episode_steps=max_steps,
                       visualize=False,
                       verbose=1,
                       callbacks=[logger])
        #self.env.reset()

        # After episode is done, we save the final weights.
        self.sarsa.save_weights('{}/{}.h5'.format(self.out_path,
                                                  self.ENV_NAME),
                                overwrite=True)

    def test(self):
        # Finally, evaluate our algorithm for 5 episodes.
        self.sarsa.test(self.env,
                        nb_episodes=5,
                        nb_max_start_steps=0,
                        visualize=True)
Beispiel #7
0
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# SARSA does not require a memory.
policy = BoltzmannQPolicy()
sarsa = SARSAAgent(model=model,
                   nb_actions=nb_actions,
                   nb_steps_warmup=10,
                   policy=policy)
sarsa.compile(Adam(learning_rate=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
sarsa.fit(env, nb_steps=50000, visualize=False, verbose=2)

# After training is done, we save the final weights.
sarsa.save_weights(f'sarsa_{ENV_NAME}_weights.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
sarsa.test(env, nb_episodes=5, visualize=True)
Beispiel #8
0
def train():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()
    # processor_noisy = CartpoleSurrogateProcessor(e_= ERR_N, e=ERR_P, surrogate=False)
    # processor_surrogate = CartpoleSurrogateProcessor(e_= ERR_N, e=ERR_P, surrogate=True)
    if not SMOOTH:
        processor_noisy = CartpoleProcessor(e_= ERR_N, e=ERR_P, smooth=False, surrogate=False)
        processor_surrogate = CartpoleProcessor(e_= ERR_N, e=ERR_P, smooth=False, surrogate=True)
    else:
        processor_noisy = CartpoleProcessor(e_= ERR_N, e=ERR_P, smooth=True, surrogate=False)
        processor_surrogate = CartpoleProcessor(e_= ERR_N, e=ERR_P, smooth=True, surrogate=True)        

    if REWARD == "normal":
        sarsa_normal = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, 
                                  policy=policy)
        sarsa_normal.compile(Adam(lr=1e-3), metrics=['mae'])
        history_normal = sarsa_normal.fit(env, nb_steps=50000, visualize=False, verbose=2)
        sarsa_normal.save_weights(os.path.join(LOG_DIR, 'sarsa_normal_{}_weights.h5f'.format(ENV_NAME)), overwrite=True)
        sarsa_normal.test(env, nb_episodes=10, visualize=False, verbose=2)

        pandas.DataFrame(history_normal.history).to_csv(os.path.join(LOG_DIR, "normal.csv"))


    elif REWARD == "noisy":
        sarsa_noisy = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, 
                                 policy=policy, processor=processor_noisy)
        sarsa_noisy.compile(Adam(lr=1e-3), metrics=['mae'])
        history_noisy = sarsa_noisy.fit(env, nb_steps=50000, visualize=False, verbose=2)
        if not SMOOTH:
            sarsa_noisy.save_weights(os.path.join(LOG_DIR, 'sarsa_noisy_{}_weights.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy.csv"))
        else:
            sarsa_noisy.save_weights(os.path.join(LOG_DIR, 'sarsa_noisy_smooth_{}_weights.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy_smooth.csv"))

        sarsa_noisy.test(env, nb_episodes=10, visualize=False)


    elif REWARD == "surrogate":
        sarsa_surrogate = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, 
                                     policy=policy, processor=processor_surrogate)
        sarsa_surrogate.compile(Adam(lr=1e-3), metrics=['mae'])
        history_surrogate = sarsa_surrogate.fit(env, nb_steps=50000, visualize=False, verbose=2)
        if not SMOOTH:
            sarsa_surrogate.save_weights(os.path.join(LOG_DIR, 'sarsa_surrogate_{}_weights.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate.csv"))

        else:
            sarsa_surrogate.save_weights(os.path.join(LOG_DIR, 'sarsa_surrogate_smooth_{}_weights.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate_smooth.csv"))

        sarsa_surrogate.test(env, nb_episodes=10, visualize=False)
Beispiel #9
0
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# SARSA does not require a memory.
policy = BoltzmannQPolicy()
sarsa = SARSAAgent(model=model,
                   nb_actions=nb_actions,
                   nb_steps_warmup=10,
                   policy=policy)
sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
sarsa.fit(env, nb_steps=500000, visualize=False, verbose=2)

# After training is done, we save the final weights.
sarsa.save_weights(
    'learning_tools/learning_nn/keras-rl/sarsa_{}_weights.h5f'.format(
        CONFIG_FILE[7:-4]),
    overwrite=True)

env.close()
del env

# Finally, evaluate our algorithm for 5 episodes.
# sarsa.test(env, nb_episodes=5, visualize=True)
Beispiel #10
0
def agent(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1, states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model


model = agent(env.observation_space.shape[0], env.action_space.n)

from rl.agents import SARSAAgent
from rl.policy import EpsGreedyQPolicy
policy = EpsGreedyQPolicy()
sarsa = SARSAAgent(model=model, policy=policy, nb_actions=env.action_space.n)
sarsa.compile('adam', metrics=['mse'])

sarsa.fit(env, nb_steps=50000, visualize=False, verbose=1)

scores = sarsa.test(env, nb_episodes=100, visualize=False)
print('Average score over 100 test games:{}'.format(
    np.mean(scores.history['episode_reward'])))

sarsa.save_weights('sarsa_weights.h5f', overwrite=True)

_ = sarsa.test(env, nb_episodes=2, visualize=True)

env.close()
Beispiel #11
0
class DQN:
    def __init__(
            self,
            env="CartPole-v1",
            emulateOculus=True,
            visualize=True,
            teachingFilesPath=None,
            policyValues={
                "inner_policy": EpsGreedyQPolicy(),
                "attr": "eps",
                "value_max": 0.75,
                "value_min": .01,
                "value_test": .0,
                "nb_steps": 50000
            },
            dobotEmulation=False):
        self.policyValues = policyValues
        os.environ[
            "PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
        physical_devices = tf.config.experimental.list_physical_devices('GPU')
        print("physical_devices-------------", len(physical_devices))
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        self.episodeLength = 25
        if env == "CartPole-v1":
            self.env = gym.make('CartPole-v1')
            self.states = self.env.observation_space.shape[0]
            self.actions = self.env.action_space.n
            self.saveFileName = 'sarsa_weights.h5f'
            logdir = "logs/CartPoleV1/" + datetime.now().strftime(
                "%Y%m%d-%H%M%S")
            self.tensorboard_callback = keras.callbacks.TensorBoard(
                log_dir=logdir)
            self.visualize = True
        elif env == "Dobot":
            self.env = dobotGym.dobotGym(emulateOculus=emulateOculus,
                                         episodeLength=self.episodeLength,
                                         visualize=visualize,
                                         teachingFilesPath=teachingFilesPath,
                                         dobotEmulation=dobotEmulation)
            self.states = self.env.observation_space.shape[0]
            self.actions = self.env.action_space.shape[0]
            self.saveFileName = 'sarsa_weights_dobot.h5f'
            logdir = "logs/Dobot/" + datetime.now().strftime("%Y%m%d-%H%M%S")
            self.tensorboard_callback = keras.callbacks.TensorBoard(
                log_dir=logdir)
            self.visualize = True
        else:
            raise TypeError("Wrong env")

        print(
            'States', self.states
        )  # To get an idea about the number of variables affecting the environment
        print(
            'Actions', self.actions
        )  # To get an idea about the number of possible actions in the environment, do [right,left]

        #

        # episodes = 10
        # for episode in range(1, episodes + 1):
        #     # At each begining reset the game
        #     state = self.env.reset()
        #     # set done to False
        #     done = False
        #     # set score to 0
        #     score = 0
        #     # while the game is not finished
        #     while not done:
        #         # visualize each step
        #         self.env.render()
        #         # choose a random action
        #         action = random.choice([0, 1])
        #         # execute the action
        #         n_state, reward, done, info = self.env.step(action)
        #         # keep track of rewards
        #         score += reward
        #     print('episode {} score {}'.format(episode, score))

        # not working :(
        # self.agent = self.agentDDP(self.states, self.actions)
        # self.agent = self.NAFAgent(self.states, self.actions)

        # self.policy = EpsGreedyQPolicy()

        self.savingFreq = 100
        self.actualSaving = 0

        self.model = self.agentSarsa(self.states, self.actions)
        self.policy = LinearAnnealedPolicy(
            inner_policy=self.policyValues["inner_policy"],
            attr=self.policyValues["attr"],
            value_max=self.policyValues["value_max"],
            value_min=self.policyValues["value_min"],
            value_test=self.policyValues["value_test"],
            nb_steps=self.policyValues["nb_steps"])
        self.agent = SARSAAgent(model=self.model,
                                policy=self.policy,
                                nb_actions=self.actions)

        self.agent._is_graph_network = True

        def t():
            return False

        self.agent._in_multi_worker_mode = t

        self.agent.save = self.saveAgentWeights

        def lenmeh():
            return self.actions

        # self.agent.__len__ = lenmeh

    def saveAgentWeights(self, path, overwrite=True):
        if self.actualSaving < self.savingFreq:
            self.actualSaving += 1
            return None
        else:
            self.actualSaving = 0
        path = 'model/checkpoint/' + datetime.now().strftime(
            "%Y%m%d-%H%M%S") + self.saveFileName
        self.agent.save_weights(path, overwrite)

    def agentSarsa(self, states, actions):
        self.model = Sequential()
        self.model.add(LSTM(42, activation='sigmoid', input_shape=(1, states)))
        self.model.add(Dense(42, activation='sigmoid'))
        self.model.add(Dense(42, activation='sigmoid'))
        self.model.add(Dense(24, activation='sigmoid'))
        self.model.add(Dense(12, activation='sigmoid'))
        self.model.add(Dense(actions, activation='linear'))
        self.path = fileOperation.saveToFolder(self.model.to_json(),
                                               name='modelShape',
                                               folder="model\\checkpoint")

        # , stateful=False states are resetted together after each batch.
        # model.add(Flatten(input_shape=(1, states)))
        # dot_img_file = '/model_1.png'
        # keras.utils.plot_model(self.model, to_file=dot_img_file, show_shapes=True)
        # model.reset_states()
        return self.model

    def load(self):
        path = fileOperation.openDialogFunction(".h5f")
        self.agent.compile('adam', metrics=['mse'])
        self.agent.load_weights(path)
        self.agent.compile('adam', metrics=['mse'])

    def test(self, nb_episodes=2):
        _ = self.agent.test(self.env,
                            nb_episodes=nb_episodes,
                            visualize=self.visualize)

    def fit(self, visualize=False):
        checkpoint_filepath = 'model/checkpoint/'
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=checkpoint_filepath,
            save_weights_only=False,
            save_freq=25)
        self.agent.compile('adam', metrics=['mse'])
        self.agent.fit(
            self.env,
            nb_steps=self.policyValues["nb_steps"],
            log_interval=self.episodeLength,
            visualize=visualize,
            verbose=1,
            nb_max_start_steps=1,
            start_step_policy=self.model.reset_states,

            # callbacks=[PlotLossesKeras()])
            callbacks=[self.tensorboard_callback, model_checkpoint_callback],
        )

        scores = self.agent.test(self.env, nb_episodes=5, visualize=visualize)
        print('Average score over 5 test games:{}'.format(
            np.mean(scores.history['episode_reward'])))
Beispiel #12
0
                     verbose=2,
                     nb_max_episode_steps=500,
                     callbacks=[tb])  # 20s episodes

    # print history
    print("history contents : ",
          hist.history.keys())  # episode_reward, nb_episode_steps, nb_steps
    # summarize history for accuracy
    import matplotlib.pyplot as plt
    plt.plot(hist.history['episode_reward'])
    plt.plot(hist.history['nb_episode_steps'])
    plt.title('learning')
    plt.xlabel('episode')
    plt.legend(['episode_reward', 'nb_episode_steps'], loc='upper left')
    plt.show()

    # save history
    with open('_experiments/history_' + filename + '.pickle', 'wb') as handle:
        pickle.dump(hist.history, handle, protocol=pickle.HIGHEST_PROTOCOL)

    # After training is done, we save the final weights.
    sarsa.save_weights('h5f_files/dqn_{}_weights.h5f'.format(filename),
                       overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    sarsa.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)

if mode == 'test':
    sarsa.load_weights('h5f_files/dqn_{}_weights.h5f'.format(filename))
    sarsa.test(env, nb_episodes=10, visualize=True,
               nb_max_episode_steps=400)  # 40 seconds episodes
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# SARSA does not require a memory.
policy = BoltzmannQPolicy()
sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy)
sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
sarsa.fit(env, nb_steps=50000, visualize=False, verbose=2)

# After training is done, we save the final weights.
sarsa.save_weights('sarsa_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
sarsa.test(env, nb_episodes=5, visualize=True)
Beispiel #14
0
            # calculate agent reward based on forward model loss
            r_intr = (fwd_loss[0]**0.5) / 100
            #            print "r_intr = %d" % r_intr

            # TODO: could use a ratio for intrinsic vs environment reward
            reward = r_intr  # + env_reward
            #            print "reward = %f" % reward

            # apply reward
            action = agent.backward(reward, done)

            if done:
                inverse_model.save_weights(inv_weights_fname, overwrite=True)
                forward_model.save_weights(fwd_weights_fname, overwrite=True)
                agent.save_weights(agent_weights_fname, overwrite=True)
                break
    env.close()

    exit()

#=========================================================================#
# NOTES:
# x start with random actions + test observations
# x train/test inverse model
# x train/test forward model
# x calculate agent reward (based on forward model)
# x change agent to sarsa
# x add forward pass
# x add backward pass
# x save/load weights
Beispiel #15
0
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# SARSA does not require a memory.
policy = BoltzmannQPolicy()
sarsa = SARSAAgent(model=model,
                   nb_actions=nb_actions,
                   nb_steps_warmup=10,
                   policy=policy)
sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
sarsa.fit(env, nb_steps=50000, visualize=False, verbose=2)

# After training is done, we save the final weights.
sarsa.save_weights('sarsa_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
sarsa.test(env, nb_episodes=10, visualize=True)
Beispiel #16
0
                            kernel_initializer=weight_initializer)(hiddenLayer)

outputLayer = Dense(nb_actions, activation='linear')(hiddenLayer)

model = Model(inputLayer, outputLayer)
print(model.summary())

# SARSA does not require a memory.
policy = BoltzmannQPolicy()
sarsa = SARSAAgent(model=model,
                   nb_actions=nb_actions,
                   nb_steps_warmup=10,
                   policy=policy)
sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

if loadFromExisting:
    sarsa.load_weights(file_path)
else:
    startTime = time.time()
    sarsa.fit(env, nb_steps=nSteps, visualize=True, verbose=1)
    endTime = time.time()
    sarsa.save_weights(file_path, overwrite=True)

# After training is done, we save the final weights.

# Finally, evaluate our algorithm for 5 episodes.
sarsa.test(env, nb_episodes=5, visualize=True)

if not loadFromExisting:
    print("Time taken to trian: {0}".format(endTime - startTime))
Beispiel #17
0
                   policy=policy,
                   test_policy=policy)
sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

# Load weights
try:
    #dqn.load_weights(weights_filename)
    sarsa.load_weights(weights_filename)
except OSError:
    print("no saved weights found")
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
#dqn.fit(env, nb_steps=5000000, visualize=False, verbose=2)
sarsa.fit(env,
          nb_steps=50000,
          visualize=False,
          verbose=1,
          callbacks=[WandbCallback()])

# After training is done, we save the final weights.
#dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
#dqn.test(env, nb_episodes=5, visualize=True)
sarsa.test(env, nb_episodes=5, visualize=True)

# Save weights
#dqn.save_weights(weights_filename, overwrite=True)
sarsa.save_weights(weights_filename, overwrite=True)