Python DQNAgent Beispiele, rl.agents.dqn.DQNAgent Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_dqn.py Projekt: matthiasplappert/keras-rl

def test_single_dqn_input():
    model = Sequential()
    model.add(Flatten(input_shape=(2, 3)))
    model.add(Dense(2))

    memory = SequentialMemory(limit=10, window_length=2)
    for double_dqn in (True, False):
        agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
                         enable_double_dqn=double_dqn)
        agent.compile('sgd')
        agent.fit(MultiInputTestEnv((3,)), nb_steps=10)

Beispiel #2

0

Datei anzeigen

Datei: dqn.py Projekt: petrosgk/RL_experiments

class DQN(BaseAgent):
  def __init__(self, model, processor, policy, test_policy, num_actions):
    # Replay memory
    memory = SequentialMemory(limit=opt.dqn_replay_memory_size,
                              window_length=opt.dqn_window_length)
    self.agent = DQNAgent(model=model,
                          nb_actions=num_actions,
                          policy=policy,
                          test_policy=test_policy,
                          memory=memory,
                          processor=processor,
                          batch_size=opt.dqn_batch_size,
                          nb_steps_warmup=opt.dqn_nb_steps_warmup,
                          gamma=opt.dqn_gamma,
                          target_model_update=opt.dqn_target_model_update,
                          enable_double_dqn=opt.enable_double_dqn,
                          enable_dueling_network=opt.enable_dueling_network,
                          train_interval=opt.dqn_train_interval,
                          delta_clip=opt.dqn_delta_clip)
    self.agent.compile(optimizer=keras.optimizers.Adam(lr=opt.dqn_learning_rate), metrics=['mae'])

  def fit(self, env, num_steps, weights_path=None, visualize=False):
    callbacks = []
    if weights_path is not None:
      callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)]
    self.agent.fit(env=env,
                   nb_steps=num_steps,
                   action_repetition=opt.dqn_action_repetition,
                   callbacks=callbacks,
                   log_interval=opt.log_interval,
                   test_interval=opt.test_interval,
                   test_nb_episodes=opt.test_nb_episodes,
                   test_action_repetition=opt.dqn_action_repetition,
                   visualize=visualize,
                   test_visualize=visualize,
                   verbose=1)

  def test(self, env, num_episodes, visualize=False):
    self.agent.test(env=env,
                    nb_episodes=num_episodes,
                    action_repetition=opt.dqn_action_repetition,
                    verbose=2,
                    visualize=visualize)

  def save(self, out_dir):
    self.agent.save_weights(out_dir, overwrite=True)

  def load(self, out_dir):
    self.agent.load_weights(out_dir)

Beispiel #3

0

Datei anzeigen

Datei: test_dqn.py Projekt: matthiasplappert/keras-rl

def test_multi_dqn_input():
    input1 = Input(shape=(2, 3))
    input2 = Input(shape=(2, 4))
    x = Concatenate()([input1, input2])
    x = Flatten()(x)
    x = Dense(2)(x)
    model = Model(inputs=[input1, input2], outputs=x)

    memory = SequentialMemory(limit=10, window_length=2)
    processor = MultiInputProcessor(nb_inputs=2)
    for double_dqn in (True, False):
        agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
                         processor=processor, enable_double_dqn=double_dqn)
        agent.compile('sgd')
        agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)

Beispiel #4

0

Datei anzeigen

Datei: keras-rl-test.py Projekt: mortennp/misc

def main():
    np.random.seed(123)    
    env = PentagoEnv(SIZE)
    env.seed(123)
    nb_actions = env.action_space.n

    model = Sequential()
    #model.add(Reshape((SIZE ** 2,), input_shape=(SIZE, SIZE)))
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(nb_actions))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=5000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
                    target_model_update=1e-2, policy=policy)
    optimizer=RMSprop(lr=0.00025, epsilon=0.01)
    dqn.compile(optimizer)

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=True, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

Beispiel #5

0

Datei anzeigen

Datei: keras-rl-test.py Projekt: mortennp/misc

def main():
    # Create env
    np.random.seed(SEED)    
    env = PentagoEnv(SIZE, agent_starts = AGENT_STARTS)
    env.seed(SEED)
    nb_actions = env.action_space.n

    # Define model
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(nb_actions))
    print(model.summary())

    # Configure and compile  agent
    memory = SequentialMemory(limit=5000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
                    target_model_update=1000, policy=policy)
    optimizer=RMSprop(lr=0.00025, epsilon=0.01)
    dqn.compile(optimizer)

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=True, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('weights/dqn-{}-weights-{}.h5f'.format(TAG, datetime.datetime.now()))

Beispiel #6

0

Datei anzeigen

Datei: dqn.py Projekt: petrosgk/RL_experiments

 def __init__(self, model, processor, policy, test_policy, num_actions):
   # Replay memory
   memory = SequentialMemory(limit=opt.dqn_replay_memory_size,
                             window_length=opt.dqn_window_length)
   self.agent = DQNAgent(model=model,
                         nb_actions=num_actions,
                         policy=policy,
                         test_policy=test_policy,
                         memory=memory,
                         processor=processor,
                         batch_size=opt.dqn_batch_size,
                         nb_steps_warmup=opt.dqn_nb_steps_warmup,
                         gamma=opt.dqn_gamma,
                         target_model_update=opt.dqn_target_model_update,
                         enable_double_dqn=opt.enable_double_dqn,
                         enable_dueling_network=opt.enable_dueling_network,
                         train_interval=opt.dqn_train_interval,
                         delta_clip=opt.dqn_delta_clip)
   self.agent.compile(optimizer=keras.optimizers.Adam(lr=opt.dqn_learning_rate), metrics=['mae'])

Beispiel #7

0

Datei anzeigen

Datei: train_agent_kerasrl.py Projekt: olivierh59500/gym-malware

def train_dqn_model(layers, rounds=10000, run_test=False, use_score=False):
    ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0'
    env = gym.make(ENV_NAME)
    env.seed(123)
    nb_actions = env.action_space.n
    window_length = 1  # "experience" consists of where we were, where we are now

    # generate a policy model
    model = generate_dense_model((window_length,) + env.observation_space.shape, layers, nb_actions)

    # configure and compile our agent
    # BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values
    policy = BoltzmannQPolicy()

    # memory can help a model during training
    # for this, we only consider a single malware sample (window_length=1) for each "experience"
    memory = SequentialMemory(limit=32, ignore_episode_boundaries=False, window_length=window_length)

    # DQN agent as described in Mnih (2013) and Mnih (2015).
    # http://arxiv.org/pdf/1312.5602.pdf
    # http://arxiv.org/abs/1509.06461
    agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16,
                     enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg',
                     target_model_update=1e-2, policy=policy, batch_size=16)

    # keras-rl allows one to use and built-in keras optimizer
    agent.compile(RMSprop(lr=1e-3), metrics=['mae'])

    # play the game. learn something!
    agent.fit(env, nb_steps=rounds, visualize=False, verbose=2)

    history_train = env.history
    history_test = None

    if run_test:
        # Set up the testing environment
        TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0'
        test_env = gym.make(TEST_NAME)

        # evaluate the agent on a few episodes, drawing randomly from the test samples
        agent.test(test_env, nb_episodes=100, visualize=False)
        history_test = test_env.history

    return agent, model, history_train, history_test

Beispiel #8

0

Datei anzeigen

Datei: openAI_variableLengthPendulum_train.py Projekt: DocVaughan/CRAWLAB-Code-Snippets

# Output layer
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
# train_policy = BoltzmannQPolicy(tau=0.05)
train_policy = EpsGreedyQPolicy()
test_policy = GreedyQPolicy()

if DUEL_DQN:
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, 
               policy=train_policy, test_policy=test_policy)
              
    filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE,  NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
else:
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
    
    filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)


dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Optionally, we can reload a previous model's weights and continue training from there
# FILENAME = 'weights/duel_dqn_variable_pendulum-v0_weights_4096_4_50000_2017-07-11_140316.h5f'
# Load the model weights

Beispiel #9

0

Datei anzeigen

class DQNSecretary:
    # 重み保存先
    weightdir = './data'
    weightfile = './data/dqn_{}_weights.h5'

    # モデルの初期化
    def __init__(self, n=100, recycle=True):
        print('モデルを作成します。')
        self.train_interval_logger = None

        # Get the environment and extract the number of actions.
        self.env = Secretary(n=n)
        self.env_name = 'secretary'
        self.weightfile = self.__class__.weightfile.format(self.env_name)
        self.nb_actions = self.env.action_space.n

        # Next, we build a very simple model.
        self.model = Sequential()
        self.model.add(
            Flatten(input_shape=(1, ) + self.env.observation_space.shape))
        self.model.add(Dense(256))
        self.model.add(Activation('relu'))
        self.model.add(Dense(256))
        self.model.add(Activation('relu'))
        self.model.add(Dense(256))
        self.model.add(Activation('relu'))
        self.model.add(Dense(self.nb_actions))
        self.model.add(Activation('linear'))
        #print(self.model.summary())

        # Finally, we configure and compile our agent.
        # You can use every built-in Keras optimizer and even the metrics!
        memory = SequentialMemory(limit=50000, window_length=1)
        policy = BoltzmannQPolicy(tau=1.)
        self.dqn = DQNAgent(model=self.model,
                            nb_actions=self.nb_actions,
                            memory=memory,
                            nb_steps_warmup=1000,
                            target_model_update=1e-2,
                            policy=policy)
        self.dqn.compile(Adam(lr=1e-3), metrics=[])

        self.__istrained = False
        print('モデルを作成しました。')

        if recycle:
            if exists(self.weightfile):
                try:
                    print('訓練済み重みを読み込みます。')
                    self.dqn.load_weights(self.weightfile)
                    self.__istrained = True
                    print('訓練済み重みを読み込みました。')
                    return None
                except:
                    print('訓練済み重みの読み込み中にエラーが発生しました。')
                    print('Unexpected error:', exc_info()[0])
                    raise
            else:
                print('訓練済み重みが存在しません。訓練を行ってください。')

    # 訓練
    def train(self,
              nb_steps=30000,
              verbose=1,
              visualize=False,
              log_interval=3000):
        if self.__istrained:
            raise RuntimeError('このモデルは既に訓練済みです。')

        print('訓練を行うので、お待ちください。')

        # 訓練実施
        # Okay, now it's time to learn something!
        # We visualize the training here for show, but this slows down training quite a lot.
        # You can always safely abort the training prematurely using Ctrl + C.
        callbacks = []
        if verbose == 1:
            self.train_interval_logger = TrainIntervalLogger2(
                interval=log_interval)
            callbacks.append(self.train_interval_logger)
            verbose = 0
        elif verbose > 1:
            callbacks.append(TrainEpisodeLogger())
            verbose = 0

        hist = self.dqn.fit(self.env,
                            nb_steps=nb_steps,
                            callbacks=callbacks,
                            verbose=verbose,
                            visualize=visualize,
                            log_interval=log_interval)
        self.__istrained = True

        if self.train_interval_logger is not None:
            # 訓練状況の可視化
            interval = self.train_interval_logger.records['interval']
            episode_reward = self.train_interval_logger.records[
                'episode_reward']
            mean_q = self.train_interval_logger.records['mean_q']
            if len(interval) > len(mean_q):
                mean_q = np.pad(mean_q, [len(interval) - len(mean_q), 0],
                                "constant")
            plt.figure()
            plt.plot(interval, episode_reward, marker='.', label='報酬')
            plt.plot(interval, mean_q, marker='.', label='Q値')
            plt.legend(loc='best', fontsize=10)
            plt.grid()
            plt.xlabel('interval')
            plt.ylabel('score')
            plt.title('訓練状況')
            plt.xticks(
                np.arange(min(interval),
                          max(interval) + 1,
                          (max(interval) - min(interval)) // 7))
            plt.show()

        # 重みの保存
        if not exists(self.__class__.weightdir):
            try:
                mkdir(self.__class__.weightdir)
            except:
                print('重み保存フォルダの作成中にエラーが発生しました。')
                print('Unexpected error:', exc_info()[0])
                raise
        try:
            # After training is done, we save the final weights.
            self.dqn.save_weights(self.weightfile, overwrite=True)
        except:
            print('重みの保存中にエラーが発生しました。')
            print('Unexpected error:', exc_info()[0])
            raise

        return hist

    # テスト
    def test(self, nb_episodes=10, visualize=True, verbose=1):
        # Finally, evaluate our algorithm for 5 episodes.
        hist = self.dqn.test(self.env,
                             nb_episodes=nb_episodes,
                             verbose=verbose,
                             visualize=visualize)
        return hist

Beispiel #10

0

Datei anzeigen

                              attr='eps',
                              value_max=1.,
                              value_min=.1,
                              value_test=.05,
                              nb_steps=1000000)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               policy=policy,
               memory=memory,
               processor=processor,
               nb_steps_warmup=50000,
               gamma=.99,
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)

dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
    save_dir = "./saved_model/" + args.rl_agent
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    weights_filename = 'dqn_{}_weights.h5f'.format(env_name_ram)
    checkpoint_weights_filename = 'dqn_' + env_name_ram + '_weights_{step}.h5f'

Beispiel #11

0

Datei anzeigen

Datei: RL_Mastermind.py Projekt: herrayush/Delta_Onsite


env = MastermindEnv()
np.random.seed(123)
env.seed(123)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(
    Adam(lr=1e-3), metrics=['mae']
)  #There is an error here, which is not letting me to import this module both in Colab and local Jupyter

dqn.fit(env, nb_steps=5000, visualize=True, verbose=2)

Beispiel #12

0

Datei anzeigen

Datei: rl_with_open_ai_gym_wrapper.py Projekt: Vectorshot/Pok-bot

        attr="eps",
        value_max=1.0,
        value_min=0.05,
        value_test=0,
        nb_steps=10000,
    )
    #loaded_model = tf.keras.models.load_model('model_20000')
    #model.load_weights('weights_DQN.h5')

    # Defining our DQN
    dqn = DQNAgent(
        model=model,
        nb_actions=len(env_player.action_space),
        policy=policy,
        memory=memory,
        nb_steps_warmup=1000,
        gamma=0.5,
        target_model_update=1,
        delta_clip=0.01,
        enable_double_dqn=True,
    )

    dqn.compile(Adam(lr=0.00025), metrics=["mae"])

    # Training
    env_player.play_against(
        env_algorithm=dqn_training,
        opponent=opponent,
        env_algorithm_kwargs={
            "dqn": dqn,
            "nb_steps": NB_TRAINING_STEPS

Beispiel #13

0

Datei anzeigen

                              value_max=1.,
                              value_min=.1,
                              value_test=.05,
                              nb_steps=1000000)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               policy=policy,
               memory=memory,
               processor=processor,
               nb_steps_warmup=50000,
               gamma=.99,
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)
dqn.compile(Adam(learning_rate=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in tensorflow.keras callbacks!
    weights_filename = f'dqn_{args.env_name}_weights.h5f'
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = f'dqn_{args.env_name}_log.json'
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]

Beispiel #14

0

Datei anzeigen

nb_actions = env.action_space.n

input_shape = (WINDOW_LENGTH, ) + INPUT_SHAPE
model = Sequential()
model.add(Permute((2, 3, 1), input_shape=input_shape))
model.add(Convolution2D(32, (8, 8), strides=(4, 4), activation='relu'))
model.add(Convolution2D(64, (4, 4), strides=(2, 2), activation='relu'))
model.add(Convolution2D(64, (3, 3), strides=(1, 1), activation='relu'))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(nb_actions, activation='relu'))
'''
policy = LinearAnnealedPolicy(GreedyQPolicy(), attr='eps', value_max=1.,
                              value_min=.1, value_test=.05, nb_steps=10000)
'''
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               policy=GreedyQPolicy(),
               memory=memory,
               processor=processor,
               nb_steps_warmup=500,
               gamma=.99,
               target_model_update=1e-2,
               train_interval=4,
               delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])
dqn.load_weights(weights_filename)
dqn.test(env, nb_episodes=10, visualize=True)

Beispiel #15

0

Datei anzeigen

Datei: agent_keras_rl.py Projekt: xavier66/ctc-executioner

import gym_ctc_marketmaker
#env = gym.make("ctc-marketmaker-v0")
env.setOrderbook(orderbook)

#model = loadModel(name='model-sell-artificial-2')
model = loadModel(name='model-sell-artificial-sine')
model = createModel()
nrTrain = 100000
nrTest = 10

policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=5000, window_length=1)
# nb_steps_warmup: the default value for that in the DQN OpenAI baselines implementation is 1000
dqn = DQNAgent(model=model,
               nb_actions=len(env.levels),
               memory=memory,
               nb_steps_warmup=100,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# cbs_train = []
# cbs_train = [LivePlotCallback(nb_episodes=20000, avgwindow=20)]
# dqn.fit(env, nb_steps=nrTrain, visualize=True, verbose=2, callbacks=cbs_train)
# saveModel(model=model, name='model-sell-artificial-sine')

cbs_train = []
cbs_test = []
cbs_test = [ActionPlotCallback(nb_episodes=nrTest)]
dqn.test(env,
         nb_episodes=nrTest,
         visualize=True,

Beispiel #16

0

Datei anzeigen

buttons = NoisyNetDense(nb_actions, activation='linear')(dense)
model = Model(inputs=frame,outputs=buttons)
print(model.summary())

memory = PrioritizedMemory(limit=1000000, alpha=.6, start_beta=.4, end_beta=1., steps_annealed=30000000, window_length=WINDOW_LENGTH)

processor = AtariProcessor()

#This is the important difference. Rather than using an E Greedy approach, where
#we keep the network consistent but randomize the way we interpret its predictions,
#in NoisyNet we are adding noise to the network and simply choosing the best value.
policy = GreedyQPolicy()

#N-step loss with n of 3
dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, enable_double_dqn=True, enable_dueling_network=True, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1., n_step=3)

#Prioritized Memories typically use lower learning rates
dqn.compile(Adam(lr=.00025/4), metrics=['mae'])

folder_path = '../model_saves/NoisyNstepPDD/'

if args.mode == 'train':
    dqn.load_weights(folder_path + 'noisynet_pdd_dqn_MsPacmanDeterministic-v4_weights_10000000.h5f')
    weights_filename = folder_path + 'final_noisynet_nstep_pdd_dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = folder_path + 'final_noisynet_nstep_pdd_dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = folder_path + 'final_noisynet_nstep_pdd_dqn_' + args.env_name + '_REWARD_DATA.txt'
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000000)]
    callbacks += [TrainEpisodeLogger(log_filename)]
    dqn.fit(env, callbacks=callbacks, nb_steps=30000000, verbose=0, nb_max_episode_steps=20000)

Beispiel #17

0

Datei anzeigen

model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
#print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy,
               enable_dueling_network=True)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

dqn.test(env, nb_episodes=5, visualize=True)

for i in range(3, 0, -1):
    print('Sentient agent in', i)
    sleep(1)

fname = f'dqn_{ENV_NAME}_weights.h5f'.lower()
if not os.path.isfile(fname):
    fname = f'dqn_{ENV_NAME}_weights_20190321.h5f'.lower()

Beispiel #18

0

Datei anzeigen

def main():

    # Get the environment and extract the number of actions.
    print("Using environment", environment_name)
    environment = gym.make(environment_name)
    environment = DiscreteWrapper(environment)
    np.random.seed(666)
    nb_actions = environment.action_space.n

    # Build the model.
    model = build_model((WINDOW_LENGTH, ) + INPUT_SHAPE, nb_actions)
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
    processor = DuckieTownProcessor()

    # Select a policy. We use eps-greedy action selection, which means that a random action is selected
    # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
    # the agent initially explores the environment (high eps) and then gradually sticks to what it knows
    # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
    # so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=1.,
        value_min=.1,
        value_test=.05,
        #nb_steps=1000000
        nb_steps=400000)

    # The trade-off between exploration and exploitation is difficult and an on-going research topic.
    # If you want, you can experiment with the parameters or use a different policy. Another popular one
    # is Boltzmann-style exploration:
    # policy = BoltzmannQPolicy(tau=1.)
    # Feel free to give it a try!

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=50000,
                   gamma=.99,
                   target_model_update=10000,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae'])

    weights_filename = 'dqn_{}_weights.h5f'.format(environment_name)

    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
    checkpoint_weights_filename = 'dqn_' + environment_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(environment_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [TensorboardCallback()]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(
        environment,
        callbacks=callbacks,
        #nb_steps=1750000,
        nb_steps=500000,
        log_interval=10000,
        visualize="visualize" in sys.argv)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(environment, nb_episodes=10, visualize=False)

Beispiel #19

0

Datei anzeigen

Datei: train_dqn.py Projekt: mudita11/DE-DDQN-bbob

model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(nb_actions, activation='linear'))
print("Model Summary: ", model.summary())

memory = SequentialMemory(limit=100000, window_length=1)  #100000

# Boltzmann Q Policy
policy = EpsGreedyQPolicy()

# DQN Agent: Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics!
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=1e4,
               target_model_update=1e3,
               policy=policy,
               enable_double_dqn=True,
               batch_size=64)  # nb_steps_warmup >= nb_steps 2000
# DQN stores the experience in the memory buffer for the first nb_steps_warmup. This is done to get the required size of batch during experience replay.
# When number of steps exceeds nb_steps_warmup then the neural network would learn and update the weight.

# Neural Compilation
dqn.compile(Adam(lr=1e-4), metrics=['mae'])

callbacks = [ModelCheckpoint('dqn_ea_weights.h5f', 1620)]

# Fit the model: training for nb_steps = number of generations
dqn.fit(env,
        callbacks=callbacks,
        nb_steps=162e30,

Beispiel #20

0

Datei anzeigen

# 下一步，我们创建一个简单的单隐层神经网络模型。

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# 接下来，配置并编译我们的代理端。我们将策略设成ε-贪心算法，并且将存储设置成顺序存储方式因为我们想要存储执行操作的结果和每一操作得到的奖励。

print 'preparing'
policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory,nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

print 'fitting'
dqn.fit(env, nb_steps=5000, visualize=True, verbose=2)

# 现在测试强化学习模型

print 'dtesting'
dqn.test(env, nb_episodes=50, visualize=True)

print 'done'

Beispiel #21

0

Datei anzeigen

model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
#print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=300000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# After training is done, we save the final weights.
dqn.load_weights('dqn_{}_weights_model4.h5f'.format(ENV_NAME))

# Redirect stdout to capture test results
old_stdout = sys.stdout
sys.stdout = mystdout = io.StringIO()

# Evaluate our algorithm for a few episodes.
dqn.test(env, nb_episodes=200, visualize=False)

# Reset stdout
sys.stdout = old_stdout

Beispiel #22

0

Datei anzeigen

Datei: openAI_planarCrane_test.py Projekt: DocVaughan/CRAWLAB-Code-Snippets


# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
# train_policy = BoltzmannQPolicy(tau=0.05)
train_policy = EpsGreedyQPolicy()
test_policy = GreedyQPolicy()


# Compile the agent based on method specified. We use .upper() to convert to 
# upper case for comparison
if METHOD.upper() == 'DUEL_DQN': 
    memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
    agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, 
               policy=train_policy, test_policy=test_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])

elif METHOD.upper() == 'DQN':
    memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
    agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])

elif METHOD.upper() == 'SARSA':
     # SARSA does not require a memory.
    agent = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=train_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])
    
elif METHOD.upper() == 'CEM':

Beispiel #23

0

Datei anzeigen

            del self.rewardbuf[0]
        self.rewards[self.episode] = rw
        self.avgrewards[self.episode] = np.mean(self.rewardbuf)
        self.plot()
        self.episode += 1

    def plot(self):
        self.grphinst.set_ydata(self.rewards)
        self.grphavg.set_ydata(self.avgrewards)
        plt.draw()
        plt.pause(0.01)


dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy,
               enable_double_dqn=False)
dqn.compile(Adam(lr=0.002, decay=2.25e-05), metrics=['mse'])

cbs = [EpsDecayCallback(eps_poilcy=policy, decay_rate=0.975)]
cbs += [LivePlotCallback(nb_episodes=4000, avgwindow=20)]
dqn.fit(env, nb_steps=1000000, visualize=False, verbose=2, callbacks=cbs)

dqn.save_weights('{}/dqn_{}_weights.h5f'.format(outdir, ENV_NAME),
                 overwrite=True)

# evaluate the algorithm for 100 episodes.
#dqn.test(env, nb_episodes=100, visualize=True)

Beispiel #24

0

Datei anzeigen

model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add((Dense(HLS)))
model.add(Activation('relu'))
model.add((Dense(HLS)))
model.add(Activation('relu'))
model.add((Dense(HLS)))
model.add(Activation('relu'))
model.add(Dense(n_action))
model.add(Activation('linear'))

print model.summary()  # How did it go? * ( ' ^')*

# Configure the RL agent
memory = SequentialMemory(limit=50000)
policy = BoltzmannQPolicy()

dqn = DQNAgent(model=model,
               nb_actions=n_action,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(optimizer=RMSprop(), metrics=['mae'])

# Training
dqn.fit(env, nb_steps=10000, verbose=2, visualize=False)

# Visualize testing
dqn.test(env, nb_episodes=5, visualize=True)

Beispiel #25

0

Datei anzeigen

            Dense(nb_hidden),
            Activation("relu"),
            Dense(nb_hidden),
            Activation("relu"),
            Dense(nb_actions),
            Activation("linear")
        ])

        memory = SequentialMemory(limit=memory_size,
                                  window_length=window_length)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=nb_steps_warmup,
                       target_model_update=target_model_update,
                       policy=policy,
                       gamma=gamma,
                       batch_size=batch_size,
                       train_interval=train_interval,
                       enable_double_dqn=True,
                       delta_clip=delta_clip)

        dqn.compile(Adam(), metrics=["mae"])

        # Whether to use existing model with the same hyper-parameters, skip its training and only do testing.
        if use_previous and logger_file.format(
                type="train",
                model_name=model_name).split("/")[-1] in existing_models:
            print("Model exists, skipping training...\n")
            dqn.load_weights(
                model_file_prev.format(model_name=model_name, type="weights"))

Beispiel #26

0

Datei anzeigen

Datei: dqn_open_ai_gym_wrapper.py Projekt: vuhcl/poke-env

policy = LinearAnnealedPolicy(
    EpsGreedyQPolicy(),
    attr="eps",
    value_max=1.0,
    value_min=0.05,
    value_test=0,
    nb_steps=10000,
)

# load saved model into DQNAgent class
trained_dqn_agent = DQNAgent(
    model=loaded_model,
    nb_actions=18,
    policy=policy,
    memory=memory,
    nb_steps_warmup=NB_STEPS_WARMUP,
    gamma=0.5,
    target_model_update=1,
    delta_clip=0.01,
    enable_double_dqn=True,
)

##############################################################################

# set random seeds
tf.random.set_seed(0)
np.random.seed(0)


# This is the function that will be used to train the dqn
def dqn_training(player, dqn, nb_steps, filename):

Beispiel #27

0

Datei anzeigen

Datei: test_keras_rl_continious.py Projekt: vxgu86/gym_co2_ventilation

nb_episodes_memory = 1000

try:
    memory = pickle.load(open("memory.pkl", "rb"))
except (FileNotFoundError, EOFError):
    memory = SequentialMemory(limit=nb_episode_steps * nb_episodes_memory,
                              window_length=1)

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!

#policy = BoltzmannQPolicy()
policy = EpsGreedyQPolicy(eps=0.01)
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

try:
    dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))
except (OSError):
    logger.warning("File not found")

n = 0
while True:
    n += 1
    logger.info(f'Iteration #{n}')

    # Run some training

Beispiel #28

0

Datei anzeigen

# In[ ]:

model.summary()

# In[ ]:

memory = SequentialMemory(limit=2000, window_length=1)
policy = BoltzmannQPolicy(tau=1.)
#dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
#               target_model_update=1e-2, policy=policy)
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               policy=policy,
               memory=memory,
               nb_steps_warmup=1000,
               gamma=.99,
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# In[ ]:

ENV_NAME = "aftersubmissionv19"
weights_filename = 'dqn_{}_weights.h5f'.format(ENV_NAME)
checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f'

log_filename = 'dqn_{}_log.json'.format(ENV_NAME)
callbacks = [
    ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000)

Beispiel #29

0

Datei anzeigen

Datei: openAI_planarCrane_train.py Projekt: DocVaughan/CRAWLAB-Code-Snippets

print(model.summary())


# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!

# train_policy = BoltzmannQPolicy()
train_policy = EpsGreedyQPolicy(eps=1.0)
test_policy = GreedyQPolicy()

# Compile the agent based on method specified. We use .upper() to convert to 
# upper case for comparison
if METHOD.upper() == 'DUEL_DQN': 
    memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
    agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, 
               policy=train_policy, test_policy=test_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])

elif METHOD.upper() == 'DQN':
    memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
    agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])

elif METHOD.upper() == 'SARSA':
     # SARSA does not require a memory.
    agent = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=train_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])
    
elif METHOD.upper() == 'CEM':

Beispiel #30

0

Datei anzeigen

processor = AtariProcessor()

policy = policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                       attr='eps',
                                       value_max=1.,
                                       value_min=.1,
                                       value_test=.05,
                                       nb_steps=1250000)

dqn = DQNAgent(model=model,
               nb_actions=n_actions,
               policy=policy,
               memory=memory,
               processor=processor,
               enable_double_dqn=False,
               enable_dueling_network=False,
               nb_steps_warmup=50000,
               gamma=.99,
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)

dqn.compile(Adam(lr=.00025), metrics=['mae'])

# folder_path = './model_saves/Vanilla/'

weights_filename = 'dqn_{}_weights.h5f'.format(env_name)
checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f'
log_filename = 'dqn_{}_log.json'.format(env_name)
callbacks = [
    ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)

Beispiel #31

0

Datei anzeigen

Datei: dqn_video.py Projekt: mnuke/se499

# Select a policy. We use eps-greedy action selection, which means that a random action is selected
# with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
# the agent initially explores the environment (high eps) and then gradually sticks to what it knows
# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
# so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                              nb_steps=1000000)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.),
               target_model_update=10000, train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    import cProfile
    cProfile.run('dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)')
    # dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

Beispiel #32

0

Datei anzeigen

Datei: train_rl.py Projekt: zQuantz/zLunarLander

memory = SequentialMemory(limit=memory_limit, window_length=window_length)

####################################################################

policy = EpsGreedyQPolicy(eps=eps)
policy = LinearAnnealedPolicy(policy, attr='eps', value_max=eps, 
							  value_min=0, value_test = 0, nb_steps=nb_steps-2000)
test_policy = GreedyQPolicy()

####################################################################

dqn = DQNAgent(model=model, 
			   nb_actions=nb_actions,
			   memory=memory,
			   nb_steps_warmup=window_length+batch_size,
               target_model_update=target_model_update,
               policy=policy, test_policy = test_policy,
               batch_size=batch_size,
               train_interval=train_interval,
               gamma = gamma)

dqn.compile(Adam(lr=lr), metrics=['mae'])

####################################################################

history = dqn.fit(env, nb_steps=nb_steps, visualize=False, verbose=1, action_repetition=action_repetition)

print('\n\n {} \n\n'.format(len(history.history['episode_reward'])))

####################################################################

Beispiel #33

0

Datei anzeigen

Datei: main2.py Projekt: olmin-dev/projetRL

def main():
    env = CarRacing()
    env.reset()

    # load json and create model
    json_file = open('./saved/model2.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = tf.keras.models.model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights("./saved/model2.h5")
    print("Loaded model from disk")
    loaded_model.compile(loss='mse', optimizer='adam')
    model = loaded_model

    # print("State Space {}".format(env.P[331]))

    # model = Sequential(name='rvoum')
    # model.add(Reshape((96,96,3),input_shape = (1,96,96,3)))
    # model.add(Conv2D(filters=32, kernel_size=(3, 3), strides = 3,activation="relu", input_shape=(1,96,96,3)))
    # model.add(Flatten())
    # model.add(Dense(3,activation="selu"))
    # #model.add(Embedding(500,6,input_length = 1, name='Embedding'))
    # model.add(Reshape((3,),name='Reshape'))
    # model = Sequential()
    # model.add(Reshape((96,96,3),input_shape = (1,96,96,3)))
    # model.add(Conv2D(filters=6, kernel_size=(7, 7), strides=3, activation='relu', input_shape=(96, 96, 3)))
    # model.add(MaxPooling2D(pool_size=(2, 2)))
    # model.add(Conv2D(filters=12, kernel_size=(4, 4), activation='relu'))
    # model.add(MaxPooling2D(pool_size=(2, 2)))
    # model.add(Flatten())
    # model.add(Dense(216, activation='relu'))
    # model.add(Dense(16, activation=None))
    # model.compile(loss='mean_squared_error', optimizer=Adam(lr=0.0001, epsilon=1e-8))
    # model.summary()

    policy = EpsGreedyQPolicy()
    memory = SequentialMemory(limit=5000, window_length=1)
    nb_actions = 16

    dqn = DQNAgent(model=model,
                   memory=memory,
                   nb_actions=nb_actions,
                   nb_steps_warmup=10,
                   target_model_update=1e-5,
                   policy=policy)
    dqn.compile(Adam(lr=0.0001), metrics=['mse'])

    log_interval = 1e4
    for i in range(0):
        print(i, "\n")
        dqn.fit(env,
                nb_steps=3000,
                log_interval=log_interval,
                verbose=1,
                nb_max_episode_steps=1000,
                action_repetition=3)
        env.close()

        model_json = model.to_json()
        with open("model2.json", "w") as json_file:
            json_file.write(model_json)
            model.save_weights("model2.h5")
            print("Saved model to disk")

    env.init_test()

    dqn.test(env,
             nb_episodes=5,
             visualize=True,
             nb_max_episode_steps=10000,
             action_repetition=2)
    env.close()

Beispiel #34

0

Datei anzeigen

Datei: dqn_line.py Projekt: hatanaka-akihiro/dqn

model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy(eps=0.1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2, nb_max_episode_steps=300)

import rl.callbacks
class EpisodeLogger(rl.callbacks.Callback):
    def __init__(self):
        self.observations = {}
        self.rewards = {}
        self.actions = {}

    def on_episode_begin(self, episode, logs):

Beispiel #35

0

Datei anzeigen

        motor_parameter=dict(r_a=15e-3, r_e=15e-3, l_a=1e-3, l_e=1e-3),
        load_parameter=dict(a=0, b=.1, c=.1, j_load=0.04),
        # Pass a string (with extra parameters)
        ode_solver='euler', solver_kwargs={},
        # Pass a Class with extra parameters
        reference_generator=WienerProcessReferenceGenerator(reference_state='i', sigma_range=(3e-3, 3e-2))
    )
    env = FlattenObservation(env)

    nb_actions = env.action_space.n
    window_length = 1

    model = Sequential()
    model.add(Flatten(input_shape=(window_length,) + env.observation_space.shape))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(4, activation='relu'))
    model.add(Dense(nb_actions, activation='linear'))

    memory = SequentialMemory(limit=15000, window_length=window_length)
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(eps=0.2), 'eps', 0.2, 0.01, 0, 20000)
    dqn = DQNAgent(
        model=model, policy=policy, nb_actions=nb_actions, memory=memory, gamma=0.9, batch_size=128,
        train_interval=1, memory_interval=1
    )

    dqn.compile(Adam(lr=1e-4), metrics=['mse'])
    dqn.fit(env, nb_steps=200000, action_repetition=1, verbose=2, visualize=True, nb_max_episode_steps=50000,
            log_interval=10000)
    dqn.test(env, nb_episodes=3, nb_max_episode_steps=50000, visualize=True)

Beispiel #36

0

Datei anzeigen

Datei: evaluation.py Projekt: Denbergvanthijs/DRL-For-imbalanced-Classification

            print_stats=False)
        env = ClassifyEnv(MODE, imb_rate, X_train, y_train)
        memory = SequentialMemory(limit=MEMORY_SIZE, window_length=1)
        policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                      attr="eps",
                                      value_max=EPS_MAX,
                                      value_min=EPS_MIN,
                                      value_test=0.05,
                                      nb_steps=EPS_STEPS)

        dqn = DQNAgent(model=model,
                       policy=policy,
                       nb_actions=2,
                       memory=memory,
                       processor=processor,
                       nb_steps_warmup=WARMUP_STEPS,
                       gamma=GAMMA,
                       target_model_update=TARGET_MODEL_UPDATE,
                       train_interval=4,
                       delta_clip=1,
                       batch_size=BATCH_SIZE,
                       enable_double_dqn=DOUBLE_DQN)
        dqn.compile(Adam(lr=LR))

        metrics = Metrics(X_val, y_val)
        dqn.fit(env,
                nb_steps=TRAINING_STEPS,
                log_interval=LOG_INTERVAL,
                callbacks=[metrics],
                verbose=0)
        y_pred = make_predictions(dqn.target_model, X_test)
        stats = calculate_metrics(y_test, y_pred)  # Get stats as dictionairy

Beispiel #37

0

Datei anzeigen

Datei: openAI_variableLengthPendulum_test.py Projekt: DocVaughan/CRAWLAB-Code-Snippets

# Output layer
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
# train_policy = BoltzmannQPolicy(tau=0.05)
train_policy = EpsGreedyQPolicy()
test_policy = GreedyQPolicy()

if DUEL_DQN:
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, 
               policy=train_policy, test_policy=test_policy)
              
    filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE,  NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
else:
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
    
    filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)


dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Load the model weights
dqn.load_weights(FILENAME)

Beispiel #38

0

Datei anzeigen

Datei: rl_worker.py Projekt: kuwabaray/dqn_for_slam

    nb_actions = env.action_space.n

    model = tf.keras.Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dense(nb_actions))
    print(model.summary())

    memory = SequentialMemory(limit=200000, window_length=1)
    policy = CustomEpsGreedy(max_eps=0.6, min_eps=0.1, eps_decay=0.9997)

    agent = DQNAgent(
        nb_actions=nb_actions,
        model=model,
        memory=memory,
        policy=policy,
        gamma=0.99,
        batch_size=64)

    agent.compile(optimizer=Adam(lr=1e-3), metrics=['mae'])
   
    if mode == 'train':

        #tensorboard_callback = TensorBoard(log_dir="~/tflog/")
        # early_stopping = EarlyStopping(monitor='episode_reward', patience=0, verbose=1)
        history = agent.fit(env,
                            nb_steps=200000,
                            visualize=False,
                            nb_max_episode_steps=500,
                            log_interval=500,

Beispiel #39

0

Datei anzeigen

np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# create model
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

# configure agent
policy = EpsGreedyQPolicy(eps=0.01)
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=100,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mse'])

# run agent
history = dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

plt.plot(history.history['episode_reward'])
plt.show()

Beispiel #40

0

Datei anzeigen

Datei: jakegrigsby-pacman-pdd.py Projekt: sunmingtao/sample-code

dense = Flatten()(cv3)
dense = Dense(512, activation='relu')(dense)
buttons = Dense(nb_actions, activation='linear')(dense)
model = Model(inputs=frame,outputs=buttons)
print(model.summary())

#PER
memory = PrioritizedMemory(limit=1000000, alpha=.6, start_beta=.4, end_beta=1., steps_annealed=10000000, window_length=WINDOW_LENGTH)

processor = AtariProcessor()

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1250000)


dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, enable_double_dqn=True, enable_dueling_network=True, nb_steps_warmup=1000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)

#Prioritized Memories typically use lower learning rates
dqn.compile(Adam(lr=.00025/4), metrics=['mae'])

folder_path = './'

mode = 'train'

if mode == 'train':
    weights_filename = folder_path + 'pdd_dqn_{}_weights.h5f'.format(env_name)
    checkpoint_weights_filename = folder_path + 'pdd_dqn_' + env_name + '_weights_{step}.h5f'
    log_filename = folder_path + 'pdd_dqn_' + env_name + '_REWARD_DATA.txt'
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000)]
    callbacks += [TrainEpisodeLogger()]

Beispiel #41

0

Datei anzeigen

Datei: gym_keras_rl.py Projekt: yuji96/reinforcement-learning-billiard

nb_actions = env.action_space.n

model = Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(nb_actions, activation='linear'))

model.summary()

memory = SequentialMemory(limit=50000, window_length=window_length)
policy = BoltzmannQPolicy()
agent = DQNAgent(model=model,
                 nb_actions=nb_actions,
                 memory=memory,
                 nb_steps_warmup=10,
                 target_model_update=1e-2,
                 policy=policy)
agent.compile(Adam())

import matplotlib.pyplot as plt

# fit の結果を取得しておく
history = agent.fit(env, nb_steps=10000, visualize=False, verbose=1)
# agent.test(env, nb_episodes=5, visualize=True)

# 結果を表示
plt.subplot(2, 1, 1)
plt.plot(history.history["nb_episode_steps"])
plt.ylabel("step")

Beispiel #42

0

Datei anzeigen

Datei: dqn_cartpole.py Projekt: matthiasplappert/keras-rl

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)

Beispiel #43

0

Datei anzeigen

model.add(Dense(16))
# model.add(Dropout(d))
model.add(Activation('relu'))
model.add(Dense(nb_actions, activation='linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=2500, window_length=1)
policy = BoltzmannQPolicy()
# enable the dueling network
# you can specify the dueling_type to one of {'avg','max','naive'}
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=2,
               enable_dueling_network=True,
               dueling_type='avg',
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Start from a great state
dqn.load_weights('duel_dqn_MountainCar-v0_weights_201806252113.h5f')

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
# dqn.fit(env, nb_steps=10000000, visualize=False, verbose=2)

# After training is done, we save the final weights.
# dqn.save_weights('duel_dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

Beispiel #44

0

Datei anzeigen

Datei: dqn_atari.py Projekt: Jaystings/keras-rl

# Select a policy. We use eps-greedy action selection, which means that a random action is selected
# with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
# the agent initially explores the environment (high eps) and then gradually sticks to what it knows
# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
# so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
    nb_steps=1000000)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory,
    processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.), reward_range=(-1., 1.),
    target_model_update=10000, train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

Beispiel #45

0

Datei anzeigen

Datei: DuelingDQNCartpole.py Projekt: sts-sadr/Python-Machine-Learning-Cookbook---Second-Edition

model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               enable_dueling_network=True,
               dueling_type='avg',
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

dqn.fit(env, nb_steps=1000, visualize=True, verbose=2)

dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

dqn.test(env, nb_episodes=5, visualize=True)

Beispiel #46

0

Datei anzeigen

Datei: test.py Projekt: NibuTake/Qlearning

from keras.optimizers import Adam
import gym
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

env = gym.make('MountainCar-v0')
nb_actions = env.action_space.n

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

memory = SequentialMemory(limit=30000, window_length=1)

policy = EpsGreedyQPolicy(eps=0.001)
dqn = DQNAgent(model=model, nb_actions=nb_actions,gamma=0.99, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

history = dqn.fit(env, nb_steps=30000, visualize=False, verbose=2)

dqn.test(env, nb_episodes=1, visualize=True)

Beispiel #47

0

Datei anzeigen

Datei: dqn_snake.py Projekt: bde-slither/gym_pygame_envs

    # Select a policy. We use eps-greedy action selection, which means that a random action is selected
    # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
    # the agent initially explores the environment (high eps) and then gradually sticks to what it knows
    # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
    # so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                                  nb_steps=1000000)

    # The trade-off between exploration and exploitation is difficult and an on-going research topic.
    # If you want, you can experiment with the parameters or use a different policy. Another popular one
    # is Boltzmann-style exploration:
    #policy = BoltzmannQPolicy(tau=1.)
    # Feel free to give it a try!

    dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
                   processor=processor, nb_steps_warmup=20000, gamma=.99, target_model_update=10000,
                   train_interval=1, delta_clip=1.)
    dqn.compile(Adam(lr=.00025), metrics=['mae'])
    agents.append(dqn)

mdqn = IndieMultiAgent(agents)
callback_multi_test={}
for id,agent in enumerate(agents):
        callbacks = [(TestLogger())]
        history=History()
        callbacks += [history]
        callback_multi_test[agent]=callbacks
if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    callback_multi={}