Python DQNAgent.fit Examples

Programming Language: Python

Namespace/Package Name: rl.agents.dqn

Class/Type: DQNAgent

Method/Function: fit

Examples at hotexamples.com: 43

The rl.agents.dqn DQNAgent fit is a method in the Python reinforcement learning library that allows you to train a Deep Q-Network (DQN) agent on a particular environment. This method is part of the DQNAgent class in the rl.agents.dqn module. The agent learns how to take actions in a given environment based on current observations, with the goal of maximizing a particular reward function.

Example 1:

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from rl.memory import SequentialMemory

model = Sequential()
model.add(Dense(16, input_shape=(env.observation_space.shape[0],), activation='relu'))
model.add(Dense(env.action_space.n, activation='linear'))
memory = SequentialMemory(limit=1000000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

This example creates an instance of the DQNAgent class and trains it for 50,000 steps on a given environment. The agent's neural network model has two dense layers, with the first having 16 neurons and the activation function "relu," while the second layer has as many neurons as there are possible actions in the environment, and has a linear activation function. The agent uses an EpsGreedyQPolicy to determine its actions, and the Adam optimizer to update its weights during training.

Example 2:

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from rl.memory import SequentialMemory

model = Sequential()
model.add(Dense(32, input_shape=(env.observation_space.shape[0],), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(env.action_space.n, activation='linear'))
memory = SequentialMemory(limit=10000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=.00025), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

This example trains a DQN agent on the same environment as the previous example, but uses a LinearAnnealedPolicy instead of EpsGreedyQPolicy. This policy creates a gradually decreasing epsilon value to use in training, starting at 1.0 and ending at 0.1 after 10,000 steps, then decreasing to the value of 0.05 over another 10,000 steps. The DQN agent's neural network model has two dense layers each containing 32 neurons and the activation function "relu." The package library in which these examples were used is the Keras-RL library.

Python DQNAgent.fit - 43 examples found. These are the top rated real world Python examples of rl.agents.dqn.DQNAgent.fit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DQNAgent(30)

compile(30)

load_weights(30)

fit(30)

save_weights(30)

test(30)

forward(7)

processor(3)

target_model(3)

compute_batch_q_values(3)

compute_q_values(2)

test_policy(2)

backward(2)

training(2)

policy(2)

select_action(1)

save_model(1)

reset_states(1)

replay(1)

remember(1)

reload_memory(1)

reload(1)

model(1)

process_state_batch(1)

modelfile(1)

X(1)

memoryfile(1)

learning(1)

get_config(1)

enable_dueling_network(1)

cmopile(1)

act(1)

_build_model(1)

__init__(1)

Y(1)

update_target_model(1)

Example #1

Show file

File: keras-rl-test.py Project: mortennp/misc

def main():
    # Create env
    np.random.seed(SEED)    
    env = PentagoEnv(SIZE, agent_starts = AGENT_STARTS)
    env.seed(SEED)
    nb_actions = env.action_space.n

    # Define model
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(nb_actions))
    print(model.summary())

    # Configure and compile  agent
    memory = SequentialMemory(limit=5000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
                    target_model_update=1000, policy=policy)
    optimizer=RMSprop(lr=0.00025, epsilon=0.01)
    dqn.compile(optimizer)

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=True, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('weights/dqn-{}-weights-{}.h5f'.format(TAG, datetime.datetime.now()))

Example #2

Show file

File: keras-rl-test.py Project: mortennp/misc

def main():
    np.random.seed(123)    
    env = PentagoEnv(SIZE)
    env.seed(123)
    nb_actions = env.action_space.n

    model = Sequential()
    #model.add(Reshape((SIZE ** 2,), input_shape=(SIZE, SIZE)))
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(nb_actions))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=5000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
                    target_model_update=1e-2, policy=policy)
    optimizer=RMSprop(lr=0.00025, epsilon=0.01)
    dqn.compile(optimizer)

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=True, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

Example #3

Show file

File: test_dqn.py Project: matthiasplappert/keras-rl

def test_single_dqn_input():
    model = Sequential()
    model.add(Flatten(input_shape=(2, 3)))
    model.add(Dense(2))

    memory = SequentialMemory(limit=10, window_length=2)
    for double_dqn in (True, False):
        agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
                         enable_double_dqn=double_dqn)
        agent.compile('sgd')
        agent.fit(MultiInputTestEnv((3,)), nb_steps=10)

Example #4

Show file

File: dqn.py Project: petrosgk/RL_experiments

class DQN(BaseAgent):
  def __init__(self, model, processor, policy, test_policy, num_actions):
    # Replay memory
    memory = SequentialMemory(limit=opt.dqn_replay_memory_size,
                              window_length=opt.dqn_window_length)
    self.agent = DQNAgent(model=model,
                          nb_actions=num_actions,
                          policy=policy,
                          test_policy=test_policy,
                          memory=memory,
                          processor=processor,
                          batch_size=opt.dqn_batch_size,
                          nb_steps_warmup=opt.dqn_nb_steps_warmup,
                          gamma=opt.dqn_gamma,
                          target_model_update=opt.dqn_target_model_update,
                          enable_double_dqn=opt.enable_double_dqn,
                          enable_dueling_network=opt.enable_dueling_network,
                          train_interval=opt.dqn_train_interval,
                          delta_clip=opt.dqn_delta_clip)
    self.agent.compile(optimizer=keras.optimizers.Adam(lr=opt.dqn_learning_rate), metrics=['mae'])

  def fit(self, env, num_steps, weights_path=None, visualize=False):
    callbacks = []
    if weights_path is not None:
      callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)]
    self.agent.fit(env=env,
                   nb_steps=num_steps,
                   action_repetition=opt.dqn_action_repetition,
                   callbacks=callbacks,
                   log_interval=opt.log_interval,
                   test_interval=opt.test_interval,
                   test_nb_episodes=opt.test_nb_episodes,
                   test_action_repetition=opt.dqn_action_repetition,
                   visualize=visualize,
                   test_visualize=visualize,
                   verbose=1)

  def test(self, env, num_episodes, visualize=False):
    self.agent.test(env=env,
                    nb_episodes=num_episodes,
                    action_repetition=opt.dqn_action_repetition,
                    verbose=2,
                    visualize=visualize)

  def save(self, out_dir):
    self.agent.save_weights(out_dir, overwrite=True)

  def load(self, out_dir):
    self.agent.load_weights(out_dir)

Example #5

Show file

File: test_dqn.py Project: matthiasplappert/keras-rl

def test_multi_dqn_input():
    input1 = Input(shape=(2, 3))
    input2 = Input(shape=(2, 4))
    x = Concatenate()([input1, input2])
    x = Flatten()(x)
    x = Dense(2)(x)
    model = Model(inputs=[input1, input2], outputs=x)

    memory = SequentialMemory(limit=10, window_length=2)
    processor = MultiInputProcessor(nb_inputs=2)
    for double_dqn in (True, False):
        agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
                         processor=processor, enable_double_dqn=double_dqn)
        agent.compile('sgd')
        agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)

Example #6

Show file

File: train_agent_kerasrl.py Project: olivierh59500/gym-malware

def train_dqn_model(layers, rounds=10000, run_test=False, use_score=False):
    ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0'
    env = gym.make(ENV_NAME)
    env.seed(123)
    nb_actions = env.action_space.n
    window_length = 1  # "experience" consists of where we were, where we are now

    # generate a policy model
    model = generate_dense_model((window_length,) + env.observation_space.shape, layers, nb_actions)

    # configure and compile our agent
    # BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values
    policy = BoltzmannQPolicy()

    # memory can help a model during training
    # for this, we only consider a single malware sample (window_length=1) for each "experience"
    memory = SequentialMemory(limit=32, ignore_episode_boundaries=False, window_length=window_length)

    # DQN agent as described in Mnih (2013) and Mnih (2015).
    # http://arxiv.org/pdf/1312.5602.pdf
    # http://arxiv.org/abs/1509.06461
    agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16,
                     enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg',
                     target_model_update=1e-2, policy=policy, batch_size=16)

    # keras-rl allows one to use and built-in keras optimizer
    agent.compile(RMSprop(lr=1e-3), metrics=['mae'])

    # play the game. learn something!
    agent.fit(env, nb_steps=rounds, visualize=False, verbose=2)

    history_train = env.history
    history_test = None

    if run_test:
        # Set up the testing environment
        TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0'
        test_env = gym.make(TEST_NAME)

        # evaluate the agent on a few episodes, drawing randomly from the test samples
        agent.test(test_env, nb_episodes=100, visualize=False)
        history_test = test_env.history

    return agent, model, history_train, history_test

Example #7

Show file

    env = gym.make("Breakout-v0")
    env.seed(1)
    env.reset()
    nb_actions = env.action_space.n
    model = create_model(nb_actions, 4)
    memory = SequentialMemory(limit=1000000, window_length=4)
    processor = AtariProcessor()
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=1000)
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=1000,
                   gamma=.99,
                   target_model_update=100,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(Adam(lr=.00025), metrics=['mae'])
    dqn.fit(env,
            nb_steps=1750000,
            log_interval=10000,
            visualize=False,
            verbose=2)
    model.save_weights('policy.h5', overwrite=True)

Example #8

Show file

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=1000,
               gamma=.9,
               enable_dueling_network=False,
               dueling_type='avg',
               target_model_update=1e-2,
               policy=policy)
# dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
#                enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=.001, decay=.001), metrics=['mae'])

rewards = []
callback = [TrainEpisodeLogger(), History()]
hist = dqn.fit(env, nb_steps=10000, visualize=False, verbose=2, callbacks=None)
rewards.extend(hist.history.get('episode_reward'))
plt.plot(rewards)

dqn.test(env, nb_episodes=5, visualize=True)

state = env.reset()
action = env.action_space.sample()
print(action)
state_list = []
for i in range(300):
    state_list.append(state)
    # action = np.argmax(dqn.model.predict(np.expand_dims(np.expand_dims(state, 0), 0))[0])
    state, reward, done, _ = env.step(2)
    env.render()
env.render(close=True)

Example #9

Show file

    checkpoint_weights_filename = 'dqn_' + Snake_env.name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(Snake_env.name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=1000)
    ]
    callbacks += [FileLogger(log_filename, interval=1000)]

    weights = "dqn_" + Snake_env.name + "_weights_" + str(step) + ".h5f"
    #if weights:
    #    weights_filename_1 = weights
    #dqn.load_weights(weights_filename_1)

    #訓練開始
    dqn.fit(Snake_env,
            callbacks=callbacks,
            nb_steps=step,
            log_interval=1000,
            verbose=1)

    #把權重存起來
    dqn.save_weights(weights_filename, overwrite=True)

elif mode == 'test':

    #讀取權重
    weights = "dqn_" + Snake_env.name + "_weights_" + str(step) + ".h5f"
    if weights:
        weights_filename = weights
    dqn.load_weights(weights_filename)
    dqn.test(Snake_env, nb_episodes=10, visualize=True)

Example #10

Show file

model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)

Example #11

Show file

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=25310, visualize=False, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=False)

Example #12

Show file

def fit_dqn(env,
            force: bool = False,
            dueling: bool = False,
            root_dir: str = ""):
    nb_actions = env.action_space.n

    loaded = False
    model_weights_path = pathlib.Path(
        f"{root_dir}/dqn{'-d' if dueling else ''}.h5")
    model_history_path = pathlib.Path(
        f"{root_dir}/dqn{'-d' if dueling else ''}.h5f.log")

    if not force and model_weights_path.exists():
        model = load_model(str(model_weights_path))
        with open(model_history_path, "rb") as f:
            history = pickle.load(f)
        loaded = True
    else:
        model = Sequential()
        model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        model.add(Dense(64))
        model.add(Dropout(0.5))
        model.add(Activation("relu"))
        model.add(Dense(64))
        model.add(Dropout(0.5))
        model.add(Activation("relu"))
        model.add(Dense(64))
        model.add(Dropout(0.5))
        model.add(Activation("relu"))
        model.add(Dense(nb_actions))
        model.add(Activation("linear"))

    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()

    dqn = DQNAgent(
        model=model,
        nb_actions=nb_actions,
        memory=memory,
        nb_steps_warmup=1000,
        target_model_update=1e-2,
        policy=policy,
        enable_dueling_network=dueling,
        dueling_type="avg",
    )

    dqn.compile(Adam(lr=1e-3), metrics=["mae"])

    if loaded:
        return dqn, history

    metrics = Metrics(dqn)
    history = dqn.fit(
        env,
        nb_steps=10000,
        start_step_policy=env.start_step_policy,
        nb_max_start_steps=10,
        nb_max_episode_steps=100,
        callbacks=[metrics],
    )

    model.save(str(model_weights_path))
    with open(model_history_path, "wb") as f:
        history = history.history
        history.update(metrics.metrics)
        pickle.dump(history, f)

    return dqn, history

Example #13

Show file

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt
    # exception so that training can be prematurely aborted. Notice
    # that you can the built-in Keras callbacks!

    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=1750000,
            log_interval=10000,
            visualize=False)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)

Example #14

Show file

File: drl.py Project: gaeldelalleau/robodrivers

# Configure
memory = SequentialMemory(limit=500000, window_length=1)
policy = BoltzmannQPolicy()
# policy = EpsGreedyQPolicy(0.5)
#policy = RlPolicy(0.5, 0.2)
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)

# Compile
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Train
dqn.fit(env,
        nb_steps=100000,
        visualize=False,
        verbose=2,
        nb_max_episode_steps=200)

# Persist
dqn.save_weights('dqn_{}_weights.h5f'.format("roborl"), overwrite=True)

# Test
while True:
    dqn.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=200)

print("FINISHED!")

Example #15

Show file

               target_model_update=1,
               delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]

    # Lets just keep training the same damn model
    dqn.load_weights(weights_filename)
    dqn.fit(env, callbacks=callbacks, nb_steps=10000, log_interval=5000)
    dqn.save_weights(weights_filename, overwrite=True)
    env.reset()
    dqn.test(env, nb_episodes=1, visualize=True)

    # After training is done, we save the final weights one more time.
    # Finally, evaluate our algorithm for 10 episodes.

elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.fit(env, nb_steps=1000000, visualize=True)

Example #16

Show file

File: training_with_preprocessing.py Project: samuelpilcer/Deep-Reinforcement-Learning-for-FlappyBird

               delta_clip=1.)

dqn.compile(Adam(lr=.00025), metrics=['mae'])

weights_filename = 'callbacks/dqn_flappy_weights.h5f'
checkpoint_weights_filename = 'callbacks/dqn_flappy_weights_{step}.h5f'
log_filename = 'dqn_flappy_log.json'

callbacks = [
    ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
]
callbacks += [FileLogger(log_filename, interval=100)]

history = dqn.fit(env,
                  callbacks=callbacks,
                  nb_steps=1500000,
                  log_interval=10000,
                  verbose=2)
#44,856.951 seconds

plt.plot(history.history["episode_reward"])
plt.savefig('images/episode_reward_with_preprocessing.png', dpi=100)
plt.show()

plt.plot(history.history["nb_episode_steps"])
plt.savefig('images/nb_episode_steps_with_preprocessing.png', dpi=100)
plt.show()

env.reset()
dqn.test(env, nb_episodes=10, visualize=True)

Example #17

Show file

def main():
    # OPTIONS
    ENV_NAME = 'OcNewActionSpaceEnv-v0'
    TIME_STEP = 100
    set_gpu_option()
    # Get the environment and extract the number of actions.
    '''
    PATH_TRAIN = "./data/train/"
    PATH_TEST = "./data/test/"
    '''
    PATH_TRAIN = '/home/data/training_x_150.h5'
    PATH_TEST = '/home/data/test_x_150.h5'
    """
    env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN)
    env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST)
    """
    store = pd.HDFStore(PATH_TRAIN, mode='r')
    varieties_list = store.keys()
    variety = 'I'
    print('variety: ', variety)
    env = OcNewActionSpaceEnv(TIME_STEP, variety=variety, path=PATH_TRAIN)
    env_test = OcNewActionSpaceEnv(TIME_STEP, variety=variety, path=PATH_TEST)

    # random seed
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    print('nb_actions: ', nb_actions)
    print('env.shape: ', env.shape)
    model = create_model(shape=env.shape, nb_actions=nb_actions)
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics!
    memory = SequentialMemory(limit=50000, window_length=TIME_STEP)
    # policy = BoltzmannQPolicy()
    policy = EpsGreedyQPolicy()
    # enable the dueling network
    # you can specify the dueling_type to one of {'avg','max','naive'}
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=200,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   target_model_update=1e-2,
                   policy=policy,
                   processor=NormalizerProcessor())
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    tbCallBack = TensorBoard(histogram_freq=0,
                             write_grads=True,
                             write_images=True)

    while True:
        # train
        '''
        for e in range(500):
            print('epoch: {}'.format(e))
            if os.path.isfile('weights'):
                print('weight file exist')
                print('load weights')
                dqn.load_weights('weights')
            else:
                print('weight file does not exist')
        '''
        dqn.fit(env,
                nb_steps=70000,
                nb_max_episode_steps=None,
                visualize=False,
                verbose=2,
                callbacks=[tbCallBack])
        #dqn.save_weights('weights', overwrite=True)
        #print('fit: ', fit)

        try:
            # validate
            info = dqn.test(env_test, nb_episodes=1, visualize=False)
            n_long, n_short, total_reward, portfolio = info['n_trades'][
                'long'], info['n_trades']['short'], info['total_reward'], int(
                    info['portfolio'])
            np.array([info]).dump(
                './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format(
                    ENV_NAME, portfolio, n_long, n_short, total_reward))
            print('info saved')
            dqn.save_weights(
                './model/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format(
                    ENV_NAME, portfolio, n_long, n_short, total_reward),
                overwrite=True)
            print('weight saved')
        except KeyboardInterrupt:
            continue

Example #18

Show file

File: run.py Project: Entrack/2DGrid-Agent

               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               enable_dueling_network=True,
               dueling_type='avg',
               target_model_update=target_model_update,
               policy=policy,
               batch_size=32)

lr_06_05_20_49 = 1e-3
lr_06_05_22_18 = 1e-2
lr_06_13_19_07 = 5e-4
lr = lr_06_05_20_49
dqn.compile(Adam(lr=lr), metrics=['mae'])

if if_learn:
    dqn.fit(env,
            nb_steps=nb_steps,
            visualize=True,
            verbose=2,
            nb_max_episode_steps=nb_max_episode_steps)

    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
else:
    dqn.load_weights(weights_name + '.h5f')
    env.steps_before_rendering = 0

dqn.test(env,
         nb_episodes=30,
         visualize=True,
         nb_max_episode_steps=nb_max_episode_steps)

Example #19

Show file

File: breakout_dqn_agent.py Project: mcnuggets-lab/rl-project

    weights_filename = 'wts/phy_dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'wts/phy_dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'phy_dqn_{}_log.json'.format(args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    if os.path.isfile(checkpoint_weights_filename):
        print("Loading previous checkpoint weights...")
        dqn.load_weights(checkpoint_weights_filename)
    elif os.path.isfile(weights_filename):
        print("Loading previous weights...")
        dqn.load_weights(weights_filename)
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=20000000,
            log_interval=10000,
            nb_max_start_steps=20)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=1, nb_max_start_steps=20, visualize=False)
elif args.mode == 'test':
    weights_filename = 'wts/phy_dqn_BreakoutDeterministic-v4_weights.h5f'.format(
        args.env_name)
    if args.weights:
        weights_filename = args.weights
    np.random.seed(None)
    env.seed(None)

Example #20

Show file

File: teste_dq.py Project: lucasvolkmer/automata_gym

memory = SequentialMemory(limit=50000, window_length=1)
policy = CustomEpsGreedyQPolicy(automataEnv=env, eps=.9)
dqn_only_embedding = DQNAgent(gamma=.999,
                              model=model,
                              nb_actions=action_size,
                              memory=memory,
                              nb_steps_warmup=500,
                              target_model_update=1e-2,
                              policy=policy,
                              test_policy=policy)
dqn_only_embedding.compile(Adam(lr=1e-3), metrics=['mae'])
dqn_only_embedding.fit(env,
                       nb_steps=50000,
                       visualize=False,
                       verbose=1,
                       nb_max_episode_steps=100,
                       log_interval=10000,
                       start_step_policy=policy)
q_values = dqn_only_embedding.compute_batch_q_values([0])
for i in range(1, state_size):
    q_values = np.vstack(
        (q_values, dqn_only_embedding.compute_batch_q_values([i])))

#dqn_only_embedding.test(env, nb_episodes=5, visualize=False, verbose=1, nb_max_episode_steps=100,
#                     start_step_policy=policy)

#Caminho para o carro 0 até MI
env.reset()
env.step(21)
env.step(4)

Example #21

Show file

File: keras_rl_cartpole.py Project: dvad924/RL-testing

model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])

dqn.fit(env, nb_steps=50000)

dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

dqn.test(env, nb_episodes=5)

Example #22

Show file

File: mikasa_bt_rl_last_4.py Project: pkmahmed/algotrading

from poloniex.gym_mikasa import MikasaLast4Env

# create Mikasa gym env
env = MikasaLast4Env()
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# create model
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

# configure agent
policy = EpsGreedyQPolicy(eps=0.01)
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mse'])

# run agent
history = dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

plt.plot(history.history['episode_reward'])
plt.show()

Example #23

Show file

File: vanilla.py Project: thejasprasad/MSDS-7335-Deep-Q-Policy-Network

               nb_steps_warmup=50000,
               gamma=.99,
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)

dqn.compile(Adam(lr=.00025), metrics=['mae'])

folder_path = '../model_saves/Vanilla/'

if args.mode == 'train':
    weights_filename = folder_path + 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = folder_path + 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = folder_path + 'dqn_' + args.env_name + '_REWARD_DATA.txt'
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000)
    ]
    callbacks += [TrainEpisodeLogger(log_filename)]
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=10000000,
            verbose=0,
            nb_max_episode_steps=20000)

elif args.mode == 'test':
    weights_filename = folder_path + 'dqn_MsPacmanDeterministic-v4_weights_10000000.h5f'
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True, nb_max_start_steps=80)

Example #24

Show file

File: dqn.py Project: blackivory/crypto-rl

class Agent(object):
    name = 'DQN'

    def __init__(
            self,
            step_size=1,
            window_size=20,
            max_position=5,
            fitting_file='ETH-USD_2018-12-31.xz',
            testing_file='ETH-USD_2018-01-01.xz',
            env='market-maker-v0',
            seed=1,
            action_repeats=4,
            number_of_training_steps=1e5,
            gamma=0.999,
            format_3d=False,  # add 3rd dimension for CNNs
            train=True,
            weights=True,
            z_score=True,
            visualize=False,
            dueling_network=True,
            double_dqn=True):
        """
        Agent constructor
        :param step_size: int, number of steps to take in env for a given simulation step
        :param window_size: int, number of lags to include in observation
        :param max_position: int, maximum number of positions able to be held in inventory
        :param fitting_file: str, file used for z-score fitting
        :param testing_file: str,file used for dqn experiment
        :param env: environment name
        :param seed: int, random seed number
        :param action_repeats: int, number of steps to take in environment between actions
        :param number_of_training_steps: int, number of steps to train agent for
        :param gamma: float, value between 0 and 1 used to discount future DQN returns
        :param format_3d: boolean, format observation as matrix or tensor
        :param train: boolean, train or test agent
        :param weights: boolean, import existing weights
        :param z_score: boolean, standardize observation space
        :param visualize: boolean, visiualize environment
        :param dueling_network: boolean, use dueling network architecture
        :param double_dqn: boolean, use double DQN for Q-value approximation
        """
        self.env_name = env
        self.env = gym.make(self.env_name,
                            fitting_file=fitting_file,
                            testing_file=testing_file,
                            step_size=step_size,
                            max_position=max_position,
                            window_size=window_size,
                            seed=seed,
                            action_repeats=action_repeats,
                            training=train,
                            z_score=z_score,
                            format_3d=format_3d)
        # Number of frames to stack e.g., 1.
        # NOTE: 'Keras-RL' uses its own frame-stacker
        self.memory_frame_stack = 1
        self.model = self.create_model()
        self.memory = SequentialMemory(limit=10000,
                                       window_length=self.memory_frame_stack)
        self.train = train
        self.number_of_training_steps = number_of_training_steps
        self.weights = weights
        self.cwd = os.path.dirname(os.path.realpath(__file__))
        self.visualize = visualize

        # create the agent
        self.agent = DQNAgent(model=self.model,
                              nb_actions=self.env.action_space.n,
                              memory=self.memory,
                              processor=None,
                              nb_steps_warmup=500,
                              enable_dueling_network=dueling_network,
                              dueling_type='avg',
                              enable_double_dqn=double_dqn,
                              gamma=gamma,
                              target_model_update=1000,
                              delta_clip=1.0)
        self.agent.compile(Adam(lr=float("3e-4")), metrics=['mae'])

    def __str__(self):
        # msg = '\n'
        # return msg.join(['{}={}'.format(k, v) for k, v in self.__dict__.items()])
        return 'Agent = {} | env = {} | number_of_training_steps = {}'.format(
            Agent.name, self.env_name, self.number_of_training_steps)

    def create_model(self):
        """
        Create a Convolutional neural network with dense layer at the end
        :return: keras model
        """
        features_shape = (self.memory_frame_stack,
                          *self.env.observation_space.shape)
        model = Sequential()
        conv = Conv2D

        model.add(
            conv(input_shape=features_shape,
                 filters=16,
                 kernel_size=[10, 1],
                 padding='same',
                 activation='relu',
                 strides=[5, 1],
                 data_format='channels_first'))
        model.add(
            conv(filters=16,
                 kernel_size=[6, 1],
                 padding='same',
                 activation='relu',
                 strides=[3, 1],
                 data_format='channels_first'))
        model.add(
            conv(filters=16,
                 kernel_size=[4, 1],
                 padding='same',
                 activation='relu',
                 strides=[2, 1],
                 data_format='channels_first'))
        model.add(Flatten())
        model.add(Dense(512))
        model.add(Activation('linear'))
        model.add(Dense(self.env.action_space.n))
        model.add(Activation('softmax'))

        print(model.summary())
        return model

    def start(self):
        """
        Entry point for agent training and testing
        :return: (void)
        """
        weights_filename = '{}/dqn_weights/dqn_{}_weights.h5f'.format(
            self.cwd, self.env_name)

        if self.weights:
            self.agent.load_weights(weights_filename)
            print('...loading weights for {}'.format(self.env_name))

        if self.train:
            checkpoint_weights_filename = 'dqn_{}'.format(self.env_name) + \
                                          '_weights_{step}.h5f'
            checkpoint_weights_filename = '{}/dqn_weights/'.format(self.cwd) + \
                                          checkpoint_weights_filename
            log_filename = '{}/dqn_weights/dqn_{}_log.json'.format(
                self.cwd, self.env_name)
            print('FileLogger: {}'.format(log_filename))

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            print('Starting training...')
            self.agent.fit(self.env,
                           callbacks=callbacks,
                           nb_steps=self.number_of_training_steps,
                           log_interval=10000,
                           verbose=0,
                           visualize=self.visualize)
            print('Saving AGENT weights...')
            self.agent.save_weights(weights_filename, overwrite=True)
        else:
            print('Starting TEST...')
            self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)

Example #25

Show file

File: CartPole.py Project: ibtehaaj/Neural-Networks

env = gym.make(ENV_NAME)

nb_actions = env.action_space.n

model = Sequential()

model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))

model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))

model.add(Dense(nb_actions, activation='linear'))

policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])

dqn.fit(env, nb_steps=50000, verbose=2)

dqn.test(env, nb_episodes=10, visualize=True)

Example #26

Show file

File: train.py Project: chriswill88/holbertonschool-machine_learning

    action = layers.Dense(actions, activation="linear")(layer5)
    return K.Model(inputs=inputs, outputs=action)


# This will automatically use a saved model!
if path.exists("policy.h5"):
    print("Using saved model!")
    model = K.models.load_model('policy.h5')
else:
    print("Using new model!")
    model = create_q_model(actions)

# setting up the DQN agent and keras-rl stuff
memory = SequentialMemory(limit=1000000, window_length=4)
policy = LinearAnnealedPolicy(
    EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1,
    value_test=.05, nb_steps=850000)
stateprocess = AtariProcessor()
dqn = DQNAgent(
    model=model, nb_actions=actions, memory=memory,
    nb_steps_warmup=35, target_model_update=1e-2, policy=policy,
    processor=stateprocess, enable_double_dqn=True)
dqn.compile(
    optimizer=Adam(lr=.00025, clipnorm=1.0),
    metrics=['mae', 'accuracy'])
dqn.fit(env, nb_steps=1750000, callbacks=[
    ModelIntervalCheck('policy.h5', 1000, 1, model)], visualize=True)

# Saving the policy network
model.save("policy.h5")

Example #27

Show file

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.

    # callbacks = []
    # if model_checkpoints:
    #     callbacks += [
    #         ModelIntervalCheckpoint(
    #             './checkpoints/checkpoint_weights.h5f',
    #             interval=checkpoint_interval)
    #     ]
    # if tensorboard:
    #     callbacks += [TensorBoard(log_dir='./logs')]
    # dqn.fit(env, nb_steps=50000, visualize=args.visualize, verbose=2,callbacks=callbacks)

    dqn.fit(env, nb_steps=50000, visualize=args.visualize, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights('double_dqn_{}_weights.h5f'.format(ENV_NAME),
                     overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=args.visualize)

# TESTING BASED ON SAVED WEIGHTS
if args.mode == 'test':
    weights_filename = 'double_dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    if args.callbacks == True:

Example #28

Show file

File: dqn_samurai.py Project: laser95/JamurAI

print(model.summary())

# コンパイル
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# 学習
dqn.fit(env, nb_steps=10000, visualize=True, verbose=1, log_interval=1)

#plot
plt.plot(env.x_plot, env.reward_plot, color='blue')
plt.title("Learning curve")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.show()

# 学習したパラメータの保存
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

#　テスト
dqn.test(env, nb_episodes=5, visualize=True)

Example #29

Show file

File: convert_conll.py Project: Koziev/RI_Syntax

    # enable the dueling network
    # you can specify the dueling_type to one of {'avg','max','naive'}
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   enable_dueling_network=False,
                   dueling_type='avg',
                   target_model_update=1e-4,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('weights.h5f', overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=False)

else:
    # SARSA
    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()

    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(state_size/2))

Example #30

Show file

def attempt(lr, numTrainSteps, fnamePrefix, activation, exportVid, visualize):
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)

    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n

    print("env.observation_space.shape: " + str(env.observation_space.shape))

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation(activation))
    model.add(Dense(13))
    model.add(Activation(activation))
    model.add(Dense(10))
    model.add(Activation(activation))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=lr), metrics=['mae'])
    if not os.path.exists(fnamePrefix):
        os.makedirs(fnamePrefix)
    weights_fname = '%s/weights.h5f' % fnamePrefix
    if os.path.isfile(weights_fname):
        print("Loading weights from before")
        print("Skipping training")
        dqn.load_weights(weights_fname)
    else:
        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        dqn.fit(env, nb_steps=numTrainSteps, visualize=False, verbose=1)

        # After training is done, we save the final weights.
        dqn.save_weights(weights_fname, overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    env.reset()
    env.close()
    if exportVid:
        if not visualize:
            # print to stderr, since trainAll redirects stdout
            eprint(
                "Error: I don't think the video export works unless you choose visualize=True"
            )
        videoFname = fnamePrefix + '/videos/' + str(time())
        if not os.path.exists(videoFname):
            os.makedirs(videoFname)
        env = wrappers.Monitor(env, videoFname, force=True)
    result = dqn.test(env, nb_episodes=1, visualize=visualize)
    if exportVid:
        print("Video saved to %s" % videoFname)
    means = {'reward': mean(result.history['episode_reward'])}
    json_fname = fnamePrefix + '/result.json'
    with open(json_fname, "w") as f:
        json.dump(result.history, f)
    return (means)

Example #31

Show file

File: dqn_cartpole.py Project: matthiasplappert/keras-rl

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)

Example #32

Show file

File: dqn_line.py Project: hatanaka-akihiro/dqn

model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy(eps=0.1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2, nb_max_episode_steps=300)

import rl.callbacks
class EpisodeLogger(rl.callbacks.Callback):
    def __init__(self):
        self.observations = {}
        self.rewards = {}
        self.actions = {}

    def on_episode_begin(self, episode, logs):
        self.observations[episode] = []
        self.rewards[episode] = []
        self.actions[episode] = []

    def on_step_end(self, step, logs):
        episode = logs['episode']

Example #33

Show file

File: openAI_variableLengthPendulum_train.py Project: DocVaughan/CRAWLAB-Code-Snippets

    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, 
               policy=train_policy, test_policy=test_policy)
              
    filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE,  NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
else:
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
    
    filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)


dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Optionally, we can reload a previous model's weights and continue training from there
# FILENAME = 'weights/duel_dqn_variable_pendulum-v0_weights_4096_4_50000_2017-07-11_140316.h5f'
# Load the model weights
# dqn.load_weights(FILENAME)


# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=NUM_STEPS, visualize=False, verbose=2, nb_max_episode_steps=500)

# After training is done, we save the final weights.
dqn.save_weights(filename, overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, nb_max_episode_steps=500, visualize=True)

Example #34

Show file

File: jakegrigsby-pacman-pdd.py Project: sunmingtao/sample-code


dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, enable_double_dqn=True, enable_dueling_network=True, nb_steps_warmup=1000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)

#Prioritized Memories typically use lower learning rates
dqn.compile(Adam(lr=.00025/4), metrics=['mae'])

folder_path = './'

mode = 'train'

if mode == 'train':
    weights_filename = folder_path + 'pdd_dqn_{}_weights.h5f'.format(env_name)
    checkpoint_weights_filename = folder_path + 'pdd_dqn_' + env_name + '_weights_{step}.h5f'
    log_filename = folder_path + 'pdd_dqn_' + env_name + '_REWARD_DATA.txt'
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000)]
    callbacks += [TrainEpisodeLogger()]
    dqn.fit(env, callbacks=callbacks, nb_steps=10000000, verbose=0, nb_max_episode_steps=20000)


elif mode == 'test':
    weights_filename = folder_path + 'pdd_dqn_MsPacmanDeterministic-v4_weights_10000000.h5f'
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True, nb_max_start_steps=80)

Example #35

Show file

File: openAI_planarCrane_train.py Project: DocVaughan/CRAWLAB-Code-Snippets

else:
    raise('Please select  DQN, DUEL_DQN, SARSA, or CEM for your method type.')


callbacks = []
# callbacks += [ModelIntervalCheckpoint(CHECKPOINT_WEIGHTS_FILENAME, interval=10000)]
callbacks += [FileLogger(LOG_FILENAME, interval=100)]
# callbacks += [TensorBoard(log_dir='logs', histogram_freq=0, write_graph=True, write_images=False)]
callbacks += [ExploreExploit()]

# Optionally, we can reload a previous model's weights and continue training from there
# LOAD_WEIGHTS_FILENAME = 'weights/duel_dqn_planar_crane-v0_weights_1024_4_50000_2017-07-12_160853.h5f'
# # # Load the model weights
# agent.load_weights(LOAD_WEIGHTS_FILENAME)

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, action_repetition=5, visualize=False, verbose=1, log_interval=LOG_INTERVAL, nb_max_episode_steps=500)

# After training is done, we save the final weights.
agent.save_weights(WEIGHT_FILENAME, overwrite=True)

# We'll also save a simply named version to make running test immediately
# following training easier. 
filename = 'weights/{}_{}_weights.h5f'.format(METHOD, ENV_NAME)
agent.save_weights(filename, overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True,  action_repetition=5) #nb_max_episode_steps=500,

Example #36

Show file

# memory = EpisodeParameterMemory(limit=20, window_length=window_length)  # Non-episodic
memory = SequentialMemory(limit=20, window_length=window_length)

policy = EpsGreedyQPolicy(eps=0.1)

agent = DQNAgent(
    model=model,
    nb_actions=nb_actions,
    memory=memory,
    nb_steps_warmup=train_step,  # A3C TRPO
    target_model_update=1e-2,
    policy=policy)
agent.compile(Adam(lr=1e-2), metrics=['mae'])

print("Fit model")
st = time.time()
agent.fit(env, train_step)
ed = time.time()
print("Training: %f [s]" % (ed - st))

# Reset environment
# env.rewind()
env = GraphEnv(graph, query, cond, base_step, test_step, time_limit,
               window_length)

st = time.time()
agent.test(env, nb_episodes=1)
ed = time.time()
print("Testing: %f [s]" % (ed - st))

Example #37

Show file

File: SuperiorBotTrain.py Project: Mvwivs/halite2-deep-rl-bot

from models import LordTateKanti

env = SuperiorEnv(
    env=halite_env.Env(),
    tiles_num=16,
)
env.configure(socket_path=f"/dev/shm/{time.time_ns()}", replay=False)
nb_actions = env.action_space.n

model = LordTateKanti.make_model(env)
print(model.summary())

memory = SequentialMemory(limit=10_000, window_length=1)
policy = BoltzmannGumbelQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=1000,
               target_model_update=1e-2,
               policy=policy,
               gamma=0.99)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

callbacks = [
    #ModelIntervalCheckpoint('dqn_PlanetCaptureBot_weights_{step}.h5f', interval=100),
    TrainEpisodeLogger()
]

dqn.fit(env, nb_steps=100_000, visualize=False, verbose=0, callbacks=callbacks)
dqn.save_weights('dqn_SuperiorBot_weights_final.h5f', overwrite=True)

Example #38

Show file

               nb_steps_warmup=50000,
               gamma=.99,
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)

Example #39

Show file

File: dqn_keras.py Project: PeterZhouSZ/rl_reconstruct

               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=500,
               target_model_update=1e-2,
               policy=policy,
               enable_double_dqn=False,
               batch_size=512)
dqn.compile(Adam())

try:
    dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))
except Exception as e:
    print(e)
    pass

temp_folder = tempfile.mkdtemp()
# env.monitor.start(temp_folder)

dqn.fit(env, nb_steps=1e5, visualize=False, verbose=1, log_interval=10000)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=20, visualize=False)
# env.monitor.close()

# upload = input("Upload? (y/n)")
# if upload == "y":
#     gym.upload(temp_folder, api_key='YOUR_OWN_KEY')

Example #40

Show file

File: dqn_atari.py Project: Jaystings/keras-rl

# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory,
    processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.), reward_range=(-1., 1.),
    target_model_update=10000, train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)

Example #41

Show file

File: dqn_keras_rl.py Project: zachkeer/ShipAI

memory = SequentialMemory(limit=2000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10000,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Define 'test' for testing an existing network weights or 'train' to train a new one!
mode = 'test'

if mode == 'train':
    filename = '400kit_rn4_maior2_mem20k_20acleme_target1000_epsgr1'
    hist = dqn.fit(env, nb_steps=300000, visualize=False, verbose=2)
    with open(
            'C:/Users/JMPF/PycharmProjects/ShipAI/ShipAI/_experiments/history_dqn_test_'
            + filename + '.pickle', 'wb') as handle:
        pickle.dump(hist.history, handle, protocol=pickle.HIGHEST_PROTOCOL)

    # After training is done, we save the final weights.
    dqn.save_weights('h5f_files/dqn_{}_weights.h5f'.format(filename),
                     overwrite=True)
    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=10, visualize=True)

if mode == 'test':
    env.set_test_performace()  # Define the initialization as performance test
    env.set_save_experice()  # Save the test to plot the results after
    filename = '400kit_rn4_maior2_mem20k_20acleme_target1000_epsgr1'

Example #42

Show file

File: test.py Project: NibuTake/Qlearning

from keras.optimizers import Adam
import gym
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

env = gym.make('MountainCar-v0')
nb_actions = env.action_space.n

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

memory = SequentialMemory(limit=30000, window_length=1)

policy = EpsGreedyQPolicy(eps=0.001)
dqn = DQNAgent(model=model, nb_actions=nb_actions,gamma=0.99, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

history = dqn.fit(env, nb_steps=30000, visualize=False, verbose=2)

dqn.test(env, nb_episodes=1, visualize=True)

Example #43

Show file

File: agent_CNN+LSTM.py Project: shayan-taheri/pysc2_StarcraftII_codelab

def training_game():
    env = Environment(
        map_name="DefeatRoaches",
        visualize=True,
        game_steps_per_episode=150,
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(screen=64, minimap=32)))

    input_shape = (_SIZE, _SIZE, 1)
    nb_actions = 12  # Number of actions

    model = neural_network_model(input_shape, nb_actions)
    memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)

    processor = SC2Proc()

    # Policy

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps",
                                  value_max=1,
                                  value_min=0.2,
                                  value_test=.0,
                                  nb_steps=1e2)

    # Agent

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   enable_double_dqn=True,
                   enable_dueling_network=True,
                   nb_steps_warmup=500,
                   target_model_update=1e-2,
                   policy=policy,
                   batch_size=150,
                   processor=processor,
                   delta_clip=1)

    dqn.compile(Adam(lr=.001), metrics=["mae", "acc"])

    # Tensorboard callback

    callbacks = keras.callbacks.TensorBoard(log_dir='./Graph',
                                            histogram_freq=0,
                                            write_graph=True,
                                            write_images=False)

    # Save the parameters and upload them when needed

    name = "agent"
    w_file = "dqn_{}_weights.h5f".format(name)
    check_w_file = "train_w" + name + "_weights.h5f"

    if SAVE_MODEL:
        check_w_file = "train_w" + name + "_weights_{step}.h5f"

    log_file = "training_w_{}_log.json".format(name)

    if LOAD_MODEL:
        dqn.load_weights(w_file)

    class Saver(Callback):
        def on_episode_end(self, episode, logs={}):
            if episode % 200 == 0:
                self.model.save_weights(w_file, overwrite=True)

    s = Saver()
    logs = FileLogger('DQN_Agent_log.csv', interval=1)

    dqn.fit(env,
            callbacks=[callbacks, s, logs],
            nb_steps=600,
            action_repetition=2,
            log_interval=1e4,
            verbose=2)

    dqn.save_weights(w_file, overwrite=True)
    dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)