Example #1
0
def main():
    # Create env
    np.random.seed(SEED)    
    env = PentagoEnv(SIZE, agent_starts = AGENT_STARTS)
    env.seed(SEED)
    nb_actions = env.action_space.n

    # Define model
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(nb_actions))
    print(model.summary())

    # Configure and compile  agent
    memory = SequentialMemory(limit=5000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
                    target_model_update=1000, policy=policy)
    optimizer=RMSprop(lr=0.00025, epsilon=0.01)
    dqn.compile(optimizer)

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=True, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('weights/dqn-{}-weights-{}.h5f'.format(TAG, datetime.datetime.now()))    
Example #2
0
def main():
    np.random.seed(123)    
    env = PentagoEnv(SIZE)
    env.seed(123)
    nb_actions = env.action_space.n

    model = Sequential()
    #model.add(Reshape((SIZE ** 2,), input_shape=(SIZE, SIZE)))
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(nb_actions))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=5000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
                    target_model_update=1e-2, policy=policy)
    optimizer=RMSprop(lr=0.00025, epsilon=0.01)
    dqn.compile(optimizer)

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=True, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)    
Example #3
0
def test_single_dqn_input():
    model = Sequential()
    model.add(Flatten(input_shape=(2, 3)))
    model.add(Dense(2))

    memory = SequentialMemory(limit=10, window_length=2)
    for double_dqn in (True, False):
        agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
                         enable_double_dqn=double_dqn)
        agent.compile('sgd')
        agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
Example #4
0
class DQN(BaseAgent):
  def __init__(self, model, processor, policy, test_policy, num_actions):
    # Replay memory
    memory = SequentialMemory(limit=opt.dqn_replay_memory_size,
                              window_length=opt.dqn_window_length)
    self.agent = DQNAgent(model=model,
                          nb_actions=num_actions,
                          policy=policy,
                          test_policy=test_policy,
                          memory=memory,
                          processor=processor,
                          batch_size=opt.dqn_batch_size,
                          nb_steps_warmup=opt.dqn_nb_steps_warmup,
                          gamma=opt.dqn_gamma,
                          target_model_update=opt.dqn_target_model_update,
                          enable_double_dqn=opt.enable_double_dqn,
                          enable_dueling_network=opt.enable_dueling_network,
                          train_interval=opt.dqn_train_interval,
                          delta_clip=opt.dqn_delta_clip)
    self.agent.compile(optimizer=keras.optimizers.Adam(lr=opt.dqn_learning_rate), metrics=['mae'])

  def fit(self, env, num_steps, weights_path=None, visualize=False):
    callbacks = []
    if weights_path is not None:
      callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)]
    self.agent.fit(env=env,
                   nb_steps=num_steps,
                   action_repetition=opt.dqn_action_repetition,
                   callbacks=callbacks,
                   log_interval=opt.log_interval,
                   test_interval=opt.test_interval,
                   test_nb_episodes=opt.test_nb_episodes,
                   test_action_repetition=opt.dqn_action_repetition,
                   visualize=visualize,
                   test_visualize=visualize,
                   verbose=1)

  def test(self, env, num_episodes, visualize=False):
    self.agent.test(env=env,
                    nb_episodes=num_episodes,
                    action_repetition=opt.dqn_action_repetition,
                    verbose=2,
                    visualize=visualize)

  def save(self, out_dir):
    self.agent.save_weights(out_dir, overwrite=True)

  def load(self, out_dir):
    self.agent.load_weights(out_dir)
Example #5
0
def test_multi_dqn_input():
    input1 = Input(shape=(2, 3))
    input2 = Input(shape=(2, 4))
    x = Concatenate()([input1, input2])
    x = Flatten()(x)
    x = Dense(2)(x)
    model = Model(inputs=[input1, input2], outputs=x)

    memory = SequentialMemory(limit=10, window_length=2)
    processor = MultiInputProcessor(nb_inputs=2)
    for double_dqn in (True, False):
        agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
                         processor=processor, enable_double_dqn=double_dqn)
        agent.compile('sgd')
        agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
def train_dqn_model(layers, rounds=10000, run_test=False, use_score=False):
    ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0'
    env = gym.make(ENV_NAME)
    env.seed(123)
    nb_actions = env.action_space.n
    window_length = 1  # "experience" consists of where we were, where we are now

    # generate a policy model
    model = generate_dense_model((window_length,) + env.observation_space.shape, layers, nb_actions)

    # configure and compile our agent
    # BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values
    policy = BoltzmannQPolicy()

    # memory can help a model during training
    # for this, we only consider a single malware sample (window_length=1) for each "experience"
    memory = SequentialMemory(limit=32, ignore_episode_boundaries=False, window_length=window_length)

    # DQN agent as described in Mnih (2013) and Mnih (2015).
    # http://arxiv.org/pdf/1312.5602.pdf
    # http://arxiv.org/abs/1509.06461
    agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16,
                     enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg',
                     target_model_update=1e-2, policy=policy, batch_size=16)

    # keras-rl allows one to use and built-in keras optimizer
    agent.compile(RMSprop(lr=1e-3), metrics=['mae'])

    # play the game. learn something!
    agent.fit(env, nb_steps=rounds, visualize=False, verbose=2)

    history_train = env.history
    history_test = None

    if run_test:
        # Set up the testing environment
        TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0'
        test_env = gym.make(TEST_NAME)

        # evaluate the agent on a few episodes, drawing randomly from the test samples
        agent.test(test_env, nb_episodes=100, visualize=False)
        history_test = test_env.history

    return agent, model, history_train, history_test
Example #7
0
    env = gym.make("Breakout-v0")
    env.seed(1)
    env.reset()
    nb_actions = env.action_space.n
    model = create_model(nb_actions, 4)
    memory = SequentialMemory(limit=1000000, window_length=4)
    processor = AtariProcessor()
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=1000)
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=1000,
                   gamma=.99,
                   target_model_update=100,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(Adam(lr=.00025), metrics=['mae'])
    dqn.fit(env,
            nb_steps=1750000,
            log_interval=10000,
            visualize=False,
            verbose=2)
    model.save_weights('policy.h5', overwrite=True)
Example #8
0
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=1000,
               gamma=.9,
               enable_dueling_network=False,
               dueling_type='avg',
               target_model_update=1e-2,
               policy=policy)
# dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
#                enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=.001, decay=.001), metrics=['mae'])

rewards = []
callback = [TrainEpisodeLogger(), History()]
hist = dqn.fit(env, nb_steps=10000, visualize=False, verbose=2, callbacks=None)
rewards.extend(hist.history.get('episode_reward'))
plt.plot(rewards)

dqn.test(env, nb_episodes=5, visualize=True)

state = env.reset()
action = env.action_space.sample()
print(action)
state_list = []
for i in range(300):
    state_list.append(state)
    # action = np.argmax(dqn.model.predict(np.expand_dims(np.expand_dims(state, 0), 0))[0])
    state, reward, done, _ = env.step(2)
    env.render()
env.render(close=True)
Example #9
0
    checkpoint_weights_filename = 'dqn_' + Snake_env.name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(Snake_env.name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=1000)
    ]
    callbacks += [FileLogger(log_filename, interval=1000)]

    weights = "dqn_" + Snake_env.name + "_weights_" + str(step) + ".h5f"
    #if weights:
    #    weights_filename_1 = weights
    #dqn.load_weights(weights_filename_1)

    #訓練開始
    dqn.fit(Snake_env,
            callbacks=callbacks,
            nb_steps=step,
            log_interval=1000,
            verbose=1)

    #把權重存起來
    dqn.save_weights(weights_filename, overwrite=True)

elif mode == 'test':

    #讀取權重
    weights = "dqn_" + Snake_env.name + "_weights_" + str(step) + ".h5f"
    if weights:
        weights_filename = weights
    dqn.load_weights(weights_filename)
    dqn.test(Snake_env, nb_episodes=10, visualize=True)
Example #10
0
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)
Example #11
0
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=25310, visualize=False, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=False)
Example #12
0
def fit_dqn(env,
            force: bool = False,
            dueling: bool = False,
            root_dir: str = ""):
    nb_actions = env.action_space.n

    loaded = False
    model_weights_path = pathlib.Path(
        f"{root_dir}/dqn{'-d' if dueling else ''}.h5")
    model_history_path = pathlib.Path(
        f"{root_dir}/dqn{'-d' if dueling else ''}.h5f.log")

    if not force and model_weights_path.exists():
        model = load_model(str(model_weights_path))
        with open(model_history_path, "rb") as f:
            history = pickle.load(f)
        loaded = True
    else:
        model = Sequential()
        model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        model.add(Dense(64))
        model.add(Dropout(0.5))
        model.add(Activation("relu"))
        model.add(Dense(64))
        model.add(Dropout(0.5))
        model.add(Activation("relu"))
        model.add(Dense(64))
        model.add(Dropout(0.5))
        model.add(Activation("relu"))
        model.add(Dense(nb_actions))
        model.add(Activation("linear"))

    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()

    dqn = DQNAgent(
        model=model,
        nb_actions=nb_actions,
        memory=memory,
        nb_steps_warmup=1000,
        target_model_update=1e-2,
        policy=policy,
        enable_dueling_network=dueling,
        dueling_type="avg",
    )

    dqn.compile(Adam(lr=1e-3), metrics=["mae"])

    if loaded:
        return dqn, history

    metrics = Metrics(dqn)
    history = dqn.fit(
        env,
        nb_steps=10000,
        start_step_policy=env.start_step_policy,
        nb_max_start_steps=10,
        nb_max_episode_steps=100,
        callbacks=[metrics],
    )

    model.save(str(model_weights_path))
    with open(model_history_path, "wb") as f:
        history = history.history
        history.update(metrics.metrics)
        pickle.dump(history, f)

    return dqn, history
Example #13
0
if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt
    # exception so that training can be prematurely aborted. Notice
    # that you can the built-in Keras callbacks!

    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=1750000,
            log_interval=10000,
            visualize=False)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)
Example #14
0
# Configure
memory = SequentialMemory(limit=500000, window_length=1)
policy = BoltzmannQPolicy()
# policy = EpsGreedyQPolicy(0.5)
#policy = RlPolicy(0.5, 0.2)
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)

# Compile
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Train
dqn.fit(env,
        nb_steps=100000,
        visualize=False,
        verbose=2,
        nb_max_episode_steps=200)

# Persist
dqn.save_weights('dqn_{}_weights.h5f'.format("roborl"), overwrite=True)

# Test
while True:
    dqn.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=200)

print("FINISHED!")
Example #15
0
               target_model_update=1,
               delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]

    # Lets just keep training the same damn model
    dqn.load_weights(weights_filename)
    dqn.fit(env, callbacks=callbacks, nb_steps=10000, log_interval=5000)
    dqn.save_weights(weights_filename, overwrite=True)
    env.reset()
    dqn.test(env, nb_episodes=1, visualize=True)

    # After training is done, we save the final weights one more time.
    # Finally, evaluate our algorithm for 10 episodes.

elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.fit(env, nb_steps=1000000, visualize=True)
               delta_clip=1.)

dqn.compile(Adam(lr=.00025), metrics=['mae'])

weights_filename = 'callbacks/dqn_flappy_weights.h5f'
checkpoint_weights_filename = 'callbacks/dqn_flappy_weights_{step}.h5f'
log_filename = 'dqn_flappy_log.json'

callbacks = [
    ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
]
callbacks += [FileLogger(log_filename, interval=100)]

history = dqn.fit(env,
                  callbacks=callbacks,
                  nb_steps=1500000,
                  log_interval=10000,
                  verbose=2)
#44,856.951 seconds

plt.plot(history.history["episode_reward"])
plt.savefig('images/episode_reward_with_preprocessing.png', dpi=100)
plt.show()

plt.plot(history.history["nb_episode_steps"])
plt.savefig('images/nb_episode_steps_with_preprocessing.png', dpi=100)
plt.show()

env.reset()
dqn.test(env, nb_episodes=10, visualize=True)
Example #17
0
def main():
    # OPTIONS
    ENV_NAME = 'OcNewActionSpaceEnv-v0'
    TIME_STEP = 100
    set_gpu_option()
    # Get the environment and extract the number of actions.
    '''
    PATH_TRAIN = "./data/train/"
    PATH_TEST = "./data/test/"
    '''
    PATH_TRAIN = '/home/data/training_x_150.h5'
    PATH_TEST = '/home/data/test_x_150.h5'
    """
    env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN)
    env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST)
    """
    store = pd.HDFStore(PATH_TRAIN, mode='r')
    varieties_list = store.keys()
    variety = 'I'
    print('variety: ', variety)
    env = OcNewActionSpaceEnv(TIME_STEP, variety=variety, path=PATH_TRAIN)
    env_test = OcNewActionSpaceEnv(TIME_STEP, variety=variety, path=PATH_TEST)

    # random seed
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    print('nb_actions: ', nb_actions)
    print('env.shape: ', env.shape)
    model = create_model(shape=env.shape, nb_actions=nb_actions)
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics!
    memory = SequentialMemory(limit=50000, window_length=TIME_STEP)
    # policy = BoltzmannQPolicy()
    policy = EpsGreedyQPolicy()
    # enable the dueling network
    # you can specify the dueling_type to one of {'avg','max','naive'}
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=200,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   target_model_update=1e-2,
                   policy=policy,
                   processor=NormalizerProcessor())
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    tbCallBack = TensorBoard(histogram_freq=0,
                             write_grads=True,
                             write_images=True)

    while True:
        # train
        '''
        for e in range(500):
            print('epoch: {}'.format(e))
            if os.path.isfile('weights'):
                print('weight file exist')
                print('load weights')
                dqn.load_weights('weights')
            else:
                print('weight file does not exist')
        '''
        dqn.fit(env,
                nb_steps=70000,
                nb_max_episode_steps=None,
                visualize=False,
                verbose=2,
                callbacks=[tbCallBack])
        #dqn.save_weights('weights', overwrite=True)
        #print('fit: ', fit)

        try:
            # validate
            info = dqn.test(env_test, nb_episodes=1, visualize=False)
            n_long, n_short, total_reward, portfolio = info['n_trades'][
                'long'], info['n_trades']['short'], info['total_reward'], int(
                    info['portfolio'])
            np.array([info]).dump(
                './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format(
                    ENV_NAME, portfolio, n_long, n_short, total_reward))
            print('info saved')
            dqn.save_weights(
                './model/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format(
                    ENV_NAME, portfolio, n_long, n_short, total_reward),
                overwrite=True)
            print('weight saved')
        except KeyboardInterrupt:
            continue
Example #18
0
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               enable_dueling_network=True,
               dueling_type='avg',
               target_model_update=target_model_update,
               policy=policy,
               batch_size=32)

lr_06_05_20_49 = 1e-3
lr_06_05_22_18 = 1e-2
lr_06_13_19_07 = 5e-4
lr = lr_06_05_20_49
dqn.compile(Adam(lr=lr), metrics=['mae'])

if if_learn:
    dqn.fit(env,
            nb_steps=nb_steps,
            visualize=True,
            verbose=2,
            nb_max_episode_steps=nb_max_episode_steps)

    dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
else:
    dqn.load_weights(weights_name + '.h5f')
    env.steps_before_rendering = 0

dqn.test(env,
         nb_episodes=30,
         visualize=True,
         nb_max_episode_steps=nb_max_episode_steps)
    weights_filename = 'wts/phy_dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'wts/phy_dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'phy_dqn_{}_log.json'.format(args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    if os.path.isfile(checkpoint_weights_filename):
        print("Loading previous checkpoint weights...")
        dqn.load_weights(checkpoint_weights_filename)
    elif os.path.isfile(weights_filename):
        print("Loading previous weights...")
        dqn.load_weights(weights_filename)
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=20000000,
            log_interval=10000,
            nb_max_start_steps=20)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=1, nb_max_start_steps=20, visualize=False)
elif args.mode == 'test':
    weights_filename = 'wts/phy_dqn_BreakoutDeterministic-v4_weights.h5f'.format(
        args.env_name)
    if args.weights:
        weights_filename = args.weights
    np.random.seed(None)
    env.seed(None)
Example #20
0
memory = SequentialMemory(limit=50000, window_length=1)
policy = CustomEpsGreedyQPolicy(automataEnv=env, eps=.9)
dqn_only_embedding = DQNAgent(gamma=.999,
                              model=model,
                              nb_actions=action_size,
                              memory=memory,
                              nb_steps_warmup=500,
                              target_model_update=1e-2,
                              policy=policy,
                              test_policy=policy)
dqn_only_embedding.compile(Adam(lr=1e-3), metrics=['mae'])
dqn_only_embedding.fit(env,
                       nb_steps=50000,
                       visualize=False,
                       verbose=1,
                       nb_max_episode_steps=100,
                       log_interval=10000,
                       start_step_policy=policy)
q_values = dqn_only_embedding.compute_batch_q_values([0])
for i in range(1, state_size):
    q_values = np.vstack(
        (q_values, dqn_only_embedding.compute_batch_q_values([i])))

#dqn_only_embedding.test(env, nb_episodes=5, visualize=False, verbose=1, nb_max_episode_steps=100,
#                     start_step_policy=policy)

#Caminho para o carro 0 até MI
env.reset()
env.step(21)
env.step(4)
Example #21
0
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])

dqn.fit(env, nb_steps=50000)

dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

dqn.test(env, nb_episodes=5)
from poloniex.gym_mikasa import MikasaLast4Env

# create Mikasa gym env
env = MikasaLast4Env()
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# create model
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

# configure agent
policy = EpsGreedyQPolicy(eps=0.01)
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mse'])

# run agent
history = dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

plt.plot(history.history['episode_reward'])
plt.show()
               nb_steps_warmup=50000,
               gamma=.99,
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)

dqn.compile(Adam(lr=.00025), metrics=['mae'])

folder_path = '../model_saves/Vanilla/'

if args.mode == 'train':
    weights_filename = folder_path + 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = folder_path + 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = folder_path + 'dqn_' + args.env_name + '_REWARD_DATA.txt'
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000)
    ]
    callbacks += [TrainEpisodeLogger(log_filename)]
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=10000000,
            verbose=0,
            nb_max_episode_steps=20000)

elif args.mode == 'test':
    weights_filename = folder_path + 'dqn_MsPacmanDeterministic-v4_weights_10000000.h5f'
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True, nb_max_start_steps=80)
Example #24
0
class Agent(object):
    name = 'DQN'

    def __init__(
            self,
            step_size=1,
            window_size=20,
            max_position=5,
            fitting_file='ETH-USD_2018-12-31.xz',
            testing_file='ETH-USD_2018-01-01.xz',
            env='market-maker-v0',
            seed=1,
            action_repeats=4,
            number_of_training_steps=1e5,
            gamma=0.999,
            format_3d=False,  # add 3rd dimension for CNNs
            train=True,
            weights=True,
            z_score=True,
            visualize=False,
            dueling_network=True,
            double_dqn=True):
        """
        Agent constructor
        :param step_size: int, number of steps to take in env for a given simulation step
        :param window_size: int, number of lags to include in observation
        :param max_position: int, maximum number of positions able to be held in inventory
        :param fitting_file: str, file used for z-score fitting
        :param testing_file: str,file used for dqn experiment
        :param env: environment name
        :param seed: int, random seed number
        :param action_repeats: int, number of steps to take in environment between actions
        :param number_of_training_steps: int, number of steps to train agent for
        :param gamma: float, value between 0 and 1 used to discount future DQN returns
        :param format_3d: boolean, format observation as matrix or tensor
        :param train: boolean, train or test agent
        :param weights: boolean, import existing weights
        :param z_score: boolean, standardize observation space
        :param visualize: boolean, visiualize environment
        :param dueling_network: boolean, use dueling network architecture
        :param double_dqn: boolean, use double DQN for Q-value approximation
        """
        self.env_name = env
        self.env = gym.make(self.env_name,
                            fitting_file=fitting_file,
                            testing_file=testing_file,
                            step_size=step_size,
                            max_position=max_position,
                            window_size=window_size,
                            seed=seed,
                            action_repeats=action_repeats,
                            training=train,
                            z_score=z_score,
                            format_3d=format_3d)
        # Number of frames to stack e.g., 1.
        # NOTE: 'Keras-RL' uses its own frame-stacker
        self.memory_frame_stack = 1
        self.model = self.create_model()
        self.memory = SequentialMemory(limit=10000,
                                       window_length=self.memory_frame_stack)
        self.train = train
        self.number_of_training_steps = number_of_training_steps
        self.weights = weights
        self.cwd = os.path.dirname(os.path.realpath(__file__))
        self.visualize = visualize

        # create the agent
        self.agent = DQNAgent(model=self.model,
                              nb_actions=self.env.action_space.n,
                              memory=self.memory,
                              processor=None,
                              nb_steps_warmup=500,
                              enable_dueling_network=dueling_network,
                              dueling_type='avg',
                              enable_double_dqn=double_dqn,
                              gamma=gamma,
                              target_model_update=1000,
                              delta_clip=1.0)
        self.agent.compile(Adam(lr=float("3e-4")), metrics=['mae'])

    def __str__(self):
        # msg = '\n'
        # return msg.join(['{}={}'.format(k, v) for k, v in self.__dict__.items()])
        return 'Agent = {} | env = {} | number_of_training_steps = {}'.format(
            Agent.name, self.env_name, self.number_of_training_steps)

    def create_model(self):
        """
        Create a Convolutional neural network with dense layer at the end
        :return: keras model
        """
        features_shape = (self.memory_frame_stack,
                          *self.env.observation_space.shape)
        model = Sequential()
        conv = Conv2D

        model.add(
            conv(input_shape=features_shape,
                 filters=16,
                 kernel_size=[10, 1],
                 padding='same',
                 activation='relu',
                 strides=[5, 1],
                 data_format='channels_first'))
        model.add(
            conv(filters=16,
                 kernel_size=[6, 1],
                 padding='same',
                 activation='relu',
                 strides=[3, 1],
                 data_format='channels_first'))
        model.add(
            conv(filters=16,
                 kernel_size=[4, 1],
                 padding='same',
                 activation='relu',
                 strides=[2, 1],
                 data_format='channels_first'))
        model.add(Flatten())
        model.add(Dense(512))
        model.add(Activation('linear'))
        model.add(Dense(self.env.action_space.n))
        model.add(Activation('softmax'))

        print(model.summary())
        return model

    def start(self):
        """
        Entry point for agent training and testing
        :return: (void)
        """
        weights_filename = '{}/dqn_weights/dqn_{}_weights.h5f'.format(
            self.cwd, self.env_name)

        if self.weights:
            self.agent.load_weights(weights_filename)
            print('...loading weights for {}'.format(self.env_name))

        if self.train:
            checkpoint_weights_filename = 'dqn_{}'.format(self.env_name) + \
                                          '_weights_{step}.h5f'
            checkpoint_weights_filename = '{}/dqn_weights/'.format(self.cwd) + \
                                          checkpoint_weights_filename
            log_filename = '{}/dqn_weights/dqn_{}_log.json'.format(
                self.cwd, self.env_name)
            print('FileLogger: {}'.format(log_filename))

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            print('Starting training...')
            self.agent.fit(self.env,
                           callbacks=callbacks,
                           nb_steps=self.number_of_training_steps,
                           log_interval=10000,
                           verbose=0,
                           visualize=self.visualize)
            print('Saving AGENT weights...')
            self.agent.save_weights(weights_filename, overwrite=True)
        else:
            print('Starting TEST...')
            self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
Example #25
0
env = gym.make(ENV_NAME)

nb_actions = env.action_space.n

model = Sequential()

model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))

model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))

model.add(Dense(nb_actions, activation='linear'))

policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])

dqn.fit(env, nb_steps=50000, verbose=2)

dqn.test(env, nb_episodes=10, visualize=True)
    action = layers.Dense(actions, activation="linear")(layer5)
    return K.Model(inputs=inputs, outputs=action)


# This will automatically use a saved model!
if path.exists("policy.h5"):
    print("Using saved model!")
    model = K.models.load_model('policy.h5')
else:
    print("Using new model!")
    model = create_q_model(actions)

# setting up the DQN agent and keras-rl stuff
memory = SequentialMemory(limit=1000000, window_length=4)
policy = LinearAnnealedPolicy(
    EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1,
    value_test=.05, nb_steps=850000)
stateprocess = AtariProcessor()
dqn = DQNAgent(
    model=model, nb_actions=actions, memory=memory,
    nb_steps_warmup=35, target_model_update=1e-2, policy=policy,
    processor=stateprocess, enable_double_dqn=True)
dqn.compile(
    optimizer=Adam(lr=.00025, clipnorm=1.0),
    metrics=['mae', 'accuracy'])
dqn.fit(env, nb_steps=1750000, callbacks=[
    ModelIntervalCheck('policy.h5', 1000, 1, model)], visualize=True)

# Saving the policy network
model.save("policy.h5")
Example #27
0
    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.

    # callbacks = []
    # if model_checkpoints:
    #     callbacks += [
    #         ModelIntervalCheckpoint(
    #             './checkpoints/checkpoint_weights.h5f',
    #             interval=checkpoint_interval)
    #     ]
    # if tensorboard:
    #     callbacks += [TensorBoard(log_dir='./logs')]
    # dqn.fit(env, nb_steps=50000, visualize=args.visualize, verbose=2,callbacks=callbacks)

    dqn.fit(env, nb_steps=50000, visualize=args.visualize, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights('double_dqn_{}_weights.h5f'.format(ENV_NAME),
                     overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=args.visualize)

# TESTING BASED ON SAVED WEIGHTS
if args.mode == 'test':
    weights_filename = 'double_dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    if args.callbacks == True:
Example #28
0
print(model.summary())

# コンパイル
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# 学習
dqn.fit(env, nb_steps=10000, visualize=True, verbose=1, log_interval=1)

#plot
plt.plot(env.x_plot, env.reward_plot, color='blue')
plt.title("Learning curve")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.show()

# 学習したパラメータの保存
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# テスト
dqn.test(env, nb_episodes=5, visualize=True)
Example #29
0
    # enable the dueling network
    # you can specify the dueling_type to one of {'avg','max','naive'}
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   enable_dueling_network=False,
                   dueling_type='avg',
                   target_model_update=1e-4,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1)

    # After training is done, we save the final weights.
    dqn.save_weights('weights.h5f', overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=False)

else:
    # SARSA
    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()

    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(state_size/2))
Example #30
0
def attempt(lr, numTrainSteps, fnamePrefix, activation, exportVid, visualize):
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)

    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n

    print("env.observation_space.shape: " + str(env.observation_space.shape))

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation(activation))
    model.add(Dense(13))
    model.add(Activation(activation))
    model.add(Dense(10))
    model.add(Activation(activation))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=lr), metrics=['mae'])
    if not os.path.exists(fnamePrefix):
        os.makedirs(fnamePrefix)
    weights_fname = '%s/weights.h5f' % fnamePrefix
    if os.path.isfile(weights_fname):
        print("Loading weights from before")
        print("Skipping training")
        dqn.load_weights(weights_fname)
    else:
        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        dqn.fit(env, nb_steps=numTrainSteps, visualize=False, verbose=1)

        # After training is done, we save the final weights.
        dqn.save_weights(weights_fname, overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    env.reset()
    env.close()
    if exportVid:
        if not visualize:
            # print to stderr, since trainAll redirects stdout
            eprint(
                "Error: I don't think the video export works unless you choose visualize=True"
            )
        videoFname = fnamePrefix + '/videos/' + str(time())
        if not os.path.exists(videoFname):
            os.makedirs(videoFname)
        env = wrappers.Monitor(env, videoFname, force=True)
    result = dqn.test(env, nb_episodes=1, visualize=visualize)
    if exportVid:
        print("Video saved to %s" % videoFname)
    means = {'reward': mean(result.history['episode_reward'])}
    json_fname = fnamePrefix + '/result.json'
    with open(json_fname, "w") as f:
        json.dump(result.history, f)
    return (means)
Example #31
0
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)
Example #32
0
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy(eps=0.1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2, nb_max_episode_steps=300)

import rl.callbacks
class EpisodeLogger(rl.callbacks.Callback):
    def __init__(self):
        self.observations = {}
        self.rewards = {}
        self.actions = {}

    def on_episode_begin(self, episode, logs):
        self.observations[episode] = []
        self.rewards[episode] = []
        self.actions[episode] = []

    def on_step_end(self, step, logs):
        episode = logs['episode']
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, 
               policy=train_policy, test_policy=test_policy)
              
    filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE,  NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
else:
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
    
    filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)


dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Optionally, we can reload a previous model's weights and continue training from there
# FILENAME = 'weights/duel_dqn_variable_pendulum-v0_weights_4096_4_50000_2017-07-11_140316.h5f'
# Load the model weights
# dqn.load_weights(FILENAME)


# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=NUM_STEPS, visualize=False, verbose=2, nb_max_episode_steps=500)

# After training is done, we save the final weights.
dqn.save_weights(filename, overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, nb_max_episode_steps=500, visualize=True)

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, enable_double_dqn=True, enable_dueling_network=True, nb_steps_warmup=1000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)

#Prioritized Memories typically use lower learning rates
dqn.compile(Adam(lr=.00025/4), metrics=['mae'])

folder_path = './'

mode = 'train'

if mode == 'train':
    weights_filename = folder_path + 'pdd_dqn_{}_weights.h5f'.format(env_name)
    checkpoint_weights_filename = folder_path + 'pdd_dqn_' + env_name + '_weights_{step}.h5f'
    log_filename = folder_path + 'pdd_dqn_' + env_name + '_REWARD_DATA.txt'
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000)]
    callbacks += [TrainEpisodeLogger()]
    dqn.fit(env, callbacks=callbacks, nb_steps=10000000, verbose=0, nb_max_episode_steps=20000)


elif mode == 'test':
    weights_filename = folder_path + 'pdd_dqn_MsPacmanDeterministic-v4_weights_10000000.h5f'
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True, nb_max_start_steps=80)




else:
    raise('Please select  DQN, DUEL_DQN, SARSA, or CEM for your method type.')


callbacks = []
# callbacks += [ModelIntervalCheckpoint(CHECKPOINT_WEIGHTS_FILENAME, interval=10000)]
callbacks += [FileLogger(LOG_FILENAME, interval=100)]
# callbacks += [TensorBoard(log_dir='logs', histogram_freq=0, write_graph=True, write_images=False)]
callbacks += [ExploreExploit()]

# Optionally, we can reload a previous model's weights and continue training from there
# LOAD_WEIGHTS_FILENAME = 'weights/duel_dqn_planar_crane-v0_weights_1024_4_50000_2017-07-12_160853.h5f'
# # # Load the model weights
# agent.load_weights(LOAD_WEIGHTS_FILENAME)

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, action_repetition=5, visualize=False, verbose=1, log_interval=LOG_INTERVAL, nb_max_episode_steps=500)

# After training is done, we save the final weights.
agent.save_weights(WEIGHT_FILENAME, overwrite=True)

# We'll also save a simply named version to make running test immediately
# following training easier. 
filename = 'weights/{}_{}_weights.h5f'.format(METHOD, ENV_NAME)
agent.save_weights(filename, overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True,  action_repetition=5) #nb_max_episode_steps=500,
Example #36
0
# memory = EpisodeParameterMemory(limit=20, window_length=window_length)  # Non-episodic
memory = SequentialMemory(limit=20, window_length=window_length)

policy = EpsGreedyQPolicy(eps=0.1)

agent = DQNAgent(
    model=model,
    nb_actions=nb_actions,
    memory=memory,
    nb_steps_warmup=train_step,  # A3C TRPO
    target_model_update=1e-2,
    policy=policy)
agent.compile(Adam(lr=1e-2), metrics=['mae'])

print("Fit model")
st = time.time()
agent.fit(env, train_step)
ed = time.time()
print("Training: %f [s]" % (ed - st))

# Reset environment
# env.rewind()
env = GraphEnv(graph, query, cond, base_step, test_step, time_limit,
               window_length)

st = time.time()
agent.test(env, nb_episodes=1)
ed = time.time()
print("Testing: %f [s]" % (ed - st))
from models import LordTateKanti

env = SuperiorEnv(
    env=halite_env.Env(),
    tiles_num=16,
)
env.configure(socket_path=f"/dev/shm/{time.time_ns()}", replay=False)
nb_actions = env.action_space.n

model = LordTateKanti.make_model(env)
print(model.summary())

memory = SequentialMemory(limit=10_000, window_length=1)
policy = BoltzmannGumbelQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=1000,
               target_model_update=1e-2,
               policy=policy,
               gamma=0.99)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

callbacks = [
    #ModelIntervalCheckpoint('dqn_PlanetCaptureBot_weights_{step}.h5f', interval=100),
    TrainEpisodeLogger()
]

dqn.fit(env, nb_steps=100_000, visualize=False, verbose=0, callbacks=callbacks)
dqn.save_weights('dqn_SuperiorBot_weights_final.h5f', overwrite=True)
Example #38
0
               nb_steps_warmup=50000,
               gamma=.99,
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)
Example #39
0
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=500,
               target_model_update=1e-2,
               policy=policy,
               enable_double_dqn=False,
               batch_size=512)
dqn.compile(Adam())

try:
    dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))
except Exception as e:
    print(e)
    pass

temp_folder = tempfile.mkdtemp()
# env.monitor.start(temp_folder)

dqn.fit(env, nb_steps=1e5, visualize=False, verbose=1, log_interval=10000)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=20, visualize=False)
# env.monitor.close()

# upload = input("Upload? (y/n)")
# if upload == "y":
#     gym.upload(temp_folder, api_key='YOUR_OWN_KEY')
Example #40
0
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory,
    processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.), reward_range=(-1., 1.),
    target_model_update=10000, train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)
Example #41
0
memory = SequentialMemory(limit=2000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10000,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Define 'test' for testing an existing network weights or 'train' to train a new one!
mode = 'test'

if mode == 'train':
    filename = '400kit_rn4_maior2_mem20k_20acleme_target1000_epsgr1'
    hist = dqn.fit(env, nb_steps=300000, visualize=False, verbose=2)
    with open(
            'C:/Users/JMPF/PycharmProjects/ShipAI/ShipAI/_experiments/history_dqn_test_'
            + filename + '.pickle', 'wb') as handle:
        pickle.dump(hist.history, handle, protocol=pickle.HIGHEST_PROTOCOL)

    # After training is done, we save the final weights.
    dqn.save_weights('h5f_files/dqn_{}_weights.h5f'.format(filename),
                     overwrite=True)
    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=10, visualize=True)

if mode == 'test':
    env.set_test_performace()  # Define the initialization as performance test
    env.set_save_experice()  # Save the test to plot the results after
    filename = '400kit_rn4_maior2_mem20k_20acleme_target1000_epsgr1'
Example #42
0
from keras.optimizers import Adam
import gym
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

env = gym.make('MountainCar-v0')
nb_actions = env.action_space.n

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

memory = SequentialMemory(limit=30000, window_length=1)

policy = EpsGreedyQPolicy(eps=0.001)
dqn = DQNAgent(model=model, nb_actions=nb_actions,gamma=0.99, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

history = dqn.fit(env, nb_steps=30000, visualize=False, verbose=2)

dqn.test(env, nb_episodes=1, visualize=True)
def training_game():
    env = Environment(
        map_name="DefeatRoaches",
        visualize=True,
        game_steps_per_episode=150,
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(screen=64, minimap=32)))

    input_shape = (_SIZE, _SIZE, 1)
    nb_actions = 12  # Number of actions

    model = neural_network_model(input_shape, nb_actions)
    memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)

    processor = SC2Proc()

    # Policy

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps",
                                  value_max=1,
                                  value_min=0.2,
                                  value_test=.0,
                                  nb_steps=1e2)

    # Agent

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   enable_double_dqn=True,
                   enable_dueling_network=True,
                   nb_steps_warmup=500,
                   target_model_update=1e-2,
                   policy=policy,
                   batch_size=150,
                   processor=processor,
                   delta_clip=1)

    dqn.compile(Adam(lr=.001), metrics=["mae", "acc"])

    # Tensorboard callback

    callbacks = keras.callbacks.TensorBoard(log_dir='./Graph',
                                            histogram_freq=0,
                                            write_graph=True,
                                            write_images=False)

    # Save the parameters and upload them when needed

    name = "agent"
    w_file = "dqn_{}_weights.h5f".format(name)
    check_w_file = "train_w" + name + "_weights.h5f"

    if SAVE_MODEL:
        check_w_file = "train_w" + name + "_weights_{step}.h5f"

    log_file = "training_w_{}_log.json".format(name)

    if LOAD_MODEL:
        dqn.load_weights(w_file)

    class Saver(Callback):
        def on_episode_end(self, episode, logs={}):
            if episode % 200 == 0:
                self.model.save_weights(w_file, overwrite=True)

    s = Saver()
    logs = FileLogger('DQN_Agent_log.csv', interval=1)

    dqn.fit(env,
            callbacks=[callbacks, s, logs],
            nb_steps=600,
            action_repetition=2,
            log_interval=1e4,
            verbose=2)

    dqn.save_weights(w_file, overwrite=True)
    dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)