Esempio n. 1
0
def play_it():
    #ENV_NAME = 'CartPole-v0'
    #ENV_NAME = 'MountainCar-v0'
    ENV_NAME = 'Single_virtual-v0'
    # Get the environment and extract the number of actions.
    env = make(ENV_NAME)
    env1 = make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n
    model = build_model(nb_actions,env.observation_space)
    # model = build_model1(nb_actions, env.observation_space)

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
                   target_model_update=1e-2, policy=policy,)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME+ datetime.now().strftime("%Y%m%d-%H%M%S"))), overwrite=True)
    # dqn.load_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME)))
    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env1, nb_episodes=5, visualize=True)
Esempio n. 2
0
def main():
    # parser = argparse.ArgumentParser(description='Run DQN on Atari SpaceInvaders')
    # parser.add_argument('--env', default='SpaceInvaders-v0', help='Atari env name')
    # parser.add_argument(
    #     '-o', '--output', default='SpaceInvaders-v0', help='Directory to save data to')
    # parser.add_argument('--seed', default=0, type=int, help='Random seed')
    # # parser.add_argument('--input_shape', default=(84, 84, 4), type=tuple, help='Size of each frame')
    #
    # args = parser.parse_args()
    #
    # args.output = get_output_folder(args.output, args.env)

    #vehicle_network
    veh_network = create_lstm_model(nb_time_steps,
                                    nb_input_vector,
                                    num_actions=g1)
    #Attacker network
    att_network = create_lstm_model(nb_time_steps,
                                    nb_input_vector,
                                    num_actions=gym.make(
                                        args.env).action_space.n)
    veh_agent = DQNAgent(q_network=veh_network,
                         preprocessor=core.Preprocessor(),
                         memory=core.ReplayMemory(),
                         policy=1,
                         gamma=0.1,
                         target_update_freq=100,
                         num_burn_in=100,
                         train_freq=20,
                         batch_size=32)
    att_agent = DQNAgent(q_network=att_network,
                         preprocessor=core.Preprocessor(),
                         memory=core.ReplayMemory(),
                         policy=1,
                         gamma=0.1,
                         target_update_freq=100,
                         num_burn_in=100,
                         train_freq=20,
                         batch_size=32)
    veh_agent.compile('Adam', 'mse')
    att_agent.compile('Adam', 'mse')
    env = VehicleFollowingENV
    for i_episode in range(20):
        agent.fit(env, 10**6)
    # env.close()
    model_json = q_network.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
Esempio n. 3
0
def main():
    # vehicle_network
    veh_network = create_lstm_model(nb_time_steps, nb_input_vector, num_actions=4)
    # Attacker network
    # att_network = create_lstm_model(nb_time_steps, nb_input_vector, num_actions=4)
    veh_agent = DQNAgent(q_network=veh_network,
                         q_network2=veh_network,
                         preprocessor=core.Preprocessor(),
                         RLmemory=core.ReplayMemory(),
                         SLmemory=core.ReplayMemory(),
                         policy=1,
                         gamma=0.1,
                         target_update_freq=100,
                         num_burn_in=100,
                         train_freq=20,
                         batch_size=32)
    # att_agent = DQNAgent(q_network=att_network,
    #                      q_network2=att_network,
    #                      preprocessor=core.Preprocessor(),
    #                      memory=core.ReplayMemory(),
    #                      policy=1,
    #                      gamma=0.1,
    #                      target_update_freq=100,
    #                      num_burn_in=100,
    #                      train_freq=20,
    #                      batch_size=32)
    veh_agent.compile('Adam', 'mse')
    # att_agent.compile('Adam', 'mse')
    env = VehicleFollowingENV()
    for i_episode in range(20):
        veh_agent.fit(env=env, num_iterations=10 ** 6)
        # att_agent.fit(env, 10 ** 6)
    # env.close()
    model_json = veh_network.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
Esempio n. 4
0
                              value_max=1.0,
                              value_min=0.1,
                              value_test=0.05,
                              nb_steps=1000000)

dqn = DQNAgent(model=model,
               nb_actions=3,
               policy=policy,
               memory=memory,
               nb_steps_warmup=2000,
               gamma=0.95,
               target_model_update=2000,
               train_interval=1,
               delta_clip=1.0)

dqn.compile(optimizer, metrics=['mae'])

env = Tetris()

start = time.time()

weights_filename = 'dqn_{}_tetris_weights.h5'.format(start)
checkpoint_weights_filename = 'dqn_{}_tetris_weights_.h5'.format(start)
log_filename = 'dqn_tetris_log.json'
callbacks = [
    ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
]
callbacks += [FileLogger(log_filename, interval=100)]
callbacks += [WandbLogger()]
dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)
dqn.save_weights(weights_filename, overwrite=True)
Esempio n. 5
0
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

# creating both agents
hider_dqn = DQNAgent(model=hider_model,
                     nb_actions=nb_actions,
                     policy=policy,
                     memory=memory,
                     nb_steps_warmup=100,
                     gamma=.99,
                     target_model_update=10,
                     train_interval=2,
                     delta_clip=1.)
hider_dqn.compile(Adam(lr=.00025), metrics=['mae'])
seeker_dqn = DQNAgent(model=seeker_model,
                      nb_actions=nb_actions,
                      policy=policy,
                      memory=memory,
                      nb_steps_warmup=100,
                      gamma=.99,
                      target_model_update=10,
                      train_interval=2,
                      delta_clip=1.)
seeker_dqn.compile(Adam(lr=.00025), metrics=['mae'])

# passing both agents to framework
framework = MultiAgentFramework(dqagents=[hider_dqn, seeker_dqn])

if args.mode == 'train':
# the agent initially explores the environment (high eps) and then gradually sticks to what it knows
# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
# so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                              nb_steps=1000000)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.),
               target_model_update=10000, train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
Esempio n. 7
0
# build model.
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(432, activation='relu'))
model.add(Dense(216, activation='relu'))
model.add(Dense(144, activation='linear'))
model.add(Dense(nb_actions))
print(model.summary())

memory = SequentialMemory(limit=500000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=100,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-2), metrics=['mae'])
#dqn.load_weights('E:\\git\\TensorFlow\\dqn_Snake_weights_2_weights.h5f')
dqn.fit(env,
        nb_steps=500000,
        visualize=visualize_training,
        verbose=2,
        callbacks=[tbCallBack])
# After training is done save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format("Snake"), overwrite=True)

# evaluate our model for 5 episodes.
dqn.test(env, nb_episodes=500, visualize=True)
Esempio n. 8
0
nb_actions = env.action_space.n
actions = np.arange(nb_actions)
policy = EpsGreedyQPolicy(1.0, 0.999)
memory = Memory(limit=50000, maxlen=1)
obs = env.reset()
agent = DQNAgent(actions=actions,
                 memory=memory,
                 update_interval=500,
                 train_interval=1,
                 batch_size=32,
                 memory_interval=1,
                 observation=obs,
                 input_shape=[len(obs)],
                 training=True,
                 policy=policy)
agent.compile()

result = []
for episode in range(500):  # 1000エピソード回す
    agent.reset()
    observation = env.reset()  # 環境の初期化
    # observation, _, _, _ = env.step(env.action_space.sample())
    observation = deepcopy(observation)
    agent.observe(observation)
    for t in range(250):  # n回試行する
        # env.render() # 表示
        action = agent.act()
        observation, reward, done, info = env.step(
            action)  # アクションを実行した結果の状態、報酬、ゲームをクリアしたかどうか、その他の情報を返す
        observation = deepcopy(observation)
        agent.observe(observation, reward, done)
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)