Example #1
0
def play_it():
    #ENV_NAME = 'CartPole-v0'
    #ENV_NAME = 'MountainCar-v0'
    ENV_NAME = 'Single_virtual-v0'
    # Get the environment and extract the number of actions.
    env = make(ENV_NAME)
    env1 = make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n
    model = build_model(nb_actions,env.observation_space)
    # model = build_model1(nb_actions, env.observation_space)

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
                   target_model_update=1e-2, policy=policy,)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME+ datetime.now().strftime("%Y%m%d-%H%M%S"))), overwrite=True)
    # dqn.load_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME)))
    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env1, nb_episodes=5, visualize=True)
Example #2
0
def optimize_agent(trial, args):
    "Optimize the model."

    model_name = args.study_name + "_" + str(trial.number)
    env_kwargs = dict()
    callback_checkpoint_kwargs = dict()
    save_dir = args.save_dir
    log_interval = args.log_interval
    num_cpus = args.num_cpus
    eval_episodes = args.eval_episodes
    n_steps = args.n_steps
    layer_normalization = args.layer_normalization
    layers = args.layers
    env_kwargs["board_size"] = 4
    env_kwargs["binary"] = not args.no_binary
    env_kwargs["extractor"] = args.extractor
    env_kwargs["seed"] = args.seed
    env_kwargs["penalty"] = args.penalty
    callback_checkpoint_kwargs["save_freq"] = args.save_freq
    callback_checkpoint_kwargs["save_path"] = args.save_dir
    callback_checkpoint_kwargs["name_prefix"] = model_name

    if args.agent == "ppo2":
        model_kwargs = trial_hiperparameter_ppo2(trial)
        model_kwargs["agent"] = "ppo2"
        model_kwargs["tensorboard_log"] = args.tensorboard_log
        model = PPO2Agent(
            model_name,
            save_dir,
            log_interval,
            num_cpus,
            eval_episodes,
            n_steps,
            layer_normalization,
            model_kwargs,
            env_kwargs,
            callback_checkpoint_kwargs,
        )
    elif args.agent == "dqn":
        # model_kwargs = trial_hiperparameter_dqn(trial)
        model_kwargs = {}
        model_kwargs["learning_rate"] = 0.0001
        model_kwargs["batch_size"] = 10000
        model_kwargs["learning_starts"] = 10000
        model_kwargs["target_network_update_freq"] = 1000
        model_kwargs["train_freq"] = 4
        model_kwargs["agent"] = "dqn"
        model_kwargs["tensorboard_log"] = args.tensorboard_log
        model_kwargs["double_q"] = True
        model_kwargs["prioritized_replay"] = True
        model_kwargs["param_noise"] = True
        print(model_kwargs)
        model = DQNAgent(
            model_name,
            save_dir,
            log_interval,
            num_cpus,
            eval_episodes,
            n_steps,
            layer_normalization,
            layers,
            args.load_path,
            args.num_timesteps_log,
            model_kwargs,
            env_kwargs,
            callback_checkpoint_kwargs,
        )
    elif args.agent == "acer":
        model_kwargs = trial_hiperparameter_acer(trial)
        model_kwargs["agent"] = "acer"
        model_kwargs["tensorboard_log"] = args.tensorboard_log
        model_kwargs["replay_start"] = 2000
        model = ACERAgent(
            model_name,
            save_dir,
            log_interval,
            num_cpus,
            eval_episodes,
            n_steps,
            layer_normalization,
            model_kwargs,
            env_kwargs,
            callback_checkpoint_kwargs,
        )
    else:
        ValueError("Choose a valid agent model")

    model.train()
    total_score = model.test()

    return total_score
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.),
               target_model_update=10000, train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)
Example #4
0
# build model.
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(432, activation='relu'))
model.add(Dense(216, activation='relu'))
model.add(Dense(144, activation='linear'))
model.add(Dense(nb_actions))
print(model.summary())

memory = SequentialMemory(limit=500000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=100,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-2), metrics=['mae'])
#dqn.load_weights('E:\\git\\TensorFlow\\dqn_Snake_weights_2_weights.h5f')
dqn.fit(env,
        nb_steps=500000,
        visualize=visualize_training,
        verbose=2,
        callbacks=[tbCallBack])
# After training is done save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format("Snake"), overwrite=True)

# evaluate our model for 5 episodes.
dqn.test(env, nb_episodes=500, visualize=True)
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)