Beispiel #1
0
nb_filters, nb_rows, nb_cols = 32, 3, 3

# keras model
keras_model = Sequential()
keras_model.add(Dense(args.hidden, activation="relu", input_dim=args.grid**2))
keras_model.add(Dense(args.hidden, activation="relu"))
keras_model.add(Dense(num_actions))

# X wrapper for Keras
model = KerasModel(keras_model)

# Memory
M = ExperienceReplay(memory_length=args.memory)

# Agent
A = DiscreteAgent(model, M)
# SGD optimizer + MSE cost + MAX policy = Q-learning as we know it
A.compile(optimizer=SGD(lr=0.2), loss="mse", policy_rule="max")

# To run an experiment, the Agent needs an Enviroment to iteract with
catcher = Catcher(grid_size=args.grid)
A.learn(catcher, epoch=args.epoch, batch_size=args.batch)

# Test the agent following the learned policy
A.play(catcher,
       epoch=100,
       visualize={
           'filepath': args.output,
           'n_frames': 270,
           'gray': True
       })
Beispiel #2
0
from x.environment import Catcher
from x.models import KerasModel
from x.memory import ExperienceReplay
from x.agent import DiscreteAgent

num_actions = 3
nb_filters, nb_rows, nb_cols = 32, 3, 3

# keras model
keras_model = Sequential()
keras_model.add(Dense(args.hidden, activation="relu", input_dim=args.grid**2))
keras_model.add(Dense(args.hidden, activation="relu"))
keras_model.add(Dense(num_actions))

# X wrapper for Keras
model = KerasModel(keras_model)

# Memory
M = ExperienceReplay(memory_length=args.memory)

# Agent
A = DiscreteAgent(model, M)
# SGD optimizer + MSE cost + MAX policy = Q-learning as we know it
A.compile(optimizer=SGD(lr=0.2), loss="mse", policy_rule="max")

# To run an experiment, the Agent needs an Enviroment to iteract with
catcher = Catcher(grid_size=args.grid)
A.learn(catcher, epoch=args.epoch, batch_size=args.batch)

# Test the agent following the learned policy
A.play(catcher, epoch=100, visualize={'filepath': args.output, 'n_frames': 270, 'gray': True})
Beispiel #3
0
from x.memory import ExperienceReplay
from x.agent import DiscreteAgent

import numpy as np

num_actions = 3
nb_rows, nb_cols = args.grid, args.grid
state_dim = 3
state_dim_values=(np.r_[0:nb_rows],np.r_[0:nb_cols],np.r_[1:nb_cols])

# To run an experiment, the Agent needs an Enviroment to iteract with
env = Catcher(grid_size=args.grid, output_type='position')

# Create Table Model
model = TableModel(state_dim=3, num_actions=num_actions)

# Memory
M = ExperienceReplay(memory_length=args.memory)

# Agent
agent = DiscreteAgent(model, M)

# Configure and build table model
agent.compile(state_dim_values, lr=0.2, policy_rule="maxrand")

agent.learn(env, epoch=args.epoch, batch_size=args.batch)

# Test the agent following the learned policy
pl_epoch = 5
agent.play(env, epoch=pl_epoch, visualize={'filepath': args.output, 'n_frames': pl_epoch*(nb_rows-1), 'gray': True})
Beispiel #4
0
              activation="softplus",
              input_dim=agent_env.n_features))
    keras_model.add(Dropout(0.5))
    keras_model.add(Dense(hidden_dim, activation="softplus"))
    keras_model.add(Dropout(0.5))
    keras_model.add(Dense(hidden_dim, activation="softplus"))
    keras_model.add(Dropout(0.5))
    keras_model.add(Dense(agent_env.n_actions, activation="softmax"))
    agent_model = KerasModel(keras_model)

    # experience memory
    agent_mem = ExperienceReplay(memory_length=memory_len)

    # compile agent
    agent = DiscreteAgent(agent_model,
                          agent_mem,
                          epsilon=lambda *args: epsilon)
    # SGD optimizer + MSE cost + MAX policy = Q-learning as we know it
    #agent.compile(optimizer=RMSprop(lr=0.001), loss='mse', policy_rule='max')
    agent.compile(optimizer=RMSprop(lr=0.001),
                  loss='categorical_crossentropy',
                  policy_rule='max')

    # train agent
    agent.learn(agent_env, epoch=epochs, batch_size=batch_size, gamma=gamma)

    # save trained model and weights
    pre = "model-04-slow"
    with open(pre + ".json", 'w') as f:
        json.dump(keras_model.to_json(), f)
    keras_model.save_weights(pre + ".h5", overwrite=True)
Beispiel #5
0
    # learning model
    keras_model = Sequential()
    keras_model.add(Dense(hidden_dim, activation="softplus", input_dim=agent_env.n_features))
    keras_model.add(Dropout(0.5))
    keras_model.add(Dense(hidden_dim, activation="softplus"))
    keras_model.add(Dropout(0.5))
    keras_model.add(Dense(hidden_dim, activation="softplus"))
    keras_model.add(Dropout(0.5))
    keras_model.add(Dense(agent_env.n_actions, activation="softmax"))
    agent_model = KerasModel(keras_model)

    # experience memory
    agent_mem = ExperienceReplay(memory_length=memory_len)

    # compile agent
    agent = DiscreteAgent(agent_model, agent_mem, epsilon=lambda *args: epsilon)
    # SGD optimizer + MSE cost + MAX policy = Q-learning as we know it
    #agent.compile(optimizer=RMSprop(lr=0.001), loss='mse', policy_rule='max')
    agent.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', policy_rule='max')

    # train agent
    agent.learn(agent_env, epoch=epochs, batch_size=batch_size, gamma=gamma)

    # save trained model and weights
    pre = "model-04-slow"
    with open(pre + ".json", 'w') as f:
        json.dump(keras_model.to_json(), f)
    keras_model.save_weights(pre + ".h5", overwrite=True)

    # test agent
    #agent.play(agent_env, epoch=100)