nb_filters, nb_rows, nb_cols = 32, 3, 3 # keras model keras_model = Sequential() keras_model.add(Dense(args.hidden, activation="relu", input_dim=args.grid**2)) keras_model.add(Dense(args.hidden, activation="relu")) keras_model.add(Dense(num_actions)) # X wrapper for Keras model = KerasModel(keras_model) # Memory M = ExperienceReplay(memory_length=args.memory) # Agent A = DiscreteAgent(model, M) # SGD optimizer + MSE cost + MAX policy = Q-learning as we know it A.compile(optimizer=SGD(lr=0.2), loss="mse", policy_rule="max") # To run an experiment, the Agent needs an Enviroment to iteract with catcher = Catcher(grid_size=args.grid) A.learn(catcher, epoch=args.epoch, batch_size=args.batch) # Test the agent following the learned policy A.play(catcher, epoch=100, visualize={ 'filepath': args.output, 'n_frames': 270, 'gray': True })
from x.environment import Catcher from x.models import KerasModel from x.memory import ExperienceReplay from x.agent import DiscreteAgent num_actions = 3 nb_filters, nb_rows, nb_cols = 32, 3, 3 # keras model keras_model = Sequential() keras_model.add(Dense(args.hidden, activation="relu", input_dim=args.grid**2)) keras_model.add(Dense(args.hidden, activation="relu")) keras_model.add(Dense(num_actions)) # X wrapper for Keras model = KerasModel(keras_model) # Memory M = ExperienceReplay(memory_length=args.memory) # Agent A = DiscreteAgent(model, M) # SGD optimizer + MSE cost + MAX policy = Q-learning as we know it A.compile(optimizer=SGD(lr=0.2), loss="mse", policy_rule="max") # To run an experiment, the Agent needs an Enviroment to iteract with catcher = Catcher(grid_size=args.grid) A.learn(catcher, epoch=args.epoch, batch_size=args.batch) # Test the agent following the learned policy A.play(catcher, epoch=100, visualize={'filepath': args.output, 'n_frames': 270, 'gray': True})
from x.memory import ExperienceReplay from x.agent import DiscreteAgent import numpy as np num_actions = 3 nb_rows, nb_cols = args.grid, args.grid state_dim = 3 state_dim_values=(np.r_[0:nb_rows],np.r_[0:nb_cols],np.r_[1:nb_cols]) # To run an experiment, the Agent needs an Enviroment to iteract with env = Catcher(grid_size=args.grid, output_type='position') # Create Table Model model = TableModel(state_dim=3, num_actions=num_actions) # Memory M = ExperienceReplay(memory_length=args.memory) # Agent agent = DiscreteAgent(model, M) # Configure and build table model agent.compile(state_dim_values, lr=0.2, policy_rule="maxrand") agent.learn(env, epoch=args.epoch, batch_size=args.batch) # Test the agent following the learned policy pl_epoch = 5 agent.play(env, epoch=pl_epoch, visualize={'filepath': args.output, 'n_frames': pl_epoch*(nb_rows-1), 'gray': True})
activation="softplus", input_dim=agent_env.n_features)) keras_model.add(Dropout(0.5)) keras_model.add(Dense(hidden_dim, activation="softplus")) keras_model.add(Dropout(0.5)) keras_model.add(Dense(hidden_dim, activation="softplus")) keras_model.add(Dropout(0.5)) keras_model.add(Dense(agent_env.n_actions, activation="softmax")) agent_model = KerasModel(keras_model) # experience memory agent_mem = ExperienceReplay(memory_length=memory_len) # compile agent agent = DiscreteAgent(agent_model, agent_mem, epsilon=lambda *args: epsilon) # SGD optimizer + MSE cost + MAX policy = Q-learning as we know it #agent.compile(optimizer=RMSprop(lr=0.001), loss='mse', policy_rule='max') agent.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', policy_rule='max') # train agent agent.learn(agent_env, epoch=epochs, batch_size=batch_size, gamma=gamma) # save trained model and weights pre = "model-04-slow" with open(pre + ".json", 'w') as f: json.dump(keras_model.to_json(), f) keras_model.save_weights(pre + ".h5", overwrite=True)
# learning model keras_model = Sequential() keras_model.add(Dense(hidden_dim, activation="softplus", input_dim=agent_env.n_features)) keras_model.add(Dropout(0.5)) keras_model.add(Dense(hidden_dim, activation="softplus")) keras_model.add(Dropout(0.5)) keras_model.add(Dense(hidden_dim, activation="softplus")) keras_model.add(Dropout(0.5)) keras_model.add(Dense(agent_env.n_actions, activation="softmax")) agent_model = KerasModel(keras_model) # experience memory agent_mem = ExperienceReplay(memory_length=memory_len) # compile agent agent = DiscreteAgent(agent_model, agent_mem, epsilon=lambda *args: epsilon) # SGD optimizer + MSE cost + MAX policy = Q-learning as we know it #agent.compile(optimizer=RMSprop(lr=0.001), loss='mse', policy_rule='max') agent.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', policy_rule='max') # train agent agent.learn(agent_env, epoch=epochs, batch_size=batch_size, gamma=gamma) # save trained model and weights pre = "model-04-slow" with open(pre + ".json", 'w') as f: json.dump(keras_model.to_json(), f) keras_model.save_weights(pre + ".h5", overwrite=True) # test agent #agent.play(agent_env, epoch=100)