import numpy as np from deer.agent import NeuralAgent from deer.q_networks.q_net_theano import MyQNetwork from Toy_env import MyEnv as Toy_env import deer.experiment.base_controllers as bc if __name__ == "__main__": rng = np.random.RandomState(123456) # --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
if __name__ == "__main__": logging.basicConfig(level=logging.INFO) # --- Parse parameters --- parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() # --- Instantiate environment --- env = MG_two_storages_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, rng) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "MG2S_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters))
parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(12345) else: rng = np.random.RandomState() # --- Instantiate environment --- env = pendulum_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng, DoubleQ=True) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng) # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py