if __name__ == "__main__": rng = np.random.RandomState(123456) # --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork( environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this
parameters.momentum, parameters.clip_norm, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, rng, high_int_dim=False, internal_dim=2) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.) # --- Instantiate agent --- agent = NeuralAgent( env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, test_policy=test_policy) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "test_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every # episode or epoch (or never, hence the resetEvery='none'). agent.attach(bc.EpsilonController(
rng = np.random.RandomState(123456) # TODO : best algorithm, hyperparameter tuning if args.network == 'DQN': network = MyQNetwork(environment=env, batch_size=32, double_Q=True, random_state=rng) elif args.network == 'DDPG': network = MyACNetwork(environment=env, batch_size=32, random_state=rng) agent = NeuralAgent(env, network, train_policy=EpsilonGreedyPolicy(network, env.nActions(), rng, 0.0), replay_memory_size=1000, batch_size=32, random_state=rng) #agent.attach(bc.VerboseController()) if args.fname == 'baseline': agent = EmpiricalTreatmentAgent(env) else: agent.setNetwork(args.fname) count = 0 length_success = [] avg_rad = [] avg_h_cell_killed = [] avg_percentage = []
env, rng, double_Q=True, high_int_dim=HIGH_INT_DIM, internal_dim=3, div_entrop_loss=1.) train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1) # --- Instantiate agent --- agent = NeuralAgent( env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, train_policy=train_policy, test_policy=test_policy) # --- load saved network and test # agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97") agent.setNetwork(input_nnet) #tesot01 avg = agent._total_mode_reward print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg) Epoch_length = 500 mode = parameters.mode #mode 3 has planning depth 6#mode 2 ahs planning 3 agent.startMode(mode, Epoch_length)
parameters.batch_size, parameters.update_rule, rng, double_Q=True) train_policy = LongerExplorationPolicy( qnetwork, env.nActions(), rng, 1.0) #EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.) test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, exp_priority=1., train_policy=train_policy, test_policy=test_policy) # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1)) agent.attach( bc.TrainerController(evaluate_on='action', periodicity=parameters.update_frequency, show_episode_avg_V_value=True, show_avg_Bellman_residual=True))
env = CellEnvironment(args.obs_type, args.resize, args.reward, args.network, args.special) rng = np.random.RandomState(777) # TODO : best algorithm, hyperparameter tuning if args.network == 'DQN': network = MyQNetwork(environment=env, batch_size=32, freeze_interval=args.epochs[1], double_Q=True, random_state=rng) agent = NeuralAgent(env, network, replay_memory_size=min( int(args.epochs[0] * args.epochs[1] * 1.1), 100000), batch_size=32, random_state=rng) agent.setDiscountFactor(0.95) agent.attach(bc.FindBestController(validationID=0, unique_fname=args.fname)) agent.attach(bc.VerboseController()) agent.attach(bc.TrainerController()) agent.attach( bc.EpsilonController(initial_e=0.8, e_decays=args.epochs[0] * args.epochs[1], e_min=0.2)) agent.attach( bc.LearningRateController(args.learning_rate[0], args.learning_rate[1], args.learning_rate[2]))
def run(self): if self.params.rng == -1: seed = random.randrange(2**32 - 1) else: seed = int(self.params.rng) rng = np.random.RandomState(seed) np.random.seed(seed) conf_env_dir = "cfgs/env/" + self.params.env_module + "/" + self.params.env_conf_file env_params = parse_conf(conf_env_dir) env_params["rng"] = rng env = get_mod_object("envs",self.params.env_module,"env",(rng,), env_params,mode=1) pol_train = get_mod_class("pols",self.params.pol_train_module, "pol") self.params.pol_train_args = flatten(self.params.pol_train_args) if self.params.pol_train_args is not None else [] pol_train_args = parse_conf("cfgs/pol/" + self.params.pol_train_module + "/" + self.params.pol_train_args[0]) if len(self.params.pol_train_args) > 0 and isfile("cfgs/pol/" + self.params.pol_train_module + "/" + self.params.pol_train_args[0]) else parse_conf("cfgs/pol/" + self.params.pol_train_module + "/default") pol_train_args_2 = erase_dict_from_keyword_list(pol_train_args, self.params.pol_train_args) pol_train_args = revalidate_dict_from_conf_module(pol_train_args_2, "pol", self.params.pol_train_module) pol_test = get_mod_class("pols",self.params.pol_test_module, "pol") self.params.pol_test_args = flatten(self.params.pol_test_args) if self.params.pol_test_args is not None else [] pol_test_args = parse_conf("cfgs/pol/" + self.params.pol_test_module + "/" + self.params.pol_test_args[0]) if len(self.params.pol_test_args) > 0 and isfile("cfgs/pol/" + self.params.pol_test_module + "/" + self.params.pol_test_args[0]) else parse_conf("cfgs/pol/" + self.params.pol_test_module + "/default") pol_test_args_2 = erase_dict_from_keyword_list(pol_test_args, self.params.pol_test_args) pol_test_args = revalidate_dict_from_conf_module(pol_test_args_2, "pol", self.params.pol_test_module) self.params.backend_nnet_conf_file= flatten(self.params.backend_nnet_conf_file) if self.params.backend_nnet_conf_file is not None else [] backend_nnet_params = parse_conf("cfgs/backend_nnet/" + self.params.backend_nnet + "/" + self.params.backend_nnet_conf_file[0]) if len(self.params.backend_nnet_conf_file) > 0 and isfile("cfgs/backend_nnet/" + self.params.backend_nnet + "/" + self.params.backend_nnet_conf_file[0]) else parse_conf("cfgs/backend_nnet/" + self.params.backend_nnet + "/default") backend_nnet_params_2 = erase_dict_from_keyword_list(backend_nnet_params,self.params.backend_nnet_conf_file) backend_nnet_params = revalidate_dict_from_conf_module(backend_nnet_params_2, "backend_nnet", self.params.backend_nnet) neural_net = get_mod_class("neural_nets", self.params.backend_nnet,"neural_net") self.params.ctrl_neural_nets_conf_file = flatten(self.params.ctrl_neural_nets_conf_file) if self.params.ctrl_neural_nets_conf_file is not None else [] ctrl_neural_nets_params = parse_conf("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/" + self.params.ctrl_neural_nets_conf_file[0]) if len(self.params.ctrl_neural_nets_conf_file) > 0 and isfile("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/" + self.params.ctrl_neural_nets_conf_file[0]) else parse_conf("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/DEFAULT") ctrl_neural_nets_params_2 = erase_dict_from_keyword_list(ctrl_neural_nets_params,self.params.ctrl_neural_nets_conf_file) ctrl_neural_nets_params = revalidate_dict_from_conf_module(ctrl_neural_nets_params_2, "ctrl_neural_net", self.params.qnetw_module) ctrl_neural_nets_params["neural_network"] = neural_net ctrl_neural_nets_params["neural_network_kwargs"] = backend_nnet_params ctrl_neural_nets_params["batch_size"] = self.params.batch_size ctrl_neural_net = get_mod_object("ctrl_neural_nets", self.params.qnetw_module, "ctrl_neural_net", (env,),ctrl_neural_nets_params, mode=0) agent = NeuralAgent([env], [ctrl_neural_net], replay_memory_size=self.params.replay_memory_size, replay_start_size=None, batch_size=self.params.batch_size, random_state=rng, exp_priority=self.params.exp_priority, train_policy=pol_train,train_policy_kwargs=pol_train_args, test_policy=pol_test, test_policy_kwargs=pol_test_args, only_full_history=self.params.only_full_history) for tc in self.params.controllers: len_tc = len(tc) s = tc[0] redo_conf = False if len_tc >= 2: #Test if sc is a config file or an argument to override if '=' not in tc[1]: #This is a config file conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/" + tc[1]) else: conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/default") sc = tc[1].split("=") if sc[0] in conf_ctrl.keys(): conf_ctrl[sc[0]] = sc[1] redo_conf = True else: print ("Warning : parameter " + str(sc[0]) + " is not included in config specs for the controller " + s) if len_tc > 2: remainder = tc[2:] for a in remainder: sc = a.split("=") if len(sc) != 2: print ("Warning : arg " + a + " for controller parametrization is ill formed. It needs to be in the form key=value.") else: redo_conf = True if sc[0] in conf_ctrl.keys(): conf_ctrl[sc[0]] = sc[1] else: print ("Warning : parameter " + str(sc[0]) + " is not included in config specs for the controller " + s) #Create a temporary config file with the erased parameter and go through parse_conf again if redo_conf: write_conf(conf_ctrl, "cfgs/ctrl/" + s + "/temp") conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/temp") os.remove("cfgs/ctrl/" + s + "/temp") else: conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/default") controller = get_mod_object("ctrls",s,"ctrl",tuple(),conf_ctrl,mode=0) agent.attach(controller) agent.run(self.params.epochs, self.params.max_size_episode)
parameters.batch_size, parameters.update_rule, rng, double_Q=True, high_int_dim=HIGH_INT_DIM, internal_dim=3) train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1) # --- Instantiate agent --- agent = NeuralAgent(env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, train_policy=train_policy, test_policy=test_policy) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "test_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))
parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, rng, neural_network=myNN) test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.00) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, test_policy=test_policy) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "PLE_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))
parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, rng, ) test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.05) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, test_policy=test_policy, ) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "ALE_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController(evaluate_on="epoch", periodicity=1))
parameters.momentum, parameters.clip_norm, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, rng, high_int_dim=False, internal_dim=1) #todo MAKE 1 test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.) # --- Instantiate agent --- agent = NeuralAgent(env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, test_policy=test_policy) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "test_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # test saved network # --- load saved network and test agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97") avg = agent._total_mode_reward
parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng, DoubleQ=True) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng) # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py agent.attach(bc.VerboseController( evaluateOn='epoch', periodicity=1)) agent.attach(bc.TrainerController( evaluateOn='action', periodicity=parameters.update_frequency, showEpisodeAvgVValue=False, showAvgBellmanResidual=False))
parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, random_state=rng, neural_network_actor=NN_keras, neural_network_critic=NN_keras) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng) # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py agent.attach(bc.VerboseController( evaluate_on='epoch', periodicity=1)) agent.attach(bc.TrainerController( evaluate_on='action', periodicity=parameters.update_frequency, show_episode_avg_V_value=True, show_avg_Bellman_residual=True))
from deer.q_networks.q_net_theano import MyQNetwork from Toy_env import MyEnv as Toy_env import deer.experiment.base_controllers as bc if __name__ == "__main__": rng = np.random.RandomState(123456) # --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument
rng = np.random.RandomState() # --- Instantiate environment --- env = pendulum_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng) # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py agent.attach(bc.VerboseController(evaluateOn='epoch', periodicity=1)) agent.attach( bc.TrainerController(evaluateOn='action', periodicity=parameters.update_frequency, showEpisodeAvgVValue=True, showAvgBellmanResidual=True)) agent.attach( bc.LearningRateController(
parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng, double_Q=True) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, exp_priority=1.) # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py agent.attach(bc.VerboseController( evaluate_on='epoch', periodicity=1)) agent.attach(bc.TrainerController( evaluate_on='action', periodicity=parameters.update_frequency, show_episode_avg_V_value=True, show_avg_Bellman_residual=True))
show_game=True) # --- Instantiate learning algorithm --- learning_algo = CRAR(env, rng, double_Q=True, high_int_dim=HIGH_INT_DIM, internal_dim=3) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1) #1.) # --- Instantiate agent --- agent = NeuralAgent(env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, test_policy=test_policy) #set name of nnet and planning depth: agent.setNetwork("test_71c8fc5b085cd8aa090e8e8e63d0a9450a3b7a27.epoch=35") # agent.setNetwork("test_964ccb7a9490cf3c3309a90d07485a77c3ec6486") #just running to check its behaviour: Epoch_length = 200 mode = 3 #mode 3 has planning depth 6 agent.startMode(mode, Epoch_length) agent.run(1, Epoch_length)
else: rng = np.random.RandomState() # --- Instantiate environment --- env = MG_two_storages_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, rng) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "MG2S_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1)) # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
high_int_dim=HIGH_INT_DIM, internal_dim=3, div_entrop_loss=1.) train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1) train_policy2 = EpsilonGreedyPolicy(learning_algo2, env.nActions(), rng, 1.) test_policy2 = EpsilonGreedyPolicy(learning_algo2, env.nActions(), rng, 0.1) # --- Instantiate agent --- agent = NeuralAgent( env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, train_policy=train_policy, test_policy=test_policy) agent2 = NeuralAgent( env, learning_algo2, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, train_policy=train_policy2, test_policy=test_policy2)