discount_factor_max=parameters.discount_max, periodicity=1)) # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every # episode or epoch (or never, hence the resetEvery='none'). agent.attach( bc.EpsilonController(initial_e=parameters.epsilon_start, e_decays=parameters.epsilon_decay, e_min=parameters.epsilon_min, evaluate_on='action', periodicity=1, reset_every='none')) agent.run(1, N_SAMPLES) #print (agent._dataset._rewards._data[0:500]) #print (agent._dataset._terminals._data[0:500]) print("end gathering data") old_rewards = agent._dataset._rewards._data old_terminals = agent._dataset._terminals._data old_actions = agent._dataset._actions._data old_observations = agent._dataset._observations[0]._data # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode, hence the two last arguments. agent.attach( bc.TrainerController(evaluate_on='action', periodicity=parameters.update_frequency,
agent.attach( bc.InterleavedTestEpochController( id=MG_two_storages_env.TEST_MODE, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4, 6], periodicity=2, show_score=True, summarize_every=parameters.period_btw_summary_perfs)) # --- Run the experiment --- try: os.mkdir("params") except Exception: pass dump(vars(parameters), "params/" + fname + ".jldump") agent.run(parameters.epochs, parameters.steps_per_epoch) # --- Show results --- basename = "scores/" + fname scores = load(basename + "_scores.jldump") plt.plot(range(1, len(scores['vs']) + 1), scores['vs'], label="VS", color='b') plt.plot(range(1, len(scores['ts']) + 1), scores['ts'], label="TS", color='r') plt.legend()
# --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added. agent.attach( bc.InterleavedTestEpochController(epoch_length=500, controllers_to_disable=[0, 1])) # --- Run the experiment --- agent.run(n_epochs=100, epoch_length=1000)
agent.attach(bc.LearningRateController( initial_learning_rate=parameters.learning_rate, learning_rate_decay=parameters.learning_rate_decay, periodicity=1)) agent.attach(bc.DiscountFactorController( initial_discount_factor=parameters.discount, discount_factor_growth=parameters.discount_inc, discount_factor_max=parameters.discount_max, periodicity=1)) agent.attach(bc.EpsilonController( initial_e=parameters.epsilon_start, e_decays=parameters.epsilon_decay, e_min=parameters.epsilon_min, evaluate_on='action', periodicity=1, reset_every='none')) agent.attach(bc.InterleavedTestEpochController( id=0, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4], periodicity=2, show_score=True, summarize_every=parameters.period_btw_summary_perfs)) # --- Run the experiment --- agent.run(parameters.epochs, parameters.steps_per_epoch)
agent.attach( bc.EpsilonController(initial_e=parameters.epsilon_start, e_decays=parameters.epsilon_decay, e_min=parameters.epsilon_min, evaluate_on='action', periodicity=1, reset_every='none')) agent.setNetwork( "./backup_maze_lowdim/test_70460bbfb88bb08e2c4c9f4352805f62760b7d2d.epoch=48" ) agent._learning_algo.freezeAllLayersExceptEncoder() agent._learning_algo.resetEncoder() #TODO compare transfer training time with for instance relearning agent.run(10, 500) #10 epochs with 500 steps, so 5000 random steps print("end gathering data") # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1)) # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given. agent.attach( bc.LearningRateController( initial_learning_rate=parameters.learning_rate, learning_rate_decay=parameters.learning_rate_decay, periodicity=1))
environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added. agent.attach(bc.InterleavedTestEpochController( epoch_length=500, controllers_to_disable=[0, 1])) # --- Run the experiment --- agent.run(n_epochs=100, epoch_length=1000)
train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1) # --- Instantiate agent --- agent = NeuralAgent( env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, train_policy=train_policy, test_policy=test_policy) # --- load saved network and test # agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97") agent.setNetwork(input_nnet) #tesot01 avg = agent._total_mode_reward print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg) Epoch_length = 500 mode = parameters.mode #mode 3 has planning depth 6#mode 2 ahs planning 3 agent.startMode(mode, Epoch_length) agent.run(1, Epoch_length) avg = agent._total_mode_reward / agent._totalModeNbrEpisode print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg) #just testing the saved nnet (possibly by visualizing the actions in the env)
network, train_policy=GaussianNoiseExplorationPolicy( network, env.nActions(), rng, .5) if args.exploration == 'gauss' else EpsilonGreedyPolicy(network, env.nActions(), rng, 0.1), replay_memory_size=min(args.epochs[0] * args.epochs[1] * 2, 100000), batch_size=32, random_state=rng) agent.setDiscountFactor(0.95) agent.attach(bc.FindBestController(validationID=0, unique_fname=args.fname)) agent.attach(bc.VerboseController()) agent.attach(bc.TrainerController()) if args.exploration == 'gauss': agent.attach( GaussianNoiseController(initial_std_dev=0.5, n_decays=args.epochs[0] * args.epochs[1], final_std_dev=0.005)) else: agent.attach( bc.EpsilonController(initial_e=0.8, e_decays=args.epochs[0] * args.epochs[1], e_min=0.05)) agent.attach( bc.LearningRateController(args.learning_rate[0], args.learning_rate[1], args.learning_rate[2])) agent.attach( bc.InterleavedTestEpochController(epoch_length=1000, controllers_to_disable=[1, 2, 3, 4])) agent.run(n_epochs=args.epochs[0], epoch_length=args.epochs[1])
def run(self): if self.params.rng == -1: seed = random.randrange(2**32 - 1) else: seed = int(self.params.rng) rng = np.random.RandomState(seed) np.random.seed(seed) conf_env_dir = "cfgs/env/" + self.params.env_module + "/" + self.params.env_conf_file env_params = parse_conf(conf_env_dir) env_params["rng"] = rng env = get_mod_object("envs",self.params.env_module,"env",(rng,), env_params,mode=1) pol_train = get_mod_class("pols",self.params.pol_train_module, "pol") self.params.pol_train_args = flatten(self.params.pol_train_args) if self.params.pol_train_args is not None else [] pol_train_args = parse_conf("cfgs/pol/" + self.params.pol_train_module + "/" + self.params.pol_train_args[0]) if len(self.params.pol_train_args) > 0 and isfile("cfgs/pol/" + self.params.pol_train_module + "/" + self.params.pol_train_args[0]) else parse_conf("cfgs/pol/" + self.params.pol_train_module + "/default") pol_train_args_2 = erase_dict_from_keyword_list(pol_train_args, self.params.pol_train_args) pol_train_args = revalidate_dict_from_conf_module(pol_train_args_2, "pol", self.params.pol_train_module) pol_test = get_mod_class("pols",self.params.pol_test_module, "pol") self.params.pol_test_args = flatten(self.params.pol_test_args) if self.params.pol_test_args is not None else [] pol_test_args = parse_conf("cfgs/pol/" + self.params.pol_test_module + "/" + self.params.pol_test_args[0]) if len(self.params.pol_test_args) > 0 and isfile("cfgs/pol/" + self.params.pol_test_module + "/" + self.params.pol_test_args[0]) else parse_conf("cfgs/pol/" + self.params.pol_test_module + "/default") pol_test_args_2 = erase_dict_from_keyword_list(pol_test_args, self.params.pol_test_args) pol_test_args = revalidate_dict_from_conf_module(pol_test_args_2, "pol", self.params.pol_test_module) self.params.backend_nnet_conf_file= flatten(self.params.backend_nnet_conf_file) if self.params.backend_nnet_conf_file is not None else [] backend_nnet_params = parse_conf("cfgs/backend_nnet/" + self.params.backend_nnet + "/" + self.params.backend_nnet_conf_file[0]) if len(self.params.backend_nnet_conf_file) > 0 and isfile("cfgs/backend_nnet/" + self.params.backend_nnet + "/" + self.params.backend_nnet_conf_file[0]) else parse_conf("cfgs/backend_nnet/" + self.params.backend_nnet + "/default") backend_nnet_params_2 = erase_dict_from_keyword_list(backend_nnet_params,self.params.backend_nnet_conf_file) backend_nnet_params = revalidate_dict_from_conf_module(backend_nnet_params_2, "backend_nnet", self.params.backend_nnet) neural_net = get_mod_class("neural_nets", self.params.backend_nnet,"neural_net") self.params.ctrl_neural_nets_conf_file = flatten(self.params.ctrl_neural_nets_conf_file) if self.params.ctrl_neural_nets_conf_file is not None else [] ctrl_neural_nets_params = parse_conf("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/" + self.params.ctrl_neural_nets_conf_file[0]) if len(self.params.ctrl_neural_nets_conf_file) > 0 and isfile("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/" + self.params.ctrl_neural_nets_conf_file[0]) else parse_conf("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/DEFAULT") ctrl_neural_nets_params_2 = erase_dict_from_keyword_list(ctrl_neural_nets_params,self.params.ctrl_neural_nets_conf_file) ctrl_neural_nets_params = revalidate_dict_from_conf_module(ctrl_neural_nets_params_2, "ctrl_neural_net", self.params.qnetw_module) ctrl_neural_nets_params["neural_network"] = neural_net ctrl_neural_nets_params["neural_network_kwargs"] = backend_nnet_params ctrl_neural_nets_params["batch_size"] = self.params.batch_size ctrl_neural_net = get_mod_object("ctrl_neural_nets", self.params.qnetw_module, "ctrl_neural_net", (env,),ctrl_neural_nets_params, mode=0) agent = NeuralAgent([env], [ctrl_neural_net], replay_memory_size=self.params.replay_memory_size, replay_start_size=None, batch_size=self.params.batch_size, random_state=rng, exp_priority=self.params.exp_priority, train_policy=pol_train,train_policy_kwargs=pol_train_args, test_policy=pol_test, test_policy_kwargs=pol_test_args, only_full_history=self.params.only_full_history) for tc in self.params.controllers: len_tc = len(tc) s = tc[0] redo_conf = False if len_tc >= 2: #Test if sc is a config file or an argument to override if '=' not in tc[1]: #This is a config file conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/" + tc[1]) else: conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/default") sc = tc[1].split("=") if sc[0] in conf_ctrl.keys(): conf_ctrl[sc[0]] = sc[1] redo_conf = True else: print ("Warning : parameter " + str(sc[0]) + " is not included in config specs for the controller " + s) if len_tc > 2: remainder = tc[2:] for a in remainder: sc = a.split("=") if len(sc) != 2: print ("Warning : arg " + a + " for controller parametrization is ill formed. It needs to be in the form key=value.") else: redo_conf = True if sc[0] in conf_ctrl.keys(): conf_ctrl[sc[0]] = sc[1] else: print ("Warning : parameter " + str(sc[0]) + " is not included in config specs for the controller " + s) #Create a temporary config file with the erased parameter and go through parse_conf again if redo_conf: write_conf(conf_ctrl, "cfgs/ctrl/" + s + "/temp") conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/temp") os.remove("cfgs/ctrl/" + s + "/temp") else: conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/default") controller = get_mod_object("ctrls",s,"ctrl",tuple(),conf_ctrl,mode=0) agent.attach(controller) agent.run(self.params.epochs, self.params.max_size_episode)