예제 #1
0
if __name__ == "__main__":

    rng = np.random.RandomState(123456)
    
    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(
        environment=env,
        random_state=rng)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        random_state=rng)

    # --- Bind controllers to the agent ---
    # Before every training epoch, we want to print a summary of the agent's epsilon, discount and 
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController())

    # During training epochs, we want to train the agent after every action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode.
    agent.attach(bc.TrainerController())

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a 
    # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the 
    # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this 
예제 #2
0
        parameters.momentum,
        parameters.clip_norm,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.update_rule,
        rng,
        high_int_dim=False,
        internal_dim=2)
    
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        learning_algo,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        test_policy=test_policy)

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "test_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
    # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
    # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
    # episode or epoch (or never, hence the resetEvery='none').
    agent.attach(bc.EpsilonController(
예제 #3
0
rng = np.random.RandomState(123456)

# TODO : best algorithm, hyperparameter tuning
if args.network == 'DQN':
    network = MyQNetwork(environment=env,
                         batch_size=32,
                         double_Q=True,
                         random_state=rng)
elif args.network == 'DDPG':
    network = MyACNetwork(environment=env, batch_size=32, random_state=rng)

agent = NeuralAgent(env,
                    network,
                    train_policy=EpsilonGreedyPolicy(network, env.nActions(),
                                                     rng, 0.0),
                    replay_memory_size=1000,
                    batch_size=32,
                    random_state=rng)

#agent.attach(bc.VerboseController())
if args.fname == 'baseline':
    agent = EmpiricalTreatmentAgent(env)
else:
    agent.setNetwork(args.fname)

count = 0
length_success = []
avg_rad = []
avg_h_cell_killed = []
avg_percentage = []
예제 #4
0
        env,
        rng,
        double_Q=True,
        high_int_dim=HIGH_INT_DIM,
        internal_dim=3,
        div_entrop_loss=1.)
    
    train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        learning_algo,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        train_policy=train_policy,
        test_policy=test_policy)

    # --- load saved network and test
    # agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97")
    agent.setNetwork(input_nnet) #tesot01

    avg = agent._total_mode_reward
    print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg)

    Epoch_length = 500
    mode = parameters.mode #mode 3 has planning depth 6#mode 2 ahs planning 3
    agent.startMode(mode, Epoch_length)
예제 #5
0
                          parameters.batch_size,
                          parameters.update_rule,
                          rng,
                          double_Q=True)

    train_policy = LongerExplorationPolicy(
        qnetwork, env.nActions(), rng,
        1.0)  #EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.)
    test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        qnetwork,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        exp_priority=1.,
                        train_policy=train_policy,
                        test_policy=test_policy)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

    agent.attach(
        bc.TrainerController(evaluate_on='action',
                             periodicity=parameters.update_frequency,
                             show_episode_avg_V_value=True,
                             show_avg_Bellman_residual=True))
예제 #6
0
env = CellEnvironment(args.obs_type, args.resize, args.reward, args.network,
                      args.special)

rng = np.random.RandomState(777)

# TODO : best algorithm, hyperparameter tuning
if args.network == 'DQN':
    network = MyQNetwork(environment=env,
                         batch_size=32,
                         freeze_interval=args.epochs[1],
                         double_Q=True,
                         random_state=rng)
    agent = NeuralAgent(env,
                        network,
                        replay_memory_size=min(
                            int(args.epochs[0] * args.epochs[1] * 1.1),
                            100000),
                        batch_size=32,
                        random_state=rng)
    agent.setDiscountFactor(0.95)
    agent.attach(bc.FindBestController(validationID=0,
                                       unique_fname=args.fname))
    agent.attach(bc.VerboseController())
    agent.attach(bc.TrainerController())
    agent.attach(
        bc.EpsilonController(initial_e=0.8,
                             e_decays=args.epochs[0] * args.epochs[1],
                             e_min=0.2))
    agent.attach(
        bc.LearningRateController(args.learning_rate[0], args.learning_rate[1],
                                  args.learning_rate[2]))
예제 #7
0
파일: run.py 프로젝트: epochstamp/mdrli
    def run(self):
        if self.params.rng == -1:
                seed = random.randrange(2**32 - 1)
        else:
                seed = int(self.params.rng)
        rng = np.random.RandomState(seed)
        np.random.seed(seed)
    
        
        

        

        conf_env_dir = "cfgs/env/" + self.params.env_module + "/" + self.params.env_conf_file
        env_params = parse_conf(conf_env_dir)
        env_params["rng"] = rng
        env = get_mod_object("envs",self.params.env_module,"env",(rng,), env_params,mode=1)

        

        pol_train = get_mod_class("pols",self.params.pol_train_module, "pol")
        self.params.pol_train_args = flatten(self.params.pol_train_args) if self.params.pol_train_args is not None else [] 
        pol_train_args = parse_conf("cfgs/pol/" + self.params.pol_train_module + "/" + self.params.pol_train_args[0]) if len(self.params.pol_train_args) > 0 and isfile("cfgs/pol/" + self.params.pol_train_module + "/" + self.params.pol_train_args[0]) else parse_conf("cfgs/pol/" + self.params.pol_train_module + "/default")
        pol_train_args_2 = erase_dict_from_keyword_list(pol_train_args, self.params.pol_train_args)
        pol_train_args = revalidate_dict_from_conf_module(pol_train_args_2, "pol", self.params.pol_train_module)

        pol_test = get_mod_class("pols",self.params.pol_test_module, "pol")
        self.params.pol_test_args = flatten(self.params.pol_test_args) if self.params.pol_test_args is not None else [] 
        pol_test_args = parse_conf("cfgs/pol/" + self.params.pol_test_module + "/" + self.params.pol_test_args[0]) if len(self.params.pol_test_args) > 0 and isfile("cfgs/pol/" + self.params.pol_test_module + "/" + self.params.pol_test_args[0]) else parse_conf("cfgs/pol/" + self.params.pol_test_module + "/default")
        pol_test_args_2 = erase_dict_from_keyword_list(pol_test_args, self.params.pol_test_args)
        pol_test_args = revalidate_dict_from_conf_module(pol_test_args_2, "pol", self.params.pol_test_module)

        self.params.backend_nnet_conf_file= flatten(self.params.backend_nnet_conf_file) if self.params.backend_nnet_conf_file is not None else [] 
        backend_nnet_params = parse_conf("cfgs/backend_nnet/" + self.params.backend_nnet + "/" + self.params.backend_nnet_conf_file[0]) if len(self.params.backend_nnet_conf_file) > 0 and isfile("cfgs/backend_nnet/" + self.params.backend_nnet + "/" + self.params.backend_nnet_conf_file[0]) else parse_conf("cfgs/backend_nnet/" + self.params.backend_nnet + "/default")
        backend_nnet_params_2 = erase_dict_from_keyword_list(backend_nnet_params,self.params.backend_nnet_conf_file)
        backend_nnet_params = revalidate_dict_from_conf_module(backend_nnet_params_2, "backend_nnet", self.params.backend_nnet)
        
        neural_net = get_mod_class("neural_nets", self.params.backend_nnet,"neural_net")
        
        self.params.ctrl_neural_nets_conf_file = flatten(self.params.ctrl_neural_nets_conf_file) if self.params.ctrl_neural_nets_conf_file is not None else [] 
        ctrl_neural_nets_params = parse_conf("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/" + self.params.ctrl_neural_nets_conf_file[0]) if len(self.params.ctrl_neural_nets_conf_file) > 0 and isfile("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/" + self.params.ctrl_neural_nets_conf_file[0]) else parse_conf("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/DEFAULT")
        ctrl_neural_nets_params_2 = erase_dict_from_keyword_list(ctrl_neural_nets_params,self.params.ctrl_neural_nets_conf_file)
        ctrl_neural_nets_params = revalidate_dict_from_conf_module(ctrl_neural_nets_params_2, "ctrl_neural_net", self.params.qnetw_module)

        ctrl_neural_nets_params["neural_network"] = neural_net
        ctrl_neural_nets_params["neural_network_kwargs"] = backend_nnet_params
        ctrl_neural_nets_params["batch_size"] = self.params.batch_size
        ctrl_neural_net = get_mod_object("ctrl_neural_nets", self.params.qnetw_module, "ctrl_neural_net", (env,),ctrl_neural_nets_params, mode=0)
        
        agent = NeuralAgent([env], [ctrl_neural_net], replay_memory_size=self.params.replay_memory_size, replay_start_size=None, batch_size=self.params.batch_size, random_state=rng, exp_priority=self.params.exp_priority, train_policy=pol_train,train_policy_kwargs=pol_train_args, test_policy=pol_test, test_policy_kwargs=pol_test_args, only_full_history=self.params.only_full_history)
       

        for tc in self.params.controllers:
                len_tc = len(tc)                
                s = tc[0]
                redo_conf = False
                if len_tc >= 2:
                    
                    #Test if sc is a config file or an argument to override
                    if '=' not in tc[1]:
                        #This is a config file
                        conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/" + tc[1])
                    else:
                        conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/default")
                        sc = tc[1].split("=")
                        if sc[0] in conf_ctrl.keys():
                            conf_ctrl[sc[0]] = sc[1]
                            redo_conf = True
                        else:
                            print ("Warning : parameter " + str(sc[0]) + " is not included in config specs for the controller " + s)

                    if len_tc > 2:
                        remainder = tc[2:]
                        for a in remainder:
                             sc = a.split("=")
                             if len(sc) != 2:
                                 print ("Warning : arg " + a + " for controller parametrization is ill formed. It needs to be in the form key=value.") 
                             else:
                                 redo_conf = True
                                 if sc[0] in conf_ctrl.keys():
                                    conf_ctrl[sc[0]] = sc[1]
                                 else:
                                    print ("Warning : parameter " + str(sc[0]) + " is not included in config specs for the controller " + s)
                    #Create a temporary config file with the erased parameter and go through parse_conf again
                    if redo_conf:
                        write_conf(conf_ctrl, "cfgs/ctrl/" + s + "/temp")
                        conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/temp")
                        os.remove("cfgs/ctrl/" + s + "/temp") 
                    
                else:
                    conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/default")
                controller = get_mod_object("ctrls",s,"ctrl",tuple(),conf_ctrl,mode=0)
                agent.attach(controller)
        agent.run(self.params.epochs, self.params.max_size_episode)
예제 #8
0
                         parameters.batch_size,
                         parameters.update_rule,
                         rng,
                         double_Q=True,
                         high_int_dim=HIGH_INT_DIM,
                         internal_dim=3)

    train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        learning_algo,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        train_policy=train_policy,
                        test_policy=test_policy)

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "test_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))
예제 #9
0
                          parameters.rms_epsilon,
                          parameters.momentum,
                          parameters.clip_delta,
                          parameters.freeze_interval,
                          parameters.batch_size,
                          parameters.update_rule,
                          rng,
                          neural_network=myNN)

    test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.00)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        qnetwork,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        test_policy=test_policy)

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "PLE_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))
예제 #10
0
파일: run_ALE.py 프로젝트: VinF/deer
        parameters.momentum,
        parameters.clip_delta,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.update_rule,
        rng,
    )

    test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.05)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        test_policy=test_policy,
    )

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "ALE_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on="epoch", periodicity=1))
예제 #11
0
                         parameters.momentum,
                         parameters.clip_norm,
                         parameters.freeze_interval,
                         parameters.batch_size,
                         parameters.update_rule,
                         rng,
                         high_int_dim=False,
                         internal_dim=1)  #todo MAKE 1

    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        learning_algo,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        test_policy=test_policy)

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "test_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # test saved network
    # --- load saved network and test
    agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97")

    avg = agent._total_mode_reward
예제 #12
0
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_delta,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.network_type,
        parameters.update_rule,
        parameters.batch_accumulator,
        rng,
        DoubleQ=True)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py
    agent.attach(bc.VerboseController(
        evaluateOn='epoch', 
        periodicity=1))

    agent.attach(bc.TrainerController(
        evaluateOn='action', 
        periodicity=parameters.update_frequency, 
        showEpisodeAvgVValue=False, 
        showAvgBellmanResidual=False))
예제 #13
0
        parameters.rms_decay,
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_delta,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.update_rule,
        random_state=rng,
        neural_network_actor=NN_keras,
        neural_network_critic=NN_keras)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py
    agent.attach(bc.VerboseController(
        evaluate_on='epoch', 
        periodicity=1))

    agent.attach(bc.TrainerController(
        evaluate_on='action', 
        periodicity=parameters.update_frequency, 
        show_episode_avg_V_value=True, 
        show_avg_Bellman_residual=True))
from deer.q_networks.q_net_theano import MyQNetwork
from Toy_env import MyEnv as Toy_env
import deer.experiment.base_controllers as bc

if __name__ == "__main__":

    rng = np.random.RandomState(123456)

    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(environment=env, random_state=rng)

    # --- Instantiate agent ---
    agent = NeuralAgent(env, qnetwork, random_state=rng)

    # --- Bind controllers to the agent ---
    # Before every training epoch, we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController())

    # During training epochs, we want to train the agent after every action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode.
    agent.attach(bc.TrainerController())

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the
    # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this
    # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument
예제 #15
0
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = pendulum_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon,
                          parameters.momentum, parameters.clip_delta,
                          parameters.freeze_interval, parameters.batch_size,
                          parameters.network_type, parameters.update_rule,
                          parameters.batch_accumulator, rng)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env, qnetwork, parameters.replay_memory_size,
        max(env.inputDimensions()[i][0]
            for i in range(len(env.inputDimensions()))), parameters.batch_size,
        rng)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py
    agent.attach(bc.VerboseController(evaluateOn='epoch', periodicity=1))

    agent.attach(
        bc.TrainerController(evaluateOn='action',
                             periodicity=parameters.update_frequency,
                             showEpisodeAvgVValue=True,
                             showAvgBellmanResidual=True))

    agent.attach(
        bc.LearningRateController(
예제 #16
0
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_delta,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.network_type,
        parameters.update_rule,
        parameters.batch_accumulator,
        rng,
        double_Q=True)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        exp_priority=1.)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py
    agent.attach(bc.VerboseController(
        evaluate_on='epoch', 
        periodicity=1))

    agent.attach(bc.TrainerController(
        evaluate_on='action', 
        periodicity=parameters.update_frequency, 
        show_episode_avg_V_value=True, 
        show_avg_Bellman_residual=True))
                      show_game=True)

    # --- Instantiate learning algorithm ---
    learning_algo = CRAR(env,
                         rng,
                         double_Q=True,
                         high_int_dim=HIGH_INT_DIM,
                         internal_dim=3)

    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng,
                                      0.1)  #1.)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        learning_algo,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        test_policy=test_policy)

    #set name of nnet and planning depth:
    agent.setNetwork("test_71c8fc5b085cd8aa090e8e8e63d0a9450a3b7a27.epoch=35")
    # agent.setNetwork("test_964ccb7a9490cf3c3309a90d07485a77c3ec6486")

    #just running to check its behaviour:
    Epoch_length = 200
    mode = 3  #mode 3 has planning depth 6
    agent.startMode(mode, Epoch_length)
    agent.run(1, Epoch_length)
예제 #18
0
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = MG_two_storages_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon,
                          parameters.momentum, parameters.clip_delta,
                          parameters.freeze_interval, parameters.batch_size,
                          parameters.update_rule, rng)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env, qnetwork, parameters.replay_memory_size,
        max(env.inputDimensions()[i][0]
            for i in range(len(env.inputDimensions()))), parameters.batch_size,
        rng)

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "MG2S_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

    # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
예제 #19
0
        high_int_dim=HIGH_INT_DIM,
        internal_dim=3,
        div_entrop_loss=1.)

    train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1)

    train_policy2 = EpsilonGreedyPolicy(learning_algo2, env.nActions(), rng, 1.)
    test_policy2 = EpsilonGreedyPolicy(learning_algo2, env.nActions(), rng, 0.1)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        learning_algo,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        train_policy=train_policy,
        test_policy=test_policy)

    agent2 = NeuralAgent(
        env,
        learning_algo2,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        train_policy=train_policy2,
        test_policy=test_policy2)