コード例 #1
0
            discount_factor_max=parameters.discount_max,
            periodicity=1))

    # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
    # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
    # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
    # episode or epoch (or never, hence the resetEvery='none').
    agent.attach(
        bc.EpsilonController(initial_e=parameters.epsilon_start,
                             e_decays=parameters.epsilon_decay,
                             e_min=parameters.epsilon_min,
                             evaluate_on='action',
                             periodicity=1,
                             reset_every='none'))

    agent.run(1, N_SAMPLES)

    #print (agent._dataset._rewards._data[0:500])
    #print (agent._dataset._terminals._data[0:500])
    print("end gathering data")
    old_rewards = agent._dataset._rewards._data
    old_terminals = agent._dataset._terminals._data
    old_actions = agent._dataset._actions._data
    old_observations = agent._dataset._observations[0]._data

    # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode, hence the two last arguments.
    agent.attach(
        bc.TrainerController(evaluate_on='action',
                             periodicity=parameters.update_frequency,
コード例 #2
0
    agent.attach(
        bc.InterleavedTestEpochController(
            id=MG_two_storages_env.TEST_MODE,
            epoch_length=parameters.steps_per_test,
            controllers_to_disable=[0, 1, 2, 3, 4, 6],
            periodicity=2,
            show_score=True,
            summarize_every=parameters.period_btw_summary_perfs))

    # --- Run the experiment ---
    try:
        os.mkdir("params")
    except Exception:
        pass
    dump(vars(parameters), "params/" + fname + ".jldump")
    agent.run(parameters.epochs, parameters.steps_per_epoch)

    # --- Show results ---
    basename = "scores/" + fname
    scores = load(basename + "_scores.jldump")
    plt.plot(range(1,
                   len(scores['vs']) + 1),
             scores['vs'],
             label="VS",
             color='b')
    plt.plot(range(1,
                   len(scores['ts']) + 1),
             scores['ts'],
             label="TS",
             color='r')
    plt.legend()
コード例 #3
0
    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(environment=env, random_state=rng)

    # --- Instantiate agent ---
    agent = NeuralAgent(env, qnetwork, random_state=rng)

    # --- Bind controllers to the agent ---
    # Before every training epoch, we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController())

    # During training epochs, we want to train the agent after every action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode.
    agent.attach(bc.TrainerController())

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the
    # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this
    # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument
    # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added.
    agent.attach(
        bc.InterleavedTestEpochController(epoch_length=500,
                                          controllers_to_disable=[0, 1]))

    # --- Run the experiment ---
    agent.run(n_epochs=100, epoch_length=1000)
コード例 #4
0
ファイル: run_mountain_car.py プロジェクト: Moshiasri/deer
    agent.attach(bc.LearningRateController(
        initial_learning_rate=parameters.learning_rate,
        learning_rate_decay=parameters.learning_rate_decay,
        periodicity=1))

    agent.attach(bc.DiscountFactorController(
        initial_discount_factor=parameters.discount,
        discount_factor_growth=parameters.discount_inc,
        discount_factor_max=parameters.discount_max,
        periodicity=1))

    agent.attach(bc.EpsilonController(
        initial_e=parameters.epsilon_start, 
        e_decays=parameters.epsilon_decay, 
        e_min=parameters.epsilon_min,
        evaluate_on='action', 
        periodicity=1, 
        reset_every='none'))

    agent.attach(bc.InterleavedTestEpochController(
        id=0, 
        epoch_length=parameters.steps_per_test, 
        controllers_to_disable=[0, 1, 2, 3, 4], 
        periodicity=2, 
        show_score=True,
        summarize_every=parameters.period_btw_summary_perfs))
    
    # --- Run the experiment ---
    agent.run(parameters.epochs, parameters.steps_per_epoch)
コード例 #5
0
    agent.attach(
        bc.EpsilonController(initial_e=parameters.epsilon_start,
                             e_decays=parameters.epsilon_decay,
                             e_min=parameters.epsilon_min,
                             evaluate_on='action',
                             periodicity=1,
                             reset_every='none'))

    agent.setNetwork(
        "./backup_maze_lowdim/test_70460bbfb88bb08e2c4c9f4352805f62760b7d2d.epoch=48"
    )
    agent._learning_algo.freezeAllLayersExceptEncoder()
    agent._learning_algo.resetEncoder()
    #TODO compare transfer training time with for instance relearning

    agent.run(10, 500)  #10 epochs with 500 steps, so 5000 random steps
    print("end gathering data")

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

    # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we
    # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
    agent.attach(
        bc.LearningRateController(
            initial_learning_rate=parameters.learning_rate,
            learning_rate_decay=parameters.learning_rate_decay,
            periodicity=1))
コード例 #6
0
ファイル: run_toy_env_simple.py プロジェクト: Moshiasri/deer
        environment=env,
        random_state=rng)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        random_state=rng)

    # --- Bind controllers to the agent ---
    # Before every training epoch, we want to print a summary of the agent's epsilon, discount and 
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController())

    # During training epochs, we want to train the agent after every action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode.
    agent.attach(bc.TrainerController())

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a 
    # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the 
    # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this 
    # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument 
    # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added.
    agent.attach(bc.InterleavedTestEpochController(
        epoch_length=500, 
        controllers_to_disable=[0, 1]))
        
    # --- Run the experiment ---
    agent.run(n_epochs=100, epoch_length=1000)
コード例 #7
0
    
    train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        learning_algo,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        train_policy=train_policy,
        test_policy=test_policy)

    # --- load saved network and test
    # agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97")
    agent.setNetwork(input_nnet) #tesot01

    avg = agent._total_mode_reward
    print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg)

    Epoch_length = 500
    mode = parameters.mode #mode 3 has planning depth 6#mode 2 ahs planning 3
    agent.startMode(mode, Epoch_length)
    agent.run(1, Epoch_length)

    avg = agent._total_mode_reward / agent._totalModeNbrEpisode
    print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg)
    
    #just testing the saved nnet (possibly by visualizing the actions in the env)
コード例 #8
0
        network,
        train_policy=GaussianNoiseExplorationPolicy(
            network, env.nActions(), rng, .5) if args.exploration == 'gauss'
        else EpsilonGreedyPolicy(network, env.nActions(), rng, 0.1),
        replay_memory_size=min(args.epochs[0] * args.epochs[1] * 2, 100000),
        batch_size=32,
        random_state=rng)
    agent.setDiscountFactor(0.95)
    agent.attach(bc.FindBestController(validationID=0,
                                       unique_fname=args.fname))
    agent.attach(bc.VerboseController())
    agent.attach(bc.TrainerController())
    if args.exploration == 'gauss':
        agent.attach(
            GaussianNoiseController(initial_std_dev=0.5,
                                    n_decays=args.epochs[0] * args.epochs[1],
                                    final_std_dev=0.005))
    else:
        agent.attach(
            bc.EpsilonController(initial_e=0.8,
                                 e_decays=args.epochs[0] * args.epochs[1],
                                 e_min=0.05))
    agent.attach(
        bc.LearningRateController(args.learning_rate[0], args.learning_rate[1],
                                  args.learning_rate[2]))
    agent.attach(
        bc.InterleavedTestEpochController(epoch_length=1000,
                                          controllers_to_disable=[1, 2, 3, 4]))

agent.run(n_epochs=args.epochs[0], epoch_length=args.epochs[1])
コード例 #9
0
ファイル: run.py プロジェクト: epochstamp/mdrli
    def run(self):
        if self.params.rng == -1:
                seed = random.randrange(2**32 - 1)
        else:
                seed = int(self.params.rng)
        rng = np.random.RandomState(seed)
        np.random.seed(seed)
    
        
        

        

        conf_env_dir = "cfgs/env/" + self.params.env_module + "/" + self.params.env_conf_file
        env_params = parse_conf(conf_env_dir)
        env_params["rng"] = rng
        env = get_mod_object("envs",self.params.env_module,"env",(rng,), env_params,mode=1)

        

        pol_train = get_mod_class("pols",self.params.pol_train_module, "pol")
        self.params.pol_train_args = flatten(self.params.pol_train_args) if self.params.pol_train_args is not None else [] 
        pol_train_args = parse_conf("cfgs/pol/" + self.params.pol_train_module + "/" + self.params.pol_train_args[0]) if len(self.params.pol_train_args) > 0 and isfile("cfgs/pol/" + self.params.pol_train_module + "/" + self.params.pol_train_args[0]) else parse_conf("cfgs/pol/" + self.params.pol_train_module + "/default")
        pol_train_args_2 = erase_dict_from_keyword_list(pol_train_args, self.params.pol_train_args)
        pol_train_args = revalidate_dict_from_conf_module(pol_train_args_2, "pol", self.params.pol_train_module)

        pol_test = get_mod_class("pols",self.params.pol_test_module, "pol")
        self.params.pol_test_args = flatten(self.params.pol_test_args) if self.params.pol_test_args is not None else [] 
        pol_test_args = parse_conf("cfgs/pol/" + self.params.pol_test_module + "/" + self.params.pol_test_args[0]) if len(self.params.pol_test_args) > 0 and isfile("cfgs/pol/" + self.params.pol_test_module + "/" + self.params.pol_test_args[0]) else parse_conf("cfgs/pol/" + self.params.pol_test_module + "/default")
        pol_test_args_2 = erase_dict_from_keyword_list(pol_test_args, self.params.pol_test_args)
        pol_test_args = revalidate_dict_from_conf_module(pol_test_args_2, "pol", self.params.pol_test_module)

        self.params.backend_nnet_conf_file= flatten(self.params.backend_nnet_conf_file) if self.params.backend_nnet_conf_file is not None else [] 
        backend_nnet_params = parse_conf("cfgs/backend_nnet/" + self.params.backend_nnet + "/" + self.params.backend_nnet_conf_file[0]) if len(self.params.backend_nnet_conf_file) > 0 and isfile("cfgs/backend_nnet/" + self.params.backend_nnet + "/" + self.params.backend_nnet_conf_file[0]) else parse_conf("cfgs/backend_nnet/" + self.params.backend_nnet + "/default")
        backend_nnet_params_2 = erase_dict_from_keyword_list(backend_nnet_params,self.params.backend_nnet_conf_file)
        backend_nnet_params = revalidate_dict_from_conf_module(backend_nnet_params_2, "backend_nnet", self.params.backend_nnet)
        
        neural_net = get_mod_class("neural_nets", self.params.backend_nnet,"neural_net")
        
        self.params.ctrl_neural_nets_conf_file = flatten(self.params.ctrl_neural_nets_conf_file) if self.params.ctrl_neural_nets_conf_file is not None else [] 
        ctrl_neural_nets_params = parse_conf("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/" + self.params.ctrl_neural_nets_conf_file[0]) if len(self.params.ctrl_neural_nets_conf_file) > 0 and isfile("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/" + self.params.ctrl_neural_nets_conf_file[0]) else parse_conf("cfgs/ctrl_nnet/" + self.params.qnetw_module + "/DEFAULT")
        ctrl_neural_nets_params_2 = erase_dict_from_keyword_list(ctrl_neural_nets_params,self.params.ctrl_neural_nets_conf_file)
        ctrl_neural_nets_params = revalidate_dict_from_conf_module(ctrl_neural_nets_params_2, "ctrl_neural_net", self.params.qnetw_module)

        ctrl_neural_nets_params["neural_network"] = neural_net
        ctrl_neural_nets_params["neural_network_kwargs"] = backend_nnet_params
        ctrl_neural_nets_params["batch_size"] = self.params.batch_size
        ctrl_neural_net = get_mod_object("ctrl_neural_nets", self.params.qnetw_module, "ctrl_neural_net", (env,),ctrl_neural_nets_params, mode=0)
        
        agent = NeuralAgent([env], [ctrl_neural_net], replay_memory_size=self.params.replay_memory_size, replay_start_size=None, batch_size=self.params.batch_size, random_state=rng, exp_priority=self.params.exp_priority, train_policy=pol_train,train_policy_kwargs=pol_train_args, test_policy=pol_test, test_policy_kwargs=pol_test_args, only_full_history=self.params.only_full_history)
       

        for tc in self.params.controllers:
                len_tc = len(tc)                
                s = tc[0]
                redo_conf = False
                if len_tc >= 2:
                    
                    #Test if sc is a config file or an argument to override
                    if '=' not in tc[1]:
                        #This is a config file
                        conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/" + tc[1])
                    else:
                        conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/default")
                        sc = tc[1].split("=")
                        if sc[0] in conf_ctrl.keys():
                            conf_ctrl[sc[0]] = sc[1]
                            redo_conf = True
                        else:
                            print ("Warning : parameter " + str(sc[0]) + " is not included in config specs for the controller " + s)

                    if len_tc > 2:
                        remainder = tc[2:]
                        for a in remainder:
                             sc = a.split("=")
                             if len(sc) != 2:
                                 print ("Warning : arg " + a + " for controller parametrization is ill formed. It needs to be in the form key=value.") 
                             else:
                                 redo_conf = True
                                 if sc[0] in conf_ctrl.keys():
                                    conf_ctrl[sc[0]] = sc[1]
                                 else:
                                    print ("Warning : parameter " + str(sc[0]) + " is not included in config specs for the controller " + s)
                    #Create a temporary config file with the erased parameter and go through parse_conf again
                    if redo_conf:
                        write_conf(conf_ctrl, "cfgs/ctrl/" + s + "/temp")
                        conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/temp")
                        os.remove("cfgs/ctrl/" + s + "/temp") 
                    
                else:
                    conf_ctrl = parse_conf("cfgs/ctrl/" + s + "/default")
                controller = get_mod_object("ctrls",s,"ctrl",tuple(),conf_ctrl,mode=0)
                agent.attach(controller)
        agent.run(self.params.epochs, self.params.max_size_episode)