Beispiel #1
0
    def __init__(self, training_directory, config):
        self.config = config
        self.training_directory = training_directory
        self.model_save_directory = self.training_directory + 'params/'
        self.env = resolve_env(self.config['environment'])(test_cases[self.config['environment']][self.config['environment_index']](), self.training_directory, self.config)
        self.env.pre_processing()
        self.model = resolve_model(self.config['model'])(self.config)
        self.MOR_flag = self.config['MOR_flag']
        if (self.MOR_flag):
            self.multiple_rewards = resolve_multiple_rewards(self.config['multiple_rewards'])
            self.reward_mins = np.zeros(len(self.multiple_rewards))
            self.reward_maxs = np.zeros(len(self.multiple_rewards))
        self.master_params = self.model.init_master_params(self.config['from_file'], self.config['params_file'])
        self.mu = self.config['n_individuals']/4
        self.learning_rate = self.config['learning_rate'] * 20
        self.A = np.sqrt(self.config['noise_std_dev']) * np.eye(len(self.master_params)) #sqrt of cov matrix
        self.mu = self.config['mu']

        for i in range(0, len(self.master_params)):
            for j in range(i, len(self.master_params)):
                self.A[i][j] += np.random.normal() * np.sqrt(self.config['noise_std_dev']) * 0.05
        if (self.config['from_file']):
            logging.info("\nLoaded Master Params from:")
            logging.info(self.config['params_file'])
        logging.info("\nReward:")
        logging.info(inspect.getsource(self.reward) + "\n")
Beispiel #2
0
 def __init__(self, training_directory, config):
     self.config = config
     self.training_directory = training_directory
     self.model_save_directory = self.training_directory + 'params/'
     self.env = resolve_env(self.config['environment'])(test_cases[self.config['environment']][self.config['environment_index']](), self.training_directory, self.config)
     self.env.pre_processing()
     self.model = resolve_model(self.config['model'])(self.config)
     self.reward = resolve_reward(self.config['reward'])
     self.MOR_flag = self.config['MOR_flag']
     if (self.MOR_flag):
         self.multiple_rewards = resolve_multiple_rewards(self.config['multiple_rewards'])
         self.reward_mins = np.zeros(len(self.multiple_rewards))
         self.reward_maxs = np.zeros(len(self.multiple_rewards))
     self.master_params = self.model.init_master_params(self.config['from_file'], self.config['params_file'])
     self.mu = self.config['n_individuals']/4
     self.learning_rate = self.config['learning_rate']
     self.noise_std_dev = self.config['noise_std_dev']
     self.visualize = self.config['visualize']
     self.visualize_every = self.config['visualize_every']
     self.moving_success_rate = 0
     if (self.config['from_file']):
         logging.info("\nLoaded Master Params from:")
         logging.info(self.config['params_file'])
     logging.info("\nReward:")
     logging.info(inspect.getsource(self.reward) + "\n")
Beispiel #3
0
 def __init__(self, training_directory, config):
     self.config = config
     self.training_directory = training_directory
     self.env = resolve_env(
         self.config['environment'])(test_cases[self.config['environment']][
             self.config['environment_index']], self.training_directory,
                                     self.config)
     self.env.pre_processing()
     self.model = resolve_model(self.config['model'])(self.config)
     self.reward = resolve_reward(self.config['reward'])
     self.master_params = self.model.init_master_params()
     self.learning_rate = self.config['learning_rate']
     self.noise_std_dev = self.config['noise_std_dev']
     logging.info("\nReward:")
     logging.info(inspect.getsource(self.reward) + "\n")