def __init__(self, training_directory, config): self.config = config self.training_directory = training_directory self.model_save_directory = self.training_directory + 'params/' self.env = resolve_env(self.config['environment'])(test_cases[self.config['environment']][self.config['environment_index']](), self.training_directory, self.config) self.env.pre_processing() self.model = resolve_model(self.config['model'])(self.config) self.MOR_flag = self.config['MOR_flag'] if (self.MOR_flag): self.multiple_rewards = resolve_multiple_rewards(self.config['multiple_rewards']) self.reward_mins = np.zeros(len(self.multiple_rewards)) self.reward_maxs = np.zeros(len(self.multiple_rewards)) self.master_params = self.model.init_master_params(self.config['from_file'], self.config['params_file']) self.mu = self.config['n_individuals']/4 self.learning_rate = self.config['learning_rate'] * 20 self.A = np.sqrt(self.config['noise_std_dev']) * np.eye(len(self.master_params)) #sqrt of cov matrix self.mu = self.config['mu'] for i in range(0, len(self.master_params)): for j in range(i, len(self.master_params)): self.A[i][j] += np.random.normal() * np.sqrt(self.config['noise_std_dev']) * 0.05 if (self.config['from_file']): logging.info("\nLoaded Master Params from:") logging.info(self.config['params_file']) logging.info("\nReward:") logging.info(inspect.getsource(self.reward) + "\n")
def __init__(self, training_directory, config): self.config = config self.training_directory = training_directory self.model_save_directory = self.training_directory + 'params/' self.env = resolve_env(self.config['environment'])(test_cases[self.config['environment']][self.config['environment_index']](), self.training_directory, self.config) self.env.pre_processing() self.model = resolve_model(self.config['model'])(self.config) self.reward = resolve_reward(self.config['reward']) self.MOR_flag = self.config['MOR_flag'] if (self.MOR_flag): self.multiple_rewards = resolve_multiple_rewards(self.config['multiple_rewards']) self.reward_mins = np.zeros(len(self.multiple_rewards)) self.reward_maxs = np.zeros(len(self.multiple_rewards)) self.master_params = self.model.init_master_params(self.config['from_file'], self.config['params_file']) self.mu = self.config['n_individuals']/4 self.learning_rate = self.config['learning_rate'] self.noise_std_dev = self.config['noise_std_dev'] self.visualize = self.config['visualize'] self.visualize_every = self.config['visualize_every'] self.moving_success_rate = 0 if (self.config['from_file']): logging.info("\nLoaded Master Params from:") logging.info(self.config['params_file']) logging.info("\nReward:") logging.info(inspect.getsource(self.reward) + "\n")
def __init__(self, training_directory, config): self.config = config self.training_directory = training_directory self.env = resolve_env( self.config['environment'])(test_cases[self.config['environment']][ self.config['environment_index']], self.training_directory, self.config) self.env.pre_processing() self.model = resolve_model(self.config['model'])(self.config) self.reward = resolve_reward(self.config['reward']) self.master_params = self.model.init_master_params() self.learning_rate = self.config['learning_rate'] self.noise_std_dev = self.config['noise_std_dev'] logging.info("\nReward:") logging.info(inspect.getsource(self.reward) + "\n")