def init_search(self): ''' Initialize the random search internal variables ''' self.max_evals = self.experiment_spec['param']['max_evals'] self.num_of_trials = self.max_evals self.search_dim = len(self.param_range_keys) self.precision = 4 # decimal roundoff biject_continuous self.search_radius = self.init_search_radius = 0.5 self.search_count = 0 # number of times search() has ran self.search_exhausted = False self.search_path = [] self.best_point = { 'trial_num': None, 'param': None, 'x': self.sample_cube(), 'fitness_score': float('-inf'), } problem = PROBLEMS.get(self.experiment_spec['problem']) self.ideal_fitness_score = ideal_fitness_score(problem) logger.info('ideal_fitness_scrore: {}'.format( self.ideal_fitness_score)) self.filename = './data/{}/random_search_history.json'.format( self.experiment_id) if self.experiment_id_override is not None: self.load() # resume
def compile_model(self): self.actor_state = self.actor.inputs[0] self.action_gradient = self.K.placeholder( shape=(None, self.env_spec['action_dim'])) self.actor_grads = self.K.tf.gradients(self.actor.output, self.actor.trainable_weights, -self.action_gradient) self.actor_optimize = self.K.tf.train.AdamOptimizer( self.lr).apply_gradients( zip(self.actor_grads, self.actor.trainable_weights)) self.critic_state = self.critic.inputs[0] self.critic_action = self.critic.inputs[1] self.critic_action_grads = self.K.tf.gradients(self.critic.output, self.critic_action) # self.actor.compile( # loss='mse', # optimizer=self.optimizer.actor_keras_optimizer) self.target_actor.compile( loss='mse', optimizer=self.optimizer.target_actor_keras_optimizer) logger.info("Actor Models compiled") self.critic.compile(loss=self.custom_critic_loss, optimizer=self.optimizer.critic_keras_optimizer) self.target_critic.compile( loss='mse', optimizer=self.optimizer.target_critic_keras_optimizer) logger.info("Critic Models compiled")
def init_search(self): ''' Initialize the random search internal variables ''' self.max_evals = self.experiment_spec['param']['max_evals'] self.num_of_trials = self.max_evals self.search_dim = len(self.param_range_keys) self.precision = 4 # decimal roundoff biject_continuous self.search_radius = self.init_search_radius = 0.5 self.search_count = 0 # number of times search() has ran self.search_exhausted = False self.search_path = [] self.best_point = { 'trial_num': None, 'param': None, 'x': self.sample_cube(), 'fitness_score': float('-inf'), } problem = PROBLEMS.get(self.experiment_spec['problem']) self.ideal_fitness_score = ideal_fitness_score(problem) logger.info( 'ideal_fitness_scrore: {}'.format(self.ideal_fitness_score)) self.filename = './data/{}/random_search_history.json'.format( self.experiment_id) if self.experiment_id_override is not None: self.load() # resume
def build_critic_models(self): state_branch = self.Sequential() state_branch.add( self.Dense(self.hidden_layers[0], input_shape=(self.env_spec['state_dim'], ), activation=self.hidden_layers_activation, init='lecun_uniform')) action_branch = self.Sequential() action_branch.add( self.Dense(self.hidden_layers[0], input_shape=(self.env_spec['action_dim'], ), activation=self.hidden_layers_activation, init='lecun_uniform')) input_layer = self.Merge([state_branch, action_branch], mode='concat') model = self.Sequential() model.add(input_layer) if (len(self.hidden_layers) > 1): for i in range(1, len(self.hidden_layers)): model.add( self.Dense(self.hidden_layers[i], init='lecun_uniform', activation=self.hidden_layers_activation)) model.add(self.Dense(1, init='lecun_uniform', activation='linear')) # fixed logger.info('Critic model summary') model.summary() self.model = model logger.info("Model built") return self.model
def compile_model(self): self.actor.compile( loss='mse', optimizer=self.optimizer.keras_optimizer) self.critic.compile( loss='mse', optimizer=self.optimizer.keras_optimizer) logger.info("Actor and critic compiled")
def build_actor(self): actor = self.Sequential() super(ActorCritic, self).build_hidden_layers(actor) actor.add(self.Dense(self.env_spec['action_dim'], init='lecun_uniform', activation=self.output_layer_activation)) logger.info("Actor summary") actor.summary() self.actor = actor
def build_critic(self): critic = self.Sequential() super(ActorCritic, self).build_hidden_layers(critic) critic.add(self.Dense(1, init='lecun_uniform', activation=self.output_layer_activation)) logger.info("Critic summary") critic.summary() self.critic = critic
def build_actor(self): actor = self.Sequential() super(ActorCritic, self).build_hidden_layers(actor) actor.add( self.Dense(self.env_spec['action_dim'], init='lecun_uniform', activation=self.output_layer_activation)) logger.info("Actor summary") actor.summary() self.actor = actor
def build_critic(self): critic = self.Sequential() super(ActorCritic, self).build_hidden_layers(critic) critic.add( self.Dense(1, init='lecun_uniform', activation=self.output_layer_activation)) logger.info("Critic summary") critic.summary() self.critic = critic
def build_model(self): super(DoubleDQN, self).build_model() model_2 = clone_model(self.model) logger.info("Model 2 summary") model_2.summary() self.model_2 = model_2 logger.info("Models 1 and 2 built") return self.model, self.model_2
def compile_model(self): self.optimizer.keras_optimizer_2 = clone_optimizer( self.optimizer.keras_optimizer) self.model.compile( loss='mse', optimizer=self.optimizer.keras_optimizer) self.model_2.compile( loss='mse', optimizer=self.optimizer.keras_optimizer_2) logger.info("Models 1 and 2 compiled")
def __init__(self, Trial, **kwargs): self.Trial = Trial self.REQUIRED_ARGS = [ 'experiment_spec', 'experiment_id_override', 'times' ] self.PARALLEL_PROCESS_NUM = PARALLEL_PROCESS_NUM self.free_cpu = self.PARALLEL_PROCESS_NUM # for parallel run logger.info('Initialize {}'.format(self.__class__.__name__)) self.set_keys(**kwargs) self.init_search()
def load(self): try: search_history = json.loads(open(self.filename).read()) self.search_path = search_history['search_path'] self.best_point = search_history['best_point'] self.param_search_list = search_history['param_search_list'] logger.info('Load search history from {}'.format(self.filename)) except (FileNotFoundError, json.JSONDecodeError): logger.info('Fail to load search history from {}'.format( self.filename)) return None
def save(self): search_history = { 'search_path': self.search_path, 'search_count': self.search_count, 'best_point': self.best_point, 'param_search_list': self.param_search_list, } with open(self.filename, 'w') as f: f.write(to_json(search_history)) logger.info('Save search history to {}'.format(self.filename)) return
def load(self): try: search_history = json.loads(open(self.filename).read()) self.search_path = search_history['search_path'] self.best_point = search_history['best_point'] self.param_search_list = search_history['param_search_list'] logger.info('Load search history from {}'.format(self.filename)) except (FileNotFoundError, json.JSONDecodeError): logger.info( 'Fail to load search history from {}'.format(self.filename)) return None
def build_actor_models(self): model = self.Sequential() self.build_hidden_layers(model) model.add( self.Dense(self.env_spec['action_dim'], init='lecun_uniform', activation=self.output_layer_activation)) logger.info('Actor model summary') model.summary() self.actor = model self.target_actor = clone_model(self.actor)
def select_action(self, state): agent = self.agent state = np.expand_dims(state, axis=0) if self.env_spec['actions'] == 'continuous': action = agent.actor.predict(state)[0] + self.sample() else: Q_state = agent.actor.predict(state)[0] assert Q_state.ndim == 1 action = np.argmax(Q_state) logger.info(str(Q_state) + ' ' + str(action)) return action
def build_model(self): super(DoubleDQN, self).build_model() model2 = Sequential.from_config(self.model.get_config()) logger.info("Model 2 summary") model2.summary() self.model2 = model2 self.model2.compile( loss='mean_squared_error', optimizer=self.optimizer) logger.info("Models built and compiled") return self.model, self.model2
def build_model(self): model = self.Sequential() self.build_hidden_layers(model) model.add(self.Dense(self.env_spec['action_dim'], init='lecun_uniform', activation=self.output_layer_activation)) logger.info("Model summary") model.summary() self.model = model logger.info("Model built") return self.model
def save(self): search_history = { 'search_path': self.search_path, 'search_count': self.search_count, 'best_point': self.best_point, 'param_search_list': self.param_search_list, } with open(self.filename, 'w') as f: f.write(to_json(search_history)) logger.info( 'Save search history to {}'.format(self.filename)) return
def recompile_model(self, sys_vars): ''' Option to change model optimizer settings Currently only used for changing the learning rate Compiling does not affect the model weights ''' if self.epi_change_lr is not None: if (sys_vars['epi'] == self.epi_change_lr and sys_vars['t'] == 0): self.lr = self.lr / 10.0 self.optimizer.change_optim_param(**{'lr': self.lr}) self.model.compile(loss='mse', optimizer=self.optimizer.keras_optimizer) logger.info('Model recompiled with new settings: ' 'Learning rate: {}'.format(self.lr)) return self.model
def build_model(self): model = Sequential() self.build_hidden_layers(model) model.add( Dense(self.env_spec['action_dim'], init='lecun_uniform', activation=self.output_layer_activation, W_constraint=maxnorm(3))) logger.info("Model summary") model.summary() self.model = model self.build_optimizer() self.model.compile(loss='mean_squared_error', optimizer=self.optimizer) logger.info("Model built and compiled") return self.model
def recompile_model(self, sys_vars): ''' Option to change model optimizer settings Currently only used for changing the learning rate Compiling does not affect the model weights ''' if self.epi_change_learning_rate is not None: if (sys_vars['epi'] == self.epi_change_learning_rate and sys_vars['t'] == 0): self.learning_rate = self.learning_rate / 10.0 self.build_optimizer() self.model.compile(loss='mean_squared_error', optimizer=self.optimizer) logger.info('Model recompiled with new settings: ' 'Learning rate: {}'.format(self.learning_rate)) return self.model
def satisfy_fitness(self): ''' break on the first strong solution ''' best_fitness_score = self.best_point['fitness_score'] if self.next_trial_num < self.PARALLEL_PROCESS_NUM: return False elif best_fitness_score > self.ideal_fitness_score: logger.info('fitness_score {} > ideal_fitness_score {}, ' 'could terminate early'.format( best_fitness_score, self.ideal_fitness_score)) # return True # TODO fix ideal_fitness_score return False else: return False
def satisfy_fitness(self): ''' break on the first strong solution ''' best_fitness_score = self.best_point['fitness_score'] if self.next_trial_num < self.PARALLEL_PROCESS_NUM: return False elif best_fitness_score > self.ideal_fitness_score: logger.info( 'fitness_score {} > ideal_fitness_score {}, ' 'could terminate early'.format( best_fitness_score, self.ideal_fitness_score)) # return True # TODO fix ideal_fitness_score return False else: return False
def recompile_model(self, sys_vars): ''' Option to change model optimizer settings Currently only used for changing the learning rate Compiling does not affect the model weights ''' if self.epi_change_lr is not None: if (sys_vars['epi'] == self.epi_change_lr and sys_vars['t'] == 0): self.lr = self.lr / 10.0 self.optimizer.change_optim_param(**{'lr': self.lr}) self.model.compile( loss='mse', optimizer=self.optimizer.keras_optimizer) logger.info('Model recompiled with new settings: ' 'Learning rate: {}'.format(self.lr)) return self.model
def compile(self, memory, optimizer, policy, preprocessor): # set 2 way references self.memory = memory self.optimizer = optimizer self.policy = policy self.preprocessor = preprocessor # back references setattr(memory, 'agent', self) setattr(optimizer, 'agent', self) setattr(policy, 'agent', self) setattr(preprocessor, 'agent', self) self.compile_model() logger.info( 'Compiled:\nAgent, Memory, Optimizer, Policy, ' 'Preprocessor:\n{}'.format(', '.join([ comp.__class__.__name__ for comp in [self, memory, optimizer, policy, preprocessor] ])))
def compile(self, memory, optimizer, policy, preprocessor): # set 2 way references self.memory = memory self.optimizer = optimizer self.policy = policy self.preprocessor = preprocessor # back references setattr(memory, 'agent', self) setattr(optimizer, 'agent', self) setattr(policy, 'agent', self) setattr(preprocessor, 'agent', self) self.compile_model() logger.info( 'Compiled:\nAgent, Memory, Optimizer, Policy, ' 'Preprocessor:\n{}'.format( ', '.join([comp.__class__.__name__ for comp in [self, memory, optimizer, policy, preprocessor]]) ))
def build_critic_models(self): state_branch = self.Sequential() state_branch.add(self.Dense( self.hidden_layers[0], input_shape=(self.env_spec['state_dim'],), activation=self.hidden_layers_activation, init='lecun_uniform')) action_branch = self.Sequential() action_branch.add(self.Dense( self.hidden_layers[0], input_shape=(self.env_spec['action_dim'],), activation=self.hidden_layers_activation, init='lecun_uniform')) input_layer = self.Merge([state_branch, action_branch], mode='concat') model = self.Sequential() model.add(input_layer) if (len(self.hidden_layers) > 1): for i in range(1, len(self.hidden_layers)): model.add(self.Dense( self.hidden_layers[i], init='lecun_uniform', activation=self.hidden_layers_activation)) model.add(self.Dense(1, init='lecun_uniform', activation='linear')) # fixed logger.info('Critic model summary') model.summary() self.model = model logger.info("Model built") return self.model
def run(self): ''' top level method to run the entire hyperoptimizer will gather and compose experiment_data, then return it ''' logger.info('Run {}'.format(self.__class__.__name__)) # crucial maxtasksperchild to free up memory by respawning worker pool = mp.Pool(self.PARALLEL_PROCESS_NUM, initializer=self.pool_init, maxtasksperchild=1) while (not self.to_terminate()): if self.free_cpu > 0: self.free_cpu -= 1 # update self.search() # add to self.param_search_list trial_num, param = self.next_param() pool.apply_async(self.run_trial, (trial_num, param), callback=self.post_search, error_callback=self.raise_error) else: pass # keep looping till free_cpu available time.sleep(0.02) # prevent cpu overwork from while loop pool.close() pool.join() return self.experiment_data
def build_model(self): self.build_actor() self.build_critic() logger.info("Actor and critic models built")
def save(self, model_path, global_step=None): logger.info('Saving model checkpoint') self.model.save_weights(model_path)
def change_optim_param(self, **new_param): self.update_optim_param(**new_param) self.init_optimizer() logger.info("Optimizer param changed") log_self(self)
def compile_model(self): self.model.compile( loss='mse', optimizer=self.optimizer.keras_optimizer) logger.info("Model compiled")
def compile_model(self): self.actor.compile(loss='mse', optimizer=self.optimizer.keras_optimizer) self.critic.compile(loss='mse', optimizer=self.optimizer.keras_optimizer) logger.info("Actor and critic compiled")