def __init__(self, settings): self.settings = update_settings(DEFAULT_SETTINGS, settings) # network and training self.q_network = parse_block(settings["model"]) self.optimizer = parse_optimizer(settings["optimizer"]) out_sh = self.q_network.output_shape() assert len(out_sh) == 2 and out_sh[0] is None, \ "Output of the Discrete DeepQ must be (None, num_actions), where None corresponds to batch_size" self.num_actions = out_sh[1] self.minipatch_size = self.settings["minibatch_size"] self.train_every_nth = self.settings['train_every_nth'] self.discount_rate = self.settings["discount_rate"] self.transitions_so_far = 0 self.exploration_period = self.settings['exploration_period'] self.random_action_probability = self.settings['random_action_probability'] self.replay_buffer = deque() self.store_every_nth = self.settings['store_every_nth'] self.replay_buffer_size = self.settings['replay_buffer_size'] self.target_network_update_rate = self.settings['target_network_update_rate'] self.summary_writer = None self.s = tf.Session() self.create_variables() self.s.run(tf.initialize_variables( self.q_network.variables() + self.target_q_network.variables()))
def make_apply_gradients_fun(settings, model): #### CREATE ALL THE OPTIMIZERS optimizers = { name:parse_optimizer(settings['model']['settings']['optimizers'][name]) for name in settings['model']['settings']['optimizers'] } update_ops = [] for var, grad in zip(model.variables(), model.gradients()): var_optimizer = None for optimizer_name, optimizer in optimizers.items(): if var.name.startswith(optimizer_name): var_optimizer = optimizer break if optimizer is None: raise Exception("Could not match optimizer for variable %s" % (var.name)) update_op = optimizer.apply_gradients([(grad.value(), var)]) update_ops.append(update_op) combined_update_op = tf.group(*update_ops) return lambda: model.get_session().run(combined_update_op)
def __init__(self, settings): self.settings = update_settings(DEFAULT_SETTINGS, settings) # network and training self.q_network = parse_block(settings["model"]) self.optimizer = parse_optimizer(settings["optimizer"]) out_sh = self.q_network.output_shape() assert len(out_sh) == 2 and out_sh[0] is None, \ "Output of the Discrete DeepQ must be (None, num_actions), where None corresponds to batch_size" self.num_actions = out_sh[1] self.minipatch_size = self.settings["minibatch_size"] self.train_every_nth = self.settings['train_every_nth'] self.discount_rate = self.settings["discount_rate"] self.transitions_so_far = 0 self.exploration_period = self.settings['exploration_period'] self.random_action_probability = self.settings[ 'random_action_probability'] self.replay_buffer = deque() self.store_every_nth = self.settings['store_every_nth'] self.replay_buffer_size = self.settings['replay_buffer_size'] self.target_network_update_rate = self.settings[ 'target_network_update_rate'] self.summary_writer = None self.s = tf.Session() self.create_variables() self.s.run( tf.initialize_variables(self.q_network.variables() + self.target_q_network.variables()))