예제 #1
0
    def __init__(self, settings):
        self.settings       = update_settings(DEFAULT_SETTINGS, settings)

        # network and training
        self.q_network = parse_block(settings["model"])
        self.optimizer = parse_optimizer(settings["optimizer"])

        out_sh = self.q_network.output_shape()
        assert len(out_sh) == 2 and out_sh[0] is None, \
                "Output of the Discrete DeepQ must be (None, num_actions), where None corresponds to batch_size"
        self.num_actions      = out_sh[1]
        self.minipatch_size   = self.settings["minibatch_size"]

        self.train_every_nth              = self.settings['train_every_nth']
        self.discount_rate    = self.settings["discount_rate"]

        self.transitions_so_far        = 0
        self.exploration_period        = self.settings['exploration_period']
        self.random_action_probability = self.settings['random_action_probability']

        self.replay_buffer                = deque()
        self.store_every_nth              = self.settings['store_every_nth']
        self.replay_buffer_size           = self.settings['replay_buffer_size']

        self.target_network_update_rate   = self.settings['target_network_update_rate']

        self.summary_writer = None

        self.s = tf.Session()

        self.create_variables()
        self.s.run(tf.initialize_variables(
                self.q_network.variables() + self.target_q_network.variables()))
예제 #2
0
def make_apply_gradients_fun(settings, model):
    #### CREATE ALL THE OPTIMIZERS
    optimizers   = {
        name:parse_optimizer(settings['model']['settings']['optimizers'][name])
        for name in settings['model']['settings']['optimizers']
    }

    update_ops = []
    for var, grad in zip(model.variables(), model.gradients()):
        var_optimizer = None
        for optimizer_name, optimizer in optimizers.items():
            if var.name.startswith(optimizer_name):
                var_optimizer = optimizer
                break
        if optimizer is None:
            raise Exception("Could not match optimizer for variable %s" % (var.name))

        update_op = optimizer.apply_gradients([(grad.value(), var)])
        update_ops.append(update_op)
    combined_update_op = tf.group(*update_ops)

    return lambda: model.get_session().run(combined_update_op)
예제 #3
0
    def __init__(self, settings):
        self.settings = update_settings(DEFAULT_SETTINGS, settings)

        # network and training
        self.q_network = parse_block(settings["model"])
        self.optimizer = parse_optimizer(settings["optimizer"])

        out_sh = self.q_network.output_shape()
        assert len(out_sh) == 2 and out_sh[0] is None, \
                "Output of the Discrete DeepQ must be (None, num_actions), where None corresponds to batch_size"
        self.num_actions = out_sh[1]
        self.minipatch_size = self.settings["minibatch_size"]

        self.train_every_nth = self.settings['train_every_nth']
        self.discount_rate = self.settings["discount_rate"]

        self.transitions_so_far = 0
        self.exploration_period = self.settings['exploration_period']
        self.random_action_probability = self.settings[
            'random_action_probability']

        self.replay_buffer = deque()
        self.store_every_nth = self.settings['store_every_nth']
        self.replay_buffer_size = self.settings['replay_buffer_size']

        self.target_network_update_rate = self.settings[
            'target_network_update_rate']

        self.summary_writer = None

        self.s = tf.Session()

        self.create_variables()
        self.s.run(
            tf.initialize_variables(self.q_network.variables() +
                                    self.target_q_network.variables()))