Esempio n. 1
0
    def _build_module(self, input_layer):
        # DND based Q head
        from memories import differentiable_neural_dictionary

        if self.tp.checkpoint_restore_dir:
            self.DND = differentiable_neural_dictionary.load_dnd(
                self.tp.checkpoint_restore_dir)
        else:
            self.DND = differentiable_neural_dictionary.QDND(
                self.DND_size,
                input_layer.get_shape()[-1],
                self.num_actions,
                self.new_value_shift_coefficient,
                key_error_threshold=self.DND_key_error_threshold)

        # Retrieve info from DND dictionary
        self.action = tf.placeholder(tf.int8, [None], name="action")
        self.input = self.action
        result = tf.py_func(self.DND.query,
                            [input_layer, self.action, self.number_of_nn],
                            [tf.float64, tf.float64])
        self.dnd_embeddings = tf.to_float(result[0])
        self.dnd_values = tf.to_float(result[1])

        # DND calculation
        square_diff = tf.square(self.dnd_embeddings -
                                tf.expand_dims(input_layer, 1))
        distances = tf.reduce_sum(square_diff,
                                  axis=2) + [self.l2_norm_added_delta]
        weights = 1.0 / distances
        normalised_weights = weights / tf.reduce_sum(
            weights, axis=1, keep_dims=True)
        self.output = tf.reduce_sum(self.dnd_values * normalised_weights,
                                    axis=1)
Esempio n. 2
0
    def _build_module(self, input_layer):
        # DND based Q head
        from memories import differentiable_neural_dictionary

        if self.tp.checkpoint_restore_dir:
            self.DND = differentiable_neural_dictionary.load_dnd(self.tp.checkpoint_restore_dir)
        else:
            self.DND = differentiable_neural_dictionary.QDND(
                self.DND_size, input_layer.get_shape()[-1], self.num_actions, self.new_value_shift_coefficient,
                key_error_threshold=self.DND_key_error_threshold)

        # Retrieve info from DND dictionary
        # self.action = tf.placeholder(tf.int8, [None], name="action")
        # self.input = self.action
        self.output = [
            self._q_value(input_layer, action)
            for action in range(self.num_actions)
        ]
Esempio n. 3
0
    def _build_module(self, input_layer):
        # DND based Q head
        from memories import differentiable_neural_dictionary

        if self.tp.checkpoint_restore_dir:
            self.DND = differentiable_neural_dictionary.load_dnd(
                self.tp.checkpoint_restore_dir)
        else:
            self.DND = differentiable_neural_dictionary.QDND(
                self.DND_size,
                input_layer.get_shape()[-1],
                self.num_actions,
                self.new_value_shift_coefficient,
                key_error_threshold=self.DND_key_error_threshold,
                learning_rate=self.tp.learning_rate)

        # Retrieve info from DND dictionary
        # We assume that all actions have enough entries in the DND
        self.output = tf.transpose([
            self._q_value(input_layer, action)
            for action in range(self.num_actions)
        ])