Beispiel #1
0
    def call(self, inputs, training):
        """
        Forward pass of the One-Gate Mixture of Experts Model

        Parameters
        ----------
        inputs: np.array or tf.Tensor
          Input to the model

        training: bool
          True during training, False otherwise

        Returns
        -------
        outputs: list of tf.Tensor
          Outputs of forward pass for each task
        """
        outputs = []
        if self.base_layer:
            if has_arg(self.base_layer, "training"):
                inputs = self.base_layer(inputs, training)
            else:
                inputs = self.base_layer(inputs)
        moe = [moe(inputs, training) for moe in self.moe_layers][0]
        for task in self.task_layers:
            if has_arg(task, "training"):
                outputs.append(task(moe, training))
            else:
                outputs.append(task(moe))
        return outputs
Beispiel #2
0
    def call(self, inputs, training):
        """
        Forward pass of the Multi-Gate Mixture of Experts model.

        Parameters
        ----------
        inputs: np.array or tf.Tensor
          Input to the model

        training: bool
          If True runs model in training mode, otherwise in prediction
          mode.

        Returns
        -------
        outputs: list of tf.Tensor
          Outputs of forward pass for each task
        """
        outputs = []
        if self.base_layer:
            if has_arg(self.base_layer, "training"):
                inputs = self.base_layer(inputs, training)
            else:
                inputs = self.base_layer(inputs)
        moes = [moe(inputs, training) for moe in self.moe_layers]
        for task, moe in zip(self.task_layers, moes):
            if has_arg(task, "training"):
                outputs.append(task(moe, training))
            else:
                outputs.append(task(moe))
        return outputs
Beispiel #3
0
    def test_has_arg(self):

        def test_func_diff(a, b):
            return a-b

        def test_func_sum(a=2, b=3):
            return a+b

        self.assertTrue(has_arg(test_func_diff,"a"))
        self.assertTrue(has_arg(test_func_sum,"b"))
        self.assertFalse(has_arg(test_func_sum,"c"))
        self.assertFalse(has_arg(test_func_diff,"z"))
Beispiel #4
0
    def call(self, inputs, training):
        """
        Defines set of computations performed in the MOE layer.
        MOE layer can accept single tensor (in this case it assumes the same
        input for every expert) or collection/sequence of tensors
        (in this case it assumes every tensor corresponds to its own expert)

        Parameters
        ----------
        inputs: np.array, tf.Tensor, list/tuple of np.arrays or  tf.Tensors
          Inputs to the MOE layer

        training: bool
          True if layer is called in training mode, False otherwise

        Returns
        -------
        moe_output: tf.Tensor
          Output of mixture of experts layers ( linearly weighted output of expert
          layers).
        """
        # compute each expert output (optionally pass training argument,
        # since some experts may contain training arg, some may not.
        experts_output = []
        for expert in self.expert_layers:
            if has_arg(expert, "training"):
                experts_output.append(expert(inputs, training))
            else:
                experts_output.append(expert(inputs))

        # compute probability of expert (degree of expert utilization) for given
        # input set
        if self.base_expert_prob_layer:
            inputs = self.base_expert_prob_layer(inputs)
        expert_utilization_prob = self.expert_probs(inputs)
        if self.add_dropout:
            expert_utilization_prob = self.drop_expert_layer(
                expert_utilization_prob, training)

        # compute weighted output of experts
        moe_output = 0
        for i, expert_output in enumerate(experts_output):
            moe_output += (
                expert_output *
                tf.expand_dims(expert_utilization_prob[:, i], axis=-1))
        return moe_output
    def call(self, inputs, training):
        """
        Forward pass through constraining layer. Constraining layer can
        accept single tensor (in this case it assumes the same
        input for every expert) or collection/sequence of tensors
        (in this case it assumes every tensor corresponds to its own expert)

        Parameters
        ----------
        inputs: tf.Tensor, np.array or List/Tuple of tf.Tensors/np.arrays
            Input tensor

        training: bool
            True in case of training, False otherwise
        """
        # compute output of the layer
        outputs = [None] * len(self.layers)
        for i, layer in enumerate(self.layers):
            if has_arg(layer, "training"):
                if isinstance(inputs, Sequence):
                    outputs[i] = layer(inputs[i], training)
                else:
                    outputs[i] = layer(inputs, training)
            else:
                outputs[i] = layer(inputs[i]) if isinstance(
                    inputs, (List, Tuple)) else layer(inputs)

        # get all trainable variables from every column of MTL
        trainable_vars = [layer.trainable_variables for layer in self.layers]

        # add constraining loss
        sharing_loss = 0.
        if self.l2_regularizer > 0.:
            sharing_loss += self.l2_regularizer * regularize_norm_diff(
                trainable_vars, "L2")
        if self.l1_regularizer > 0.:
            sharing_loss += self.l1_regularizer * regularize_norm_diff(
                trainable_vars, "L1")

        # add sharing loss and return outputs
        self.add_loss(sharing_loss)
        return outputs