def test_softmax_noncausal_attention_block_output(self):
        batch_size = 1
        length = 2
        num_heads = 1
        dim = 8
        num_random_features = 1000
        query = tf.random.normal([batch_size, length, num_heads, dim])
        key = tf.random.normal([batch_size, length, num_heads, dim])
        value = tf.random.normal([batch_size, length, num_heads, dim])
        kernel_transformation = favor.softmax_kernel_transformation
        projection_matrix = favor.create_projection_matrix(
            num_random_features, dim)
        query = tf.cast(query, tf.float64)
        key = tf.cast(key, tf.float64)
        value = tf.cast(value, tf.float64)
        projection_matrix = tf.cast(projection_matrix, tf.float64)
        attention_block_output = favor.favor_attention(query, key, value, None,
                                                       kernel_transformation,
                                                       False,
                                                       projection_matrix)

        query = tf.multiply(query, 1.0 / math.sqrt(float(dim)))
        attention_scores = tf.einsum("BXHD,BYHD->BXYH", query, key)
        attention_scores = tf.nn.softmax(attention_scores, axis=2)
        exact_attention_block_output = tf.einsum("BXYH,BYHD->BXHD",
                                                 attention_scores, value)
        max_error = 0.5
        with self.session(use_gpu=False) as sess:
            favor_output, groundtruth_output = sess.run(
                [exact_attention_block_output, attention_block_output])
            error = np.max(
                np.abs(
                    (groundtruth_output - favor_output) / groundtruth_output))
            self.assertLess(error, max_error)
Beispiel #2
0
def apply_mask(x, scope=''):
    """Apply mask to a given weight tensor.

  Args:
    x: Input weight tensor
    scope: The current variable scope. Defaults to "".
  Returns:
    Tensor representing masked_weights
  """

    mask = pruning_utils.weight_mask_variable(x, scope)
    threshold = pruning_utils.weight_threshold_variable(x, scope)
    # Add masked_weights in the weights namescope so as to make it easier
    # for the quantization library to add quant ops.
    masked_weights = tf.multiply(mask, x, _MASKED_WEIGHT_NAME)

    # Make sure the mask for a given variable are not added multiple times to the
    # collection. This is particularly important when applying mask to RNN's
    # weight variables
    if mask not in tf.get_collection_ref(_MASK_COLLECTION):
        tf.add_to_collection(_THRESHOLD_COLLECTION, threshold)
        tf.add_to_collection(_MASK_COLLECTION, mask)
        tf.add_to_collection(_MASKED_WEIGHT_COLLECTION, masked_weights)
        tf.add_to_collection(_WEIGHT_COLLECTION, x)
    return masked_weights
Beispiel #3
0
    def _update_mask(self, weights, threshold):
        """Updates the mask for a given weight tensor.

    This functions first computes the cdf of the weight tensor, and estimates
    the threshold value such that 'desired_sparsity' fraction of weights
    have magnitude less than the threshold.

    Args:
      weights: The weight tensor that needs to be masked.
      threshold: The current threshold value. The function will compute a new
        threshold and return the exponential moving average using the current
        value of threshold

    Returns:
      new_threshold: The new value of the threshold based on weights, and
        sparsity at the current global_step
      new_mask: A numpy array of the same size and shape as weights containing
        0 or 1 to indicate which of the values in weights falls below
        the threshold

    Raises:
      ValueError: if sparsity is not defined
    """
        if self._sparsity is None:
            raise ValueError('Sparsity variable undefined')

        sparsity = self._get_sparsity(weights.op.name)
        with tf.name_scope(weights.op.name + '_pruning_ops'):
            abs_weights = tf.abs(weights)
            k = tf.cast(
                tf.round(
                    tf.cast(tf.size(abs_weights), tf.float32) *
                    (1 - sparsity)), tf.int32)
            # Sort the entire array
            values, _ = tf.nn.top_k(tf.reshape(abs_weights, [-1]),
                                    k=tf.size(abs_weights))
            # Grab the (k-1) th value
            current_threshold = tf.gather(values, k - 1)
            smoothed_threshold = tf.add_n([
                tf.multiply(current_threshold, 1 - self._spec.threshold_decay),
                tf.multiply(threshold, self._spec.threshold_decay)
            ])

            new_mask = tf.cast(
                tf.greater_equal(abs_weights, smoothed_threshold), tf.float32)

        return smoothed_threshold, new_mask
Beispiel #4
0
    def _get_sparsity(self, weight_name):
        """Returns target sparsity for the given layer/weight name."""
        target_sparsity = [
            sparsity for regexp, sparsity in self._weight_sparsity_map.items()
            if regexp.search(weight_name)
        ]
        if not target_sparsity:
            return self._sparsity

        if len(target_sparsity) > 1:
            raise ValueError(
                'Multiple matches in weight_sparsity_map for weight %s' %
                weight_name)
        # TODO(suyoggupta): This will work when initial_sparsity = 0. Generalize
        # to handle other cases as well.
        return tf.multiply(
            self._sparsity,
            tf.div(target_sparsity[0], self._spec.target_sparsity))
Beispiel #5
0
    def GetMixResult(cls, theta, concat, lstmobj):  # pylint:disable=unused-argument
        """Compute the mix result.

    Args:
      theta: a theta object in the LSTM cells;
      concat: Tensor, concat of previous output and current state vector;
      lstmobj: a LSTM cell object.

    Returns:
      result Tensor.

    Raises:
      NotImplementedError if prune_option is not 'weight',
      'first_order_gradient', or 'second_order_gradient'.
    """
        return tf.matmul(
            concat,
            lstmobj.QWeight(tf.multiply(theta.wm, theta.mask,
                                        'masked_weight')))
Beispiel #6
0
    def _setup_sparsity(self):
        begin_step = self._spec.sparsity_function_begin_step
        end_step = self._spec.sparsity_function_end_step
        initial_sparsity = self._spec.initial_sparsity
        target_sparsity = self._spec.target_sparsity
        exponent = self._spec.sparsity_function_exponent

        with tf.name_scope(self._spec.name):
            p = tf.minimum(
                1.0,
                tf.maximum(
                    0.0,
                    tf.div(tf.cast(self._global_step - begin_step, tf.float32),
                           end_step - begin_step)))
            sparsity = tf.add(tf.multiply(initial_sparsity - target_sparsity,
                                          tf.pow(1 - p, exponent)),
                              target_sparsity,
                              name='sparsity')

        return sparsity