def _tf_sparsemax_loss(self, z, q, dtype, use_gpu):
    z = z.astype(dtype)
    q = q.astype(dtype)

    with self.test_session(use_gpu=use_gpu):
      tf_sparsemax_op = sparsemax(z)
      tf_loss_op = sparsemax_loss(z, tf_sparsemax_op, q)
      tf_loss_out = tf_loss_op.eval()

    return tf_loss_op, tf_loss_out
    def _tf_sparsemax_loss(self, z, q, dtype, use_gpu):
        z = z.astype(dtype)
        q = q.astype(dtype)

        with self.test_session(use_gpu=use_gpu):
            tf_sparsemax_op = sparsemax(z)
            tf_loss_op = sparsemax_loss(z, tf_sparsemax_op, q)
            tf_loss_out = tf_loss_op.eval()

        return tf_loss_op, tf_loss_out
Example #3
0
  def _test_gradient_against_estimate(self, dtype, random, use_gpu):
    """check sparsemax Rop, against estimated Rop"""
    z = random.uniform(low=-3, high=3, size=(test_obs, 10)).astype(dtype)

    logits = array_ops.placeholder(dtype, name='z')
    sparsemax_op = sparsemax(logits)

    with self.test_session(use_gpu=use_gpu):
      err = gradient_checker.compute_gradient_error(
          logits, z.shape, sparsemax_op, z.shape, x_init_value=z, delta=1e-9)

    self.assertLess(err, 1e-4)
    def _test_gradient_against_numpy(self, dtype, random, use_gpu):
        """check sparsemax Rop, aginst numpy Rop"""
        z = random.uniform(low=-3, high=3, size=(test_obs, 10)).astype(dtype)

        logits = constant_op.constant(z, name='z')
        sparsemax_op = sparsemax(logits)
        sparsemax_grad_op = gradients_impl.gradients(sparsemax_op, [logits])[0]

        with self.test_session(use_gpu=use_gpu):
            tf_grad = sparsemax_grad_op.eval()
            np_grad = self._np_sparsemax_grad(z)

            self.assertAllCloseAccordingToType(np_grad, tf_grad)
            self.assertShapeEqual(np_grad, sparsemax_grad_op)
Example #5
0
  def _test_gradient_against_numpy(self, dtype, random, use_gpu):
    """check sparsemax Rop, against numpy Rop"""
    z = random.uniform(low=-3, high=3, size=(test_obs, 10)).astype(dtype)

    logits = constant_op.constant(z, name='z')
    sparsemax_op = sparsemax(logits)
    sparsemax_grad_op = gradients_impl.gradients(sparsemax_op, [logits])[0]

    with self.test_session(use_gpu=use_gpu):
      tf_grad = sparsemax_grad_op.eval()
      np_grad = self._np_sparsemax_grad(z)

      self.assertAllCloseAccordingToType(np_grad, tf_grad)
      self.assertShapeEqual(np_grad, sparsemax_grad_op)
  def _test_gradient_against_estimate(self, dtype, random, use_gpu):
    """check sparsemax-loss Rop, against estimated-loss Rop"""
    z = random.uniform(low=-3, high=3, size=(test_obs, 10)).astype(dtype)
    q = np.zeros((test_obs, 10)).astype(dtype)
    q[np.arange(0, test_obs), np.random.randint(0, 10, size=test_obs)] = 1

    logits = array_ops.placeholder(dtype, name='z')
    sparsemax_op = sparsemax(logits)
    loss_op = sparsemax_loss(logits, sparsemax_op, q)

    with self.test_session(use_gpu=use_gpu):
      err = gradient_checker.compute_gradient_error(
          logits, z.shape, loss_op, (test_obs,), x_init_value=z, delta=1e-9)

    self.assertLess(err, 1e-4)
    def _test_gradient_against_estimate(self, dtype, random, use_gpu):
        """check sparsemax Rop, aginst estimated Rop"""
        z = random.uniform(low=-3, high=3, size=(test_obs, 10)).astype(dtype)

        logits = array_ops.placeholder(dtype, name='z')
        sparsemax_op = sparsemax(logits)

        with self.test_session(use_gpu=use_gpu):
            err = gradient_checker.compute_gradient_error(logits,
                                                          z.shape,
                                                          sparsemax_op,
                                                          z.shape,
                                                          x_init_value=z,
                                                          delta=1e-9)

        self.assertLess(err, 1e-4)
Example #8
0
def sml(labels, logits):
    sm = sparsemax(logits)
    #loss = -np.dot(logits,labels)
    #smz=sparsemax(logits)


    shifted_logits = logits - \
        math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis]

    # sum over support
    support = math_ops.cast(sm > 0, sm.dtype)
    sum_s = support * sm * (shifted_logits - 0.5 * sm)

    # - z_k + ||q||^2
    q_part = labels * (0.5 * labels - shifted_logits)

    return math_ops.reduce_sum(sum_s + q_part, axis=1)
  def _test_gradient_against_numpy(self, dtype, random, use_gpu):
    """check sparsemax-loss Rop, against numpy Rop"""
    z = random.uniform(low=-3, high=3, size=(test_obs, 10))
    q = np.zeros((test_obs, 10))
    q[np.arange(0, test_obs), np.random.randint(0, 10, size=test_obs)] = 1

    logits = constant_op.constant(z.astype(dtype), name='z')
    sparsemax_op = sparsemax(logits)
    loss_op = sparsemax_loss(logits, sparsemax_op, q.astype(dtype))
    loss_grad_op = gradients_impl.gradients(loss_op, [logits])[0]

    with self.test_session(use_gpu=use_gpu):
      tf_grad = loss_grad_op.eval()
      np_grad = self._np_sparsemax_loss_grad(z, q).astype(dtype)

      self.assertAllCloseAccordingToType(
          np_grad, tf_grad, half_atol=1e-2, half_rtol=5e-3)
      self.assertShapeEqual(np_grad, loss_grad_op)
    def _test_gradient_against_estimate(self, dtype, random, use_gpu):
        """check sparsemax-loss Rop, against estimated-loss Rop"""
        z = random.uniform(low=-3, high=3, size=(test_obs, 10)).astype(dtype)
        q = np.zeros((test_obs, 10)).astype(dtype)
        q[np.arange(0, test_obs), np.random.randint(0, 10, size=test_obs)] = 1

        logits = array_ops.placeholder(dtype, name='z')
        sparsemax_op = sparsemax(logits)
        loss_op = sparsemax_loss(logits, sparsemax_op, q)

        with self.test_session(use_gpu=use_gpu):
            err = gradient_checker.compute_gradient_error(logits,
                                                          z.shape,
                                                          loss_op,
                                                          (test_obs, ),
                                                          x_init_value=z,
                                                          delta=1e-9)

        self.assertLess(err, 1e-4)
    def _test_gradient_against_numpy(self, dtype, random, use_gpu):
        """check sparsemax-loss Rop, against numpy Rop"""
        z = random.uniform(low=-3, high=3, size=(test_obs, 10))
        q = np.zeros((test_obs, 10))
        q[np.arange(0, test_obs), np.random.randint(0, 10, size=test_obs)] = 1

        logits = constant_op.constant(z.astype(dtype), name='z')
        sparsemax_op = sparsemax(logits)
        loss_op = sparsemax_loss(logits, sparsemax_op, q.astype(dtype))
        loss_grad_op = gradients_impl.gradients(loss_op, [logits])[0]

        with self.test_session(use_gpu=use_gpu):
            tf_grad = loss_grad_op.eval()
            np_grad = self._np_sparsemax_loss_grad(z, q).astype(dtype)

            self.assertAllCloseAccordingToType(np_grad,
                                               tf_grad,
                                               half_atol=1e-2,
                                               half_rtol=5e-3)
            self.assertShapeEqual(np_grad, loss_grad_op)
    def _tf_sparsemax(self, z, dtype, use_gpu):
        with self.test_session(use_gpu=use_gpu):
            tf_sparsemax_op = sparsemax(z.astype(dtype))
            tf_sparsemax_out = tf_sparsemax_op.eval()

        return tf_sparsemax_op, tf_sparsemax_out
  def _tf_sparsemax(self, z, dtype, use_gpu):
    with self.test_session(use_gpu=use_gpu):
      tf_sparsemax_op = sparsemax(z.astype(dtype))
      tf_sparsemax_out = tf_sparsemax_op.eval()

    return tf_sparsemax_op, tf_sparsemax_out
Example #14
0
    def encoder(self, data, is_training):
        """TabNet encoder model."""

        with tf.compat.v1.variable_scope("Encoder",
                                         reuse=tf.compat.v1.AUTO_REUSE):

            # Reads and normalizes input features.
            features = tf.compat.v1.feature_column.input_layer(
                data, self.columns)
            features = tf.layers.batch_normalization(
                features, training=is_training, momentum=self.batch_momentum)
            batch_size = tf.shape(features)[0]

            # Initializes decision-step dependent variables.
            output_aggregated = tf.zeros([batch_size, self.output_dim])
            masked_features = features
            mask_values = tf.zeros([batch_size, self.num_features])
            aggregated_mask_values = tf.zeros([batch_size, self.num_features])
            complemantary_aggregated_mask_values = tf.ones(
                [batch_size, self.num_features])
            total_entropy = 0

            if is_training:
                v_b = self.virtual_batch_size
            else:
                v_b = 1

            for ni in range(self.num_decision_steps):

                # Feature transformer with two shared and two decision step dependent
                # blocks is used below.

                reuse_flag = (ni > 0)

                transform_f1 = tf.layers.dense(masked_features,
                                               self.feature_dim * 2,
                                               name="Transform_f1",
                                               reuse=reuse_flag,
                                               use_bias=False)
                transform_f1 = tf.layers.batch_normalization(
                    transform_f1,
                    training=is_training,
                    momentum=self.batch_momentum,
                    virtual_batch_size=v_b)
                transform_f1 = glu(transform_f1, self.feature_dim)

                transform_f2 = tf.layers.dense(transform_f1,
                                               self.feature_dim * 2,
                                               name="Transform_f2",
                                               reuse=reuse_flag,
                                               use_bias=False)
                transform_f2 = tf.layers.batch_normalization(
                    transform_f2,
                    training=is_training,
                    momentum=self.batch_momentum,
                    virtual_batch_size=v_b)
                transform_f2 = (glu(transform_f2, self.feature_dim) +
                                transform_f1) * np.sqrt(0.5)

                transform_f3 = tf.layers.dense(transform_f2,
                                               self.feature_dim * 2,
                                               name="Transform_f3" + str(ni),
                                               use_bias=False)
                transform_f3 = tf.layers.batch_normalization(
                    transform_f3,
                    training=is_training,
                    momentum=self.batch_momentum,
                    virtual_batch_size=v_b)
                transform_f3 = (glu(transform_f3, self.feature_dim) +
                                transform_f2) * np.sqrt(0.5)

                transform_f4 = tf.layers.dense(transform_f3,
                                               self.feature_dim * 2,
                                               name="Transform_f4" + str(ni),
                                               use_bias=False)
                transform_f4 = tf.layers.batch_normalization(
                    transform_f4,
                    training=is_training,
                    momentum=self.batch_momentum,
                    virtual_batch_size=v_b)
                transform_f4 = (glu(transform_f4, self.feature_dim) +
                                transform_f3) * np.sqrt(0.5)

                if ni > 0:

                    decision_out = tf.nn.relu(
                        transform_f4[:, :self.output_dim])

                    # Decision aggregation.
                    output_aggregated += decision_out

                    # Aggregated masks are used for visualization of the
                    # feature importance attributes.
                    scale_agg = tf.reduce_sum(
                        decision_out, axis=1,
                        keep_dims=True) / (self.num_decision_steps - 1)
                    aggregated_mask_values += mask_values * scale_agg

                features_for_coef = (transform_f4[:, self.output_dim:])

                if ni < self.num_decision_steps - 1:

                    # Determines the feature masks via linear and nonlinear
                    # transformations, taking into account of aggregated feature use.
                    mask_values = tf.layers.dense(features_for_coef,
                                                  self.num_features,
                                                  name="Transform_coef" +
                                                  str(ni),
                                                  use_bias=False)
                    mask_values = tf.layers.batch_normalization(
                        mask_values,
                        training=is_training,
                        momentum=self.batch_momentum,
                        virtual_batch_size=v_b)
                    mask_values *= complemantary_aggregated_mask_values
                    mask_values = contrib_sparsemax.sparsemax(mask_values)

                    # Relaxation factor controls the amount of reuse of features between
                    # different decision blocks and updated with the values of
                    # coefficients.
                    complemantary_aggregated_mask_values *= (
                        self.relaxation_factor - mask_values)

                    # Entropy is used to penalize the amount of sparsity in feature
                    # selection.
                    total_entropy += tf.reduce_mean(
                        tf.reduce_sum(-mask_values *
                                      tf.math.log(mask_values + self.epsilon),
                                      axis=1)) / (self.num_decision_steps - 1)

                    # Feature selection.
                    masked_features = tf.multiply(mask_values, features)

                    # Visualization of the feature selection mask at decision step ni
                    tf.compat.v1.summary.image(
                        "Mask for step" + str(ni),
                        tf.expand_dims(tf.expand_dims(mask_values, 0), 3),
                        max_outputs=1)

            # Visualization of the aggregated feature importances
            tf.compat.v1.summary.image(
                "Aggregated mask",
                tf.expand_dims(tf.expand_dims(aggregated_mask_values, 0), 3),
                max_outputs=1)

            return output_aggregated, total_entropy