Esempio n. 1
0
 def call(self, inputs):
   shape = inputs.get_shape().as_list()
   output_shape = shape[:-1] + [self.units]
   if len(output_shape) > 2:
     # Broadcasting is required for the inputs.
     outputs = standard_ops.tensordot(inputs, self.kernel,
                                      [[len(shape) - 1], [0]])
     # Reshape the output back to the original ndim of the input.
     outputs.set_shape(output_shape)
   else:
     outputs = standard_ops.matmul(inputs, self.kernel)
   if self.use_bias:
     outputs = nn.bias_add(outputs, self.bias)
   if self.activation is not None:
     return self.activation(outputs)  # pylint: disable=not-callable
   return outputs
Esempio n. 2
0
 def call(self, inputs):
   inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
   shape = inputs.get_shape().as_list()
   if len(shape) > 2:
     # Broadcasting is required for the inputs.
     outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1],
                                                            [0]])
     # Reshape the output back to the original ndim of the input.
     if not context.executing_eagerly():
       output_shape = shape[:-1] + [self.units]
       outputs.set_shape(output_shape)
   else:
     outputs = gen_math_ops.mat_mul(inputs, self.kernel)
   if self.use_bias:
     outputs = nn.bias_add(outputs, self.bias)
   if self.activation is not None:
     return self.activation(outputs)  # pylint: disable=not-callable
   return outputs
Esempio n. 3
0
 def call(self, inputs):
   inputs = ops.convert_to_tensor(inputs)
   rank = common_shapes.rank(inputs)
   if rank > 2:
     # Broadcasting is required for the inputs.
     outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]])
     # Reshape the output back to the original ndim of the input.
     if not context.executing_eagerly():
       shape = inputs.shape.as_list()
       output_shape = shape[:-1] + [self.units]
       outputs.set_shape(output_shape)
   else:
     # Cast the inputs to self.dtype, which is the variable dtype. We do not
     # cast if `should_cast_variables` is True, as in that case the variable
     # will be automatically casted to inputs.dtype.
     if not self._mixed_precision_policy.should_cast_variables:
       inputs = math_ops.cast(inputs, self.dtype)
     outputs = gen_math_ops.mat_mul(inputs, self.kernel)
   if self.use_bias:
     outputs = nn.bias_add(outputs, self.bias)
   if self.activation is not None:
     return self.activation(outputs)  # pylint: disable=not-callable
   return outputs
  def _minimize_constrained(self,
                            minimization_problem,
                            global_step=None,
                            var_list=None,
                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
                            aggregation_method=None,
                            colocate_gradients_with_ops=False,
                            name=None,
                            grad_loss=None):
    """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the constraint/objective weight matrix (the analogue of
    Lagrange multipliers) will be updated using `constrained_optimizer` (if
    provided) or `optimizer` (if not). Whether the matrix updates are additive
    or multiplicative depends on the derived class.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
      colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
        method.
      name: as in `tf.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Raises:
      ValueError: If the minimization_problem tensors have different dtypes.

    Returns:
      `Operation`, the train_op.
    """
    objective = minimization_problem.objective

    constraints = minimization_problem.constraints
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints

    # Make sure that the objective, constraints and proxy constraints all have
    # the same dtype.
    if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
        objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
      raise ValueError("objective, constraints and proxy_constraints must "
                       "have the same dtype")

    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
    proxy_constraints = standard_ops.reshape(
        proxy_constraints, shape=(num_constraints,))

    # We use a lambda to initialize the state so that, if this function call is
    # inside the scope of a tf.control_dependencies() block, the dependencies
    # will not be applied to the initializer.
    state = standard_ops.Variable(
        lambda: self._initial_state(num_constraints),
        trainable=False,
        name="swap_regret_optimizer_state")

    zero_and_constraints = standard_ops.concat(
        (standard_ops.zeros((1,), dtype=constraints.dtype), constraints),
        axis=0)
    objective_and_proxy_constraints = standard_ops.concat(
        (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)

    distribution = self._distribution(state)
    loss = standard_ops.tensordot(
        standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
        objective_and_proxy_constraints, 1)
    matrix_gradient = standard_ops.matmul(
        standard_ops.expand_dims(
            standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
        standard_ops.expand_dims(distribution, 0))

    update_ops = []
    if self.constraint_optimizer is None:
      # If we don't have a separate constraint_optimizer, then we use
      # self._optimizer for both the update of the model parameters, and that of
      # the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      grads_and_vars.append(
          self._constraint_grad_and_var(state, matrix_gradient))
      update_ops.append(
          self.optimizer.apply_gradients(grads_and_vars, name="update"))
    else:
      # If we have a separate constraint_optimizer, then we use self._optimizer
      # for the update of the model parameters, and self._constraint_optimizer
      # for that of the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      matrix_grads_and_vars = [
          self._constraint_grad_and_var(state, matrix_gradient)
      ]

      gradients = [
          gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars
          if gradient is not None
      ]
      with ops.control_dependencies(gradients):
        update_ops.append(
            self.optimizer.apply_gradients(grads_and_vars, name="update"))
        update_ops.append(
            self.constraint_optimizer.apply_gradients(
                matrix_grads_and_vars, name="optimizer_state_update"))

    with ops.control_dependencies(update_ops):
      if global_step is None:
        # If we don't have a global step, just project, and we're done.
        return self._projection_op(state, name=name)
      else:
        # If we have a global step, then we need to increment it in addition to
        # projecting.
        projection_op = self._projection_op(state, name="project")
        with ops.colocate_with(global_step):
          global_step_op = state_ops.assign_add(
              global_step, 1, name="global_step_increment")
        return control_flow_ops.group(projection_op, global_step_op, name=name)
 def _matmul(self, inputs, kernel):
   if inputs.shape.ndims <= 2:
     return standard_ops.matmul(inputs, kernel)
   # To handle broadcasting, we must use `tensordot`.
   return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]])
Esempio n. 6
0
 def _matmul(self, inputs, kernel):
     if inputs.shape.ndims <= 2:
         return standard_ops.matmul(inputs, kernel)
     # To handle broadcasting, we must use `tensordot`.
     return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]])
    def _minimize_constrained(self,
                              minimization_problem,
                              global_step=None,
                              var_list=None,
                              gate_gradients=train_optimizer.Optimizer.GATE_OP,
                              aggregation_method=None,
                              colocate_gradients_with_ops=False,
                              name=None,
                              grad_loss=None):
        """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the constraint/objective weight matrix (the analogue of
    Lagrange multipliers) will be updated using `constrained_optimizer` (if
    provided) or `optimizer` (if not). Whether the matrix updates are additive
    or multiplicative depends on the derived class.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
      colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
        method.
      name: as in `tf.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Raises:
      ValueError: If the minimization_problem tensors have different dtypes.

    Returns:
      `Operation`, the train_op.
    """
        objective = minimization_problem.objective

        constraints = minimization_problem.constraints
        proxy_constraints = minimization_problem.proxy_constraints
        if proxy_constraints is None:
            proxy_constraints = constraints

        # Make sure that the objective, constraints and proxy constraints all have
        # the same dtype.
        if (objective.dtype.base_dtype != constraints.dtype.base_dtype
                or objective.dtype.base_dtype !=
                proxy_constraints.dtype.base_dtype):
            raise ValueError(
                "objective, constraints and proxy_constraints must "
                "have the same dtype")

        # Flatten both constraints tensors to 1d.
        num_constraints = minimization_problem.num_constraints
        constraints = standard_ops.reshape(constraints,
                                           shape=(num_constraints, ))
        proxy_constraints = standard_ops.reshape(proxy_constraints,
                                                 shape=(num_constraints, ))

        # We use a lambda to initialize the state so that, if this function call is
        # inside the scope of a tf.control_dependencies() block, the dependencies
        # will not be applied to the initializer.
        state = standard_ops.Variable(
            lambda: self._initial_state(num_constraints),
            trainable=False,
            name="swap_regret_optimizer_state")

        zero_and_constraints = standard_ops.concat((standard_ops.zeros(
            (1, ), dtype=constraints.dtype), constraints),
                                                   axis=0)
        objective_and_proxy_constraints = standard_ops.concat(
            (standard_ops.expand_dims(objective, 0), proxy_constraints),
            axis=0)

        distribution = self._distribution(state)
        loss = standard_ops.tensordot(
            standard_ops.cast(distribution,
                              objective_and_proxy_constraints.dtype),
            objective_and_proxy_constraints, 1)
        matrix_gradient = standard_ops.matmul(
            standard_ops.expand_dims(
                standard_ops.cast(zero_and_constraints, distribution.dtype),
                1), standard_ops.expand_dims(distribution, 0))

        update_ops = []
        if self.constraint_optimizer is None:
            # If we don't have a separate constraint_optimizer, then we use
            # self._optimizer for both the update of the model parameters, and that of
            # the internal state.
            grads_and_vars = self.optimizer.compute_gradients(
                loss,
                var_list=var_list,
                gate_gradients=gate_gradients,
                aggregation_method=aggregation_method,
                colocate_gradients_with_ops=colocate_gradients_with_ops,
                grad_loss=grad_loss)
            grads_and_vars.append(
                self._constraint_grad_and_var(state, matrix_gradient))
            update_ops.append(
                self.optimizer.apply_gradients(grads_and_vars, name="update"))
        else:
            # If we have a separate constraint_optimizer, then we use self._optimizer
            # for the update of the model parameters, and self._constraint_optimizer
            # for that of the internal state.
            grads_and_vars = self.optimizer.compute_gradients(
                loss,
                var_list=var_list,
                gate_gradients=gate_gradients,
                aggregation_method=aggregation_method,
                colocate_gradients_with_ops=colocate_gradients_with_ops,
                grad_loss=grad_loss)
            matrix_grads_and_vars = [
                self._constraint_grad_and_var(state, matrix_gradient)
            ]

            gradients = [
                gradient
                for gradient, _ in grads_and_vars + matrix_grads_and_vars
                if gradient is not None
            ]
            with ops.control_dependencies(gradients):
                update_ops.append(
                    self.optimizer.apply_gradients(grads_and_vars,
                                                   name="update"))
                update_ops.append(
                    self.constraint_optimizer.apply_gradients(
                        matrix_grads_and_vars, name="optimizer_state_update"))

        with ops.control_dependencies(update_ops):
            if global_step is None:
                # If we don't have a global step, just project, and we're done.
                return self._projection_op(state, name=name)
            else:
                # If we have a global step, then we need to increment it in addition to
                # projecting.
                projection_op = self._projection_op(state, name="project")
                with ops.colocate_with(global_step):
                    global_step_op = state_ops.assign_add(
                        global_step, 1, name="global_step_increment")
                return control_flow_ops.group(projection_op,
                                              global_step_op,
                                              name=name)
  def minimize_constrained(self,
                           minimization_problem,
                           global_step=None,
                           var_list=None,
                           gate_gradients=train_optimizer.Optimizer.GATE_OP,
                           aggregation_method=None,
                           colocate_gradients_with_ops=False,
                           name=None,
                           grad_loss=None):
    """Returns an `Op` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the Lagrange multipliers will be updated using
    `constrained_optimizer` (if provided) or `optimizer` (if not).

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
      colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
        method.
      name: as in `tf.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Returns:
      TensorFlow Op.
    """
    objective = minimization_problem.objective

    constraints = minimization_problem.constraints
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints
    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
    proxy_constraints = standard_ops.reshape(
        proxy_constraints, shape=(num_constraints,))

    # We use a lambda to initialize the state so that, if this function call is
    # inside the scope of a tf.control_dependencies() block, the dependencies
    # will not be applied to the initializer.
    state = standard_ops.Variable(
        lambda: self._initial_state(num_constraints),
        trainable=False,
        name="external_regret_optimizer_state")

    multipliers = self._lagrange_multipliers(state)
    loss = (
        objective + standard_ops.tensordot(multipliers, proxy_constraints, 1))
    multipliers_gradient = constraints

    update_ops = []
    if self.constraint_optimizer is None:
      # If we don't have a separate constraint_optimizer, then we use
      # self._optimizer for both the update of the model parameters, and that of
      # the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      grads_and_vars.append(
          self._constraint_grad_and_var(state, multipliers_gradient))
      update_ops.append(
          self.optimizer.apply_gradients(grads_and_vars, name="update"))
    else:
      # If we have a separate constraint_optimizer, then we use self._optimizer
      # for the update of the model parameters, and self._constraint_optimizer
      # for that of the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      multiplier_grads_and_vars = [
          self._constraint_grad_and_var(state, multipliers_gradient)
      ]

      gradients = [
          gradient for gradient, _ in grads_and_vars + multiplier_grads_and_vars
          if gradient is not None
      ]
      with ops.control_dependencies(gradients):
        update_ops.append(
            self.optimizer.apply_gradients(grads_and_vars, name="update"))
        update_ops.append(
            self.constraint_optimizer.apply_gradients(
                multiplier_grads_and_vars, name="optimizer_state_update"))

    with ops.control_dependencies(update_ops):
      if global_step is None:
        # If we don't have a global step, just project, and we're done.
        return self._projection_op(state, name=name)
      else:
        # If we have a global step, then we need to increment it in addition to
        # projecting.
        projection_op = self._projection_op(state, name="project")
        with ops.colocate_with(global_step):
          global_step_op = state_ops.assign_add(
              global_step, 1, name="global_step_increment")
        return control_flow_ops.group(projection_op, global_step_op, name=name)
Esempio n. 9
0
def dense(inputs, kernel, bias=None, activation=None, dtype=None):
    """Densely connected NN layer op.

  Args:
    inputs: `tf.Tensor` or `tf.SparseTensor`. Inputs to operation.
    kernel: `tf.Variable`. Matrix kernel.
    bias: (Optional) `tf.Variable`. Bias to add to outputs.
    activation: (Optional) 1-argument callable. Activation function to apply to
      outputs.
    dtype: (Optional) `tf.DType`. Dtype to cast `inputs` to.

  Returns:
    `tf.Tensor`. Output of dense connection.
  """
    if dtype:
        if inputs.dtype.base_dtype != dtype.base_dtype:
            inputs = math_ops.cast(inputs, dtype=dtype)

    rank = inputs.shape.rank
    if rank == 2 or rank is None:
        # We use embedding_lookup_sparse as a more efficient matmul operation for
        # large sparse input tensors. The op will result in a sparse gradient, as
        # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense
        # gradients. This can lead to sigfinicant speedups, see b/171762937.
        if isinstance(inputs, sparse_tensor.SparseTensor):
            # We need to fill empty rows, as the op assumes at least one id per row.
            inputs, _ = sparse_ops.sparse_fill_empty_rows(inputs, 0)
            # We need to do some munging of our input to use the embedding lookup as a
            # matrix multiply. We split our input matrix into separate ids and weights
            # tensors. The values of the ids tensor should be the column indices of
            # our input matrix and the values of the weights tensor can continue to
            # the actual matrix weights. The column arrangement of ids and weights
            # will be summed over and does not matter. See the documentation for
            # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation of the
            # inputs to both ops.
            ids = sparse_tensor.SparseTensor(indices=inputs.indices,
                                             values=inputs.indices[:, 1],
                                             dense_shape=inputs.dense_shape)
            weights = inputs
            outputs = embedding_ops.embedding_lookup_sparse_v2(kernel,
                                                               ids,
                                                               weights,
                                                               combiner="sum")
        else:
            outputs = gen_math_ops.MatMul(a=inputs, b=kernel)
    # Broadcast kernel to inputs.
    else:
        outputs = standard_ops.tensordot(inputs, kernel, [[rank - 1], [0]])
        # Reshape the output back to the original ndim of the input.
        if not context.executing_eagerly():
            shape = inputs.shape.as_list()
            output_shape = shape[:-1] + [kernel.shape[-1]]
            outputs.set_shape(output_shape)

    if bias is not None:
        outputs = nn_ops.bias_add(outputs, bias)

    if activation is not None:
        outputs = activation(outputs)

    return outputs
Esempio n. 10
0
    def call(self, inputs):
        inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)

        enable_quantop_dense = int(os.getenv('ENABLE_QUANTOP_DENSE', 0))
        if enable_quantop_dense == 1:
            inputs_qs = quantemu_ops.quantize_emu(
                inputs,
                data_format='unknown',
                allocate_copy=int(os.getenv('QUANTEMU_ALLOCATE_COPY_INPUTS',
                                            0)),
                data_type=int(os.getenv('QUANTEMU_DENSE_DATA_TYPE', 0)),
                precision=int(os.getenv('QUANTEMU_PRECISION_DENSE_INPUTS',
                                        23)),
                exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)),
                round_mode=int(os.getenv('QUANTEMU_RMODE_INPUTS', 0)))

            kernel_qs = quantemu_ops.quantize_emu(
                self.kernel,
                data_format='unknown',
                allocate_copy=int(
                    os.getenv('QUANTEMU_ALLOCATE_COPY_FILTERS', 0)),
                data_type=int(os.getenv('QUANTEMU_DENSE_DATA_TYPE', 0)),
                precision=int(os.getenv('QUANTEMU_PRECISION_DENSE_FILTERS',
                                        23)),
                exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)),
                round_mode=int(os.getenv('QUANTEMU_RMODE_FILTERS', 0)))
            rank = common_shapes.rank(inputs)
            if rank > 2:
                # Broadcasting is required for the inputs.
                outputs = standard_ops.tensordot(inputs_qs, kernel_qs,
                                                 [[rank - 1], [0]])
                # Reshape the output back to the original ndim of the input.
                if not context.executing_eagerly():
                    shape = inputs.get_shape().as_list()
                    output_shape = shape[:-1] + [self.units]
                    outputs.set_shape(output_shape)
            else:
                outputs = gen_math_ops.mat_mul(inputs_qs, kernel_qs)
            if self.use_bias:
                outputs = nn.bias_add(outputs, self.bias)
            if self.activation is not None:
                return self.activation(outputs)  # pylint: disable=not-callable
            return outputs

        else:  # No quantization

            rank = common_shapes.rank(inputs)
            if rank > 2:
                # Broadcasting is required for the inputs.
                outputs = standard_ops.tensordot(inputs, self.kernel,
                                                 [[rank - 1], [0]])
                # Reshape the output back to the original ndim of the input.
                if not context.executing_eagerly():
                    shape = inputs.get_shape().as_list()
                    output_shape = shape[:-1] + [self.units]
                    outputs.set_shape(output_shape)
            else:
                outputs = gen_math_ops.mat_mul(inputs, self.kernel)
            if self.use_bias:
                outputs = nn.bias_add(outputs, self.bias)
            if self.activation is not None:
                return self.activation(outputs)  # pylint: disable=not-callable
            return outputs
Esempio n. 11
0
 def _broadcasted_tensordot(_inputs, _kernel):
     return standard_ops.tensordot(_inputs, _kernel,
                                   [[rank - 1], [0]])
Esempio n. 12
0
    def call(self, inputs):

        inputs = ops.convert_to_tensor(inputs)
        rank = common_shapes.rank(inputs)
        if rank > 2:
            # Broadcasting is required for the inputs.
            outputs = standard_ops.tensordot(inputs, self.kernel,
                                             [[rank - 1], [0]])
            # Reshape the output back to the original ndim of the input.
            if not context.executing_eagerly():
                shape = inputs.get_shape().as_list()
                output_shape = shape[:-1] + [self.units]
                outputs.set_shape(output_shape)
        else:
            outputs = gen_math_ops.mat_mul(inputs, self.kernel)

        if self.activation is not None:
            outputs = self.activation(outputs)  # pylint: disable=not-callable

        if self.verbose > 0:
            print(outputs.get_shape(), 'outputs before masking')

        if self.leaky_inputs:
            if self.verbose > 0:
                print('performing mask op')
            self.full_outputs = outputs
            # outputs_d = {}
            # for dim in range(outputs.get_shape()[1]):

            outputs_1d = tf.reshape(tf.transpose(outputs), [
                -1,
            ],
                                    name='outputs_1d_')

            # mask_array = self.mask_array['key_' + str(dim)]
            if self.verbose > 0:
                print('mask_array', self.mask_array.get_shape())

            mask_array_1d = tf.reshape(self.mask_array, [
                -1,
            ],
                                       name='mask_ph_1d_')

            if self.verbose > 0:
                print('mask_array_1d', mask_array_1d.get_shape())

            mask = tf.math.greater(mask_array_1d,
                                   tf.constant(0.0),
                                   name='masking_op_')

            outputs = outputs_1d[mask]
            outputs = tf.expand_dims(outputs, axis=1)
            # outputs_d['val_' + str(dim)] = outputs

            if self.verbose > 0:
                print(outputs.get_shape(), 'shape of output after masking')

            return outputs
        else:
            self.full_outputs = outputs

            if self.verbose > 0:
                print(outputs.get_shape(), 'shape of output without masking')

            return outputs