def dense(self,
              input,
              units,
              name,
              reuse,
              kernel_regularizer,
              initZero=False):
        shape = input.get_shape().as_list()
        input_dim = shape[-1]

        with tf.variable_scope(name, reuse=reuse):
            initializer = tf.constant_initializer(
                0.0) if initZero else tf.truncated_normal_initializer(
                    stddev=1.0)
            kernel = tf.get_variable('kernel',
                                     shape=[input_dim, units],
                                     initializer=initializer,
                                     regularizer=kernel_regularizer)
            bias = tf.get_variable('bias',
                                   shape=[units],
                                   initializer=tf.constant_initializer(0.0))
            scale = variance_scaling_lr([input_dim, units], 'FAN_AVG')
            kernel = kernel * scale
        output = standard_ops.matmul(input, kernel)
        output = tf.nn.bias_add(output, bias)
        return output
Esempio n. 2
0
    def call(self, inputs):
        inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
        shape = inputs.get_shape().as_list()

        # binarization weight
        # self.b_kernel = binarization(self.kernel, self.H)
        self.b_kernel = self.kernel

        #r_kernel = self.kernel
        #self.kernel = self.b_kernel

        print("shape: ", len(shape))
        if len(shape) > 2:
            # Broadcasting is required for the inputs.
            outputs = standard_ops.tensordot(inputs, self.b_kernel,
                                             [[len(shape) - 1], [0]])
            # Reshape the output back to the original ndim of the input.
            if context.in_graph_mode():
                output_shape = shape[:-1] + [self.units]
                outputs.set_shape(output_shape)
        else:
            outputs = standard_ops.matmul(inputs, self.b_kernel)

        # restore weight
        #self.kernel = r_kernel

        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)
        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Esempio n. 3
0
    def call(self, inputs):
        inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
        shape = inputs.get_shape().as_list()
        output_shape = shape[:-1] + [self.units]
        if len(output_shape) > 2:
            raise ValueError('len(output_shape) > 2 is not supported')
        else:
            xU = standard_ops.matmul(inputs, self.manifold_args[0])
            xUS = standard_ops.matmul(xU,
                                      standard_ops.diag(self.manifold_args[1]))
            xUSV = standard_ops.matmul(xUS, self.manifold_args[2])
            outputs = xUSV

        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)
        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs
 def while_loop_body(iteration, eigenvector, old_eigenvector):
   """Performs one iteration of the power method."""
   del old_eigenvector  # Needed by the condition, but not the body.
   iteration += 1
   # We need to use tf.matmul() and tf.expand_dims(), instead of
   # tf.tensordot(), since the former will infer the shape of the result, while
   # the latter will not (tf.while_loop() needs the shapes).
   new_eigenvector = standard_ops.matmul(
       matrix, standard_ops.expand_dims(eigenvector, 1))[:, 0]
   new_eigenvector /= standard_ops.norm(new_eigenvector)
   return (iteration, new_eigenvector, eigenvector)
 def while_loop_body(iteration, eigenvector, old_eigenvector):
   """Performs one iteration of the power method."""
   del old_eigenvector  # Needed by the condition, but not the body.
   iteration += 1
   # We need to use tf.matmul() and tf.expand_dims(), instead of
   # tf.tensordot(), since the former will infer the shape of the result, while
   # the latter will not (tf.while_loop() needs the shapes).
   new_eigenvector = standard_ops.matmul(
       matrix, standard_ops.expand_dims(eigenvector, 1))[:, 0]
   new_eigenvector /= standard_ops.norm(new_eigenvector)
   return (iteration, new_eigenvector, eigenvector)
Esempio n. 6
0
 def fprop(self, x):
     if len(self.output_shape) > 2:
         # Broadcasting is required for the inputs.
         outputs = standard_ops.tensordot(x, self.kernel, [[len(self.input_shape) - 1],
                                                                [0]])
         # Reshape the output back to the original ndim of the input.
         outputs.set_shape(self.output_shape)
     else:
         outputs = standard_ops.matmul(x, self.kernel)
     outputs = nn.bias_add(outputs, self.bias)
     # if self.activation is not None:
     #     return self.activation(outputs)  # pylint: disable=not-callable
     return outputs
Esempio n. 7
0
    def call(self, inputs):
        outputs = standard_ops.matmul(inputs, self.W)

        if self.use_bias:
            outputs = nn.bias_add(outputs, self.b)

        # Apply normalizer function / layer.
        if self.normalizer_fn is not None:
            outputs = self.normalizer_fn(outputs)

        if self.activation_fn is not None:
            outputs = self.activation_fn(outputs)  # pylint: disable=not-callable

        return outputs
Esempio n. 8
0
 def call(self, inputs):
   shape = inputs.get_shape().as_list()
   output_shape = shape[:-1] + [self.units]
   if len(output_shape) > 2:
     # Broadcasting is required for the inputs.
     outputs = standard_ops.tensordot(inputs, self.kernel,
                                      [[len(shape) - 1], [0]])
     # Reshape the output back to the original ndim of the input.
     outputs.set_shape(output_shape)
   else:
     outputs = standard_ops.matmul(inputs, self.kernel)
   if self.use_bias:
     outputs = nn.bias_add(outputs, self.bias)
   if self.activation is not None:
     return self.activation(outputs)  # pylint: disable=not-callable
   return outputs
Esempio n. 9
0
 def call(self,inputs):
     inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
     shape = inputs.get_shape().as_list()
     if len(shape) > 2:
       # Broadcasting is required for the inputs.
       outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1],
                                                              [0]])
       # Reshape the output back to the original ndim of the input.
       if context.in_graph_mode():
         output_shape = shape[:-1] + [self.units]
         outputs.set_shape(output_shape)
     else:
         outputs = standard_ops.matmul(inputs, self.kernel)
     
     outputs = nn.bias_add(outputs, self.bias)
     return outputs
Esempio n. 10
0
 def call(self, inputs):
     shape = inputs.get_shape().as_list()
     output_shape = shape[:-1] + [self.units]
     if len(output_shape) > 2:
         # Broadcasting is required for the inputs.
         outputs = standard_ops.tensordot(inputs, self.kernel,
                                          [[len(shape) - 1], [0]])
         # Reshape the output back to the original ndim of the input.
         outputs.set_shape(output_shape)
     else:
         outputs = standard_ops.matmul(inputs, self.kernel)
     if self.use_bias:
         outputs = nn.bias_add(outputs, self.bias)
     if self.activation is not None:
         return self.activation(outputs)  # pylint: disable=not-callable
     return outputs
Esempio n. 11
0
def dense(inputs, kernel, bias=None, activation=None):
    #inputs = ops.convert_to_tensor(inputs, dtype=dtype)
    shape = inputs.get_shape().as_list()
    output_shape = shape[:-1] + [kernel.get_shape().as_list()[-1]]
    if len(output_shape) > 2:
        # Broadcasting is required for the inputs.
        outputs = standard_ops.tensordot(inputs, kernel,
                                         [[len(shape) - 1], [0]])
        # Reshape the output back to the original ndim of the input.
        outputs.set_shape(output_shape)
    else:
        outputs = standard_ops.matmul(inputs, kernel)
    if bias is not None:
        outputs = nn.bias_add(outputs, bias)
    if activation is not None:
        return activation(outputs)  # pylint: disable=not-callable
    return outputs
Esempio n. 12
0
    def call(self, inputs, **kwargs):
        combined_kernel = tf.concat(axis=1,
                                    values=self.dense_kernels,
                                    name='combined_kernel')
        input_shape = tensor_shape.TensorShape(inputs.shape)
        if input_shape[-1].value is None:
            raise ValueError('The last dimension of the inputs to `Dense` '
                             'should be defined. Found `None`.')
        self.input_spec = base.InputSpec(min_ndim=2,
                                         axes={-1: input_shape[-1].value})
        inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
        shape = inputs.get_shape().as_list()
        if len(shape) > 2:
            # Broadcasting is required for the inputs.
            outputs = standard_ops.tensordot(inputs, combined_kernel,
                                             [[len(shape) - 1], [0]])
            # # Reshape the output back to the original ndim of the input.
            # if context.in_graph_mode():
            #   output_shape = shape[:-1] + [self.units]
            #   outputs.set_shape(output_shape)
        else:
            outputs = standard_ops.matmul(inputs, combined_kernel)
        prev = None
        output_ops = []

        if all([d.use_bias for d in self.dense_layers]):
            combined_bias = tf.concat(
                axis=0, values=[d.bias for d in self.dense_layers])
            outputs = nn.bias_add(outputs, combined_bias)
            self.bias_combined = True
        activations = {d.activation for d in self.dense_layers}
        if None not in activations and len(activations) == 1:
            outputs = activations.pop()(outputs)
            self.activations_combined = True
        for d in self.dense_layers:
            if prev is None:
                layer_output = outputs[:, :d.units]
            else:
                layer_output = outputs[:, prev:(prev + d.units)]
            prev = d.units
            if d.use_bias and not self.bias_combined:
                layer_output = nn.bias_add(layer_output, d.bias)
            if d.activation is not None and not self.activations_combined:
                layer_output = d.activation(layer_output)
            output_ops.append(layer_output)
        return output_ops
Esempio n. 13
0
    def call(self, inputs):
        inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
        shape = inputs.get_shape().as_list()
        output_shape = shape[:-1] + [self.units]

        # quantize the weights, if there is an weight quantizer
        if self.weight_quantizer is not None:
            with tf.variable_scope("quant_weights"):
                used_kernel = self.weight_quantizer.quantize(self.kernel)
            with tf.variable_scope("quant_biases"):
                used_bias = self.weight_quantizer.quantize(self.bias)
        else:
            used_kernel = self.kernel
            used_bias = self.bias
        # if intrinsic quantization, apply intr. quantization to weights, too!
        if self.quantizer is not None:
            used_kernel = self.quantizer.quantize(used_kernel)
            used_bias = self.quantizer.quantize(used_bias)

        if len(output_shape) > 2:
            ## Broadcasting is required for the inputs.
            #outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1],
            #                                                       [0]])
            ## Reshape the output back to the original ndim of the input.
            #outputs.set_shape(output_shape)
            raise ValueError(
                'output_shape > 2 not supported for quantized operation, tried $d.'
                % (len(output_shape)))
        else:
            if self.quantizer is None:
                outputs = standard_ops.matmul(inputs, used_kernel)
            else:  # with quantization
                outputs = qmatmul(inputs, used_kernel, self.quantizer)
        #TODO: quantize after bias and activation
        if self.use_bias:
            outputs = nn.bias_add(outputs, used_bias)
            if self.quantizer is not None:
                outputs = self.quantizer.quantize(outputs)
        if self.activation is not None:
            # never called, since activation performed in upper hierarchy
            outputs = self.activation(outputs)  # pylint: disable=not-callable
            if self.quantizer is not None:
                outputs = self.quantizer.quantize(outputs)
        return outputs
Esempio n. 14
0
 def call(self, inputs):
     if binarize_enabled:
         self.kernel = clip_by_value(self.kernel, -1, 1)
     inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
     shape = inputs.get_shape().as_list()
     if len(shape) > 2:
         # Broadcasting is required for the inputs.
         outputs = standard_ops.tensordot(inputs, binarize(self.kernel),
                                          [[len(shape) - 1], [0]])
         # Reshape the output back to the original ndim of the input.
         if context.in_graph_mode():
             output_shape = shape[:-1] + [self.units]
             outputs.set_shape(output_shape)
     else:
         outputs = standard_ops.matmul(inputs, binarize(self.kernel))
     if self.use_bias:
         outputs = nn.bias_add(outputs, self.bias)
     if self.activation is not None:
         return self.activation(outputs)  # pylint: disable=not-callable
     return outputs
Esempio n. 15
0
    def call(self, inputs):
        shape = inputs.get_shape().as_list()
        input_dim = shape[-1]
        output_shape = shape[:-1] + [self.units]
        if len(output_shape) > 2:
            # Reshape the input to 2D.
            output_shape_tensors = array_ops.unpack(array_ops.shape(inputs))
            output_shape_tensors[-1] = self.units
            output_shape_tensor = array_ops.pack(output_shape_tensors)
            inputs = array_ops.reshape(inputs, [-1, input_dim])

        outputs = standard_ops.matmul(inputs, self.w)
        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)

        if len(output_shape) > 2:
            # Reshape the output back to the original ndim of the input.
            outputs = array_ops.reshape(outputs, output_shape_tensor)
            outputs.set_shape(output_shape)

        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs
Esempio n. 16
0
    def call(self, inputs):
        shape = inputs.get_shape().as_list()
        input_dim = shape[-1]
        output_shape = shape[:-1] + [self.units]
        if len(output_shape) > 2:
            # Reshape the input to 2D.
            output_shape_tensors = array_ops.unpack(array_ops.shape(inputs))
            output_shape_tensors[-1] = self.units
            output_shape_tensor = array_ops.pack(output_shape_tensors)
            inputs = array_ops.reshape(inputs, [-1, input_dim])

        outputs = standard_ops.matmul(inputs, self.w)
        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)

        if len(output_shape) > 2:
            # Reshape the output back to the original ndim of the input.
            outputs = array_ops.reshape(outputs, output_shape_tensor)
            outputs.set_shape(output_shape)

        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs
Esempio n. 17
0
    def call(self, inputs):
        inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
        shape = inputs.get_shape().as_list()

        if len(shape) > 2:
            # Broadcasting is required for the inputs.
            outputs = standard_ops.tensordot(inputs, self.kernel,
                                             [[len(shape) - 1], [0]])
            # Reshape the output back to the original ndim of the input.
            if context.in_graph_mode():
                output_shape = shape[:-1] + [self.units]
                outputs.set_shape(output_shape)
        else:
            outputs = standard_ops.matmul(inputs, self.kernel)

        scaler = self.scale / tf.sqrt(
            tf.reduce_sum(tf.square(self.kernel), [0]))
        outputs = scaler * outputs

        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)
        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs
  def _minimize_constrained(self,
                            minimization_problem,
                            global_step=None,
                            var_list=None,
                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
                            aggregation_method=None,
                            colocate_gradients_with_ops=False,
                            name=None,
                            grad_loss=None):
    """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the constraint/objective weight matrix (the analogue of
    Lagrange multipliers) will be updated using `constrained_optimizer` (if
    provided) or `optimizer` (if not). Whether the matrix updates are additive
    or multiplicative depends on the derived class.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.compat.v1.train.Optimizer`'s `minimize`
        method.
      colocate_gradients_with_ops: as in `tf.compat.v1.train.Optimizer`'s
        `minimize` method.
      name: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.

    Raises:
      ValueError: If the minimization_problem tensors have different dtypes.

    Returns:
      `Operation`, the train_op.
    """
    objective = minimization_problem.objective

    constraints = minimization_problem.constraints
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints

    # Make sure that the objective, constraints and proxy constraints all have
    # the same dtype.
    if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
        objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
      raise ValueError("objective, constraints and proxy_constraints must "
                       "have the same dtype")

    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
    proxy_constraints = standard_ops.reshape(
        proxy_constraints, shape=(num_constraints,))

    # We use a lambda to initialize the state so that, if this function call is
    # inside the scope of a tf.control_dependencies() block, the dependencies
    # will not be applied to the initializer.
    state = standard_ops.Variable(
        lambda: self._initial_state(num_constraints),
        trainable=False,
        name="swap_regret_optimizer_state")

    zero_and_constraints = standard_ops.concat((standard_ops.zeros(
        (1,), dtype=constraints.dtype), constraints),
                                               axis=0)
    objective_and_proxy_constraints = standard_ops.concat(
        (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)

    distribution = self._distribution(state)
    loss = standard_ops.tensordot(
        standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
        objective_and_proxy_constraints, 1)
    matrix_gradient = standard_ops.matmul(
        standard_ops.expand_dims(
            standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
        standard_ops.expand_dims(distribution, 0))

    update_ops = []
    if self.constraint_optimizer is None:
      # If we don't have a separate constraint_optimizer, then we use
      # self._optimizer for both the update of the model parameters, and that of
      # the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      grads_and_vars.append(
          self._constraint_grad_and_var(state, matrix_gradient))
      update_ops.append(
          self.optimizer.apply_gradients(grads_and_vars, name="update"))
    else:
      # If we have a separate constraint_optimizer, then we use self._optimizer
      # for the update of the model parameters, and self._constraint_optimizer
      # for that of the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      matrix_grads_and_vars = [
          self._constraint_grad_and_var(state, matrix_gradient)
      ]

      gradients = [
          gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars
          if gradient is not None
      ]
      with ops.control_dependencies(gradients):
        update_ops.append(
            self.optimizer.apply_gradients(grads_and_vars, name="update"))
        update_ops.append(
            self.constraint_optimizer.apply_gradients(
                matrix_grads_and_vars, name="optimizer_state_update"))

    with ops.control_dependencies(update_ops):
      if global_step is None:
        # If we don't have a global step, just project, and we're done.
        return self._projection_op(state, name=name)
      else:
        # If we have a global step, then we need to increment it in addition to
        # projecting.
        projection_op = self._projection_op(state, name="project")
        with ops.colocate_with(global_step):
          global_step_op = state_ops.assign_add(
              global_step, 1, name="global_step_increment")
        return control_flow_ops.group(projection_op, global_step_op, name=name)
Esempio n. 19
0
fixed_size = 8
fixed_prec = 4

inputs_vals = np.arange(
    input_width * input_height * input_channels * batch_size).reshape(
        batch_size, input_width * input_height * input_channels)
filters_vals = np.arange(
    input_channels * input_width * input_height * output_channels).reshape(
        input_channels * input_width * input_height, output_channels)

inputs = tf.constant(inputs_vals, dtype=tf.float64)
filters = tf.constant(filters_vals, dtype=tf.float64)

quantizer = Quantizers.NoQuantizer()
output = QFC.qmatmul(inputs, filters, quantizer)
gold_output = standard_ops.matmul(inputs, filters)

with tf.Session() as sess:
    gold_result = gold_output.eval().flatten()
    result = output.eval().flatten()
    #print(sess.run(gold_output))
    #print(sess.run(filters))
    pass

failed = False
for i in range(len(result)):
    if result[i] != gold_result[i]:
        failed = True
        break

print('QFullyConnect test:')
Esempio n. 20
0
 def _matmul(self, inputs, kernel):
     if inputs.shape.ndims <= 2:
         return standard_ops.matmul(inputs, kernel)
     # To handle broadcasting, we must use `tensordot`.
     return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]])
  def _minimize_constrained(self,
                            minimization_problem,
                            global_step=None,
                            var_list=None,
                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
                            aggregation_method=None,
                            colocate_gradients_with_ops=False,
                            name=None,
                            grad_loss=None):
    """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the constraint/objective weight matrix (the analogue of
    Lagrange multipliers) will be updated using `constrained_optimizer` (if
    provided) or `optimizer` (if not). Whether the matrix updates are additive
    or multiplicative depends on the derived class.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
      colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
        method.
      name: as in `tf.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Raises:
      ValueError: If the minimization_problem tensors have different dtypes.

    Returns:
      `Operation`, the train_op.
    """
    objective = minimization_problem.objective

    constraints = minimization_problem.constraints
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints

    # Make sure that the objective, constraints and proxy constraints all have
    # the same dtype.
    if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
        objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
      raise ValueError("objective, constraints and proxy_constraints must "
                       "have the same dtype")

    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
    proxy_constraints = standard_ops.reshape(
        proxy_constraints, shape=(num_constraints,))

    # We use a lambda to initialize the state so that, if this function call is
    # inside the scope of a tf.control_dependencies() block, the dependencies
    # will not be applied to the initializer.
    state = standard_ops.Variable(
        lambda: self._initial_state(num_constraints),
        trainable=False,
        name="swap_regret_optimizer_state")

    zero_and_constraints = standard_ops.concat(
        (standard_ops.zeros((1,), dtype=constraints.dtype), constraints),
        axis=0)
    objective_and_proxy_constraints = standard_ops.concat(
        (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)

    distribution = self._distribution(state)
    loss = standard_ops.tensordot(
        standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
        objective_and_proxy_constraints, 1)
    matrix_gradient = standard_ops.matmul(
        standard_ops.expand_dims(
            standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
        standard_ops.expand_dims(distribution, 0))

    update_ops = []
    if self.constraint_optimizer is None:
      # If we don't have a separate constraint_optimizer, then we use
      # self._optimizer for both the update of the model parameters, and that of
      # the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      grads_and_vars.append(
          self._constraint_grad_and_var(state, matrix_gradient))
      update_ops.append(
          self.optimizer.apply_gradients(grads_and_vars, name="update"))
    else:
      # If we have a separate constraint_optimizer, then we use self._optimizer
      # for the update of the model parameters, and self._constraint_optimizer
      # for that of the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      matrix_grads_and_vars = [
          self._constraint_grad_and_var(state, matrix_gradient)
      ]

      gradients = [
          gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars
          if gradient is not None
      ]
      with ops.control_dependencies(gradients):
        update_ops.append(
            self.optimizer.apply_gradients(grads_and_vars, name="update"))
        update_ops.append(
            self.constraint_optimizer.apply_gradients(
                matrix_grads_and_vars, name="optimizer_state_update"))

    with ops.control_dependencies(update_ops):
      if global_step is None:
        # If we don't have a global step, just project, and we're done.
        return self._projection_op(state, name=name)
      else:
        # If we have a global step, then we need to increment it in addition to
        # projecting.
        projection_op = self._projection_op(state, name="project")
        with ops.colocate_with(global_step):
          global_step_op = state_ops.assign_add(
              global_step, 1, name="global_step_increment")
        return control_flow_ops.group(projection_op, global_step_op, name=name)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None
def legacy_fully_connected(x,
                           num_output_units,
                           activation_fn=None,
                           weight_init=initializers.xavier_initializer(),
                           bias_init=init_ops.zeros_initializer,
                           name=None,
                           weight_collections=(ops.GraphKeys.WEIGHTS,),
                           bias_collections=(ops.GraphKeys.BIASES,),
                           output_collections=(ops.GraphKeys.ACTIVATIONS,),
                           trainable=True,
                           weight_regularizer=None,
                           bias_regularizer=None):
  # pylint: disable=anomalous-backslash-in-string
  r"""Adds the parameters for a fully connected layer and returns the output.
  A fully connected layer is generally defined as a matrix multiply:
  `y = f(w * x + b)` where `f` is given by `activation_fn`. If
  `activation_fn` is `None`, the result of `y = w * x + b` is
  returned.
  If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)]
  with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix
  multiply along the first dimensions. The result r is a tensor of shape
  [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`],
  where \\\( r_{i_0, ..., i_{n-1}, k} =
  \\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\).
  This is accomplished by reshaping `x` to 2-D
  [\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)]
  before the matrix multiply and afterwards reshaping it to
  [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`].
  This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting
  `bias_init` to `None`.
  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.
  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and in which collections to place
  the created variables (`weight_collections` and `bias_collections`; note that
  the variables are always added to the `VARIABLES` collection). The output of
  the layer can be placed in custom collections using `output_collections`.
  The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`,
  respectively.
  A per layer regularization can be specified by setting `weight_regularizer`
  and `bias_regularizer`, which are applied to the weights and biases
  respectively, and whose output is added to the `REGULARIZATION_LOSSES`
  collection.
  Args:
    x: The input `Tensor`.
    num_output_units: The size of the output.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity. If None is used, do not apply any activation.
    weight_init: An optional weight initialization, defaults to
      `xavier_initializer`.
    bias_init: An initializer for the bias, defaults to 0. Set to `None` in
      order to disable bias.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections to which weights are added.
    bias_collections: List of graph collections to which biases are added.
    output_collections: List of graph collections to which outputs are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    weight_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for weights.
    bias_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for biases.
  Returns:
    The output of the fully connected layer.
  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
  with variable_scope.variable_op_scope([x], name, 'fully_connected'):
    dims = x.get_shape().dims
    if dims is None:
      raise ValueError('dims of x must be known but is None')
    if len(dims) < 2:
      raise ValueError('rank of x must be at least 2 not: %d' % len(dims))
    num_input_units = dims[-1].value
    if num_input_units is None:
      raise ValueError('last dimension of x must be known but is None')
    dtype = x.dtype.base_dtype

    weight_collections = set(list(weight_collections or []) +
                             [ops.GraphKeys.VARIABLES])
    w = variable_scope.get_variable('weights',
                                    shape=[num_input_units, num_output_units],
                                    dtype=dtype,
                                    initializer=weight_init,
                                    collections=weight_collections,
                                    regularizer=weight_regularizer,
                                    trainable=trainable)
    x_2_dim = x if len(dims) <= 2 else array_ops.reshape(x,
                                                         [-1, num_input_units])
    y = standard_ops.matmul(x_2_dim, w)

    if bias_init is not None:
      bias_collections = set(list(bias_collections or []) +
                             [ops.GraphKeys.VARIABLES])
      b = variable_scope.get_variable('bias',
                                      shape=[num_output_units],
                                      dtype=dtype,
                                      initializer=bias_init,
                                      collections=bias_collections,
                                      regularizer=bias_regularizer,
                                      trainable=trainable)

      y = nn.bias_add(y, b)

    if len(dims) > 2:
      out_shape = array_ops.unpack(array_ops.shape(x))
      out_shape[-1] = num_output_units

      y = array_ops.reshape(y, array_ops.pack(out_shape))

      static_shape = x.get_shape().as_list()
      static_shape[-1] = num_output_units
      y.set_shape(static_shape)

    return _apply_activation(y, activation_fn, output_collections)
Esempio n. 24
0
    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        if input_shape.ndims is None:
            raise ValueError('Inputs to `Dense` should have known rank.')
        if len(input_shape) < 2:
            raise ValueError('Inputs to `Dense` should have rank >= 2.')
        if input_shape[-1].value is None:
            raise ValueError('The last dimension of the inputs to `Dense` '
                             'should be defined. Found `None`.')
        # Note that we set `trainable=True` because this is a trainable
        # weight of the layer. If the layer is not trainable
        # (self.trainable = False), the variable will not be added to
        # tf.trainable_variables(), and self.trainable_weights will be empty.
        m = input_shape[-1].value
        k = self.rank
        n = self.units

        # glorot unifiorm
        limit = (6.0 / (m + n))**0.5
        kernel0 = random_ops.random_uniform([m, n],
                                            minval=-limit,
                                            maxval=limit)
        s0, u0, v0 = linalg_ops.svd(kernel0, full_matrices=False)
        v0 = array_ops.transpose(v0)
        u0 = array_ops.slice(u0, [0, 0], [m, k])  #u0[:,:k]
        s0 = array_ops.slice(s0, [
            0,
        ], [
            k,
        ])  #s0[:k]
        v0 = array_ops.slice(v0, [0, 0], [k, n])  #v0[:k,:]

        self.manifold_args = [
            vs.get_variable(
                'U',
                #shape=[m, k],
                initializer=u0,  #init_ops.orthogonal_initializer(),
                regularizer=self.kernel_regularizer,
                dtype=self.dtype,
                trainable=True),
            vs.get_variable(
                'S',
                #shape=[k,],
                initializer=s0,  #self.kernel_initializer,
                regularizer=self.kernel_regularizer,
                dtype=self.dtype,
                trainable=True),
            vs.get_variable(
                'V',
                #shape=[k, n],
                initializer=v0,  #init_ops.orthogonal_initializer(),
                regularizer=self.kernel_regularizer,
                dtype=self.dtype,
                trainable=True)
        ]

        U = self.manifold_args[0]
        US = standard_ops.matmul(U, standard_ops.diag(self.manifold_args[1]))
        USV = standard_ops.matmul(US, self.manifold_args[2])
        self.kernel = USV

        if 'norm' in self.summaries:
            summary.scalar('krenel-norm', linalg_ops.norm(self.kernel))
        if 'histogram' in self.summaries:
            summary.histogram('kernel-histogram', self.kernel)

        manifold = manifolds.FixedRankEmbedded(m, n, k)

        if self.use_bias:
            self.bias = vs.get_variable('bias',
                                        shape=[n],
                                        initializer=self.bias_initializer,
                                        regularizer=self.bias_regularizer,
                                        dtype=self.dtype,
                                        trainable=True)

            self.manifold_args.append(self.bias)
            manifold = manifolds.Product([manifold, manifolds.Euclidean(n)])

        else:
            self.bias = None

        self.manifold = manifold
Esempio n. 25
0
def dense_layer_ot(x,
                   in_size,
                   out_size,
                   sequence_length,
                   scope_name,
                   activation_fn=tf.nn.elu,
                   batch_norm=fu.create_BNParams()):
    '''
    Apply a dense layer over all the time_stamp.
    This is for filtering the timeseries
    :param x: input data
    :param in_size: input size or number of feature
    :param out_size: output size
    :param sequence_length: length of the sequence. Number of timestemp to iterate of
    :param scope_name: scope name of this transformation
    :param activation_fn: activation function
    :param batch_norm: named indicating if applying batch normalization and the phase(true if training, false if tensing)
    :return: 
    '''
    layers_output = []
    with tf.variable_scope(scope_name) as vs:
        W = tf.get_variable(
            'weight_filter',
            shape=[in_size, out_size],
            initializer=tf.contrib.layers.xavier_initializer(),
            collections=[GraphKeys.WEIGHTS, GraphKeys.GLOBAL_VARIABLES],
            trainable=True)

        if not batch_norm.apply:
            b = tf.get_variable(
                'bias_filter',
                shape=[out_size],
                initializer=tf.constant_initializer(0.),
                collections=[GraphKeys.BIASES, GraphKeys.GLOBAL_VARIABLES],
                trainable=True)

        for t in range(0, sequence_length):
            layer_output = standard_ops.matmul(x[:, t, :], W)

            if batch_norm.apply:
                layer_output = tf.contrib.layers.batch_norm(
                    layer_output,
                    center=batch_norm.center,
                    scale=batch_norm.scale,
                    is_training=batch_norm.phase,
                    scope=vs.name + '_bn')
            else:
                # apply batch norm
                layer_output = standard_ops.add(layer_output, b)

            if activation_fn:
                layer_output = activation_fn(layer_output)

            layers_output.append(tf.expand_dims(
                layer_output,
                1))  # add again the timestemp dimention to allow concatenation

        # proved to be the same weights
        s.add_hidden_layer_summary(layers_output[-1], vs.name, weight=W)
        if not batch_norm.apply:
            tf.summary.histogram(vs.name + '_bias', b)

    return tf.concat(layers_output, axis=1)
Esempio n. 26
0
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer,
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    scope=None):
    """Adds a fully connected layer.

  `fully_connected` creates a variable called `weights`, representing a fully
  connected weight matrix, which is multiplied by the `inputs` to produce a
  `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
  `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
  None and a `biases_initializer` is provided then a `biases` variable would be
  created and added the hidden units. Finally, if `activation_fn` is not `None`,
  it is applied to the hidden units as well.

  Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
  prior to the initial matrix multiply by `weights`.

  Args:
    inputs: A tensor of with at least rank 2 and value for the last dimension,
      i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
    num_outputs: Integer, the number of output units in the layer.
    activation_fn: activation function.
    normalizer_fn: normalization function to use instead of `biases`. If
      `normalize_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
    normalizer_params: normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionay containing a different list of collection per variable.
    outputs_collections: collection to add the outputs.
    scope: Optional scope for variable_op_scope.

  Returns:
     the tensor variable representing the result of the series of operations.

  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
    with variable_scope.variable_op_scope([inputs],
                                          scope,
                                          'fully_connected',
                                          reuse=reuse) as sc:
        dtype = inputs.dtype.base_dtype
        num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2)

        static_shape = inputs.get_shape().as_list()
        static_shape[-1] = num_outputs

        out_shape = array_ops.unpack(array_ops.shape(inputs))
        out_shape[-1] = num_outputs

        weights_shape = [num_input_units, num_outputs]
        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')
        weights = variables.model_variable('weights',
                                           shape=weights_shape,
                                           dtype=dtype,
                                           initializer=weights_initializer,
                                           regularizer=weights_regularizer,
                                           collections=weights_collections)
        if len(static_shape) > 2:
            # Reshape inputs
            inputs = array_ops.reshape(inputs, [-1, num_input_units])
        outputs = standard_ops.matmul(inputs, weights)
        if normalizer_fn:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable(
                    'biases',
                    shape=[
                        num_outputs,
                    ],
                    dtype=dtype,
                    initializer=biases_initializer,
                    regularizer=biases_regularizer,
                    collections=biases_collections)
                outputs = nn.bias_add(outputs, biases)
        if len(static_shape) > 2:
            # Reshape back outputs
            outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
            outputs.set_shape(static_shape)
        if activation_fn:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)
Esempio n. 27
0
def fully_connected(x,
                    num_output_nodes,
                    activation_fn=None,
                    weight_init=None,
                    bias_init=standard_ops.constant_initializer(0.),
                    num_input_nodes=None,
                    name=None,
                    weight_collections=None,
                    bias_collections=None,
                    weight_regularizer=None,
                    create_summaries=True):
    """Adds the parameters for a fully connected layer and returns the output.

  A fully connected layer is generally defined as a matrix multiply:
  \\\\(y = f(w * x + b)\\\\) where **f** is given by `activation_fn`

  This op creates `w` and optionally `b` and adds various summaries that can be
  useful for visualizing learning or diagnosing training problems. Bias can be
  disabled by setting `bias_init` to `None`.

  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.

  In almost all cases, the number of input nodes can be inferred from the shape
  of `x`, but if it is unspecified or additional size checks are desired, then
  `num_input_nodes` can be specified.

  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and which collections to place
  the created variables in (`weight_collections` and `bias_collections`).

  A per layer regularization can be specified by setting `weight_regularizer`.
  This is only applied to weights and not the bias.

  Args:
    x: The input `Tensor`. Must be 2D.
    num_output_nodes: The size of the output.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity. If None is used, do not apply any activation.
    weight_init: An optional initialization. If not specified, uses Xavier
      initialization (see `tf.learn.xavier_initializer`).
    bias_init: An initializer for the bias, defaults to 0. Set to`None` in order
      to disable bias.
    num_input_nodes: The number of input nodes.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections for just weights.
    bias_collections: List of graph collections for just bias.
    weight_regularizer: A regularizer like the result of
      `tf.learn.l1_regularizer` or `tf.learn.l2_regularizer`.
    create_summaries: Set to false to disable summaries.

  Returns:
    The result of applying a fully connected layer.

  Raises:
    ValueError: if `x` is not rank 2; or `x`'s second dimension is not known
    and `num_input_nodes` is not specified.
  """
    with variable_scope.variable_op_scope([x], name, 'fully_connected'):
        # Check rank and if num_input_nodes is specified, make sure it matches.
        # TODO(wicke): This does not work with scalar inputs (shape [batch_size,])
        # TODO(wicke): We'd have to encode the broadcasting rules here to be safe.
        x.get_shape().assert_is_compatible_with([None, num_input_nodes])

        if not num_input_nodes:
            if x.get_shape().dims is None or x.get_shape(
            ).dims[1].value is None:
                raise ValueError(
                    'If x has an unknown second dimension then num_input_nodes '
                    'must be specified; shape: %s num_input_nodes: %s' %
                    (x.get_shape(), num_input_nodes))
            else:
                num_input_nodes = x.get_shape().dims[1].value

        dtype = x.dtype.base_dtype
        # Regularization is only applied to the weights and not bias.
        w = _weight_variable_2d(num_input_nodes,
                                num_output_nodes,
                                dtype=dtype,
                                init=weight_init,
                                collections=weight_collections,
                                regularizer=weight_regularizer,
                                create_summaries=create_summaries)

        y = standard_ops.matmul(x, w)
        if bias_init is not None:
            b = _bias_variable(num_output_nodes,
                               dtype=dtype,
                               init=bias_init,
                               collections=bias_collections,
                               create_summaries=create_summaries)
            y = nn.bias_add(y, b)

        if create_summaries:
            return _apply_activation_with_summaries(y, activation_fn)
        if activation_fn:
            y = activation_fn(y)
        return y
Esempio n. 28
0
def fully_connected(x,
                    num_output_units,
                    activation_fn=None,
                    weight_init=initializers.xavier_initializer(),
                    bias_init=standard_ops.constant_initializer(0.),
                    name=None,
                    weight_collections=(ops.GraphKeys.WEIGHTS,),
                    bias_collections=(ops.GraphKeys.BIASES,),
                    output_collections=(ops.GraphKeys.ACTIVATIONS,),
                    weight_regularizer=None,
                    bias_regularizer=None):
  """Adds the parameters for a fully connected layer and returns the output.

  A fully connected layer is generally defined as a matrix multiply:
  `y = f(w * x + b)` where `f` is given by `activation_fn`. If
  `activation_fn` is `None`, the result of `y = w * x + b` is
  returned.

  This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting
  `bias_init` to `None`.

  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.

  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and which in collections to place
  the created variables (`weight_collections` and `bias_collections`; note that
  the variables are always added to the `VARIABLES` collection). The output of
  the layer can be placed in custom collections using `output_collections`.
  The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`,
  respectively.

  A per layer regularization can be specified by setting `weight_regularizer`
  and `bias_regularizer`, which are applied to the weights and biases
  respectively, and whose output is added to the `REGULARIZATION_LOSSES`
  collection.

  Args:
    x: The input `Tensor`.
    num_output_units: The size of the output.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity. If None is used, do not apply any activation.
    weight_init: An optional weight initialization, defaults to
      `xavier_initializer`.
    bias_init: An initializer for the bias, defaults to 0. Set to `None` in
      order to disable bias.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections to which weights are added.
    bias_collections: List of graph collections to which biases are added.
    output_collections: List of graph collections to which outputs are added.
    weight_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for weights.
    bias_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for biases.

  Returns:
    The output of the fully connected layer.
  """
  with variable_scope.variable_op_scope([x], name, 'fully_connected'):
    num_input_units = x.get_shape().dims[1].value
    dtype = x.dtype.base_dtype

    w = _weight_variable(shape=[num_input_units, num_output_units],
                         dtype=dtype,
                         initializer=weight_init,
                         collections=weight_collections,
                         regularizer=weight_regularizer)

    y = standard_ops.matmul(x, w)

    if bias_init is not None:
      b = _bias_variable(shape=[num_output_units],
                         dtype=dtype,
                         initializer=bias_init,
                         collections=bias_collections,
                         regularizer=bias_regularizer)

      y = nn.bias_add(y, b)

    return _apply_activation(y, activation_fn, output_collections)
Esempio n. 29
0
def fully_connected(x,
                    num_output_units,
                    activation_fn=None,
                    weight_init=initializers.xavier_initializer(),
                    bias_init=standard_ops.constant_initializer(0.),
                    name=None,
                    weight_collections=(ops.GraphKeys.WEIGHTS, ),
                    bias_collections=(ops.GraphKeys.BIASES, ),
                    output_collections=(ops.GraphKeys.ACTIVATIONS, ),
                    weight_regularizer=None,
                    bias_regularizer=None):
    """Adds the parameters for a fully connected layer and returns the output.

  A fully connected layer is generally defined as a matrix multiply:
  `y = f(w * x + b)` where `f` is given by `activation_fn`. If
  `activation_fn` is `None`, the result of `y = w * x + b` is
  returned.

  This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting
  `bias_init` to `None`.

  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.

  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and which in collections to place
  the created variables (`weight_collections` and `bias_collections`; note that
  the variables are always added to the `VARIABLES` collection). The output of
  the layer can be placed in custom collections using `output_collections`.
  The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`,
  respectively.

  A per layer regularization can be specified by setting `weight_regularizer`
  and `bias_regularizer`, which are applied to the weights and biases
  respectively, and whose output is added to the `REGULARIZATION_LOSSES`
  collection.

  Args:
    x: The input `Tensor`.
    num_output_units: The size of the output.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity. If None is used, do not apply any activation.
    weight_init: An optional weight initialization, defaults to
      `xavier_initializer`.
    bias_init: An initializer for the bias, defaults to 0. Set to `None` in
      order to disable bias.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections to which weights are added.
    bias_collections: List of graph collections to which biases are added.
    output_collections: List of graph collections to which outputs are added.
    weight_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for weights.
    bias_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for biases.

  Returns:
    The output of the fully connected layer.
  """
    with variable_scope.variable_op_scope([x], name, 'fully_connected'):
        num_input_units = x.get_shape().dims[1].value
        dtype = x.dtype.base_dtype

        w = _weight_variable(shape=[num_input_units, num_output_units],
                             dtype=dtype,
                             initializer=weight_init,
                             collections=weight_collections,
                             regularizer=weight_regularizer)

        y = standard_ops.matmul(x, w)

        if bias_init is not None:
            b = _bias_variable(shape=[num_output_units],
                               dtype=dtype,
                               initializer=bias_init,
                               collections=bias_collections,
                               regularizer=bias_regularizer)

            y = nn.bias_add(y, b)

        return _apply_activation(y, activation_fn, output_collections)
Esempio n. 30
0
def fully_connected(x,
                    num_output_nodes,
                    activation_fn=None,
                    weight_init=None,
                    bias_init=standard_ops.constant_initializer(0.),
                    num_input_nodes=None,
                    name=None,
                    weight_collections=None,
                    bias_collections=None,
                    weight_regularizer=None,
                    create_summaries=True):
  """Adds the parameters for a fully connected layer and returns the output.

  A fully connected layer is generally defined as a matrix multiply:
  \\\\(y = f(w * x + b)\\\\) where **f** is given by `activation_fn`

  This op creates `w` and optionally `b` and adds various summaries that can be
  useful for visualizing learning or diagnosing training problems. Bias can be
  disabled by setting `bias_init` to `None`.

  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.

  In almost all cases, the number of input nodes can be inferred from the shape
  of `x`, but if it is unspecified or additional size checks are desired, then
  `num_input_nodes` can be specified.

  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and which collections to place
  the created variables in (`weight_collections` and `bias_collections`).

  A per layer regularization can be specified by setting `weight_regularizer`.
  This is only applied to weights and not the bias.

  Args:
    x: The input `Tensor`. Must be 2D.
    num_output_nodes: The size of the output.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity. If None is used, do not apply any activation.
    weight_init: An optional initialization. If not specified, uses Xavier
      initialization (see `tf.learn.xavier_initializer`).
    bias_init: An initializer for the bias, defaults to 0. Set to`None` in order
      to disable bias.
    num_input_nodes: The number of input nodes.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections for just weights.
    bias_collections: List of graph collections for just bias.
    weight_regularizer: A regularizer like the result of
      `tf.learn.l1_regularizer` or `tf.learn.l2_regularizer`.
    create_summaries: Set to false to disable summaries.

  Returns:
    The result of applying a fully connected layer.

  Raises:
    ValueError: if `x` is not rank 2; or `x`'s second dimension is not known
    and `num_input_nodes` is not specified.
  """
  with variable_scope.variable_op_scope([x], name, 'fully_connected'):
    # Check rank and if num_input_nodes is specified, make sure it matches.
    # TODO(wicke): This does not work with scalar inputs (shape [batch_size,])
    # TODO(wicke): We'd have to encode the broadcasting rules here to be safe.
    x.get_shape().assert_is_compatible_with([None, num_input_nodes])

    if not num_input_nodes:
      if x.get_shape().dims is None or x.get_shape().dims[1].value is None:
        raise ValueError(
            'If x has an unknown second dimension then num_input_nodes '
            'must be specified; shape: %s num_input_nodes: %s'
            % (x.get_shape(), num_input_nodes))
      else:
        num_input_nodes = x.get_shape().dims[1].value

    dtype = x.dtype.base_dtype
    # Regularization is only applied to the weights and not bias.
    w = _weight_variable_2d(
        num_input_nodes, num_output_nodes, dtype=dtype, init=weight_init,
        collections=weight_collections, regularizer=weight_regularizer,
        create_summaries=create_summaries)

    y = standard_ops.matmul(x, w)
    if bias_init is not None:
      b = _bias_variable(
          num_output_nodes, dtype=dtype, init=bias_init,
          collections=bias_collections, create_summaries=create_summaries)
      y = nn.bias_add(y, b)

    if create_summaries:
      return _apply_activation_with_summaries(y, activation_fn)
    if activation_fn:
      y = activation_fn(y)
    return y
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer,
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None):
  """Adds a fully connected layer.
  `fully_connected` creates a variable called `weights`, representing a fully
  connected weight matrix, which is multiplied by the `inputs` to produce a
  `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
  `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
  None and a `biases_initializer` is provided then a `biases` variable would be
  created and added the hidden units. Finally, if `activation_fn` is not `None`,
  it is applied to the hidden units as well.
  Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
  prior to the initial matrix multiply by `weights`.
  Args:
    inputs: A tensor of with at least rank 2 and value for the last dimension,
      i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
    num_outputs: Integer, the number of output units in the layer.
    activation_fn: activation function.
    normalizer_fn: normalization function to use instead of `biases`. If
      `normalize_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
    normalizer_params: normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionary containing a different list of collections per variable.
    outputs_collections: collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_op_scope.
  Returns:
     the tensor variable representing the result of the series of operations.
  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
  if not isinstance(num_outputs, int):
    raise ValueError('num_outputs should be integer, got %s.', num_outputs)
  with variable_scope.variable_op_scope([inputs],
                                        scope,
                                        'fully_connected',
                                        reuse=reuse) as sc:
    dtype = inputs.dtype.base_dtype
    num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2)

    static_shape = inputs.get_shape().as_list()
    static_shape[-1] = num_outputs

    out_shape = array_ops.unpack(array_ops.shape(inputs))
    out_shape[-1] = num_outputs

    weights_shape = [num_input_units, num_outputs]
    weights_collections = utils.get_variable_collections(
        variables_collections, 'weights')
    weights = variables.model_variable('weights',
                                       shape=weights_shape,
                                       dtype=dtype,
                                       initializer=weights_initializer,
                                       regularizer=weights_regularizer,
                                       collections=weights_collections,
                                       trainable=trainable)
    if len(static_shape) > 2:
      # Reshape inputs
      inputs = array_ops.reshape(inputs, [-1, num_input_units])
    outputs = standard_ops.matmul(inputs, weights)
    if normalizer_fn:
      normalizer_params = normalizer_params or {}
      outputs = normalizer_fn(outputs, **normalizer_params)
    else:
      if biases_initializer is not None:
        biases_collections = utils.get_variable_collections(
            variables_collections, 'biases')
        biases = variables.model_variable('biases',
                                          shape=[num_outputs,],
                                          dtype=dtype,
                                          initializer=biases_initializer,
                                          regularizer=biases_regularizer,
                                          collections=biases_collections,
                                          trainable=trainable)
        outputs = nn.bias_add(outputs, biases)
    if len(static_shape) > 2:
      # Reshape back outputs
      outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
      outputs.set_shape(static_shape)
    if activation_fn:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
Esempio n. 32
0
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None
Esempio n. 33
0
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_normalizer_fn=None,
                    weights_normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None):
    # Be copied and modified from tensorflow-0.12.0.contrib.layer.fully_connected,
    # add weights_nomalizer_* options.
    """Adds a fully connected layer.

    `fully_connected` creates a variable called `weights`, representing a fully
    connected weight matrix, which is multiplied by the `inputs` to produce a
    `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
    `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
    None and a `biases_initializer` is provided then a `biases` variable would be
    created and added the hidden units. Finally, if `activation_fn` is not `None`,
    it is applied to the hidden units as well.

    Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
    prior to the initial matrix multiply by `weights`.

    Args:
      inputs: A tensor of with at least rank 2 and value for the last dimension,
        i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
      num_outputs: Integer or long, the number of output units in the layer.
      activation_fn: activation function, set to None to skip it and maintain
        a linear activation.
      normalizer_fn: normalization function to use instead of `biases`. If
        `normalizer_fn` is provided then `biases_initializer` and
        `biases_regularizer` are ignored and `biases` are not created nor added.
        default set to None for no normalizer function
      normalizer_params: normalization function parameters.
      weights_normalizer_fn: weights normalization function.
      weights_normalizer_params: weights normalization function parameters.
      weights_initializer: An initializer for the weights.
      weights_regularizer: Optional regularizer for the weights.
      biases_initializer: An initializer for the biases. If None skip biases.
      biases_regularizer: Optional regularizer for the biases.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      variables_collections: Optional list of collections for all the variables or
        a dictionary containing a different list of collections per variable.
      outputs_collections: collection to add the outputs.
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
      scope: Optional scope for variable_scope.

    Returns:
       the tensor variable representing the result of the series of operations.

    Raises:
      ValueError: if x has rank less than 2 or if its last dimension is not set.
    """
    if not (isinstance(num_outputs, six.integer_types)):
        raise ValueError('num_outputs should be int or long, got %s.',
                         num_outputs)
    with variable_scope.variable_scope(scope,
                                       'fully_connected', [inputs],
                                       reuse=reuse) as sc:
        inputs = ops.convert_to_tensor(inputs)
        dtype = inputs.dtype.base_dtype
        inputs_shape = inputs.get_shape()
        num_input_units = utils.last_dimension(inputs_shape, min_rank=2)

        static_shape = inputs_shape.as_list()
        static_shape[-1] = num_outputs

        out_shape = array_ops.unpack(array_ops.shape(inputs),
                                     len(static_shape))
        out_shape[-1] = num_outputs

        weights_shape = [num_input_units, num_outputs]
        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')
        weights = variables.model_variable('weights',
                                           shape=weights_shape,
                                           dtype=dtype,
                                           initializer=weights_initializer,
                                           regularizer=weights_regularizer,
                                           collections=weights_collections,
                                           trainable=trainable)
        if weights_normalizer_fn is not None:
            weights_normalizer_params = weights_normalizer_params or {}
            weights = weights_normalizer_fn(weights,
                                            **weights_normalizer_params)
        if len(static_shape) > 2:
            # Reshape inputs
            inputs = array_ops.reshape(inputs, [-1, num_input_units])
        outputs = standard_ops.matmul(inputs, weights)
        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable(
                    'biases',
                    shape=[
                        num_outputs,
                    ],
                    dtype=dtype,
                    initializer=biases_initializer,
                    regularizer=biases_regularizer,
                    collections=biases_collections,
                    trainable=trainable)
                outputs = nn.bias_add(outputs, biases)
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        if len(static_shape) > 2:
            # Reshape back outputs
            outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
            outputs.set_shape(static_shape)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
 def _matmul(self, inputs, kernel):
   if inputs.shape.ndims <= 2:
     return standard_ops.matmul(inputs, kernel)
   # To handle broadcasting, we must use `tensordot`.
   return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]])