def call(self, inputs): outputs = nn.convolution( input=inputs, filter=self.masked_kernel, dilation_rate=self.dilation_rate, strides=self.strides, padding=self.padding.upper(), data_format=utils.convert_data_format(self.data_format, self.rank + 2)) if self.bias is not None: if self.data_format == 'channels_first': if self.rank == 1: # nn.bias_add does not accept a 1D input tensor. bias = array_ops.reshape(self.bias, (1, self.filters, 1)) outputs += bias if self.rank == 2: outputs = nn.bias_add(outputs, self.bias, data_format='NCHW') if self.rank == 3: # As of Mar 2017, direct addition is significantly slower than # bias_add when computing gradients. To use bias_add, we collapse Z # and Y into a single dimension to obtain a 4D input tensor. outputs_shape = outputs.shape.as_list() outputs_4d = array_ops.reshape(outputs, [ outputs_shape[0], outputs_shape[1], outputs_shape[2] * outputs_shape[3], outputs_shape[4] ]) outputs_4d = nn.bias_add(outputs_4d, self.bias, data_format='NCHW') outputs = array_ops.reshape(outputs_4d, outputs_shape) else: outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') if self.activation is not None: return self.activation(outputs) return outputs
def _conv_pool(x): """(Conv -> bias -> relu -> max_pool) x2.""" x_image = array_ops.reshape(x, [-1, 8, 8, 1]) w_conv1 = _weight([3, 3, 1, 6]) b_conv1 = _bias([6]) h_conv1 = nn.relu(nn.bias_add(_conv2d(x_image, w_conv1), b_conv1)) h_pool1 = _max_pool_2x2(h_conv1) w_conv2 = _weight([3, 3, 6, 4]) b_conv2 = _bias([4]) h_conv2 = nn.relu(nn.bias_add(_conv2d(h_pool1, w_conv2), b_conv2)) h_pool2 = _max_pool_2x2(h_conv2) return h_pool2
def get_simple_graph_def(self): """Create a simple graph and return its graph_def.""" g = ops.Graph() with g.as_default(): a = aops.placeholder( dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input") e = cop.constant( [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], name="weights", dtype=dtypes.float32) conv = nn.conv2d( input=a, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv") b = cop.constant( [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32) t = nn.bias_add(conv, b, name="biasAdd") relu = nn.relu(t, "relu") idty = aops.identity(relu, "ID") v = nn_ops.max_pool( idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") aops.squeeze(v, name="output") return g.as_graph_def()
def GetParams(self): """Single vgg layer test in TF-TRT conversion.""" dtype = dtypes.float32 input_name = "input" input_dims = [5, 8, 8, 2] output_name = "output" g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) x, _, _ = nn_impl.fused_batch_norm( x, [1.0, 1.0], [0.0, 0.0], mean=[0.5, 0.5], variance=[1.0, 1.0], is_training=False) e = constant_op.constant( np.random.randn(1, 1, 2, 6), name="weights", dtype=dtype) conv = nn.conv2d( input=x, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv") b = constant_op.constant(np.random.randn(6), name="bias", dtype=dtype) t = nn.bias_add(conv, b, name="biasAdd") relu = nn.relu(t, "relu") idty = array_ops.identity(relu, "ID") v = nn_ops.max_pool( idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") array_ops.squeeze(v, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], output_names=[output_name], expected_output_dims=[(5, 2, 2, 6)])
def GetSingleEngineGraphDef(dtype=dtypes.float32): """Create a graph containing single segment.""" g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME) with g.device("/GPU:0"): conv_filter = constant_op.constant( [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], name="weights", dtype=dtype) conv = nn.conv2d( input=inp, filter=conv_filter, strides=[1, 2, 2, 1], padding="SAME", name="conv") bias = constant_op.constant( [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtype) added = nn.bias_add(conv, bias, name="bias_add") relu = nn.relu(added, "relu") identity = array_ops.identity(relu, "identity") pool = nn_ops.max_pool( identity, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") array_ops.squeeze(pool, name=OUTPUT_NAME) return g.as_graph_def()
def call(self, inputs): inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] if self.data_format == 'channels_first': c_axis, h_axis, w_axis = 1, 2, 3 else: c_axis, h_axis, w_axis = 3, 1, 2 height, width = inputs_shape[h_axis], inputs_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides def get_deconv_dim(dim_size, stride_size, kernel_size, padding): if isinstance(dim_size, ops.Tensor): dim_size = math_ops.mul(dim_size, stride_size) elif dim_size is not None: dim_size *= stride_size if padding == 'valid' and dim_size is not None: dim_size += max(kernel_size - stride_size, 0) return dim_size # Infer the dynamic output shape: out_height = get_deconv_dim(height, stride_h, kernel_h, self.padding) out_width = get_deconv_dim(width, stride_w, kernel_w, self.padding) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) strides = (1, 1, stride_h, stride_w) else: output_shape = (batch_size, out_height, out_width, self.filters) strides = (1, stride_h, stride_w, 1) output_shape_tensor = array_ops.stack(output_shape) outputs = nn.conv2d_transpose( inputs, self.kernel, output_shape_tensor, strides, padding=self.padding.upper(), data_format=utils.convert_data_format(self.data_format, ndim=4)) # Infer the static output shape: out_shape = inputs.get_shape().as_list() out_shape[c_axis] = self.filters out_shape[h_axis] = get_deconv_dim( out_shape[h_axis], stride_h, kernel_h, self.padding) out_shape[w_axis] = get_deconv_dim( out_shape[w_axis], stride_w, kernel_w, self.padding) outputs.set_shape(out_shape) if self.bias: outputs = nn.bias_add( outputs, self.bias, data_format=utils.convert_data_format(self.data_format, ndim=4)) if self.activation is not None: return self.activation(outputs) return outputs
def _apply_variational_bias(self, inputs): if self.bias.posterior is None: self.bias.posterior_tensor = None return inputs self.bias.posterior_tensor = self.bias.posterior_tensor_fn( self.bias.posterior) return nn.bias_add(inputs, self.bias.posterior_tensor)
def _testConvReparameterization(self, layer_class): batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5 with self.test_session() as sess: (kernel_posterior, kernel_prior, kernel_divergence, bias_posterior, bias_prior, bias_divergence, layer, inputs, outputs, kl_penalty, kernel_shape) = self._testConvSetUp( layer_class, batch_size, depth=depth, height=height, width=width, channels=channels, filters=filters) convolution_op = nn_ops.Convolution( tensor_shape.TensorShape(inputs.shape), filter_shape=tensor_shape.TensorShape(kernel_shape), padding="SAME") expected_outputs = convolution_op(inputs, kernel_posterior.result_sample) expected_outputs = nn.bias_add(expected_outputs, bias_posterior.result_sample, data_format="NHWC") [ expected_outputs_, actual_outputs_, expected_kernel_, actual_kernel_, expected_kernel_divergence_, actual_kernel_divergence_, expected_bias_, actual_bias_, expected_bias_divergence_, actual_bias_divergence_, ] = sess.run([ expected_outputs, outputs, kernel_posterior.result_sample, layer.kernel_posterior_tensor, kernel_divergence.result, kl_penalty[0], bias_posterior.result_sample, layer.bias_posterior_tensor, bias_divergence.result, kl_penalty[1], ]) self.assertAllClose( expected_kernel_, actual_kernel_, rtol=1e-6, atol=0.) self.assertAllClose( expected_bias_, actual_bias_, rtol=1e-6, atol=0.) self.assertAllClose( expected_outputs_, actual_outputs_, rtol=1e-6, atol=0.) self.assertAllClose( expected_kernel_divergence_, actual_kernel_divergence_, rtol=1e-6, atol=0.) self.assertAllClose( expected_bias_divergence_, actual_bias_divergence_, rtol=1e-6, atol=0.) self.assertAllEqual( [[kernel_posterior.distribution, kernel_prior.distribution, kernel_posterior.result_sample]], kernel_divergence.args) self.assertAllEqual( [[bias_posterior.distribution, bias_prior.distribution, bias_posterior.result_sample]], bias_divergence.args)
def call(self, inputs): outputs = nn.convolution( input=inputs, filter=self.kernel, dilation_rate=self.dilation_rate, strides=self.strides, padding=self.padding.upper(), data_format=utils.convert_data_format(self.data_format, self.rank + 2)) if self.bias is not None: if self.rank != 2 and self.data_format == 'channels_first': # bias_add does not support channels_first for non-4D inputs. if self.rank == 1: bias = array_ops.reshape(self.bias, (1, self.filters, 1)) if self.rank == 3: bias = array_ops.reshape(self.bias, (1, self.filters, 1, 1)) outputs += bias else: outputs = nn.bias_add( outputs, self.bias, data_format=utils.convert_data_format(self.data_format, 4)) # Note that we passed rank=4 because bias_add will only accept # NHWC and NCWH even if the rank of the inputs is 3 or 5. if self.activation is not None: return self.activation(outputs) return outputs
def _DenseLayer(x, num_inputs, num_outputs, quantization_range, name): """Dense layer with quantized outputs. Args: x: input to the dense layer num_inputs: number of input columns of x num_outputs: number of output columns quantization_range: the min/max range for quantization name: name of the variable scope Returns: The output of the layer. """ with variable_scope.variable_scope(name): kernel = variable_scope.get_variable( 'kernel', shape=[num_inputs, num_outputs], dtype=dtypes.float32, initializer=keras.initializers.glorot_uniform()) bias = variable_scope.get_variable( 'bias', shape=[num_outputs], dtype=dtypes.float32, initializer=keras.initializers.zeros()) x = math_ops.matmul(x, kernel) x = _Quantize(x, quantization_range) x = nn.bias_add(x, bias) x = _Quantize(x, quantization_range) return x
def call(self, inputs): if self.data_format == 'channels_first': # Reshape to channels last inputs = array_ops.transpose(inputs, (0, 2, 3, 1)) # Apply the actual ops. outputs = nn.separable_conv2d( inputs, self.depthwise_kernel, self.pointwise_kernel, strides=(1,) + self.strides + (1,), padding=self.padding.upper(), rate=self.dilation_rate) if self.data_format == 'channels_first': # Reshape to channels first outputs = array_ops.transpose(outputs, (0, 3, 1, 2)) if self.bias: outputs = nn.bias_add( outputs, self.bias, data_format=utils.convert_data_format(self.data_format, ndim=4)) if self.activation is not None: return self.activation(outputs) return outputs
def GetParams(self): # TODO(laigd): we should test the following cases: # - batch size is not changed, other dims are changing # - batch size is decreasing, other dims are identical # - batch size is decreasing, other dims are changing # - batch size is increasing, other dims are identical # - batch size is increasing, other dims are changing input_dims = [[[1, 5, 5, 1]], [[10, 5, 5, 1]], [[3, 5, 5, 1]], [[1, 5, 5, 1]], [[1, 3, 1, 1]], [[2, 9, 9, 1]], [[1, 224, 224, 1]], [[1, 128, 224, 1]]] expected_output_dims = input_dims g = ops.Graph() with g.as_default(): x = array_ops.placeholder( shape=(None, None, None, 1), dtype=dtypes.float32, name="input") conv_filter1 = constant_op.constant( np.ones([3, 3, 1, 8]), name="weights1", dtype=dtypes.float32) bias1 = constant_op.constant(np.random.randn(8), dtype=dtypes.float32) x = nn.conv2d( input=x, filter=conv_filter1, strides=[1, 1, 1, 1], padding="SAME", name="conv") x = nn.bias_add(x, bias1) x = nn.relu(x) conv_filter2 = constant_op.constant( np.ones([3, 3, 8, 1]), name="weights2", dtype=dtypes.float32) bias2 = constant_op.constant(np.random.randn(1), dtype=dtypes.float32) x = nn.conv2d( input=x, filter=conv_filter2, strides=[1, 1, 1, 1], padding="SAME", name="conv") x = nn.bias_add(x, bias2) x = array_ops.identity(x, name="output") return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=["input"], input_dims=input_dims, output_names=["output"], expected_output_dims=expected_output_dims)
def _lstm_cell(prev_c, prev_h, x): """Create an LSTM cell.""" # i: input gate # f: forget gate # o: output gate # c: cell state # x: input # h: embedding bias = _bias([4]) w = _weight([8, 16]) ifoc = math_ops.matmul(array_ops.concat([x, prev_h], axis=1), w) i, f, o, c = array_ops.split(ifoc, 4, axis=1) i = math_ops.sigmoid(nn.bias_add(i, bias)) f = math_ops.sigmoid(nn.bias_add(f, bias)) o = math_ops.sigmoid(nn.bias_add(o, bias)) c = math_ops.tanh(nn.bias_add(c, bias)) next_c = f * prev_c + i * c next_h = o * math_ops.tanh(next_c) return next_c, next_h
def loop_fn(i): with g: a = array_ops.gather(x, i) if stacked_value else x b = array_ops.gather(bias, i) if stacked_bias else bias y = nn.bias_add(a, b, data_format=data_format) loss = math_ops.reduce_sum(y * y) grad = g.gradient(loss, bias) if stacked_bias: # If we gather over bias in loop_fn, the gradient will be an # instance of `IndexedSlices` with attrs `values` and `indices`. return y, grad.values, grad.indices else: return y, grad
def _predictions(self, logits): """Returns a dict of predictions. Args: logits: logits `Tensor` before applying possible centered bias. Returns: Dict of prediction `Tensor` keyed by `PredictionKey`. """ if self._enable_centered_bias: logits = nn.bias_add(logits, _centered_bias( self.logits_dimension, self._centered_bias_weight_collection)) return self._logits_to_predictions(logits)
def bias_add(inputs, activation_fn=None, initializer=init_ops.zeros_initializer, regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a bias to the inputs. Can be used as a normalizer function for conv2d and fully_connected. Args: inputs: a tensor of with at least rank 2 and value for the last dimension, e.g. `[batch_size, depth]`, `[None, None, None, depth]`. activation_fn: Optional activation function. initializer: An initializer for the bias, defaults to 0. regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional collections for the variables. outputs_collections: collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_op_scope. Returns: a tensor representing the result of adding biases to the inputs. """ with variable_scope.variable_op_scope([inputs], scope, 'BiasAdd', reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype num_features = utils.last_dimension(inputs.get_shape(), min_rank=2) biases_collections = utils.get_variable_collections(variables_collections, 'biases') biases = variables.model_variable('biases', shape=[num_features,], dtype=dtype, initializer=initializer, regularizer=regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(inputs, biases) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def _eval_op(self, features, target, logits=None, logits_input=None, name="eval_op"): target = _check_target(target, self._label_name) if self._enable_centered_bias: logits = nn.bias_add(logits, _centered_bias( self.logits_dimension, self._centered_bias_weight_collection)) loss_unweighted = self._eval_loss_fn(logits, target) loss, _ = _loss(loss_unweighted, _weight_tensor(features, self._weight_column_name), name=name) predictions = self._logits_to_prediction(logits) return predictions, loss
def _logits(self, features): if not (self._get_linear_feature_columns() or self._get_dnn_feature_columns()): raise ValueError("Either linear_feature_columns or dnn_feature_columns " "should be defined.") features = self._get_feature_dict(features) if self._get_linear_feature_columns() and self._get_dnn_feature_columns(): logits = self._linear_logits(features) + self._dnn_logits(features) elif self._get_dnn_feature_columns(): logits = self._dnn_logits(features) else: logits = self._linear_logits(features) return nn.bias_add(logits, self._centered_bias())
def call(self, inputs): shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [self.units] if len(output_shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. outputs.set_shape(output_shape) else: outputs = standard_ops.matmul(inputs, self.kernel) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def _logits(self, features, is_training=False): linear_feature_columns = self._get_linear_feature_columns() dnn_feature_columns = self._get_dnn_feature_columns() if not (linear_feature_columns or dnn_feature_columns): raise ValueError("Either linear_feature_columns or dnn_feature_columns " "should be defined.") if linear_feature_columns and dnn_feature_columns: logits = self._linear_logits(features, is_training) + self._dnn_logits(features, is_training) elif dnn_feature_columns: logits = self._dnn_logits(features, is_training) else: logits = self._linear_logits(features, is_training) if self._enable_centered_bias: return nn.bias_add(logits, self._centered_bias()) else: return logits
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() if len(shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = gen_math_ops.mat_mul(inputs, self.kernel) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def _training_loss(self, features, target, logits=None, logits_input=None, name="training_loss"): """Returns training loss tensor for this head. Training loss is different from the loss reported on the tensorboard as we should respect the example weights when computing the gradient. L = sum_{i} w_{i} * l_{i} / B where B is the number of examples in the batch, l_{i}, w_{i} are individual losses, and example weight. Args: features: features dict. target: either a tensor for labels or in multihead case, a dict of string to target tensor. logits: logits, a float tensor. logits_input: Output of last hidden layer. name: Op name. Returns: A tuple of training Loss and additional_train_op (possibly None) """ target = _check_target(target, self._label_name) centered_bias_step = None if self._enable_centered_bias: logits = nn.bias_add(logits, _centered_bias( self.logits_dimension, self._centered_bias_weight_collection)) centered_bias_step = [_centered_bias_step( self.logits_dimension, self._centered_bias_weight_collection, target, self._train_loss_fn)] loss_unweighted = self._train_loss_fn(logits, target) loss, weighted_average_loss = _loss( loss_unweighted, _weight_tensor(features, self._weight_column_name), name=name) logging_ops.scalar_summary(_head_prefixed(self._head_name, "loss"), weighted_average_loss) return loss, centered_bias_step
def GetParams(self): """Single vgg layer in NCHW unit tests in TF-TRT.""" dtype = dtypes.float32 input_name = "input" input_dims = [5, 2, 8, 8] g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) x, _, _ = nn_impl.fused_batch_norm( x, np.random.randn(2).astype(np.float32), np.random.randn(2).astype(np.float32), mean=np.random.randn(2).astype(np.float32), variance=np.random.randn(2).astype(np.float32), data_format="NCHW", is_training=False) e = constant_op.constant( np.random.randn(1, 1, 2, 6), name="weights", dtype=dtype) conv = nn.conv2d( input=x, filter=e, data_format="NCHW", strides=[1, 1, 2, 2], padding="SAME", name="conv") b = constant_op.constant(np.random.randn(6), name="bias", dtype=dtype) t = nn.bias_add(conv, b, data_format="NCHW", name="biasAdd") relu = nn.relu(t, "relu") idty = array_ops.identity(relu, "ID") v = nn_ops.max_pool( idty, [1, 1, 2, 2], [1, 1, 2, 2], "VALID", data_format="NCHW", name="max_pool") array_ops.squeeze(v, name="output") return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], num_expected_engines=1, expected_output_dims=(5, 6, 2, 2), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def GetParams(self): """Test exclusion of ops which are not supported in INT32 mode by TF-TRT""" input_name = 'input' output_name = 'output' input_dims = [100, 4] dtype = dtypes.int32 g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) b = self._ConstOp((4, 10), dtype) x = math_ops.matmul(x, b) b = self._ConstOp((10,), dtype) x = nn.bias_add(x, b) x = array_ops.identity(x, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[[input_dims]], output_names=[output_name], expected_output_dims=[[[100, 10]]])
def GetParams(self): """Create a graph containing single segment.""" # TODO(aaroey): test graph with different dtypes. dtype = dtypes.float32 input_name = "input" input_dims = [100, 24, 24, 2] g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtype, shape=[None] + input_dims[1:], name=input_name) with g.device("/GPU:0"): conv_filter = constant_op.constant( [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], name="weights", dtype=dtype) conv = nn.conv2d( input=inp, filter=conv_filter, strides=[1, 2, 2, 1], padding="SAME", name="conv") bias = constant_op.constant( [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtype) added = nn.bias_add(conv, bias, name="bias_add") relu = nn.relu(added, "relu") identity = array_ops.identity(relu, "identity") pool = nn_ops.max_pool( identity, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") array_ops.squeeze(pool, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], # TODO(aaroey): LayoutOptimizer adds additional nodes to the graph which # breaks the connection check, fix it. # - my_trt_op_0 should have ["weights", "conv", "bias", "bias_add", # "relu", "identity", "max_pool"] expected_engines=["my_trt_op_0"], expected_output_dims=(100, 6, 6, 6), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def GetParams(self): """Create a graph containing single segment.""" # TODO(aaroey): test graph with different dtypes. dtype = dtypes.float32 input_name = "input" input_dims = [100, 24, 24, 2] output_name = "output" g = ops.Graph() with g.as_default(): inp = array_ops.placeholder(dtype=dtype, shape=[None] + input_dims[1:], name=input_name) with g.device("/GPU:0"): conv_filter = constant_op.constant( [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]] ], name="weights", dtype=dtype) conv = nn.conv2d(input=inp, filter=conv_filter, strides=[1, 2, 2, 1], padding="SAME", name="conv") bias = constant_op.constant([4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtype) added = nn.bias_add(conv, bias, name="bias_add") relu = nn.relu(added, "relu") identity = array_ops.identity(relu, "identity") pool = nn_ops.max_pool(identity, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") array_ops.squeeze(pool, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[[input_dims]], output_names=[output_name], expected_output_dims=[[[100, 6, 6, 6]]])
def GraphFn(self, x): dtype = x.dtype x, _, _ = nn_impl.fused_batch_norm(x, [1.0, 1.0], [0.0, 0.0], mean=[0.5, 0.5], variance=[1.0, 1.0], is_training=False) e = constant_op.constant(np.random.randn(1, 1, 2, 6), name="weights", dtype=dtype) conv = nn.conv2d(input=x, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv") b = constant_op.constant(np.random.randn(6), name="bias", dtype=dtype) t = nn.bias_add(conv, b, name="biasAdd") relu = nn.relu(t, "relu") idty = array_ops.identity(relu, "ID") v = nn_ops.max_pool(idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") return array_ops.squeeze(v, name="output_0")
def GraphFn(self, inp): """Create a graph containing single segment.""" dtype = inp.dtype conv_filter = constant_op.constant( [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], name="weights", dtype=dtype) conv = nn.conv2d(input=inp, filter=conv_filter, strides=[1, 2, 2, 1], padding="SAME", name="conv") bias = constant_op.constant([4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtype) added = nn.bias_add(conv, bias, name="bias_add") relu = nn.relu(added, "relu") identity = array_ops.identity(relu, "identity") pool = nn_ops.max_pool(identity, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") return array_ops.squeeze(pool, name="output_0")
def GetParams(self): """Test exclusion of ops which are not supported in INT32 mode by TF-TRT""" input_name = 'input' output_name = 'output' input_dims = [100, 4] dtype = dtypes.int32 g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) b = self._ConstOp((4, 10), dtype) x = math_ops.matmul(x, b) b = self._ConstOp((10, ), dtype) x = nn.bias_add(x, b) x = array_ops.identity(x, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[[input_dims]], output_names=[output_name], expected_output_dims=[[[100, 10]]])
def call(self, inputs): input_shape = inputs.shape if self._is_causal: # Apply causal padding to inputs for Conv1D. inputs = array_ops.pad(inputs, self._compute_causal_padding(inputs)) outputs = self._convolution_op(inputs, self.kernel * self.window) if self.use_bias: output_rank = outputs.shape.rank if self.rank == 1 and self._channels_first: # nn.bias_add does not accept a 1D input tensor. bias = array_ops.reshape(self.bias, (1, self.filters, 1)) outputs += bias else: # Handle multiple batch dimensions. if output_rank is not None and output_rank > 2 + self.rank: def _apply_fn(o): return nn.bias_add(o, self.bias, data_format=self._tf_data_format) outputs = conv_utils.squeeze_batch_dims( outputs, _apply_fn, inner_rank=self.rank + 1) else: outputs = nn.bias_add(outputs, self.bias, data_format=self._tf_data_format) if not context.executing_eagerly(): # Infer the static output shape: out_shape = self.compute_output_shape(input_shape) outputs.set_shape(out_shape) if self.activation is not None: return self.activation(outputs) return outputs
def _training_loss(self, features, labels, logits, name="training_loss"): """Returns training loss tensor for this head. Training loss is different from the loss reported on the tensorboard as we should respect the example weights when computing the gradient. L = sum_{i} w_{i} * l_{i} / B where B is the number of examples in the batch, l_{i}, w_{i} are individual losses, and example weight. Args: features: features dict. labels: either a tensor for labels or in multihead case, a dict of string to labels tensor. logits: logits, a float tensor. name: Op name. Returns: A loss `Tensor`. """ labels = _check_labels(labels, self._label_name) if self._enable_centered_bias: logits = nn.bias_add( logits, _centered_bias(self.logits_dimension, self._centered_bias_weight_collection)) loss_unweighted = self._train_loss_fn(logits, labels) loss, weighted_average_loss = _loss(loss_unweighted, _weight_tensor( features, self._weight_column_name), name=name) summary.scalar(_head_prefixed(self._head_name, "loss"), weighted_average_loss) return loss
def call(self, inputs): features = tf.convert_to_tensor(inputs[NODE_FEATURES]) is_sparse_adjacency = isinstance(inputs[EDGE_FEATURES], tf.sparse.SparseTensor) if is_sparse_adjacency: adjacency = tf.sparse.to_dense(inputs[EDGE_FEATURES]) else: adjacency = tf.convert_to_tensor(inputs[EDGE_FEATURES]) weights = self._get_weight_matrix() features = tf.expand_dims(features, 1) outputs = tf.matmul(adjacency, features) if self.add_self_loops: outputs = tf.concat([features, outputs], 1) outputs = tf.matmul(outputs, weights) if self.aggregation_method == 'concat': outputs = tf.transpose(outputs, (0, 2, 1, 3)) outputs_shape = tf.shape(outputs) outputs = tf.reshape(outputs, (outputs_shape[0], outputs_shape[1], -1)) elif self.aggregation_method == 'sum': outputs = tf.reduce_sum(outputs, axis=1) elif self.aggregation_method == 'max': outputs = tf.reduce_max(outputs, axis=1) else: raise ValueError('Undefined aggregation method' + f'`{self.aggregation_method}`.') if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: outputs = self.activation(outputs) # pylint: disable=not-callable return {**inputs, NODE_FEATURES: outputs}
def call(self, inputs, training=True): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() if self.weight_norm: inputs = tf.matmul(inputs, self.V) scaler = self.g / tf.sqrt(tf.reduce_sum(tf.square(self.V), [0])) outputs = tf.reshape(scaler, [1, self.units]) * inputs else: if len(shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = gen_math_ops.mat_mul(inputs, self.kernel) if self.mean_only_batch_norm: mean = tf.reduce_mean(outputs, reduction_indices=0) if training: # If first iteration if self.batch_norm_running_average == []: self.batch_norm_running_average = mean else: self.batch_norm_running_average = ( self.batch_norm_running_average + mean) / 2 outputs = outputs - mean else: outputs = outputs - self.batch_norm_running_average if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [self.units] # quantize the weights, if there is an weight quantizer if self.weight_quantizer is not None: used_kernel = self.weight_quantizer.quantize(self.kernel) else: used_kernel = self.kernel # if intrinsic quantization, apply intr. quantization to weights, too! if self.quantizer is not None: used_kernel = self.quantizer.quantize(used_kernel) if len(output_shape) > 2: ## Broadcasting is required for the inputs. #outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], # [0]]) ## Reshape the output back to the original ndim of the input. #outputs.set_shape(output_shape) raise ValueError( 'output_shape > 2 not supported for quantized operation, tried $d.' % (len(output_shape))) else: if self.quantizer is None: outputs = standard_ops.matmul(inputs, used_kernel) else: # with quantization outputs = qmatmul(inputs, used_kernel, self.quantizer) #TODO: quantize after bias and activation if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.quantizer is not None: outputs = self.quantizer.quantize(outputs) if self.activation is not None: outputs = self.activation(outputs) # pylint: disable=not-callable if self.quantizer is not None: outputs = self.quantizer.quantize(outputs) return outputs
def _build_graph(self, is_freezed=True): images = array_ops.placeholder(dtypes.float32, shape=[None, 4, 4, 3], name="input") if is_freezed: w = constant_op.constant( [[[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]]], dtype=dtypes.float32, shape=[1, 3, 3, 3], name="w/read") else: w = variables.VariableV1( [[[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]]], dtype=dtypes.float32, shape=[1, 3, 3, 3], name="w") conv = nn.conv2d(images, filter=w, name="conv2d", strides=[1, 1, 1, 1], padding="SAME") if is_freezed: b = constant_op.constant([0.1, 0.2, 0.3], dtype=dtypes.float32, shape=[3], name="b/read") else: b = variables.VariableV1([0.1, 0.2, 0.3], dtype=dtypes.float32, shape=[3], name="b") bias = nn.bias_add(conv, b, name="bias") relu = nn.relu(bias, name="relu") return
def call(self, inputs): rank = len(inputs.shape) if rank > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): shape = inputs.shape.as_list() output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: inputs = math_ops.cast(inputs, self._compute_dtype) if K.is_sparse(inputs): outputs = sparse_ops.sparse_tensor_dense_matmul( inputs, self.kernel) else: outputs = gen_math_ops.mat_mul(inputs, self.kernel) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): inputs = ops.convert_to_tensor(inputs) rank = common_shapes.rank(inputs) if rank > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: # Cast the inputs to self.dtype, which is the variable dtype. We do not # cast if `should_cast_variables` is True, as in that case the variable # will be automatically casted to inputs.dtype. if not self._mixed_precision_policy.should_cast_variables: inputs = math_ops.cast(inputs, self.dtype) outputs = gen_math_ops.mat_mul(inputs, self.kernel) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): if self._is_causal: # Apply causal padding to inputs for Conv1D. inputs = array_ops.pad(inputs, self._compute_causal_padding(inputs)) self.U = self.calcU() kernel = self.W * self.U outputs = K.conv1d( inputs, kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: if self.data_format == 'channels_first': bias = array_ops.reshape(self.bias, (1, self.filters, 1)) outputs += bias else: outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs, sample): kernel = control_flow_ops.cond(sample, lambda: self.kernel_sample, lambda: self.kernel_mean) inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() if len(shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = gen_math_ops.mat_mul(inputs, kernel) if self.use_bias: bias = control_flow_ops.cond(sample, lambda: self.bias_sample, lambda: self.bias_mean) outputs = nn.bias_add(outputs, bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): inputs = ops.convert_to_tensor(inputs) rank = common_shapes.rank(inputs) if rank > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): shape = inputs.shape.as_list() output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: # Cast the inputs to self.dtype, which is the variable dtype. We do not # cast if `should_cast_variables` is True, as in that case the variable # will be automatically casted to inputs.dtype. if not self._mixed_precision_policy.should_cast_variables: inputs = math_ops.cast(inputs, self.dtype) outputs = gen_math_ops.mat_mul(inputs, self.kernel) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): shape = inputs.get_shape().as_list() input_dim = shape[-1] output_shape = shape[:-1] + [self.units] if len(output_shape) > 2: # Reshape the input to 2D. output_shape_tensors = array_ops.unpack(array_ops.shape(inputs)) output_shape_tensors[-1] = self.units output_shape_tensor = array_ops.pack(output_shape_tensors) inputs = array_ops.reshape(inputs, [-1, input_dim]) outputs = standard_ops.matmul(inputs, self.w) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if len(output_shape) > 2: # Reshape the output back to the original ndim of the input. outputs = array_ops.reshape(outputs, output_shape_tensor) outputs.set_shape(output_shape) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs, **kwargs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() if len(shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. # if not context.executing_eagerly(): # output_shape = shape[:-1] + [self.units] # outputs.set_shape(output_shape) else: # notice: batch product here # outputs = reduce_sum(multiply(self.x0, inputs), axis=1, keep_dims=True) outputs = matmul(expand_dims(self.x0, axis=2), expand_dims(inputs, axis=2), transpose_a=False, transpose_b=True) # the static shape # shape_kernel = convert_to_tensor([shape[0], 1, 1]) # the dynamic shape shape_kernel = tf.convert_to_tensor([tf.shape(inputs)[0], 1, 1]) outputs = matmul( outputs, tile(expand_dims(self.kernel, axis=0), multiples=shape_kernel)) shape_outputs = tf.convert_to_tensor(tf.shape(inputs)[0:2]) outputs = gen_math_ops.add( tf.reshape(outputs, shape=shape_outputs), inputs) outputs = nn.bias_add(outputs, self.bias) outputs = nn.relu(outputs) # outputs = gen_math_ops.mat_mul(inputs, self.kernel) # print(outputs) # if self.use_bias: # outputs = nn.bias_add(outputs, self.bias) # if self.activation is not None: # return self.activation(outputs) # pylint: disable=not-callable return outputs
def head_ops(self, features, labels, mode, train_op_fn, logits=None, logits_input=None, scope=None): """See `_Head`.""" _check_mode_valid(mode) _check_logits_input_not_supported(logits, logits_input) centered_bias = None if self._enable_centered_bias: centered_bias = _centered_bias(self._logits_dimension) logits = nn.bias_add(logits, centered_bias) predictions = self._logits_to_predictions(logits) loss = None train_op = None eval_metric_ops = None if (mode != model_fn.ModeKeys.INFER) and (labels is not None): labels_tensor = _to_labels_tensor(labels, self._label_name) loss = _training_loss( features, labels_tensor, logits, loss_fn=self._loss_fn, weight_column_name=self._weight_column_name, head_name=self._head_name) if (mode == model_fn.ModeKeys.TRAIN) and (train_op_fn is not None): train_op = _train_op( loss, labels_tensor, train_op_fn, centered_bias, self._logits_dimension, self._loss_fn) eval_metric_ops = _eval_metric_ops( self._default_metrics(), features, labels, predictions) return model_fn.ModelFnOps( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, signature_fn=self._signature_fn(), output_alternatives=self._create_output_alternatives(predictions))
def GetParams(self): """Create a graph containing single segment.""" # TODO(aaroey): test graph with different dtypes. dtype = dtypes.float32 input_name = "input" input_dims = [100, 24, 24, 2] output_name = "output" g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtype, shape=[None] + input_dims[1:], name=input_name) with g.device("/GPU:0"): conv_filter = constant_op.constant( [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], name="weights", dtype=dtype) conv = nn.conv2d( input=inp, filter=conv_filter, strides=[1, 2, 2, 1], padding="SAME", name="conv") bias = constant_op.constant([4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtype) added = nn.bias_add(conv, bias, name="bias_add") relu = nn.relu(added, "relu") identity = array_ops.identity(relu, "identity") pool = nn_ops.max_pool( identity, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") array_ops.squeeze(pool, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], output_names=[output_name], expected_output_dims=[(100, 6, 6, 6)])
def call(self, inputs): # Apply the actual ops if self.data_format is not 'channels_last': raise ValueError("mpusim_separable_conv2d " "requires NHWC data format") strides = (1,) + self.strides + (1,) outputs = mpusim_separable_conv2d_op_impl(inputs, self.depthwise_kernel, self.pointwise_kernel, strides, self.padding.upper(), self.dilation_rate, None, conv_utils.convert_data_format(self.data_format, ndim=4), self.activations_datatype_size_byte, self.weights_datatype_size_byte, self.results_datatype_size_byte, self.systolic_array_height, self.systolic_array_width, self.activation_fifo_depth, self.accumulator_array_height, self.log_file_output_dir, self.model_name) if self.use_bias: outputs = nn.bias_add(outputs, self.bias, data_format= \ conv_utils.convert_data_format(self.data_format, ndim=4)) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): data_format = conv_utils.convert_data_format(self.data_format, self.rank + 2) inputs, tf_data_format = K._preprocess_conv2d_input(inputs, self.data_format) inputs = tf.image.extract_patches( inputs, sizes=(1,) + K.int_shape(self.kernel)[:2] + (1,), strides=(1,) + self.strides + (1,), rates=(1,) + self.dilation_rate + (1,), padding=self.padding.upper(), ) kernel = K.reshape(self.kernel, (-1, self.filters)) outputs = self.kernel_function([inputs, kernel]) if self.data_format == 'channels_first': outputs = K.permute_dimensions(outputs, (0, 3, 1, 2)) if self.use_bias: outputs = nn.bias_add(outputs, self.bias, data_format=data_format) if self.activation is not None: outputs = self.activation(outputs) return outputs
def call(self, inputs, training=True): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() self.training = training for i in range(self.L): mask, penalty = self._get_mask(training) # ipdb.set_trace() kernel_new = tf.multiply(self.kernel, mask) if len(shape) > 2: # Broadcasting is required for the inputs. if i == 0: outputs = standard_ops.tensordot(inputs, kernel_new, [[len(shape) - 1], [0]]) else: outputs += standard_ops.tensordot(inputs, kernel_new, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: if i == 0: outputs = gen_math_ops.mat_mul(inputs, kernel_new) else: outputs += gen_math_ops.mat_mul(inputs, kernel_new) # add bias inside the sample loop if self.use_bias: outputs = nn.bias_add(outputs, self.bias) # take mean outputs = outputs / float(self.L) if self.activation is not None: return self.activation(outputs), penalty # pylint: disable=not-callable return outputs
def execute(self, inputs, matrix, bias, soft_thresh): if len(inputs.shape) < 4: inputs = tf.expand_dims(inputs, 3) inp_shape = inputs.shape inputs = tf.reshape(inputs, [-1, tf.reduce_prod(inputs.shape[1:])]) x = tf.linalg.matmul(matrix, tf.transpose(inputs)) x = nn.bias_add(tf.transpose(x), bias) if self.soft_shrinkage_activation: if self.learn_soft_thresh: soft_thresh = soft_thresh x = tf.multiply(tf.math.sign(x), tf.math.maximum(tf.math.abs(x) - soft_thresh, 0)) else: x = self.activation(x) if self.transposed_mat: outputs = tf.transpose( tf.linalg.matmul(tf.transpose(matrix), tf.transpose(x))) outputs = tf.reshape( outputs, [-1, inp_shape[1], inp_shape[2], inp_shape[3]]) if outputs.shape[3] == 1: outputs = tf.reduce_sum(outputs, 3) else: outputs = x return outputs
def call(self, inputs, training): if self._use_alpha: alpha = tf.reduce_mean(tf.math.abs(self.kernel)) bin_kernel = tf.cast(self.kernel > 0, tf.float32) * 2 - 1 # bin_kernel = sign_w_alpha(self.kernel) inputs = inputs * alpha else: bin_kernel = sign(self.kernel) inputs = quantize_activations( inputs, training=training, quantize=self._quantize, quantize_bits=self._quantize_bits, min_deviation_multiplier=self._min_deviation_multiplier, max_deviation_multiplier=self._max_deviation_multiplier) bin_kernel = bin_kernel outputs = self._convolution_op(inputs, bin_kernel) if self.use_bias: # only channels_last format self.bias = _fake_cast_float16(self.bias) outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') #outputs = tf.cast(outputs, tf.float32) if self._binarize_activations: outputs = sign(outputs) else: if self.activation is not None: return self.activation(outputs) return outputs
def template(x_shape=[2, 3, 4, 5], data_format="NHWC", description: str = ""): from tensorflow.python.ops import nn x = tf.placeholder(np.float32, x_shape) b = tf.placeholder(np.float32, x_shape[-1] if data_format == "NHWC" else x_shape[1]) y = nn.bias_add(x, b, data_format) vx = np.random.rand(*x_shape).astype(np.float32) vb = np.random.rand( *[x_shape[-1] if data_format == "NHWC" else x_shape[1]]).astype( np.float32) with tf.Session() as sess: vy, = sess.run([y], {x: vx, b: vb}) graph = TensorFlowConverter(sess, batch_size=2).convert([x, b], [y]) generate_kernel_test_case( description=f"[TensorFlow] BiasAdd {description}", graph=graph, inputs={ graph.inputs[0]: vx, graph.inputs[1]: vb }, expected={graph.outputs[0]: vy}, )
def _training_loss(self, features, labels, logits=None, name="training_loss"): """Returns training loss tensor for this head. Training loss is different from the loss reported on the tensorboard as we should respect the example weights when computing the gradient. L = sum_{i} w_{i} * l_{i} / B where B is the number of examples in the batch, l_{i}, w_{i} are individual losses, and example weight. Args: features: features dict. labels: either a tensor for labels or in multihead case, a dict of string to labels tensor. logits: logits, a float tensor. name: Op name. Returns: A loss `Tensor`. """ labels = _check_labels(labels, self._label_name) if self._enable_centered_bias: logits = nn.bias_add(logits, _centered_bias( self.logits_dimension, self._centered_bias_weight_collection)) loss_unweighted = self._train_loss_fn(logits, labels) loss, weighted_average_loss = _loss( loss_unweighted, _weight_tensor(features, self._weight_column_name), name=name) summary.scalar( _head_prefixed(self._head_name, "loss"), weighted_average_loss) return loss
def test_conv2d_biasadd_relu_fusion(self): """Test Conv2D+BiasAdd+Relu fusion.""" if not test_util.is_gpu_available(): self.skipTest('No GPU available') N, H, W, C = (5, 3, 3, 4) for precision in ('float16', 'float32'): ops.reset_default_graph() x_shape = [N, C, H, W] x_format = 'NCHW' b_format = 'NC..' use_fp16 = precision == 'float16' if use_fp16: x_shape = [N, H, W, C] x_format = 'NHWC' b_format = 'N..C' x = _input(x_shape) w = _weight([2, 2, C, C]) b = _bias([C]) if use_fp16: x = math_ops.cast(x, dtypes.float16) w = math_ops.cast(w, dtypes.float16) b = math_ops.cast(b, dtypes.float16) y = nn_ops.conv2d( x, w, strides=(1, 1), padding='SAME', data_format=x_format) z = nn.bias_add(y, b, data_format=b_format) out = nn.relu(z) out = array_ops.identity(out) epilog_ops = [b'BiasAdd', b'Relu'] fused_op = ['_FusedConv2D'] graph = self._VerifyValues(out, use_fp16, fused_op, epilog_ops)
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() if len(shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. if context.in_graph_mode(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = standard_ops.matmul(inputs, self.kernel) scaler = self.scale / tf.sqrt( tf.reduce_sum(tf.square(self.kernel), [0])) outputs = scaler * outputs if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_normalizer_fn=None, weights_normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): # Be copied and modified from tensorflow-0.12.0.contrib.layer.fully_connected, # add weights_nomalizer_* options. """Adds a fully connected layer. `fully_connected` creates a variable called `weights`, representing a fully connected weight matrix, which is multiplied by the `inputs` to produce a `Tensor` of hidden units. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the hidden units. Finally, if `activation_fn` is not `None`, it is applied to the hidden units as well. Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened prior to the initial matrix multiply by `weights`. Args: inputs: A tensor of with at least rank 2 and value for the last dimension, i.e. `[batch_size, depth]`, `[None, None, None, channels]`. num_outputs: Integer or long, the number of output units in the layer. activation_fn: activation function, set to None to skip it and maintain a linear activation. normalizer_fn: normalization function to use instead of `biases`. If `normalizer_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. default set to None for no normalizer function normalizer_params: normalization function parameters. weights_normalizer_fn: weights normalization function. weights_normalizer_params: weights normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionary containing a different list of collections per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: the tensor variable representing the result of the series of operations. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ if not (isinstance(num_outputs, six.integer_types)): raise ValueError('num_outputs should be int or long, got %s.', num_outputs) with variable_scope.variable_scope(scope, 'fully_connected', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype inputs_shape = inputs.get_shape() num_input_units = utils.last_dimension(inputs_shape, min_rank=2) static_shape = inputs_shape.as_list() static_shape[-1] = num_outputs out_shape = array_ops.unpack(array_ops.shape(inputs), len(static_shape)) out_shape[-1] = num_outputs weights_shape = [num_input_units, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) if weights_normalizer_fn is not None: weights_normalizer_params = weights_normalizer_params or {} weights = weights_normalizer_fn(weights, **weights_normalizer_params) if len(static_shape) > 2: # Reshape inputs inputs = array_ops.reshape(inputs, [-1, num_input_units]) outputs = standard_ops.matmul(inputs, weights) if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if activation_fn is not None: outputs = activation_fn(outputs) if len(static_shape) > 2: # Reshape back outputs outputs = array_ops.reshape(outputs, array_ops.pack(out_shape)) outputs.set_shape(static_shape) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def convolution(inputs, num_outputs, kernel_size, stride=1, padding='SAME', data_format=None, rate=1, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_normalizer_fn=None, weights_normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): # Be copied and modified from tensorflow-0.12.0.contrib.layer.convolution, # add weights_nomalizer_* options. """Adds an N-D convolution followed by an optional batch_norm layer. It is required that 1 <= N <= 3. `convolution` creates a variable called `weights`, representing the convolutional kernel, that is convolved (actually cross-correlated) with the `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the activations. Finally, if `activation_fn` is not `None`, it is applied to the activations as well. Performs a'trous convolution with input stride/dilation rate equal to `rate` if a value > 1 for any dimension of `rate` is specified. In this case `stride` values != 1 are not supported. Args: inputs: a Tensor of rank N+2 of shape `[batch_size] + input_spatial_shape + [in_channels]` if data_format does not start with "NC" (default), or `[batch_size, in_channels] + input_spatial_shape` if data_format starts with "NC". num_outputs: integer, the number of output filters. kernel_size: a sequence of N positive integers specifying the spatial dimensions of of the filters. Can be a single integer to specify the same value for all spatial dimensions. stride: a sequence of N positive integers specifying the stride at which to compute output. Can be a single integer to specify the same value for all spatial dimensions. Specifying any `stride` value != 1 is incompatible with specifying any `rate` value != 1. padding: one of `"VALID"` or `"SAME"`. data_format: A string or None. Specifies whether the channel dimension of the `input` and output is the last dimension (default, or if `data_format` does not start with "NC"), or the second dimension (if `data_format` starts with "NC"). For N=1, the valid values are "NWC" (default) and "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". For N=3, currently the only valid value is "NDHWC". rate: a sequence of N positive integers specifying the dilation rate to use for a'trous convolution. Can be a single integer to specify the same value for all spatial dimensions. Specifying any `rate` value != 1 is incompatible with specifying any `stride` value != 1. activation_fn: activation function, set to None to skip it and maintain a linear activation. normalizer_fn: normalization function to use instead of `biases`. If `normalizer_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. default set to None for no normalizer function normalizer_params: normalization function parameters. weights_normalizer_fn: weights normalization function. weights_normalizer_params: weights normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. Returns: a tensor representing the output of the operation. Raises: ValueError: if `data_format` is invalid. ValueError: both 'rate' and `stride` are not uniformly 1. """ if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC']: raise ValueError('Invalid data_format: %r' % (data_format, )) with variable_scope.variable_scope(scope, 'Conv', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype input_rank = inputs.get_shape().ndims if input_rank is None: raise ValueError('Rank of inputs must be known') if input_rank < 3 or input_rank > 5: raise ValueError( 'Rank of inputs is %d, which is not >= 3 and <= 5' % input_rank) conv_dims = input_rank - 2 kernel_size = utils.n_positive_integers(conv_dims, kernel_size) stride = utils.n_positive_integers(conv_dims, stride) rate = utils.n_positive_integers(conv_dims, rate) if data_format is None or data_format.endswith('C'): num_input_channels = inputs.get_shape()[input_rank - 1].value elif data_format.startswith('NC'): num_input_channels = inputs.get_shape()[1].value else: raise ValueError('Invalid data_format') if num_input_channels is None: raise ValueError('Number of in_channels must be known.') weights_shape = (list(kernel_size) + [num_input_channels, num_outputs]) weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) if weights_normalizer_fn is not None: weights_normalizer_params = weights_normalizer_params or {} weights = weights_normalizer_fn(weights, **weights_normalizer_params) outputs = nn.convolution(input=inputs, filter=weights, dilation_rate=rate, strides=stride, padding=padding, data_format=data_format) if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[num_outputs], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases, data_format=data_format) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def legacy_fully_connected(x, num_output_units, activation_fn=None, weight_init=initializers.xavier_initializer(), bias_init=init_ops.zeros_initializer, name=None, weight_collections=(ops.GraphKeys.WEIGHTS,), bias_collections=(ops.GraphKeys.BIASES,), output_collections=(ops.GraphKeys.ACTIVATIONS,), trainable=True, weight_regularizer=None, bias_regularizer=None): # pylint: disable=anomalous-backslash-in-string r"""Adds the parameters for a fully connected layer and returns the output. A fully connected layer is generally defined as a matrix multiply: `y = f(w * x + b)` where `f` is given by `activation_fn`. If `activation_fn` is `None`, the result of `y = w * x + b` is returned. If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)] with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix multiply along the first dimensions. The result r is a tensor of shape [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`], where \\\( r_{i_0, ..., i_{n-1}, k} = \\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\). This is accomplished by reshaping `x` to 2-D [\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)] before the matrix multiply and afterwards reshaping it to [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`]. This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting `bias_init` to `None`. The variable creation is compatible with `tf.variable_scope` and so can be reused with `tf.variable_scope` or `tf.make_template`. Most of the details of variable creation can be controlled by specifying the initializers (`weight_init` and `bias_init`) and in which collections to place the created variables (`weight_collections` and `bias_collections`; note that the variables are always added to the `VARIABLES` collection). The output of the layer can be placed in custom collections using `output_collections`. The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`, respectively. A per layer regularization can be specified by setting `weight_regularizer` and `bias_regularizer`, which are applied to the weights and biases respectively, and whose output is added to the `REGULARIZATION_LOSSES` collection. Args: x: The input `Tensor`. num_output_units: The size of the output. activation_fn: A function that requires a single Tensor that is applied as a non-linearity. If None is used, do not apply any activation. weight_init: An optional weight initialization, defaults to `xavier_initializer`. bias_init: An initializer for the bias, defaults to 0. Set to `None` in order to disable bias. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. weight_collections: List of graph collections to which weights are added. bias_collections: List of graph collections to which biases are added. output_collections: List of graph collections to which outputs are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). weight_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for weights. bias_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for biases. Returns: The output of the fully connected layer. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ with variable_scope.variable_op_scope([x], name, 'fully_connected'): dims = x.get_shape().dims if dims is None: raise ValueError('dims of x must be known but is None') if len(dims) < 2: raise ValueError('rank of x must be at least 2 not: %d' % len(dims)) num_input_units = dims[-1].value if num_input_units is None: raise ValueError('last dimension of x must be known but is None') dtype = x.dtype.base_dtype weight_collections = set(list(weight_collections or []) + [ops.GraphKeys.VARIABLES]) w = variable_scope.get_variable('weights', shape=[num_input_units, num_output_units], dtype=dtype, initializer=weight_init, collections=weight_collections, regularizer=weight_regularizer, trainable=trainable) x_2_dim = x if len(dims) <= 2 else array_ops.reshape(x, [-1, num_input_units]) y = standard_ops.matmul(x_2_dim, w) if bias_init is not None: bias_collections = set(list(bias_collections or []) + [ops.GraphKeys.VARIABLES]) b = variable_scope.get_variable('bias', shape=[num_output_units], dtype=dtype, initializer=bias_init, collections=bias_collections, regularizer=bias_regularizer, trainable=trainable) y = nn.bias_add(y, b) if len(dims) > 2: out_shape = array_ops.unpack(array_ops.shape(x)) out_shape[-1] = num_output_units y = array_ops.reshape(y, array_ops.pack(out_shape)) static_shape = x.get_shape().as_list() static_shape[-1] = num_output_units y.set_shape(static_shape) return _apply_activation(y, activation_fn, output_collections)
def convolution2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME', activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a 2D convolution followed by an optional batch_norm layer. `convolution2d` creates a variable called `weights`, representing the convolutional kernel, that is convolved with the `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the activations. Finally, if `activation_fn` is not `None`, it is applied to the activations as well. Args: inputs: a 4-D tensor `[batch_size, height, width, channels]`. num_outputs: integer, the number of output filters. kernel_size: a list of length 2 `[kernel_height, kernel_width]` of of the filters. Can be an int if both values are the same. stride: a list of length 2 `[stride_height, stride_width]`. Can be an int if both strides are the same. Note that presently both strides must have the same value. padding: one of `VALID` or `SAME`. activation_fn: activation function. normalizer_fn: normalization function to use instead of `biases`. If `normalize_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. normalizer_params: normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionay containing a different list of collection per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_op_scope`. Returns: a tensor representing the output of the operation. """ with variable_scope.variable_op_scope([inputs], scope, 'Conv', reuse=reuse) as sc: dtype = inputs.dtype.base_dtype kernel_h, kernel_w = utils.two_element_tuple(kernel_size) stride_h, stride_w = utils.two_element_tuple(stride) num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4) weights_shape = [kernel_h, kernel_w, num_filters_in, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) outputs = nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], padding=padding) if normalizer_fn: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable('biases', shape=[num_outputs,], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) inputs = gen_math_ops.cast(inputs, dtypes.float32) outputs = gen_math_ops.mat_mul(inputs, self.kernel) outputs = nn.bias_add(outputs, self.bias) return gen_math_ops.cos(outputs)
def fully_connected(x, num_output_units, activation_fn=None, weight_init=initializers.xavier_initializer(), bias_init=standard_ops.constant_initializer(0.), name=None, weight_collections=(ops.GraphKeys.WEIGHTS, ), bias_collections=(ops.GraphKeys.BIASES, ), output_collections=(ops.GraphKeys.ACTIVATIONS, ), weight_regularizer=None, bias_regularizer=None): """Adds the parameters for a fully connected layer and returns the output. A fully connected layer is generally defined as a matrix multiply: `y = f(w * x + b)` where `f` is given by `activation_fn`. If `activation_fn` is `None`, the result of `y = w * x + b` is returned. This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting `bias_init` to `None`. The variable creation is compatible with `tf.variable_scope` and so can be reused with `tf.variable_scope` or `tf.make_template`. Most of the details of variable creation can be controlled by specifying the initializers (`weight_init` and `bias_init`) and which in collections to place the created variables (`weight_collections` and `bias_collections`; note that the variables are always added to the `VARIABLES` collection). The output of the layer can be placed in custom collections using `output_collections`. The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`, respectively. A per layer regularization can be specified by setting `weight_regularizer` and `bias_regularizer`, which are applied to the weights and biases respectively, and whose output is added to the `REGULARIZATION_LOSSES` collection. Args: x: The input `Tensor`. num_output_units: The size of the output. activation_fn: A function that requires a single Tensor that is applied as a non-linearity. If None is used, do not apply any activation. weight_init: An optional weight initialization, defaults to `xavier_initializer`. bias_init: An initializer for the bias, defaults to 0. Set to `None` in order to disable bias. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. weight_collections: List of graph collections to which weights are added. bias_collections: List of graph collections to which biases are added. output_collections: List of graph collections to which outputs are added. weight_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for weights. bias_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for biases. Returns: The output of the fully connected layer. """ with variable_scope.variable_op_scope([x], name, 'fully_connected'): num_input_units = x.get_shape().dims[1].value dtype = x.dtype.base_dtype w = _weight_variable(shape=[num_input_units, num_output_units], dtype=dtype, initializer=weight_init, collections=weight_collections, regularizer=weight_regularizer) y = standard_ops.matmul(x, w) if bias_init is not None: b = _bias_variable(shape=[num_output_units], dtype=dtype, initializer=bias_init, collections=bias_collections, regularizer=bias_regularizer) y = nn.bias_add(y, b) return _apply_activation(y, activation_fn, output_collections)