def testSparseGlorotUniform_OutputShape(self): initializer = common_init.SparseGlorotUniform(.5) x = tf.get_variable("x", shape=[512, 1024], initializer=initializer, dtype=tf.float32) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) res = sess.run(x) self.assertEqual(res.shape, (512, 1024))
def testSparseGlorotUniform_NoSparsity(self): initializer = common_init.SparseGlorotUniform(0, seed=5) initializer_base = tf.glorot_uniform_initializer(seed=5) x = tf.get_variable("x", shape=[512, 1024], initializer=initializer, dtype=tf.float32) y = tf.get_variable("y", shape=[512, 1024], initializer=initializer_base, dtype=tf.float32) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) res_x = sess.run(x) res_y = sess.run(y) self.assertEqual(res_x.shape, (512, 1024)) self.assertEqual(res_y.shape, (512, 1024)) self.assertAllEqual(res_x, res_y)
def dense(x, units, activation=None, use_bias=True, kernel_initializer="glorot_uniform", bias_initializer="zeros", sparsity_technique="variational_dropout", auxiliary_initializer=None, threshold=3.0, clip_alpha=None, training=True, dtype=tf.float32, name=None, initial_sparsity=None): """Matmul & bias add that supports broadcasting for batched gemm. Supports a contrained set of functionality provided by tf.layers.dense. Args: x: input tensor. units: number of units in the dense layer. activation: activation function to use in the layer. use_bias: whether or not to add a bias to the output. kernel_initializer: weight initializer for the layer. bias_initializer: weight initializer for the bias. sparsity_technique: sparsification technique to apply to the weights. auxiliary_initializer: initializer for auxiliary variables use in variational dropout and l0 regularization. threshold: log-alpha threshold for variational dropout. clip_alpha: whether to clip the alpha values for variational dropout. training: whether this run is training or evaluation the model. dtype: data type for the weights and computation. name: name for the layer. initial_sparsity: initial weight sparsity at the start of training. Returns: Tensor representing the output of the layer. """ activation = activations.get(activation) kernel_initializer = initializers.get(kernel_initializer) bias_initializer = initializers.get(bias_initializer) if (sparsity_technique == "magnitude_pruning" or sparsity_technique == "random_pruning"): if initial_sparsity is not None: # If the initial sparsity value is passed in, use the sparse glorot # uniform initializer to account for the zero valued weights. kernel_initializer = common_init.SparseGlorotUniform( initial_sparsity, dtype=dtype) tf.logging.info( "Using sparse initialization with sparsity {} for variable {}". format(initial_sparsity, tf.get_variable_scope().name)) # If the sparsity technique is magnitude_pruning, or random_pruning # use the model_pruning masked_fully_connected layer # # masked_fully_connected doesn't take use_bias arg, pass None for the # bias initializer if we don't want a bias variable bias_initializer = bias_initializer if use_bias else None with tf.variable_scope(name, default_name="dense"): return pruning_layers.masked_fully_connected( inputs=x, num_outputs=units, activation_fn=activation, weights_initializer=kernel_initializer, biases_initializer=bias_initializer) if initial_sparsity is not None: raise ValueError("initial_sparsity only supported for mp & rp") # layer_name = "%s_{}" % name if name else "{}" input_shape = x.get_shape().as_list() if input_shape[-1] is None: raise ValueError("The last dimension of the inputs to `Dense` " "should be defined. Found `None`.") with tf.variable_scope(name, default_name="dense") as vs: kernel = tf.get_variable("kernel", shape=[input_shape[-1], units], initializer=kernel_initializer, dtype=dtype, trainable=True) bias = None if use_bias: bias = tf.get_variable("bias", shape=[ units, ], initializer=bias_initializer, dtype=dtype, trainable=True) # Compute the dense layer if sparsity_technique == "variational_dropout": log_sigma2_initializer = initializers.get(auxiliary_initializer) if not log_sigma2_initializer: log_sigma2_initializer = tf.constant_initializer(value=-10, dtype=dtype) with tf.variable_scope(vs, auxiliary_name_scope=False) as vs1: with tf.name_scope(vs1.original_name_scope): log_sigma2 = tf.get_variable( "log_sigma2", shape=[input_shape[-1], units], initializer=log_sigma2_initializer, dtype=dtype, trainable=True) variational_parameters = (kernel, log_sigma2) tf.add_to_collection(VARIATIONAL_DROPOUT_PARAMETERS, variational_parameters) input_rank = x.get_shape().ndims if input_rank > 2: if training: outputs = vd.nn.broadcast_matmul_train(x, variational_parameters, clip_alpha=clip_alpha) else: outputs = vd.nn.broadcast_matmul_eval(x, variational_parameters, threshold) else: if training: outputs = vd.nn.matmul_train(x, variational_parameters, clip_alpha=clip_alpha) else: outputs = vd.nn.matmul_eval(x, variational_parameters, threshold) else: if sparsity_technique != "l0_regularization": raise ValueError( "Unsupported sparsity technique {}".format(sparsity_technique)) log_alpha_initializer = initializers.get(auxiliary_initializer) if not log_alpha_initializer: # Default to \alpha / (\alpha + 1) equal to 0.5 # Default to \alpha / (\alpha + 1) = .1 log_alpha_initializer = tf.random_normal_initializer(mean=2.197, stddev=0.01, dtype=dtype) with tf.variable_scope(vs, auxiliary_name_scope=False) as vs1: with tf.name_scope(vs1.original_name_scope): log_alpha = tf.get_variable("log_alpha", shape=[input_shape[-1], units], initializer=log_alpha_initializer, dtype=dtype, trainable=True) weight_parameters = (kernel, log_alpha) tf.add_to_collection(L0_REGULARIZATION_PARAMETERS, weight_parameters) input_rank = x.get_shape().ndims if input_rank > 2: if training: outputs = l0.nn.broadcast_matmul_train(x, weight_parameters) else: outputs = l0.nn.broadcast_matmul_eval(x, weight_parameters) else: if training: outputs = l0.nn.matmul_train(x, weight_parameters) else: outputs = l0.nn.matmul_eval(x, weight_parameters) # Handle the bias and activation if use_bias: outputs = tf.nn.bias_add(outputs, bias) if activation is not None: return activation(outputs) return outputs