def testInitializerDifferent(self): for dtype in [dtypes.float32, dtypes.float64]: init1 = init_ops.truncated_normal_initializer( 0.0, 1.0, seed=1, dtype=dtype) init2 = init_ops.truncated_normal_initializer( 0.0, 1.0, seed=2, dtype=dtype) self.assertFalse(identicaltest(self, init1, init2))
def testInitFromPartitionVar(self): checkpoint_dir = self.get_temp_dir() with self.test_session() as session: v1 = _create_partition_checkpoints(session, checkpoint_dir) # New graph and session. with ops.Graph().as_default() as g: with self.test_session(graph=g) as session: with variable_scope.variable_scope("some_scope"): my1 = variable_scope.get_variable( name="my1", shape=[100, 100], initializer=init_ops.truncated_normal_initializer(0.5), partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) my1_var_list = my1._get_variable_list() with variable_scope.variable_scope("some_other_scope"): my2 = variable_scope.get_variable( name="var1", shape=[100, 100], initializer=init_ops.truncated_normal_initializer(0.5), partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) my2_var_list = my2._get_variable_list() checkpoint_utils.init_from_checkpoint(checkpoint_dir, { "scope/var1": "some_scope/my1", "scope/": "some_other_scope/"}) session.run(variables.global_variables_initializer()) my1_values = session.run(my1_var_list) self.assertAllEqual(my1_values, v1) my2_values = session.run(my2_var_list) self.assertAllEqual(my2_values, v1) # New graph and session. with ops.Graph().as_default() as g: with self.test_session(graph=g) as session: with variable_scope.variable_scope("some_scope"): my1 = variable_scope.get_variable( name="my1", shape=[100, 100], initializer=init_ops.truncated_normal_initializer(0.5), partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) my1_var_list = my1._get_variable_list() checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"scope/var1": my1_var_list,}) session.run(variables.global_variables_initializer()) my1_values = session.run(my1_var_list) self.assertAllEqual(my1_values, v1)
def _define_vars(self, params, **kwargs): with ops.device(self.device_assigner): self.tree_parameters = variable_scope.get_variable( name='tree_parameters_%d' % self.layer_num, shape=[params.num_nodes, params.num_features_per_node], initializer=init_ops.truncated_normal_initializer( mean=params.weight_init_mean, stddev=params.weight_init_std)) self.tree_thresholds = variable_scope.get_variable( name='tree_thresholds_%d' % self.layer_num, shape=[params.num_nodes], initializer=init_ops.truncated_normal_initializer( mean=params.weight_init_mean, stddev=params.weight_init_std))
def testWithScopes(self): init_value0 = np.asarray([1.0, 3.0, 9.0]).reshape((1, 3, 1)) init_value1 = np.asarray([2.0, 4.0, 6.0, 8.0]).reshape((2, 1, 2)) with self.test_session() as sess: initializer = init_ops.truncated_normal_initializer(stddev=.1) with variable_scope.variable_scope('my_model/my_layer0'): var0 = variables_lib2.variable( 'my_var0', shape=[1, 3, 1], initializer=initializer) with variable_scope.variable_scope('my_model/my_layer1'): var1 = variables_lib2.variable( 'my_var1', shape=[2, 1, 2], initializer=initializer) var_names_to_values = { 'my_model/my_layer0/my_var0': init_value0, 'my_model/my_layer1/my_var1': init_value1 } init_fn = variables_lib2.assign_from_values_fn(var_names_to_values) # Initialize the variables. sess.run(variables_lib.global_variables_initializer()) # Perform the assignment. init_fn(sess) # Request and test the variable values: var0, var1 = sess.run([var0, var1]) self.assertAllEqual(init_value0, var0) self.assertAllEqual(init_value1, var1)
def load_embedding_initializer(ckpt_path, embedding_tensor_name, new_vocab_size, embedding_dim, old_vocab_file, new_vocab_file, num_oov_buckets=0, initializer=None): """Returns a variable initializer for loading pre-trained embeddings. Wrapper around `load_and_remap_matrix_initializer()` specialized for loading embedding weights and remapping according to the provided vocab files. See docs for `load_and_remap_matrix_initializer()` for more details. NOTE: Only for use with div-partitioned variables / vocabularies. Args: ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from which the old matrix `Tensor` will be loaded. embedding_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. new_vocab_size: Number of entries in the new vocab. embedding_dim: `int` specifying the dimension of the embedding vectors from the checkpoint. Must match the number of columns in the old embedding matrix. old_vocab_file: A scalar `Tensor` of type `string` containing the path to the old vocabulary file. new_vocab_file: A scalar `Tensor` of type `string` containing the path to the new vocabulary file. num_oov_buckets: `int` specifying the number of out-of-vocabulary buckets to use. Must be >= 0. initializer: Initializer function that accepts a 1-D tensor as the arg to specify the shape of the returned tensor. If `None`, defaults to using `truncated_normal_initializer()`. Returns: A variable initializer function. """ if initializer is None: # TODO(b/25671353): This should be kept in sync with the stddev used by # feature_column.py's _EmbeddingColumn. initializer = init_ops.truncated_normal_initializer( stddev=1.0 / math_ops.sqrt(math_ops.cast(embedding_dim, dtypes.float32))) return load_and_remap_matrix_initializer( ckpt_path=ckpt_path, old_tensor_name=embedding_tensor_name, new_row_vocab_size=new_vocab_size, new_col_vocab_size=embedding_dim, old_row_vocab_file=old_vocab_file, new_row_vocab_file=new_vocab_file, old_col_vocab_file=None, new_col_vocab_file=None, num_row_oov_buckets=num_oov_buckets, num_col_oov_buckets=0, initializer=initializer)
def _WeightInit(self, stddev): """Returns truncated normal variable initializer. Function is defined purely to shorten the name so that it stops wrapping. Args: stddev: Standard deviation of normal variable. Returns: An initialized that initializes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev)
def _random_weights(self, size=50, num_shards=1): assert size > 0 assert num_shards > 0 assert num_shards <= size embedding_weights = partitioned_variables.create_partitioned_variables( shape=[size], slicing=[num_shards], initializer=init_ops.truncated_normal_initializer( mean=0.0, stddev=1.0, dtype=dtypes.float32)) for w in embedding_weights: w.initializer.run() return embedding_weights
def _random_weights(self, size=50, num_shards=1): assert size > 0 assert num_shards > 0 assert num_shards <= size embedding_weights = list(variable_scope.get_variable( "embedding_weights", shape=[size], partitioner=partitioned_variables.fixed_size_partitioner(num_shards), initializer=init_ops.truncated_normal_initializer( mean=0.0, stddev=1.0, dtype=dtypes.float32))) for w in embedding_weights: w.initializer.run() return embedding_weights
def _conv(args, output_size, filter_size, stddev=0.001, bias=True, bias_start=0.0, scope=None): if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] # Calculate the total size of arguments on dimension 3. # (batch_size x height x width x arg_size) total_arg_size = 0 shapes = [a.get_shape().as_list() for a in args] height = shapes[0][1] width = shapes[0][2] for shape in shapes: if len(shape) != 4: raise ValueError("Conv is expecting 3D arguments: %s" % str(shapes)) if not shape[3]: raise ValueError("Conv expects shape[3] of arguments: %s" % str(shapes)) if shape[1] == height and shape[2] == width: total_arg_size += shape[3] else: raise ValueError( "Inconsistent height and width size in arguments: %s" % str(shapes)) with vs.variable_scope(scope or "Conv"): kernel = vs.get_variable( "Kernel", [filter_size[0], filter_size[1], total_arg_size, output_size], initializer=init_ops.truncated_normal_initializer(stddev=stddev)) if len(args) == 1: res = tf.nn.conv2d(args[0], kernel, [1, 1, 1, 1], padding='SAME') else: res = tf.nn.conv2d(array_ops.concat(3, args), kernel, [1, 1, 1, 1], padding='SAME') if not bias: return res bias_term = vs.get_variable( "Bias", [output_size], initializer=init_ops.constant_initializer(bias_start)) return res + bias_term
def __new__(cls, sparse_id_column, embedding_dimension, max_sequence_length, initializer=None, num_units=256, cell_type='basic_rnn', bidirectional_rnn=False, mode=model_fn.ModeKeys.TRAIN, dropout_keep_probabilities=None, ckpt_to_load_from=None, tensor_name_in_ckpt=None, shared_embedding_name=None, shared_vocab_size=None, max_norm=None, trainable=True): if initializer is not None and not callable(initializer): raise ValueError("initializer must be callable if specified. " "Embedding of column_name: {}".format( sparse_id_column.name)) if (ckpt_to_load_from is None) != (tensor_name_in_ckpt is None): raise ValueError("Must specify both `ckpt_to_load_from` and " "`tensor_name_in_ckpt` or none of them.") if initializer is None: logging.warn("The default stddev value of initializer will change from " "\"1/sqrt(vocab_size)\" to \"1/sqrt(dimension)\" after " "2017/02/25.") stddev = 1 / math.sqrt(sparse_id_column.length) initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=stddev) return super(_RNNColumn, cls).__new__(cls, sparse_id_column, embedding_dimension, max_sequence_length, initializer, num_units, cell_type, bidirectional_rnn, mode, dropout_keep_probabilities, ckpt_to_load_from, tensor_name_in_ckpt, shared_embedding_name, shared_vocab_size, max_norm, trainable)
def __new__(cls, sparse_id_column, dimension, combiner="mean", initializer=None): if initializer is not None and not callable(initializer): raise ValueError("initializer must be callable if specified.") if initializer is None: stddev = 1 / math.sqrt(sparse_id_column.length) # TODO(b/25671353): Better initial value? initializer = init_ops.truncated_normal_initializer(mean=0.0, stddev=stddev) return super(_EmbeddingColumn, cls).__new__(cls, sparse_id_column, dimension, combiner, initializer)
def _get_initializer(self): if self.args.weight_init == "trunc_norm": w_init = init_ops.truncated_normal_initializer(mean=0.0, stddev=0.01) elif self.args.weight_init == "xavier": w_init = slim.xavier_initializer() # elif self.args.weight_init == "rand_norm": # w_init = init_ops.random_normal_initializer(mean=0.0, stddev=0.01) # elif self.args.weight_init == "variance_scale": # w_init = slim.variance_scaling_initializer() else: raise ValueError("Not supported weight initializer: " + self.args.weight_init) b_init = init_ops.constant_initializer() return w_init, b_init
def _random_weights(self, vocab_size=4, embed_dim=4, num_shards=1): assert vocab_size > 0 assert embed_dim > 0 assert num_shards > 0 assert num_shards <= vocab_size embedding_weights = partitioned_variables.create_partitioned_variables( shape=[vocab_size, embed_dim], slicing=[num_shards, 1], initializer=init_ops.truncated_normal_initializer( mean=0.0, stddev=1.0 / math.sqrt(vocab_size), dtype=dtypes.float32)) for w in embedding_weights: w.initializer.run() embedding_weights = [w.eval() for w in embedding_weights] return embedding_weights
def fc(name, x, num_units_out): num_units_in = x.shape[1] weights_initializer = init_ops.truncated_normal_initializer(stddev=0.01) with vs.variable_scope(name): weights = _get_variable('weights', shape=[num_units_in, num_units_out], init=weights_initializer) biases = _get_variable('biases', shape=[num_units_out], init=init_ops.constant_initializer(0.0)) x = nn_ops.xw_plus_b(x, weights, biases) return x
def __new__(cls, vocabulary_size, dimension, initializer=None, combiner='mean'): """Embedding table configuration. Args: vocabulary_size: Number of vocabulary (/rows) in the table. dimension: The embedding dimension. initializer: A variable initializer function to be used in embedding variable initialization. If not specified, defaults to `tf.truncated_normal_initializer` with mean `0.0` and standard deviation `1/sqrt(dimension)`. combiner: A string specifying how to reduce if there are multiple entries in a single row. Currently 'mean', 'sqrtn', 'sum' and None are supported, with 'mean' the default. 'sqrtn' often achieves good accuracy, in particular with bag-of-words columns. For more information, see `tf.nn.embedding_lookup_sparse`. None is only valid for dense rather than sparse tensors. Returns: `TableConfig`. Raises: ValueError: if `vocabulary_size` is not positive integer. ValueError: if `dimension` is not positive integer. ValueError: if `initializer` is specified and is not callable. ValueError: if `combiner` is not supported. """ if not isinstance(vocabulary_size, int) or vocabulary_size < 1: raise ValueError( 'Invalid vocabulary_size {}.'.format(vocabulary_size)) if not isinstance(dimension, int) or dimension < 1: raise ValueError('Invalid dimension {}.'.format(dimension)) if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified.') if initializer is None: initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) if combiner not in ('mean', 'sum', 'sqrtn', None): raise ValueError('Invalid combiner {}'.format(combiner)) return super(TableConfig, cls).__new__(cls, vocabulary_size, dimension, initializer, combiner)
def _conv3d(_input, out_channels, kd=3, kh=3, kw=3, sd=1, sh=1, sw=1, stddev=0.01, padding='SAME', name="conv2d", dtype=dtypes.float32, bias_add=True): """ A wrapped conv3d operation :param _input: tensor, shape = [batch_size, depth, height, width, channels] :param out_channels: scalar, convolution output channels :param kd: scalar, filter depth :param kh: scalar, filter height :param kw: scalar, filter width :param sd: scalar, stride depth :param sh: scalar, stride y :param sw: scalar, stride x :param stddev: scalar, standard deviation used for params' initialization :param padding: string, 'VALID' or 'SAME' :param bias_add: bool, whether to add bias to convolution result :return: tensor, convolution result which has the same shape length as _input """ with variable_scope.variable_scope(name): weights = variable_scope.get_variable( 'w', [kd, kh, kw, _input.shape[-1].value, out_channels], dtype=dtype, initializer=init_ops.truncated_normal_initializer(stddev=stddev, dtype=dtype, seed=20170705)) conv = nn_ops.conv3d(_input, weights, strides=[1, sd, sh, sw, 1], padding=padding) if bias_add: biases = variable_scope.get_variable( 'biases', [out_channels], initializer=init_ops.constant_initializer(0.0, dtype=dtype), dtype=dtype) return nn_ops.bias_add(conv, biases) else: return conv
def _random_weights(self, size=50, num_shards=1): assert size > 0 assert num_shards > 0 assert num_shards <= size embedding_weights = list( variable_scope.get_variable( "embedding_weights", shape=[size], partitioner=partitioned_variables.fixed_size_partitioner( num_shards), initializer=init_ops.truncated_normal_initializer( mean=0.0, stddev=1.0, dtype=dtypes.float32))) for w in embedding_weights: w.initializer.run() return embedding_weights
def _conv2d(self, inputs, output_filters, bias_initializer): input_shape = inputs.get_shape().as_list() kernel_shape = list( self._kernel_size) + [input_shape[-1], output_filters] kernel = vs.get_variable( "kernel", kernel_shape, dtype=dtypes.float32, initializer=init_ops.truncated_normal_initializer(stddev=0.02)) outputs = nn_ops.conv2d(inputs, kernel, [1] * 4, padding='SAME') if not self._normalizer_fn: bias = vs.get_variable('bias', [output_filters], dtype=dtypes.float32, initializer=bias_initializer) outputs = nn_ops.bias_add(outputs, bias) return outputs
def _create_embedding_lookup(input_tensor, vocab_size, dimension, weight_collections, stddev, combiner, trainable, name): """Creates embedding variable and does a lookup. Args: input_tensor: A tensor which should contain sparse id to look up. vocab_size: An integer specifying the vocabulary size. dimension: An integer specifying the embedding vector dimension. weight_collections: List of graph collections to which weights are added. stddev: the standard deviation to be used in embedding initialization. combiner: A string specifying how to reduce if the sparse column is multivalent. Currently "mean", "sqrtn" and "sum" are supported: * "sum": do not normalize features in the column * "mean": do l1 normalization on features in the column * "sqrtn": do l2 normalization on features in the column For more information: `tf.embedding_lookup_sparse`. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). name: A string specifying the name of the embedding variable. Returns: A Tensor with shape [batch_size, dimension] and embedding Variable. """ slicing = _max_size_embedding_partitioner()(vocab_size, dimension) logging.info("Slicing=%s for name=%s, vocab_size=%d, embed_dim=%d", str(slicing), name, vocab_size, dimension) if stddev > 0: initializer = init_ops.truncated_normal_initializer(stddev=stddev) else: initializer = init_ops.zeros_initializer embeddings = partitioned_variables.create_partitioned_variables( shape=[vocab_size, dimension], slicing=slicing, initializer=initializer, dtype=dtypes.float32, collections=weight_collections, name=name, reuse=False, trainable=trainable) return contrib_embedding_ops.safe_embedding_lookup_sparse( embeddings, input_tensor, default_id=0, combiner=combiner, name=name), embeddings
def __new__(cls, vocabulary_size, dimension, initializer=None, combiner='mean'): """Embedding table configuration. Args: vocabulary_size: Number of vocabulary (/rows) in the table. dimension: The embedding dimension. initializer: A variable initializer function to be used in embedding variable initialization. If not specified, defaults to `tf.truncated_normal_initializer` with mean `0.0` and standard deviation `1/sqrt(dimension)`. combiner: A string specifying how to reduce if there are multiple entries in a single row. Currently 'mean', 'sqrtn', 'sum' and None are supported, with 'mean' the default. 'sqrtn' often achieves good accuracy, in particular with bag-of-words columns. For more information, see `tf.nn.embedding_lookup_sparse`. None is only valid for dense rather than sparse tensors. Returns: `TableConfig`. Raises: ValueError: if `vocabulary_size` is not positive integer. ValueError: if `dimension` is not positive integer. ValueError: if `initializer` is specified and is not callable. ValueError: if `combiner` is not supported. """ if not isinstance(vocabulary_size, int) or vocabulary_size < 1: raise ValueError('Invalid vocabulary_size {}.'.format(vocabulary_size)) if not isinstance(dimension, int) or dimension < 1: raise ValueError('Invalid dimension {}.'.format(dimension)) if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified.') if initializer is None: initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) if combiner not in ('mean', 'sum', 'sqrtn', None): raise ValueError('Invalid combiner {}'.format(combiner)) return super(TableConfig, cls).__new__(cls, vocabulary_size, dimension, initializer, combiner)
def _random_weights(self, vocab_size=4, embed_dim=4, num_shards=1): assert vocab_size > 0 assert embed_dim > 0 assert num_shards > 0 assert num_shards <= vocab_size initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1.0 / math.sqrt(vocab_size), dtype=dtypes.float32) embedding_weights = list(variable_scope.get_variable( name="embedding_weights", shape=[vocab_size, embed_dim], partitioner=partitioned_variables.fixed_size_partitioner(num_shards), initializer=initializer)) for w in embedding_weights: self.evaluate(w.initializer) embedding_weights = [self.evaluate(w) for w in embedding_weights] return embedding_weights
def _create_partition_checkpoints(sess, checkpoint_dir): checkpoint_prefix = os.path.join(checkpoint_dir, "model") checkpoint_state_name = "checkpoint" v1 = variable_scope.get_variable( name="var1", shape=[100, 100], initializer=init_ops.truncated_normal_initializer(0.5), partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) sess.run(variables.global_variables_initializer()) v1_value = sess.run(v1._get_variable_list()) saver = saver_lib.Saver() saver.save(sess, checkpoint_prefix, global_step=0, latest_filename=checkpoint_state_name) return v1_value
def inception_v3_arg_scope(weight_decay=0.00004, stddev=0.1, batch_norm_var_collection='moving_vars', use_fused_batchnorm=True): """Defines the default InceptionV3 arg scope. Args: weight_decay: The weight decay to use for regularizing the model. stddev: The standard deviation of the trunctated normal weight initializer. batch_norm_var_collection: The name of the collection for the batch norm variables. use_fused_batchnorm: Enable fused batchnorm. Returns: An `arg_scope` to use for the inception v3 model. """ batch_norm_params = { # Decay for the moving averages. 'decay': 0.9997, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # collection containing update_ops. 'updates_collections': ops.GraphKeys.UPDATE_OPS, # Use fused batch norm if possible. 'fused': use_fused_batchnorm, # collection containing the moving mean and moving variance. 'variables_collections': { 'beta': None, 'gamma': None, 'moving_mean': [batch_norm_var_collection], 'moving_variance': [batch_norm_var_collection], } } # Set weight_decay for weights in Conv and FC layers. with arg_scope( [layers.conv2d, layers_lib.fully_connected], weights_regularizer=regularizers.l2_regularizer(weight_decay)): with arg_scope( [layers.conv2d], weights_initializer=init_ops.truncated_normal_initializer( stddev=stddev), activation_fn=nn_ops.relu, normalizer_fn=layers_lib.batch_norm, normalizer_params=batch_norm_params) as sc: return sc
def _random_weights(self, vocab_size=4, embed_dim=4, num_shards=1): assert vocab_size > 0 assert embed_dim > 0 assert num_shards > 0 assert num_shards <= vocab_size initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1.0 / math.sqrt(vocab_size), dtype=dtypes.float32) embedding_weights = list(variable_scope.get_variable( "embedding_weights", shape=[vocab_size, embed_dim], partitioner=partitioned_variables.fixed_size_partitioner(num_shards), initializer=initializer)) for w in embedding_weights: w.initializer.run() embedding_weights = [w.eval() for w in embedding_weights] return embedding_weights
def _create_partition_checkpoints(sess, checkpoint_dir): checkpoint_prefix = os.path.join(checkpoint_dir, "model") checkpoint_state_name = "checkpoint" v1 = variable_scope.get_variable( name="var1", shape=[100, 100], initializer=init_ops.truncated_normal_initializer(0.5), partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) sess.run(variables.global_variables_initializer()) v1_value = sess.run(v1._get_variable_list()) saver = saver_lib.Saver() saver.save( sess, checkpoint_prefix, global_step=0, latest_filename=checkpoint_state_name) return v1_value
def _deep_embedding_lookup_arguments(self, input_tensor): if self.initializer is None: stddev = 1 / math.sqrt(self.length) initializer = init_ops.truncated_normal_initializer(mean=0.0, stddev=stddev) else: initializer = self.initializer return _DeepEmbeddingLookupArguments( input_tensor=self.to_sparse_tensor(input_tensor), weight_tensor=None, vocab_size=self.length, dimension=self.embedding_dimension, initializer=initializer, combiner=None, shared_embedding_name=self.shared_name, hash_key=None, max_norm=self.max_norm, trainable=self.trainable, origin_feature_tensor=None, bucket_size=self.length)
def test_deepcopy_with_bypass_scope_validation(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_dimension = 2 initializer = init_ops.truncated_normal_initializer(mean=0.0, stddev=.5) embedding_column = tpu_fc._TPUEmbeddingColumnV2( categorical_column=categorical_column, dimension=embedding_dimension, combiner='mean', initializer=initializer, max_sequence_length=0, use_safe_embedding_lookup=False, bypass_scope_validation=True) embedding_column_copy = copy.deepcopy(embedding_column) self.assertEqual(embedding_dimension, embedding_column_copy.dimension) self.assertEqual(embedding_column._max_sequence_length, embedding_column_copy._max_sequence_length) self.assertTrue(embedding_column_copy._bypass_scope_validation) self.assertFalse(embedding_column_copy.use_safe_embedding_lookup)
def get(identifier, **kwargs): if identifier is None or isinstance(identifier, init_ops.Initializer): return identifier if np.isscalar(identifier) and identifier == 0.: identifier = 'zeros' # TODO: ... if callable(identifier): return identifier elif isinstance(identifier, six.string_types): # If identifier is a string identifier = identifier.lower() if identifier in ['random_uniform']: rng = kwargs.get('range', None) low, high = checker.get_range(rng) return init_ops.RandomUniform(minval=low, maxval=high) elif identifier in ['random_norm', 'random_normal']: mean = kwargs.get('mean', 0.) stddev = kwargs.get('stddev', 1.) return init_ops.truncated_normal_initializer(mean=mean, stddev=stddev) elif identifier in ['glorot_uniform', 'xavier_uniform']: return glorot_uniform() elif identifier in ['glorot_normal', 'xavier_normal']: return init_ops.glorot_normal_initializer() elif identifier in ['id', 'identity']: return identity() else: # Find initializer in tensorflow.python.ops.init_ops initializer = (init_ops.__dict__.get(identifier, None) or init_ops.__dict__.get( '{}_initializer'.format(identifier), None)) # If nothing is found if initializer is None: raise ValueError('Can not resolve "{}"'.format(identifier)) # Return initializer with default parameters return initializer elif np.isscalar(identifier): # Note string is scalar return tf.initializers.constant(value=identifier) else: raise TypeError('identifier must be a Initializer or a string')
def test_embedding_lookup_sparse_with_initializer(self): id = 0 embed_dim = 8 elements_num = 262144 for initializer, target_mean, target_stddev in [ (init_ops.random_normal_initializer(0.0, 0.001), 0.0, 0.001), (init_ops.truncated_normal_initializer(0.0, 0.001), 0.0, 0.00088), (keras_init_ops.RandomNormalV2(mean=0.0, stddev=0.001), 0.0, 0.001), ]: with self.session(config=default_config, use_gpu=test_util.is_gpu_available()): id += 1 embedding_weights = de.get_variable( "emb-init-bugfix-" + str(id), key_dtype=dtypes.int64, value_dtype=dtypes.float32, devices=_get_devices() * 3, initializer=initializer, dim=embed_dim, ) ids = np.random.randint( -0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, elements_num, dtype=np.int64, ) ids = np.unique(ids) ids = constant_op.constant(ids, dtypes.int64) vals_op = de.embedding_lookup(embedding_weights, ids, None).eval() mean = self.evaluate(math_ops.reduce_mean(vals_op)) stddev = self.evaluate(math_ops.reduce_std(vals_op)) rtol = 2e-5 atol = rtol self.assertTrue(not (list(vals_op[0]) == list(vals_op[1]))) self.assertAllClose(target_mean, mean, rtol, atol) self.assertAllClose(target_stddev, stddev, rtol, atol)
def _gconv(args, output_size, adj, stddev=0.001, bias=True, bias_start=0.0): ''' computes graph convolution inputs output_size - output dimension adj - weighted similarity matrix stddev - when variables are initialized they will be drawn from truncated normal w/ this stddev bias - whether or not to include bias term in output bias_start - initial value of bias ''' if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] # Calculate the total size of arguments on dimension 3. # (batch_size x height x width x arg_size) total_arg_size = 0 shapes = [a.get_shape().as_list() for a in args] n_vertices = shapes[0][1] for shape in shapes: if len(shape) != 3: raise ValueError("GConv is expecting 3D arguments: %s" % str(shapes)) if shape[1] == n_vertices: total_arg_size += shape[2] else : raise ValueError("Inconsistent number of vertices in arguments: %s" % str(shapes)) kernel = vs.get_variable("kernel", [total_arg_size, output_size], initializer=init_ops.truncated_normal_initializer(stddev=stddev)) if len(args) == 1: res = layers.gconv(adj, args[0], kernel) else: res = layers.gconv(adj, array_ops.concat(args, 2), kernel) if not bias:return res bias_term = vs.get_variable( "bias", [output_size], initializer=init_ops.constant_initializer(bias_start)) return res + bias_term
def test_bypass_scope_validation(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_dimension = 2 initializer = init_ops.truncated_normal_initializer(mean=0.0, stddev=.5) embedding_column = tpu_fc._TPUEmbeddingColumnV2( categorical_column=categorical_column, dimension=embedding_dimension, combiner='mean', initializer=initializer, max_sequence_length=0, learning_rate_fn=None, use_safe_embedding_lookup=True, bypass_scope_validation=True) self.assertIs(categorical_column, embedding_column.categorical_column) self.assertEqual(embedding_dimension, embedding_column.dimension) state_manager = _TestStateManager() with tpu_function.tpu_shard_context(1): with variable_scope.variable_scope('tower1/scope1'): embedding_column.create_state(state_manager) with variable_scope.variable_scope('tower2/scope2'): embedding_column.create_state(state_manager)
def testTruncatedNormalInitalizer(self): with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): i = init_ops.truncated_normal_initializer(mean=1.0, stddev=0.01) z = variable_scope.get_variable("z1", shape=[2, 4], dtype=np.float32, initializer=i) tu.configure_ipu_system() with tu.ipu_session() as sess: # Clean existing reports sess.run(report) sess.run(variables.global_variables_initializer()) o = sess.run(z) self.assertAllClose(o, np.ones((2, 4)), 0.2, 0.2) # Find of the names of compute sets r = sess.run(report) s = tu.extract_all_strings_from_event_trace(r) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'z1/Initializer/truncated_normal/TruncatedNormal/custom-call*/truncatedNormal', 'z1/Initializer/truncated_normal/mul/multiply.*/Op/Multiply', 'z1/Initializer/truncated_normal/add.*/AddTo' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def _conv(args, output_size, filter_size, stddev=0.001, bias=True, bias_start=0.0, scope=None): if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] # Calculate the total size of arguments on dimension 3. # (batch_size x height x width x arg_size) total_arg_size = 0 shapes = [a.get_shape().as_list() for a in args] height = shapes[0][1] width = shapes[0][2] for shape in shapes: if len(shape) != 4: raise ValueError("Conv is expecting 3D arguments: %s" % str(shapes)) if not shape[3]: raise ValueError("Conv expects shape[3] of arguments: %s" % str(shapes)) if shape[1] == height and shape[2] == width: total_arg_size += shape[3] else : raise ValueError("Inconsistent height and width size in arguments: %s" % str(shapes)) with vs.variable_scope(scope or "Conv"): kernel = vs.get_variable("Kernel", [filter_size[0], filter_size[1], total_arg_size, output_size], initializer=init_ops.truncated_normal_initializer(stddev=stddev)) if len(args) == 1: res = tf.nn.conv2d(args[0], kernel, [1, 1, 1, 1], padding='SAME') else: res = tf.nn.conv2d(array_ops.concat(3, args), kernel, [1, 1, 1, 1], padding='SAME') if not bias: return res bias_term = vs.get_variable( "Bias", [output_size], initializer=init_ops.constant_initializer(bias_start)) return res + bias_term
from __future__ import absolute_import from __future__ import division from __future__ import print_function from astronet.contrib import layers from astronet.contrib.framework.python.ops import arg_scope from astronet.contrib.layers.python.layers import layers as layers_lib from astronet.contrib.layers.python.layers import regularizers from astronet.contrib.layers.python.layers import utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import variable_scope trunc_normal = lambda stddev: init_ops.truncated_normal_initializer(0.0, stddev) def overfeat_arg_scope(weight_decay=0.0005): with arg_scope( [layers.conv2d, layers_lib.fully_connected], activation_fn=nn_ops.relu, weights_regularizer=regularizers.l2_regularizer(weight_decay), biases_initializer=init_ops.zeros_initializer()): with arg_scope([layers.conv2d], padding='SAME'): with arg_scope([layers_lib.max_pool2d], padding='VALID') as arg_sc: return arg_sc def overfeat(inputs, num_classes=1000,
def testInitFromPartitionVar(self): checkpoint_dir = self.get_temp_dir() with self.test_session() as session: v1 = _create_partition_checkpoints(session, checkpoint_dir) # New graph and session. with ops.Graph().as_default() as g: with self.test_session(graph=g) as session: with variable_scope.variable_scope("some_scope"): my1 = variable_scope.get_variable( name="my1", shape=[100, 100], initializer=init_ops.zeros_initializer(), partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) my1_var_list = my1._get_variable_list() # Create another variable with different partitions than the variable in # the checkpoint. with variable_scope.variable_scope("some_other_scope"): my2 = variable_scope.get_variable( name="var1", shape=[100, 100], initializer=init_ops.zeros_initializer(), partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=16 << 10)) my2_var_list = my2._get_variable_list() checkpoint_utils.init_from_checkpoint(checkpoint_dir, { "scope/var1": "some_scope/my1", "scope/": "some_other_scope/"}) session.run(variables.global_variables_initializer()) my1_values = session.run(my1_var_list) self.assertAllEqual(my1_values, v1) my2_values = session.run(my2_var_list) # Verify we created different number of partitions. self.assertNotEquals(len(my2_values), len(v1)) # Verify the values were correctly initialized inspite of different # partitions. full_my2_values = np.concatenate(my2_values, axis=0) full_v1_values = np.concatenate(v1, axis=0) self.assertAllEqual(full_my2_values, full_v1_values) # New graph and session. with ops.Graph().as_default() as g: with self.test_session(graph=g) as session: with variable_scope.variable_scope("some_scope"): my1 = variable_scope.get_variable( name="my1", shape=[100, 100], initializer=init_ops.truncated_normal_initializer(0.5), partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) my1_var_list = my1._get_variable_list() checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"scope/var1": my1_var_list,}) session.run(variables.global_variables_initializer()) my1_values = session.run(my1_var_list) self.assertAllEqual(my1_values, v1)
def embedding_column(categorical_column, dimension, combiner='mean', initializer=None, max_sequence_length=0, partition_strategy='div'): """TPU embedding_column for `tf.feature_column.embedding_column`. Note that the interface for TPU embedding_column is different from the non-TPU version. The following args available for the non-TPU version are NOT supported: ckpt_to_load_from, tensor_name_in_ckp, max_norm and trainable. Args: categorical_column: A categorical_column returned from categorical_column_with_identity, weighted_categorical_column, categorical_column_with_vocabulary_file, categorical_column_with_vocabulary_list, sequence_categorical_column_with_identity, sequence_categorical_column_with_vocabulary_file, sequence_categorical_column_with_vocabulary_list dimension: An integer specifying dimension of the embedding, must be > 0. combiner: A string specifying how to reduce if there are multiple entries in a single row for a non-sequence column. For more information, see `tf.feature_column.embedding_column`. initializer: A variable initializer function to be used in embedding variable initialization. If not specified, defaults to `tf.compat.v1.truncated_normal_initializer` with mean `0.0` and standard deviation `1/sqrt(dimension)`. max_sequence_length: An non-negative integer specifying the max sequence length. Any sequence shorter then this will be padded with 0 embeddings and any sequence longer will be truncated. This must be positive for sequence features and 0 for non-sequence features. partition_strategy: Determines how tensors are sharded on the tpu hosts. See `tf.nn.safe_embedding_lookup_sparse` for more details. Allowed value are `"div"` and `"mod"'. If `"mod"` is used, evaluation and exporting the model to CPU will not work. In order to do this, you must shuffle the embedding tensors into a single shard. Returns: A _TPUEmbeddingColumn. Raises: ValueError: if `dimension` not > 0. ValueError: if `initializer` is specified but not callable. """ if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( 'categorical_column for tpu ' ' embedding_column must be type %s, got %s.' % (' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS ]), type(categorical_column))) if (dimension is None) or (dimension < 1): raise ValueError('Invalid dimension {}.'.format(dimension)) if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified. ' 'Embedding of column_name: {}'.format( categorical_column.name)) if initializer is None: initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) embedding_shape = categorical_column._num_buckets, dimension # pylint: disable=protected-access def _creator(weight_collections, scope): embedding_column_layer = fc._EmbeddingColumnLayer( embedding_shape=embedding_shape, initializer=initializer, weight_collections=weight_collections, trainable=True, name='embedding_column_layer') return embedding_column_layer(None, scope=scope) # pylint: disable=not-callable column = _TPUEmbeddingColumn( categorical_column=categorical_column, dimension=dimension, combiner=combiner, layer_creator=_creator, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True, max_sequence_length=max_sequence_length, partition_strategy=partition_strategy) # For Embedding column, the initializer is hidden inside the creator Fn, which # is not accessiable later. So, we attach it to a speicial field. Also note # that non-TPU Embedding column and non-TPU shared Embedding column handle the # initializer differently. See shared_embedding_columns for details. column._tpu_initializer = initializer return column
def testDuplicatedInitializer(self): init = init_ops.truncated_normal_initializer(0.0, 1.0) self.assertFalse(duplicated_initializer(self, init, 1))
def embedding_column_v2(categorical_column, dimension, combiner='mean', initializer=None, max_sequence_length=0): """TPU version of `tf.compat.v1.feature_column.embedding_column`. Note that the interface for `tf.tpu.experimental.embedding_column` is different from that of `tf.compat.v1.feature_column.embedding_column`: The following arguments are NOT supported: `ckpt_to_load_from`, `tensor_name_in_ckpt`, `max_norm` and `trainable`. Use this function in place of `tf.compat.v1.feature_column.embedding_column` when you want to use the TPU to accelerate your embedding lookups via TPU embeddings. ``` column = tf.feature_column.categorical_column_with_identity(...) tpu_column = tf.tpu.experimental.embedding_column(column, 10) ... def model_fn(features): dense_feature = tf.keras.layers.DenseFeature(tpu_column) embedded_feature = dense_feature(features) ... estimator = tf.estimator.tpu.TPUEstimator( model_fn=model_fn, ... embedding_config_spec=tf.estimator.tpu.experimental.EmbeddingConfigSpec( column=[tpu_column], ...)) ``` Args: categorical_column: A categorical column returned from `categorical_column_with_identity`, `weighted_categorical_column`, `categorical_column_with_vocabulary_file`, `categorical_column_with_vocabulary_list`, `sequence_categorical_column_with_identity`, `sequence_categorical_column_with_vocabulary_file`, `sequence_categorical_column_with_vocabulary_list` dimension: An integer specifying dimension of the embedding, must be > 0. combiner: A string specifying how to reduce if there are multiple entries in a single row for a non-sequence column. For more information, see `tf.feature_column.embedding_column`. initializer: A variable initializer function to be used in embedding variable initialization. If not specified, defaults to `tf.compat.v1.truncated_normal_initializer` with mean `0.0` and standard deviation `1/sqrt(dimension)`. max_sequence_length: An non-negative integer specifying the max sequence length. Any sequence shorter then this will be padded with 0 embeddings and any sequence longer will be truncated. This must be positive for sequence features and 0 for non-sequence features. Returns: A `_TPUEmbeddingColumnV2`. Raises: ValueError: if `dimension` not > 0. ValueError: if `initializer` is specified but not callable. """ if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS_V2): raise TypeError( 'categorical_column for tpu ' ' embedding_column must be type %s, got %s.' % (' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS_V2 ]), type(categorical_column))) if (dimension is None) or (dimension < 1): raise ValueError('Invalid dimension {}.'.format(dimension)) if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified. ' 'Embedding of column_name: {}'.format( categorical_column.name)) if initializer is None: initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) column = _TPUEmbeddingColumnV2( categorical_column=categorical_column, dimension=dimension, combiner=combiner, initializer=initializer, max_sequence_length=max_sequence_length) return column
def shared_embedding_columns(categorical_columns, dimension, combiner='mean', initializer=None, shared_embedding_collection_name=None, max_sequence_lengths=None, learning_rate_fn=None, use_safe_embedding_lookup=True): """List of dense columns that convert from sparse, categorical input. Note that the interface for TPU embedding_column is different from the non-TPU version. The following args available for the non-TPU version are NOT supported: ckpt_to_load_from, tensor_name_in_ckp, max_norm and trainable. Args: categorical_columns: A list of categorical_columns returned from categorical_column_with_identity, weighted_categorical_column, categorical_column_with_vocabulary_file, categorical_column_with_vocabulary_list, sequence_categorical_column_with_identity, sequence_categorical_column_with_vocabulary_file, sequence_categorical_column_with_vocabulary_list dimension: An integer specifying dimension of the embedding, must be > 0. combiner: A string specifying how to reduce if there are multiple entries in a single row for a non-sequence column. For more information, see `tf.feature_column.embedding_column`. initializer: A variable initializer function to be used in embedding variable initialization. If not specified, defaults to `tf.truncated_normal_initializer` with mean `0.0` and standard deviation `1/sqrt(dimension)`. shared_embedding_collection_name: Optional name of the collection where shared embedding weights are added. If not given, a reasonable name will be chosen based on the names of `categorical_columns`. This is also used in `variable_scope` when creating shared embedding weights. max_sequence_lengths: An list of non-negative integers, either None or empty or the same length as the argument categorical_columns. Entries corresponding to non-sequence columns must be 0 and entries corresponding to sequence columns specify the max sequence length for the column. Any sequence shorter then this will be padded with 0 embeddings and any sequence longer will be truncated. learning_rate_fn: A function that takes global step and returns learning rate for the embedding table. use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures there are no empty rows and all weights and ids are positive at the expense of extra compute cost. This only applies to rank 2 (NxM) shaped input tensors. Defaults to true, consider turning off if the above checks are not needed. Note that having empty rows will not trigger any error though the output result might be 0 or omitted. Returns: A _TPUEmbeddingColumn. Raises: ValueError: if `dimension` not > 0. ValueError: if `initializer` is specified but not callable. ValueError: if `max_sequence_lengths` is specified and not the same length as `categorical_columns`. ValueError: if `max_sequence_lengths` is positive for a non sequence column or 0 for a sequence column. """ for categorical_column in categorical_columns: if isinstance(categorical_column, _BLACKLISTED_CATEGORICAL_COLUMNS_V2): raise TypeError('categorical_column for tpu ' ' embedding_column was blacklisted type %s' % type(categorical_column)) if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( 'categorical_column for tpu ' ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS ]), type(categorical_column))) if not max_sequence_lengths: max_sequence_lengths = [0] * len(categorical_columns) if len(max_sequence_lengths) != len(categorical_columns): raise ValueError( 'max_sequence_lengths and categorical_columns must be of ' 'the same length. len(max_sequence_lengths)={} ' 'len(categorical_columns)={}.'.format(len(max_sequence_lengths), len(categorical_columns))) if (dimension is None) or (dimension < 1): raise ValueError('Invalid dimension {}.'.format(dimension)) if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified. ') if initializer is None: initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) # Sort the columns so the default collection name is deterministic even if the # user passes columns from an unsorted collection, such as dict.values(). sorted_columns = sorted(categorical_columns, key=lambda x: x.name) num_buckets = sorted_columns[0]._num_buckets # pylint: disable=protected-access for c in sorted_columns[1:]: if num_buckets != c._num_buckets: # pylint: disable=protected-access raise ValueError( 'To use shared_embedding_column, all categorical_columns must have ' 'the same number of buckets. Given column: {} with buckets: {} does ' 'not match column: {} with buckets: {}'.format( sorted_columns[0], num_buckets, c, c._num_buckets)) # pylint: disable=protected-access if not shared_embedding_collection_name: shared_embedding_collection_name = '_'.join(c.name for c in sorted_columns) shared_embedding_collection_name += '_shared_embedding' tpu_columns = [] # Create the state (_SharedEmbeddingColumnLayer) here. for categorical_column, max_sequence_length in zip(categorical_columns, max_sequence_lengths): column = _TPUSharedEmbeddingColumn( categorical_column=categorical_column, dimension=dimension, combiner=combiner, initializer=initializer, shared_embedding_collection_name=shared_embedding_collection_name, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True, max_sequence_length=max_sequence_length, learning_rate_fn=learning_rate_fn, use_safe_embedding_lookup=use_safe_embedding_lookup) tpu_columns.append(column) return tpu_columns
def embedding_column(categorical_column, dimension, combiner='mean', initializer=None, max_sequence_length=0, learning_rate_fn=None, use_safe_embedding_lookup=True): """TPU embedding_column for `tf.feature_column.embedding_column`. Note that the interface for TPU embedding_column is different from the non-TPU version. The following args available for the non-TPU version are NOT supported: ckpt_to_load_from, tensor_name_in_ckp, max_norm and trainable. Args: categorical_column: A categorical_column returned from categorical_column_with_identity, weighted_categorical_column, categorical_column_with_vocabulary_file, categorical_column_with_vocabulary_list, sequence_categorical_column_with_identity, sequence_categorical_column_with_vocabulary_file, sequence_categorical_column_with_vocabulary_list dimension: An integer specifying dimension of the embedding, must be > 0. combiner: A string specifying how to reduce if there are multiple entries in a single row for a non-sequence column. For more information, see `tf.feature_column.embedding_column`. initializer: A variable initializer function to be used in embedding variable initialization. If not specified, defaults to `tf.compat.v1.truncated_normal_initializer` with mean `0.0` and standard deviation `1/sqrt(dimension)`. max_sequence_length: An non-negative integer specifying the max sequence length. Any sequence shorter then this will be padded with 0 embeddings and any sequence longer will be truncated. This must be positive for sequence features and 0 for non-sequence features. learning_rate_fn: A function that takes global step and returns learning rate for the embedding table. use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures there are no empty rows and all weights and ids are positive at the expense of extra compute cost. This only applies to rank 2 (NxM) shaped input tensors. Defaults to true, consider turning off if the above checks are not needed. Note that having empty rows will not trigger any error though the output result might be 0 or omitted. Returns: A _TPUEmbeddingColumn. Raises: ValueError: if `dimension` not > 0. ValueError: if `initializer` is specified but not callable. TypeError: if categorical_column is not a supported type. """ if isinstance(categorical_column, _BLACKLISTED_CATEGORICAL_COLUMNS_V2): raise TypeError('categorical_column for tpu ' ' embedding_column was blacklisted type %s' % type(categorical_column)) if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( 'categorical_column for tpu ' ' embedding_column must be type %s, got %s.' % (' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS ]), type(categorical_column))) if (dimension is None) or (dimension < 1): raise ValueError('Invalid dimension {}.'.format(dimension)) if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified. ' 'Embedding of column_name: {}'.format( categorical_column.name)) if initializer is None: initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) embedding_shape = categorical_column._num_buckets, dimension # pylint: disable=protected-access def _creator(weight_collections, scope): embedding_column_layer = fc._EmbeddingColumnLayer( embedding_shape=embedding_shape, initializer=initializer, weight_collections=weight_collections, trainable=True, name='embedding_column_layer') return embedding_column_layer(None, scope=scope) # pylint: disable=not-callable column = _TPUEmbeddingColumn( categorical_column=categorical_column, dimension=dimension, combiner=combiner, layer_creator=_creator, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True, max_sequence_length=max_sequence_length, learning_rate_fn=learning_rate_fn, use_safe_embedding_lookup=use_safe_embedding_lookup) # For Embedding column, the initializer is hidden inside the creator Fn, which # is not accessiable later. So, we attach it to a speicial field. Also note # that non-TPU Embedding column and non-TPU shared Embedding column handle the # initializer differently. See shared_embedding_columns for details. column._tpu_initializer = initializer return column
def embedding_column( categorical_column, dimension, combiner="mean", initializer=None, max_norm=None, trainable=True, ): """ Create a customized EmbeddingColumn for ElasticDL. The native EmbeddingColumn will create a variable to store the entire embedding table. It can't leverage the benefit from the ElasticDL parameter server to partition the embedding table. Create this ElasticDL EmbeddingColumn to interact with ElasticDL parameter server. The API signature is based on the native tf.feature_column.embedding_column and remove some unused parameters. Args: categorical_column: A `CategoricalColumn` created by a `categorical_column_with_*` function. This column produces the sparse IDs that are inputs to the embedding lookup. dimension: An integer specifying dimension of the embedding, must be > 0. combiner: A string specifying how to reduce if there are multiple entries in a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with 'mean' the default. 'sqrtn' often achieves good accuracy, in particular with bag-of-words columns. Each of this can be thought as example level normalizations on the column. For more information, see `tf.embedding_lookup_sparse`. initializer: A variable initializer function to be used in embedding variable initialization. If not specified, defaults to `truncated_normal_initializer` with mean `0.0` and standard deviation `1/sqrt(dimension)`. max_norm: If not `None`, embedding values are l2-normalized to this value. trainable: Whether or not the embedding is trainable. Default is True. Returns: `DenseColumn` that converts from sparse input. Raises: ValueError: if `dimension` not > 0. ValueError: if `initializer` is specified and is not callable. """ if (dimension is None) or (dimension < 1): raise ValueError("Invalid dimension {}.".format(dimension)) if (initializer is not None) and (not callable(initializer)): raise ValueError("initializer must be callable if specified. " "Embedding of column_name: {}".format( categorical_column.name)) if initializer is None: initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) return EmbeddingColumn( categorical_column=categorical_column, dimension=dimension, combiner=combiner, initializer=initializer, max_norm=max_norm, trainable=trainable, )
from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib import layers from tensorflow.contrib.framework.python.ops import arg_scope from tensorflow.contrib.layers.python.layers import initializers from tensorflow.contrib.layers.python.layers import layers as layers_lib from tensorflow.contrib.layers.python.layers import regularizers from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import variable_scope trunc_normal = lambda stddev: init_ops.truncated_normal_initializer(0.0, stddev) def inception_v3_base(inputs, final_endpoint='Mixed_7c', min_depth=16, depth_multiplier=1.0, scope=None): """Inception model from http://arxiv.org/abs/1512.00567. Constructs an Inception v3 network from inputs to the given final endpoint. This method can construct the network up to the final inception block Mixed_7c. Note that the names of the layers in the paper do not correspond to the names of the endpoints registered by this function although they build the same
def _load_embedding_initializer(ckpt_path, embedding_tensor_name, new_vocab_size, embedding_dim, old_vocab_file, new_vocab_file, old_vocab_size=-1, num_oov_buckets=0, initializer=None, max_rows_in_memory=-1): """Returns a variable initializer for loading pre-trained embeddings. Wrapper around `load_and_remap_matrix_initializer()` specialized for loading embedding weights and remapping according to the provided vocab files. See docs for `load_and_remap_matrix_initializer()` for more details. NOTE: Only for use with div-partitioned variables / vocabularies. Args: ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from which the old matrix `Tensor` will be loaded. embedding_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. new_vocab_size: Number of entries in the new vocab. embedding_dim: `int` specifying the dimension of the embedding vectors from the checkpoint. Must match the number of columns in the old embedding matrix. old_vocab_file: A scalar `Tensor` of type `string` containing the path to the old vocabulary file. new_vocab_file: A scalar `Tensor` of type `string` containing the path to the new vocabulary file. old_vocab_size: The number of entries to consider in the old vocabulary. With the default value of -1, the entire old row vocabulary file will be used. Otherwise, only the first `old_vocab_size` entries will be considered for remapping.Must be smaller than the length of `old_row_vocab_file`. num_oov_buckets: `int` specifying the number of out-of-vocabulary buckets to use. Must be >= 0. initializer: Initializer function that accepts a 1-D tensor as the arg to specify the shape of the returned tensor. If `None`, defaults to using `truncated_normal_initializer()`. max_rows_in_memory: `int` specifying the maximum number of rows to load from the checkpoint at once. If less than or equal to 0, the entire matrix will be loaded into memory. Setting this arg trades increased disk reads for lower memory usage. Returns: A variable initializer function. """ if initializer is None: # TODO(b/25671353): This should be kept in sync with the stddev used by # feature_column.py's _EmbeddingColumn. initializer = init_ops.truncated_normal_initializer( stddev=1.0 / math.sqrt(embedding_dim)) return _load_and_remap_matrix_initializer( ckpt_path=ckpt_path, old_tensor_name=embedding_tensor_name, new_row_vocab_size=new_vocab_size, new_col_vocab_size=embedding_dim, old_row_vocab_size=old_vocab_size, old_row_vocab_file=old_vocab_file, new_row_vocab_file=new_vocab_file, old_col_vocab_file=None, new_col_vocab_file=None, num_row_oov_buckets=num_oov_buckets, num_col_oov_buckets=0, initializer=initializer, max_rows_in_memory=max_rows_in_memory)
def shared_embedding_columns_v2(categorical_columns, dimension, combiner='mean', initializer=None, shared_embedding_collection_name=None, max_sequence_lengths=None): """TPU version of `tf.compat.v1.feature_column.shared_embedding_columns`. Note that the interface for `tf.tpu.experimental.shared_embedding_columns` is different from that of `tf.compat.v1.feature_column.shared_embedding_columns`: The following arguments are NOT supported: `ckpt_to_load_from`, `tensor_name_in_ckpt`, `max_norm` and `trainable`. Use this function in place of tf.compat.v1.feature_column.shared_embedding_columns` when you want to use the TPU to accelerate your embedding lookups via TPU embeddings. ``` column_a = tf.feature_column.categorical_column_with_identity(...) column_b = tf.feature_column.categorical_column_with_identity(...) tpu_columns = tf.tpu.experimental.shared_embedding_columns( [column_a, column_b], 10) ... def model_fn(features): dense_feature = tf.keras.layers.DenseFeature(tpu_columns) embedded_feature = dense_feature(features) ... estimator = tf.estimator.tpu.TPUEstimator( model_fn=model_fn, ... embedding_config_spec=tf.estimator.tpu.experimental.EmbeddingConfigSpec( column=tpu_columns, ...)) ``` Args: categorical_columns: A list of categorical columns returned from `categorical_column_with_identity`, `weighted_categorical_column`, `categorical_column_with_vocabulary_file`, `categorical_column_with_vocabulary_list`, `sequence_categorical_column_with_identity`, `sequence_categorical_column_with_vocabulary_file`, `sequence_categorical_column_with_vocabulary_list` dimension: An integer specifying dimension of the embedding, must be > 0. combiner: A string specifying how to reduce if there are multiple entries in a single row for a non-sequence column. For more information, see `tf.feature_column.embedding_column`. initializer: A variable initializer function to be used in embedding variable initialization. If not specified, defaults to `tf.truncated_normal_initializer` with mean `0.0` and standard deviation `1/sqrt(dimension)`. shared_embedding_collection_name: Optional name of the collection where shared embedding weights are added. If not given, a reasonable name will be chosen based on the names of `categorical_columns`. This is also used in `variable_scope` when creating shared embedding weights. max_sequence_lengths: An list of non-negative integers, either None or empty or the same length as the argument categorical_columns. Entries corresponding to non-sequence columns must be 0 and entries corresponding to sequence columns specify the max sequence length for the column. Any sequence shorter then this will be padded with 0 embeddings and any sequence longer will be truncated. Returns: A list of `_TPUSharedEmbeddingColumnV2`. Raises: ValueError: if `dimension` not > 0. ValueError: if `initializer` is specified but not callable. ValueError: if `max_sequence_lengths` is specified and not the same length as `categorical_columns`. ValueError: if `max_sequence_lengths` is positive for a non sequence column or 0 for a sequence column. """ for categorical_column in categorical_columns: if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS_V2): raise TypeError( 'categorical_column for tpu ' ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS_V2 ]), type(categorical_column))) if not max_sequence_lengths: max_sequence_lengths = [0] * len(categorical_columns) if len(max_sequence_lengths) != len(categorical_columns): raise ValueError('max_sequence_lengths and categorical_columns must be of ' 'the same length. len(max_sequence_lengths)={} ' 'len(categorical_columns)={}.'.format( len(max_sequence_lengths), len(categorical_columns))) if (dimension is None) or (dimension < 1): raise ValueError('Invalid dimension {}.'.format(dimension)) if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified. ') if initializer is None: initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) # Sort the columns so the default collection name is deterministic even if the # user passes columns from an unsorted collection, such as dict.values(). sorted_columns = sorted(categorical_columns, key=lambda x: x.name) num_buckets = sorted_columns[0]._num_buckets # pylint: disable=protected-access for c in sorted_columns[1:]: if num_buckets != c._num_buckets: # pylint: disable=protected-access raise ValueError( 'To use shared_embedding_column, all categorical_columns must have ' 'the same number of buckets. Given column: {} with buckets: {} does ' 'not match column: {} with buckets: {}'.format( sorted_columns[0], num_buckets, c, c._num_buckets)) # pylint: disable=protected-access if not shared_embedding_collection_name: shared_embedding_collection_name = '_'.join(c.name for c in sorted_columns) shared_embedding_collection_name += '_shared_embedding' tpu_columns = [] column_creator = fc_lib.SharedEmbeddingColumnCreator( dimension=dimension, initializer=initializer, ckpt_to_load_from=None, tensor_name_in_ckpt=None, num_buckets=num_buckets, trainable=None, name=shared_embedding_collection_name) # Create the state (_SharedEmbeddingColumnLayer) here. for categorical_column, max_sequence_length in zip( categorical_columns, max_sequence_lengths): column = _TPUSharedEmbeddingColumnV2( categorical_column=categorical_column, shared_embedding_column_creator=column_creator, combiner=combiner, initializer=initializer, shared_embedding_collection_name=shared_embedding_collection_name, max_sequence_length=max_sequence_length) tpu_columns.append(column) return tpu_columns
def embedding_column(categorical_column, dimension, combiner='mean', initializer=None): """TPU embedding_column for `tf.feature_column.embedding_column`. Note that the interface for TPU embedding_column is different from the non-TPU version. The following args available for the non-TPU version are NOT supported: ckpt_to_load_from, tensor_name_in_ckp, max_norm and trainable. Args: categorical_column: A categorical_column returned from categorical_column_with_identity, weighted_categorical_column, categorical_column_with_vocabulary_list or categorical_column_with_vocabulary_file. dimension: An integer specifying dimension of the embedding, must be > 0. combiner: A string specifying how to reduce if there are multiple entries in a single row. For more information, see `tf.feature_column.embedding_column`. initializer: A variable initializer function to be used in embedding variable initialization. If not specified, defaults to `tf.truncated_normal_initializer` with mean `0.0` and standard deviation `1/sqrt(dimension)`. Returns: A _TPUEmbeddingColumn. Raises: ValueError: if `dimension` not > 0. ValueError: if `initializer` is specified but not callable. """ if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( 'categorical_column for tpu ' ' embedding_column must be type %s, got %s.' % (' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS ]), type(categorical_column))) if (dimension is None) or (dimension < 1): raise ValueError('Invalid dimension {}.'.format(dimension)) if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified. ' 'Embedding of column_name: {}'.format( categorical_column.name)) if initializer is None: initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) embedding_shape = categorical_column._num_buckets, dimension # pylint: disable=protected-access def _creator(weight_collections, scope): embedding_column_layer = fc._EmbeddingColumnLayer( embedding_shape=embedding_shape, initializer=initializer, weight_collections=weight_collections, trainable=True, name='embedding_column_layer') return embedding_column_layer(None, scope=scope) # pylint: disable=not-callable column = _TPUEmbeddingColumn( categorical_column=categorical_column, dimension=dimension, combiner=combiner, layer_creator=_creator, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True) # For Embedding column, the initializer is hidden inside the creator Fn, which # is not accessiable later. So, we attach it to a speicial field. Also note # that non-TPU Embedding column and non-TPU shared Embedding column handle the # initializer differently. See shared_embedding_columns for details. column._tpu_initializer = initializer return column
def test_line(x): m = variable_scope.get_variable( "w", shape=[], initializer=init_ops.truncated_normal_initializer()) b = variable_scope.get_variable( "b", shape=[], initializer=init_ops.truncated_normal_initializer()) return x * m + b
def shared_embedding_columns(categorical_columns, dimension, combiner='mean', initializer=None, shared_embedding_collection_name=None, max_sequence_lengths=None, partition_strategy='div'): """List of dense columns that convert from sparse, categorical input. Note that the interface for TPU embedding_column is different from the non-TPU version. The following args available for the non-TPU version are NOT supported: ckpt_to_load_from, tensor_name_in_ckp, max_norm and trainable. Args: categorical_columns: A list of categorical_columns returned from categorical_column_with_identity, weighted_categorical_column, categorical_column_with_vocabulary_file, categorical_column_with_vocabulary_list, sequence_categorical_column_with_identity, sequence_categorical_column_with_vocabulary_file, sequence_categorical_column_with_vocabulary_list dimension: An integer specifying dimension of the embedding, must be > 0. combiner: A string specifying how to reduce if there are multiple entries in a single row for a non-sequence column. For more information, see `tf.feature_column.embedding_column`. initializer: A variable initializer function to be used in embedding variable initialization. If not specified, defaults to `tf.truncated_normal_initializer` with mean `0.0` and standard deviation `1/sqrt(dimension)`. shared_embedding_collection_name: Optional name of the collection where shared embedding weights are added. If not given, a reasonable name will be chosen based on the names of `categorical_columns`. This is also used in `variable_scope` when creating shared embedding weights. max_sequence_lengths: An list of non-negative integers, either None or empty or the same length as the argument categorical_columns. Entries corresponding to non-sequence columns must be 0 and entries corresponding to sequence columns specify the max sequence length for the column. Any sequence shorter then this will be padded with 0 embeddings and any sequence longer will be truncated. partition_strategy: Determines how tensors are sharded on the tpu hosts. See `tf.nn.safe_embedding_lookup_sparse` for more details. Allowed value are `"div"` and `"mod"'. Returns: A _TPUEmbeddingColumn. Raises: ValueError: if `dimension` not > 0. ValueError: if `initializer` is specified but not callable. ValueError: if `max_sequence_lengths` is specified and not the same length as `categorical_columns`. ValueError: if `max_sequence_lengths` is positive for a non sequence column or 0 for a sequence column. """ for categorical_column in categorical_columns: if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( 'categorical_column for tpu ' ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS ]), type(categorical_column))) if not max_sequence_lengths: max_sequence_lengths = [0] * len(categorical_columns) if len(max_sequence_lengths) != len(categorical_columns): raise ValueError('max_sequence_lengths and categorical_columns must be of ' 'the same length. len(max_sequence_lengths)={} ' 'len(categorical_columns)={}.'.format( len(max_sequence_lengths), len(categorical_columns))) if (dimension is None) or (dimension < 1): raise ValueError('Invalid dimension {}.'.format(dimension)) if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified. ') if initializer is None: initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) # Sort the columns so the default collection name is deterministic even if the # user passes columns from an unsorted collection, such as dict.values(). sorted_columns = sorted(categorical_columns, key=lambda x: x.name) num_buckets = sorted_columns[0]._num_buckets # pylint: disable=protected-access for c in sorted_columns[1:]: if num_buckets != c._num_buckets: # pylint: disable=protected-access raise ValueError( 'To use shared_embedding_column, all categorical_columns must have ' 'the same number of buckets. Given column: {} with buckets: {} does ' 'not match column: {} with buckets: {}'.format( sorted_columns[0], num_buckets, c, c._num_buckets)) # pylint: disable=protected-access if not shared_embedding_collection_name: shared_embedding_collection_name = '_'.join(c.name for c in sorted_columns) shared_embedding_collection_name += '_shared_embedding' tpu_columns = [] # Create the state (_SharedEmbeddingColumnLayer) here. for categorical_column, max_sequence_length in zip( categorical_columns, max_sequence_lengths): column = _TPUSharedEmbeddingColumn( categorical_column=categorical_column, dimension=dimension, combiner=combiner, initializer=initializer, shared_embedding_collection_name=shared_embedding_collection_name, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True, max_sequence_length=max_sequence_length, partition_strategy=partition_strategy) tpu_columns.append(column) return tpu_columns