def testIncompatibleShapes(self): with self.test_session(): x, _, _ = _EmbeddingParams(1, 10, dtype=dtypes.float32) sp_ids = sparse_tensor.SparseTensor( constant_op.constant([[0, 0], [0, 1], [1, 0]], dtypes.int64), constant_op.constant([0, 1, 2], dtypes.int32), constant_op.constant([2, 2], dtypes.int64)) sp_weights = sparse_tensor.SparseTensor( constant_op.constant([[0, 0], [0, 1]], dtypes.int64), constant_op.constant([12.0, 5.0], dtypes.float32), constant_op.constant([1, 2], dtypes.int64)) with self.assertRaises(ValueError): embedding_ops.embedding_lookup_sparse( x, sp_ids, sp_weights, combiner="mean")
def testIncompatibleShapes(self): with self.cached_session(): x, _, _ = _EmbeddingParams(1, 10, dtype=dtypes.float32) sp_ids = sparse_tensor.SparseTensor( constant_op.constant([[0, 0], [0, 1], [1, 0]], dtypes.int64), constant_op.constant([0, 1, 2], dtypes.int32), constant_op.constant([2, 2], dtypes.int64)) sp_weights = sparse_tensor.SparseTensor( constant_op.constant([[0, 0], [0, 1]], dtypes.int64), constant_op.constant([12.0, 5.0], dtypes.float32), constant_op.constant([1, 2], dtypes.int64)) with self.assertRaises(ValueError): embedding_ops.embedding_lookup_sparse( x, sp_ids, sp_weights, combiner="mean")
def testGradientsEmbeddingLookupSparse(self): vocab_size = 12 batch_size = 4 param_shape = [2, 3] sp_ids, sp_weights, _, _, _ = (self._RandomIdsAndWeights( batch_size, vocab_size)) for num_shards, combiner, dtype, ignore_weights in itertools.product( [1, 3], ["sum", "mean", "sqrtn"], [dtypes.float32, dtypes.float64], [True, False]): with self.test_session(): x, params, _ = _EmbeddingParams( num_shards, vocab_size, shape=param_shape, dtype=dtype) y = embedding_ops.embedding_lookup_sparse( x, sp_ids, None if ignore_weights else sp_weights, combiner=combiner) x_name = [_PName(i) for i in range(num_shards)] x_init_value = [params[x_n + ":0"] for x_n in x_name] x_shape = [i.shape for i in x_init_value] y_shape = [batch_size] + list(params[_PName(0) + ":0"].shape[1:]) err = gradient_checker.compute_gradient_error( x, x_shape, y, y_shape, x_init_value=x_init_value) self.assertLess(err, 1e-5 if dtype == dtypes.float64 else 2e-3)
def loss_fn(emb): embedding = embedding_ops.embedding_lookup_sparse(emb, sp_ids, None, combiner='sum') pred = math_ops.matmul(embedding, x) return pred * pred
def testGradientsEmbeddingLookupSparse(self): vocab_size = 12 batch_size = 4 param_shape = [2, 3] sp_ids, sp_weights, _, _, _ = (self._RandomIdsAndWeights( batch_size, vocab_size)) for num_shards, combiner, dtype, ignore_weights in itertools.product( [1, 3], ["sum", "mean", "sqrtn"], [dtypes.float32, dtypes.float64], [True, False]): with self.cached_session(): x, params, _ = _EmbeddingParams( num_shards, vocab_size, shape=param_shape, dtype=dtype) y = embedding_ops.embedding_lookup_sparse( x, sp_ids, None if ignore_weights else sp_weights, combiner=combiner) x_name = [_PName(i) for i in range(num_shards)] x_init_value = [params[x_n + ":0"] for x_n in x_name] x_shape = [i.shape for i in x_init_value] y_shape = [batch_size] + list(params[_PName(0) + ":0"].shape[1:]) err = gradient_checker.compute_gradient_error( x, x_shape, y, y_shape, x_init_value=x_init_value) self.assertLess(err, 1e-5 if dtype == dtypes.float64 else 2e-3)
def testEmbeddingLookupSparse(self): vocab_size = 13 batch_size = 10 param_shape = [2, 5] expected_lookup_result_shape = [None] + param_shape sp_ids, sp_weights, ids, weights, vals_per_batch_entry = ( self._RandomIdsAndWeights(batch_size, vocab_size)) grouped_ids = self._GroupByBatchEntry(ids, vals_per_batch_entry) grouped_weights = self._GroupByBatchEntry(weights, vals_per_batch_entry) grouped_ignored_weights = self._GroupByBatchEntry( np.ones(np.sum(vals_per_batch_entry)), vals_per_batch_entry) for num_shards, combiner, dtype, ignore_weights in itertools.product( [1, 5], ["sum", "mean", "sqrtn"], [dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64], [True, False]): with self.cached_session(): p, params, feed_dict = _EmbeddingParams( num_shards, vocab_size, shape=param_shape, dtype=dtype) embedding_sum = embedding_ops.embedding_lookup_sparse( p, sp_ids, None if ignore_weights else sp_weights, combiner=combiner) self.assertEqual(embedding_sum.get_shape().as_list(), expected_lookup_result_shape) if dtype in (dtypes.float16, dtypes.bfloat16): self.assertEqual(embedding_sum.dtype, dtypes.float32) else: self.assertEqual(embedding_sum.dtype, dtype) tf_embedding_sum = embedding_sum.eval(feed_dict=feed_dict) np_embedding_sum, np_weight_sum, np_weight_sq_sum = _EmbeddingResult( params, grouped_ids, num_shards, vocab_size, weight_vals=grouped_ignored_weights if ignore_weights else grouped_weights) if combiner == "mean": np_embedding_sum /= np.reshape(np_weight_sum, (batch_size, 1, 1)) if combiner == "sqrtn": np_embedding_sum /= np.reshape( np.sqrt(np_weight_sq_sum), (batch_size, 1, 1)) rtol = 1e-6 if dtype == dtypes.bfloat16: rtol = 1e-2 elif dtype == dtypes.float16: rtol = 1e-3 atol = rtol self.assertAllClose(np_embedding_sum, tf_embedding_sum, rtol, atol)
def test_embedding_lookup_sparse_shape_checking(self): with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): embed_dim = 4 embedding_weights_nn = variable_scope.get_variable( "n", shape=[100, embed_dim], use_resource=False) embedding_weights_de = _random_weights(embed_dim=4) sparse_ids, _ = ids_and_weights_3d(embed_dim=embed_dim) embedding_lookup_base = embedding_ops.embedding_lookup_sparse( embedding_weights_nn, sparse_ids, None) embedding_lookup_test = de.embedding_lookup_sparse( embedding_weights_de, sparse_ids, None) self.assertTrue(embedding_lookup_base.get_shape().as_list() == embedding_lookup_test.get_shape().as_list())
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner=None, default_id=None, name=None, partition_strategy="div", max_norm=None): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. The total unpartitioned shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size and `e_1, ..., e_m` are the embedding dimensions. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not None, all embeddings are l2-normalized to max_norm before combining. Returns: Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. Raises: ValueError: if `embedding_weights` is empty. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) # get underlying Variables. if not isinstance(embedding_weights, list): embedding_weights = [embedding_weights] if len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) with ops.name_scope(name, "embedding_lookup", embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = sparse_ids.dense_shape.get_shape()[0] original_rank = ( array_ops.size(original_shape) if original_rank_dim.value is None else original_rank_dim.value) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1)]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) result = embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope, max_norm=max_norm) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice( math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape(tensor_shape.unknown_shape( (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) return final_result
def common_minimize_trainable(self, base_opt, test_opt, name): base_opt = de.DynamicEmbeddingOptimizer(base_opt) test_opt = de.DynamicEmbeddingOptimizer(test_opt) id = 0 config = config_pb2.ConfigProto() config.allow_soft_placement = False for ( num_shards, k_dtype, d_dtype, initial_mode, dim, run_step, ) in itertools.product( [1, 2], [dtypes.int64], [ dtypes.float32, ], [ "constant", ], [1, 10], [10], ): id += 1 raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] raw_init_vals = [ [ x, ] * dim for x in [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81] ] raw_ids = constant_op.constant([1, 3, 3, 9], dtype=k_dtype) sp_ids = sparse_tensor.SparseTensor( indices=[ [0, 0], [0, 1], [1, 0], [2, 1], ], values=raw_ids, dense_shape=[3, 2], ) x = constant_op.constant([[_x * dim] for _x in [[0.4], [0.5], [0.6]]], dtype=d_dtype) x = array_ops.reshape(x, shape=(3 * dim, 1)) # base branch with self.session(use_gpu=test_util.is_gpu_available(), config=default_config) as sess: base_var = variables.Variable( np.array(raw_init_vals).reshape([len(raw_init_ids), dim]), dtype=d_dtype, shape=[len(raw_init_ids), dim], ) base_embedding = embedding_ops.embedding_lookup_sparse( base_var, sp_ids, None, combiner="sum") base_embedding = array_ops.reshape(base_embedding, shape=[1, 3 * dim]) pred0 = math_ops.matmul(base_embedding, x) loss0 = pred0 * pred0 base_opt_op = base_opt.minimize(loss0, var_list=[base_var]) # run base self.evaluate(variables.global_variables_initializer()) for _ in range(run_step): sess.run(base_opt_op) base_var_val = self.evaluate(base_var) # test branch with self.session(config=default_config, use_gpu=test_util.is_gpu_available()) as sess: # test var prepare embeddings = de.get_variable( "t1030-" + name + str(id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1.0, dim=dim, ) self.device_check(embeddings) init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) init_op = embeddings.upsert(init_ids, init_vals) self.evaluate(init_op) test_var, trainable = de.embedding_lookup_sparse( embeddings, sp_ids, sp_weights=None, combiner="sum", return_trainable=True, ) pred1 = math_ops.matmul( array_ops.reshape(test_var, shape=[1, 3 * dim]), x) loss1 = pred1 * pred1 test_opt_op = test_opt.minimize(loss1, var_list=[trainable]) self.evaluate(variables.global_variables_initializer()) self.assertAllCloseAccordingToType( np.array(raw_init_vals).reshape([len(raw_init_ids), dim]), self.evaluate(base_var), ) # Run `run_step` step of sgd for _ in range(run_step): sess.run(test_opt_op) if test_util.is_gpu_available(): self.assertTrue( _check_device(embeddings.tables[0].resource_handle, "GPU")) table_var_val = self.evaluate( array_ops.reshape(embeddings.lookup(init_ids), shape=[10, dim])) # Validate updated params self.assertAllCloseAccordingToType( base_var_val, table_var_val, msg="Cond:{},{},{},{},{}".format(num_shards, k_dtype, d_dtype, dim, run_step), )
def common_minimize_trainable(self, base_opt, test_opt, name): base_opt = de.DynamicEmbeddingOptimizer(base_opt) test_opt = de.DynamicEmbeddingOptimizer(test_opt) id = 0 for ( num_shards, k_dtype, d_dtype, initial_mode, dim, run_step, ) in itertools.product( [3], [dtypes.int64], [ dtypes.float32, ], [ "constant", ], [1, 10], [10], ): with ops.Graph().as_default(): id += 1 raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] raw_init_vals = [ [ x, ] * dim for x in [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81] ] raw_ids = constant_op.constant([1, 3, 3, 9], dtype=k_dtype) sp_ids = sparse_tensor.SparseTensor( indices=[ [0, 0], [0, 1], [1, 0], [2, 1], ], values=raw_ids, dense_shape=[3, 2], ) x = constant_op.constant([[_x * dim] for _x in [[0.4], [0.5], [0.6]]], dtype=d_dtype) x = array_ops.reshape(x, shape=(3 * dim, 1)) # base var prepare base_var = variables.Variable( np.array(raw_init_vals).reshape([len(raw_init_ids), dim]), dtype=d_dtype, shape=[len(raw_init_ids), dim], ) # test var prepare embeddings = de.get_variable( "t1030-" + name + str(id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1.0, dim=dim, ) init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) init_op = embeddings.upsert(init_ids, init_vals) # base branch base_embedding = embedding_ops.embedding_lookup_sparse( base_var, sp_ids, None, combiner="sum") base_embedding = array_ops.reshape(base_embedding, shape=[1, 3 * dim]) pred0 = math_ops.matmul(base_embedding, x) loss0 = pred0 * pred0 base_opt_op = base_opt.minimize(loss0, var_list=[base_var]) # test branch test_var, trainable = de.embedding_lookup_sparse( embeddings, sp_ids, sp_weights=None, combiner="sum", return_trainable=True, ) pred1 = math_ops.matmul( array_ops.reshape(test_var, shape=[1, 3 * dim]), x) loss1 = pred1 * pred1 gstep = training_util.create_global_step() test_opt_op = test_opt.minimize(loss1, var_list=[trainable], global_step=gstep) table_var = array_ops.reshape(embeddings.lookup(init_ids), shape=[10, dim]) with monitored_session.MonitoredTrainingSession( is_chief=True, config=default_config) as sess: sess.run(init_op) self.assertAllCloseAccordingToType( np.array(raw_init_vals).reshape( [len(raw_init_ids), dim]), sess.run(base_var), ) # run base for _ in range(run_step): sess.run(base_opt_op) sess.run(test_opt_op) # Validate global_step self.assertEqual(run_step, sess.run(gstep)) # Validate updated params self.assertAllCloseAccordingToType( sess.run(base_var), sess.run(table_var), msg="Cond:{},{},{},{},{}".format( num_shards, k_dtype, d_dtype, dim, run_step), ) self.device_check(embeddings)
def safe_embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name=None, partition_strategy="div"): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. sparse_ids: `SparseTensor` of shape `[batch_size, ?]` containing the ids. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. Returns: Dense tensor of shape `[batch_size, embed_dim]`. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None or len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights else None embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights] contrib_tensor_util.assert_same_float_dtype( embedding_weights + [sparse_weights]) with ops.op_scope( embedding_weights + [sparse_ids, sparse_weights], name, "embedding_lookup") as scope: # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = tf_embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.pack([1, array_ops.shape(result)[1]])) result = math_ops.select( is_row_empty, array_ops.zeros_like(result), result, name=scope) return result
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name=None, partition_strategy="div"): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. sparse_ids: `SparseTensor` of shape `[batch_size, ?]` containing the ids. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. Returns: Dense tensor of shape `[batch_size, embed_dim]`. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None or len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights else None embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) with ops.op_scope(embedding_weights + [sparse_ids, sparse_weights], name, "embedding_lookup") as scope: # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = tf_embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.pack([1, array_ops.shape(result)[1]])) result = math_ops.select(is_row_empty, array_ops.zeros_like(result), result, name=scope) return result
def common_minimize_trainable(self, base_opt, test_opt, name): if test_util.is_gpu_available(): keys_type_list = [dtypes.int64] else: keys_type_list = [dtypes.int64, dtypes.string] deo.enable_train_mode() for run_id, num_shards, k_dtype, d_dtype, initial_mode, dim, run_step \ in _next_run_step_config(keys_type_list): with ops.Graph().as_default(): raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] if k_dtype == dtypes.string: raw_init_ids = [str(i) for i in raw_init_ids] raw_init_vals = [ [ x, ] * dim for x in [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81] ] raw_ids_py = [1, 3, 3, 9] raw_ids_nn = constant_op.constant(raw_ids_py, dtype=dtypes.int64) raw_ids_de = raw_ids_nn if k_dtype == dtypes.string: raw_ids_de = constant_op.constant( [str(i) for i in raw_ids_py], dtype=k_dtype) sp_ids_nn = sparse_tensor.SparseTensor(indices=[ [0, 0], [0, 1], [1, 0], [2, 1], ], values=raw_ids_nn, dense_shape=[3, 2]) sp_ids_de = sparse_tensor.SparseTensor(indices=[ [0, 0], [0, 1], [1, 0], [2, 1], ], values=raw_ids_de, dense_shape=[3, 2]) x = constant_op.constant([[_x * dim] for _x in [[0.4], [0.5], [0.6]]], dtype=d_dtype) x = array_ops.reshape(x, shape=(3 * dim, 1)) # base var prepare base_var = variables.Variable(np.array(raw_init_vals).reshape( [len(raw_init_ids), dim]), dtype=d_dtype, shape=[len(raw_init_ids), dim]) # test var prepare embeddings = deo.get_variable('t1030-' + name + str(run_id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1., dim=dim) init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) init_op = embeddings.upsert(init_ids, init_vals) # base branch base_embedding = embedding_ops.embedding_lookup_sparse( base_var, sp_ids_nn, None, combiner='sum') base_embedding = array_ops.reshape(base_embedding, shape=[1, 3 * dim]) pred0 = math_ops.matmul(base_embedding, x) loss0 = pred0 * pred0 base_opt_op = base_opt.minimize(loss0, var_list=[base_var]) # test branch test_var, trainable = deo.embedding_lookup_sparse( embeddings, sp_ids_de, sp_weights=None, combiner="sum", return_trainable=True) pred1 = math_ops.matmul( array_ops.reshape(test_var, shape=[1, 3 * dim]), x) loss1 = pred1 * pred1 gstep = training_util.create_global_step() test_opt_op = test_opt.minimize(loss1, var_list=[trainable], global_step=gstep) table_var = array_ops.reshape(embeddings.lookup(init_ids), shape=[10, dim]) with monitored_session.MonitoredTrainingSession( is_chief=True, config=default_config) as sess: sess.run(init_op) self.assertAllCloseAccordingToType( np.array(raw_init_vals).reshape( [len(raw_init_ids), dim]), sess.run(base_var)) # run base for _ in range(run_step): sess.run(base_opt_op) sess.run(test_opt_op) # Validate global_step self.assertEqual(run_step, sess.run(gstep)) # Validate updated params self.assertAllCloseAccordingToType( sess.run(base_var), sess.run(table_var), msg="Cond:{},{},{},{},{}".format( num_shards, k_dtype, d_dtype, dim, run_step)) self.device_check(embeddings)