def test_get_variable(self): with self.session( config=default_config, graph=ops.Graph(), use_gpu=test_util.is_gpu_available(), ): default_val = -1 with variable_scope.variable_scope("embedding", reuse=True): table1 = de.get_variable("t1", dtypes.int64, dtypes.int32, initializer=default_val, dim=2) table2 = de.get_variable("t1", dtypes.int64, dtypes.int32, initializer=default_val, dim=2) table3 = de.get_variable("t2", dtypes.int64, dtypes.int32, initializer=default_val, dim=2) self.assertAllEqual(table1, table2) self.assertNotEqual(table1, table3)
def test_GraphKeys(self): v0 = de.Variable(key_dtype=dtypes.int64, value_dtype=dtypes.float32, initializer=0.0, name="v0") v1 = de.Variable(key_dtype=dtypes.int64, value_dtype=dtypes.float32, initializer=0.0, name="v1", trainable=False) v2 = de.get_variable( "v2", key_dtype=dtypes.int64, value_dtype=dtypes.float32, initializer=init_ops.zeros_initializer, dim=10, ) v3 = de.get_variable("v3", key_dtype=dtypes.int64, value_dtype=dtypes.float32, initializer=init_ops.zeros_initializer, dim=10, trainable=False) de_vars = ops.get_collection(de.GraphKeys.DYNAMIC_EMBEDDING_VARIABLES) self.assertSetEqual(set([v0, v1, v2, v3]), set(de_vars)) de_trainable_vars = ops.get_collection( de.GraphKeys.TRAINABLE_DYNAMIC_EMBEDDING_VARIABLES) self.assertAllEqual(set([v0, v2]), set(de_trainable_vars))
def test_scope_reuse_embedding_lookup(self): ids = constant_op.constant([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=dtypes.int64) with variable_scope.variable_scope("test", reuse=variable_scope.AUTO_REUSE): p1 = de.get_variable(name="p1") with variable_scope.variable_scope("q"): _, t1 = de.embedding_lookup(p1, ids, name="emb", return_trainable=True) with variable_scope.variable_scope("test", reuse=variable_scope.AUTO_REUSE): p1_reuse = de.get_variable(name="p1") p2 = de.get_variable(name="p2") with variable_scope.variable_scope("q"): _, t2 = de.embedding_lookup(p2, ids, name="emb", return_trainable=True) self.assertAllEqual(p1.name, "test/p1") self.assertAllEqual(p2.name, "test/p2") self.assertAllEqual(p1, p1_reuse) self.assertEqual(t1.name, "test/q/emb/TrainableWrapper:0") self.assertEqual(t2.name, "test/q/emb/TrainableWrapper_1:0") self.assertAllEqual(p1._tables[0].name, "test_p1_mht_1of1") self.assertAllEqual(p1_reuse._tables[0].name, "test_p1_mht_1of1") self.assertAllEqual(p2._tables[0].name, "test_p2_mht_1of1")
def test_scope_reuse_safe_sparse_embedding_lookup(self): indices = [ [0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 1, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1], ] ids = [0, 1, -1, -1, 2, 0, 1] shape = [2, 3, 4] sparse_ids = sparse_tensor.SparseTensor( constant_op.constant(indices, dtypes.int64), constant_op.constant(ids, dtypes.int64), constant_op.constant(shape, dtypes.int64), ) with variable_scope.variable_scope("test", reuse=variable_scope.AUTO_REUSE): p1 = de.get_variable(name="p1") with variable_scope.variable_scope("q"): _, t1 = de.safe_embedding_lookup_sparse(p1, sparse_ids, None, name="safe_sp_emb", return_trainable=True) with variable_scope.variable_scope("test", reuse=variable_scope.AUTO_REUSE): p1_reuse = de.get_variable(name="p1") p2 = de.get_variable(name="p2") with variable_scope.variable_scope("q"): _, t2 = de.safe_embedding_lookup_sparse(p2, sparse_ids, None, name="safe_sp_emb", return_trainable=True) self.assertAllEqual(p1.name, "test/p1") self.assertAllEqual(p2.name, "test/p2") self.assertAllEqual(p1, p1_reuse) self.assertEqual( t1.name, "test/q/safe_sp_emb/embedding_lookup_sparse/embedding_lookup/TrainableWrapper:0", ) self.assertEqual( t2.name, "test/q/safe_sp_emb/embedding_lookup_sparse/embedding_lookup/TrainableWrapper_1:0", ) self.assertAllEqual(p1._tables[0].name, "test_p1_mht_1of1") self.assertAllEqual(p1_reuse._tables[0].name, "test_p1_mht_1of1") self.assertAllEqual(p2._tables[0].name, "test_p2_mht_1of1")
def test_get_variable_reuse_error(self): ops.disable_eager_execution() with self.session( config=default_config, graph=ops.Graph(), use_gpu=test_util.is_gpu_available(), ): with variable_scope.variable_scope("embedding", reuse=False): _ = de.get_variable("t900", initializer=-1, dim=2) with self.assertRaisesRegexp( ValueError, "Variable embedding/t900 already exists"): _ = de.get_variable("t900", initializer=-1, dim=2)
def test_higher_rank(self): np.random.seed(8) with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): for dim in [1, 10]: for ids_shape in [[3, 2], [4, 3], [4, 3, 10]]: with variable_scope.variable_scope("test_higher_rank", reuse=True): params = de.get_variable( "t350-" + str(dim), dtypes.int64, dtypes.float32, initializer=2.0, dim=dim, ) ids = np.random.randint( 2**31, size=np.prod(ids_shape), dtype=np.int).reshape(ids_shape) ids = constant_op.constant(ids, dtype=dtypes.int64) simple = params.lookup(ids) self.evaluate(params.upsert(ids, simple)) embedding = de.embedding_lookup(params, ids) self.assertAllEqual(simple.eval(), embedding.eval()) self.assertAllEqual(ids_shape + [dim], embedding.eval().shape)
def test_simple_sharded(self): embeddings = de.get_variable( "t300", dtypes.int64, dtypes.float32, dim= 5, devices=_get_devices() * 2, initializer=2.0, ) ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64) embedding, trainable = de.embedding_lookup(embeddings, ids, max_norm=1.0, return_trainable=True) with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): self.assertAllClose( embedding.eval(), [ [1.0], ] * 5, ) self.evaluate(trainable.update_op()) self.assertAllEqual(embeddings.size().eval(), 5) self.assertAllEqual(embeddings.size(0).eval(), 3) self.assertAllEqual(embeddings.size(1).eval(), 2)
def test_fn(): embeddings = de.get_variable( "t2020-v2-" + name + str(id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1.0, dim=dim, ) self.device_check(embeddings) trainables = [] init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) self.evaluate(embeddings.upsert(init_ids, init_vals)) def var_fn(): return trainables def loss_fn(x, trainables): ids = constant_op.constant(raw_ids, dtype=k_dtype) pred, trainable = de.embedding_lookup( [x], ids, return_trainable=True) trainables.clear() trainables.append(trainable) return pred * pred test_opt_op = test_opt.minimize( lambda: loss_fn(embeddings, trainables), var_fn) self.evaluate(variables.global_variables_initializer()) for _ in range(run_step): self.evaluate(test_opt_op) return embeddings.lookup(init_ids)
def test_dynamic_embedding_variable_remove_high_rank(self): with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): default_val = constant_op.constant([-1, -1, -1], dtypes.int32) keys = constant_op.constant([0, 1, 2], dtypes.int64) values = constant_op.constant([[0, 1, 2], [2, 3, 4], [4, 5, 6]], dtypes.int32) table = de.get_variable("t180", dtypes.int64, dtypes.int32, initializer=default_val, dim=3) self.evaluate(table.upsert(keys, values)) self.assertAllEqual(3, self.evaluate(table.size())) remove_keys = constant_op.constant([[0, 3]], dtypes.int64) self.evaluate(table.remove(remove_keys)) self.assertAllEqual(2, self.evaluate(table.size())) remove_keys = constant_op.constant([[0, 1], [2, 3]], dtypes.int64) output = table.lookup(remove_keys) self.assertAllEqual([2, 2, 3], output.get_shape()) result = self.evaluate(output) self.assertAllEqual( [[[-1, -1, -1], [2, 3, 4]], [[4, 5, 6], [-1, -1, -1]]], result)
def test_check_ops_number(self): self.assertTrue(de.get_model_mode() == "train") de.enable_inference_mode() self.assertTrue(de.get_model_mode() == "inference") de.enable_train_mode() self.assertTrue(de.get_model_mode() == "train") for fn, assign_num, read_num in [(de.enable_train_mode, 1, 2), (de.enable_inference_mode, 0, 1)]: fn() embeddings = de.get_variable('ModeModeTest' + str(assign_num), key_dtype=dtypes.int64, value_dtype=dtypes.float32, devices=_get_devices(), initializer=1., dim=8) ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64) test_var, trainable = de.embedding_lookup([embeddings], ids, return_trainable=True) _ = math_ops.add(test_var, 1) op_list = ops.get_default_graph().get_operations() op_list_assign = [ op.name for op in op_list if "AssignBeforeReadVariable" in op.name ] op_list_read = [op.name for op in op_list if "ReadVariableOp" in op.name] self.assertTrue(len(op_list_assign) == assign_num) self.assertTrue(len(op_list_read) == read_num) de.enable_train_mode() ops.reset_default_graph()
def __init__(self, var): """ A timestamp status sparse variable is created. The timestamp status has same key_dtype as the target variable and value_dtype in int32, which indicates the timestamp value. The timestamp means a digital record of time. The later the time, the larger the timestamp. Args: var: A `dynamic_embedding.Variable` object to be restricted. """ super(TimestampRestrictPolicy, self).__init__(var) scope = variable_scope.get_variable_scope() if scope.name: tstp_scope = scope.name + '/status' else: tstp_scope = 'status' tstp_name = self.var.name + '/timestamp' with ops.name_scope(tstp_scope, 'status', []) as unique_scope: if unique_scope: full_name = unique_scope + tstp_name else: full_name = tstp_name self.tstp_var = de.get_variable( full_name, key_dtype=self.var.key_dtype, value_dtype=dtypes.int32, dim=1, devices=self.var.devices, partitioner=self.var.partition_fn, trainable=False, init_size=self.var.init_size, )
def __init__(self, var): """ A frequency status sparse variable is created. The frequency status has same key_dtype as the target variable and value_dtype in `int32`, which indicates the occurrence times of the feature. Args: var: A `dynamic_embedding.Variable` object to be restricted. """ super(FrequencyRestrictPolicy, self).__init__(var) self.init_count = constant_op.constant(0, dtypes.int32) scope = variable_scope.get_variable_scope() if scope.name: freq_scope = scope.name + '/status' else: freq_scope = 'status' freq_name = self.var.name + '/frequency' with ops.name_scope(freq_scope, 'status', []) as unique_scope: if unique_scope: full_name = unique_scope + freq_name else: full_name = freq_name self.freq_var = de.get_variable( full_name, key_dtype=self.var.key_dtype, value_dtype=dtypes.int32, dim=1, devices=self.var.devices, partitioner=self.var.partition_fn, trainable=False, init_size=self.var.init_size, )
def test_embedding_lookup(self): if not context.executing_eagerly(): self.skipTest('Only test in eager mode.') params = de.get_variable('pn012', dim=2, initializer=0.1) params.upsert( constant_op.constant([1, 2, 3], dtype=dtypes.int64), constant_op.constant([[1., 1.], [2., 2.], [3., 3.]], dtype=dtypes.float32)) shadow = de.shadow_ops.ShadowVariable(params) ids = constant_op.constant([2, 3, 4], dtype=dtypes.int64) val = de.shadow_ops.embedding_lookup(shadow, ids) self.assertAllEqual( val, constant_op.constant([[2., 2.], [3., 3.], [0.1, 0.1]], dtype=dtypes.float32)) params.upsert( constant_op.constant([1, 2, 3], dtype=dtypes.int64), constant_op.constant([[1.1, 1.1], [2.2, 2.2], [3.3, 3.3]], dtype=dtypes.float32)) val = de.shadow_ops.embedding_lookup(shadow, ids) self.assertAllEqual( val, constant_op.constant([[2.2, 2.2], [3.3, 3.3], [0.1, 0.1]], dtype=dtypes.float32))
def test_create(self): if not context.executing_eagerly(): self.skipTest('Only test in eager mode.') key_dtypes = [dtypes.int32, dtypes.int64] value_dtypes = [dtypes.int32, dtypes.float32, dtypes.float64] dims = [1, 4] trainable_options = [True, False] devices = ['/CPU:0'] var_list = [] rnd = 0 for comb in itertools.product(key_dtypes, value_dtypes, dims, trainable_options): devar = de.get_variable('sparse_domain-' + str(rnd), key_dtype=comb[0], value_dtype=comb[1], dim=comb[2], initializer=0.1, devices=devices, init_size=1) name = 'shadow-' + str(rnd) var = de.shadow_ops.ShadowVariable(devar, name=name, trainable=comb[3]) self.assertEqual(var.dtype, devar.value_dtype) self.assertEqual(var.ids.dtype, devar.key_dtype) rnd += 1
def test_sharded_custom_partitioner_int32_ids(self): def _partition_fn(keys, shard_num): return math_ops.cast(keys % 2, dtype=dtypes.int32) embeddings = de.get_variable( "t330", dtypes.int64, dtypes.float32, partitioner=_partition_fn, devices=_get_devices() * 3, initializer=2.0, ) ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64) vals = constant_op.constant([[0.0], [1.0], [2.0], [3.0], [4.0]], dtype=dtypes.float32) ids_test = constant_op.constant([1, 3, 2, 3, 0], dtype=dtypes.int64) embedding = de.embedding_lookup(embeddings, ids_test) with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): self.evaluate(embeddings.upsert(ids, vals)) self.assertAllClose(embedding.eval(), [[1.0], [3.0], [2.0], [3.0], [0.0]]) self.assertAllEqual([5, 1], embedding.eval().shape) self.assertAllEqual(3, embeddings.size(0).eval()) self.assertAllEqual(2, embeddings.size(1).eval()) self.assertAllEqual(0, embeddings.size(2).eval())
def commonly_apply_update_verify(self): first_inputs = np.array(range(3), dtype=np.int64) second_inputs = np.array(range(1, 4), dtype=np.int64) overdue_features = np.array([0, 3], dtype=np.int64) updated_features = np.array(range(1, 3), dtype=np.int64) with session.Session(config=default_config) as sess: ids = array_ops.placeholder(dtypes.int64) var = de.get_variable('sp_var', key_dtype=ids.dtype, value_dtype=dtypes.float32, initializer=-0.1, dim=2) embed_w, trainable = de.embedding_lookup(var, ids, return_trainable=True, name='pl3201') policy = de.FrequencyRestrictPolicy(var) update_op = policy.apply_update(ids) self.assertAllEqual(sess.run(policy.status.size()), 0) sess.run(update_op, feed_dict={ids: first_inputs}) self.assertAllEqual(sess.run(policy.status.size()), 3) time.sleep(1) sess.run(update_op, feed_dict={ids: second_inputs}) self.assertAllEqual(sess.run(policy.status.size()), 4) keys, freq = sess.run(policy.status.export()) kvs = sorted(dict(zip(keys, freq)).items()) freq = np.array([x[1] for x in kvs]) for x in freq[overdue_features]: for y in freq[updated_features]: self.assertLess(x, y)
def test_dynamic_embedding_variable_invalid_shape(self): with self.session(config=default_config, use_gpu=test_util.is_gpu_available()): default_val = constant_op.constant([-1, -1], dtypes.int64) keys = constant_op.constant([0, 1, 2], dtypes.int64) table = de.get_variable("t110", dtypes.int64, dtypes.int32, initializer=default_val, dim=2) # Shape [6] instead of [3, 2] values = constant_op.constant([0, 1, 2, 3, 4, 5], dtypes.int32) with self.assertRaisesOpError("Expected shape"): self.evaluate(table.upsert(keys, values)) # Shape [2,3] instead of [3, 2] values = constant_op.constant([[0, 1, 2], [3, 4, 5]], dtypes.int32) with self.assertRaisesOpError("Expected shape"): self.evaluate(table.upsert(keys, values)) # Shape [2, 2] instead of [3, 2] values = constant_op.constant([[0, 1], [2, 3]], dtypes.int32) with self.assertRaisesOpError("Expected shape"): self.evaluate(table.upsert(keys, values)) # Shape [3, 1] instead of [3, 2] values = constant_op.constant([[0], [2], [4]], dtypes.int32) with self.assertRaisesOpError("Expected shape"): self.evaluate(table.upsert(keys, values)) # Valid Insert values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int32) self.evaluate(table.upsert(keys, values)) self.assertAllEqual(3, self.evaluate(table.size()))
def commonly_apply_update_verify_v2(self): if not context.executing_eagerly(): self.skipTest('Skip graph mode test.') first_inputs = np.array(range(6), dtype=np.int64) second_inputs = np.array(range(3, 9), dtype=np.int64) overdue_features = np.array([0, 1, 2, 6, 7, 8], dtype=np.int64) updated_features = np.array(range(3, 6), dtype=np.int64) all_features = np.array(range(9), dtype=np.int64) with self.session(config=default_config): var = de.get_variable('sp_var', key_dtype=dtypes.int64, value_dtype=dtypes.float32, initializer=-0.1, dim=2) embed_w, trainable = de.embedding_lookup(var, first_inputs, return_trainable=True, name='vc3939') policy = de.FrequencyRestrictPolicy(var) self.assertAllEqual(policy.status.size(), 0) policy.apply_update(first_inputs) self.assertAllEqual(policy.status.size(), len(first_inputs)) time.sleep(1) policy.apply_update(second_inputs) self.assertAllEqual(policy.status.size(), len(all_features)) keys, freq = policy.status.export() kvs = sorted(dict(zip(keys.numpy(), freq.numpy())).items()) freq = np.array([x[1] for x in kvs]) for x in freq[overdue_features]: for y in freq[updated_features]: self.assertLess(x, y)
def _random_weights( key_dtype=dtypes.int64, value_dtype=dtypes.float32, vocab_size=4, embed_dim=4, num_shards=1, ): assert vocab_size > 0 assert embed_dim > 0 assert num_shards > 0 assert num_shards <= vocab_size initializer = init_ops.truncated_normal_initializer(mean=0.0, stddev=1.0 / math.sqrt(vocab_size), dtype=dtypes.float32) embedding_weights = de.get_variable( key_dtype=key_dtype, value_dtype=value_dtype, devices=_get_devices() * num_shards, name="embedding_weights", initializer=initializer, dim=embed_dim, ) return embedding_weights
def test_static_shape_checking(self): np.random.seed(8) with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): for dim in [1, 10]: for ids_shape in [[3, 2], [4, 3], [4, 3, 10]]: with variable_scope.variable_scope( "test_static_shape_checking" + str(dim), reuse=variable_scope.AUTO_REUSE, ): params = de.get_variable( "test_static_shape_checking-" + str(dim), dtypes.int64, dtypes.float32, initializer=2.0, dim=dim, ) params_nn = variable_scope.get_variable("n", shape=[100, dim], use_resource=False) ids = np.random.randint(2**31, size=np.prod(ids_shape), dtype=np.int).reshape(ids_shape) ids = constant_op.constant(ids, dtype=dtypes.int64) embedding_test = de.embedding_lookup(params, ids) embedding_base = embedding_ops.embedding_lookup(params_nn, ids) self.assertAllEqual(embedding_test.shape, embedding_base.shape)
def test_sharded_multi_lookup_on_one_variable(self): embeddings = de.get_variable( "t340", dtypes.int64, dtypes.float32, devices=_get_devices() * 3, initializer=2.0, ) ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64) vals = constant_op.constant([[0.0], [1.0], [2.0], [3.0], [4.0]], dtype=dtypes.float32) new_vals = constant_op.constant([[10.0], [11.0], [12.0], [13.0], [14.0]], dtype=dtypes.float32) ids0 = constant_op.constant([1, 3, 2], dtype=dtypes.int64) ids1 = constant_op.constant([3, 4], dtype=dtypes.int64) embedding0 = de.embedding_lookup(embeddings, ids0) embedding1 = de.embedding_lookup(embeddings, ids1) with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): self.evaluate(embeddings.upsert(ids, vals)) self.assertAllClose(embedding0.eval(), [[1.0], [3.0], [2.0]]) self.assertAllEqual([3, 1], embedding0.eval().shape) self.assertAllClose(embedding1.eval(), [[3.0], [4.0]]) self.assertAllEqual([2, 1], embedding1.eval().shape) self.evaluate(embeddings.upsert(ids, new_vals)) self.assertAllClose(embedding1.eval(), [[13.0], [14.0]]) self.assertAllEqual([2, 1], embedding1.eval().shape)
def commonly_apply_restriction_verify(self, optimizer): first_inputs = np.array(range(6), dtype=np.int64) second_inputs = np.array(range(4, 9), dtype=np.int64) overdue_features = np.array(range(4), dtype=np.int64) updated_features = np.array(range(4, 9), dtype=np.int64) all_input_features = np.array(range(9), dtype=np.int64) embedding_dim = 2 oversize_trigger = 100 optimizer = de.DynamicEmbeddingOptimizer(optimizer) with session.Session(config=default_config) as sess: ids = array_ops.placeholder(dtypes.int64) var = de.get_variable('sp_var', key_dtype=ids.dtype, value_dtype=dtypes.float32, initializer=-0.1, dim=embedding_dim, restrict_policy=de.TimestampRestrictPolicy) embed_w, trainable = de.embedding_lookup(var, ids, return_trainable=True, name='ut8900') loss = _simple_loss(embed_w) train_op = optimizer.minimize(loss, var_list=[trainable]) slot_params = [ optimizer.get_slot(trainable, name).params for name in optimizer.get_slot_names() ] all_vars = [var] + slot_params + [var.restrict_policy.status] sess.run(variables.global_variables_initializer()) sess.run([train_op], feed_dict={ids: first_inputs}) time.sleep(1) sess.run([train_op], feed_dict={ids: second_inputs}) for v in all_vars: self.assertAllEqual(sess.run(v.size()), 9) keys, tstp = sess.run(var.restrict_policy.status.export()) kvs = sorted(dict(zip(keys, tstp)).items()) tstp = np.array([x[1] for x in kvs]) for x in tstp[overdue_features]: for y in tstp[updated_features]: self.assertLess(x, y) sess.run( var.restrict_policy.apply_restriction( len(updated_features), trigger=oversize_trigger)) for v in all_vars: self.assertAllEqual(sess.run(v.size()), len(all_input_features)) sess.run( var.restrict_policy.apply_restriction( len(updated_features), trigger=len(updated_features))) for v in all_vars: self.assertAllEqual(sess.run(v.size()), len(updated_features)) keys, _ = sess.run(var.export()) keys_sorted = np.sort(keys) self.assertAllEqual(keys_sorted, updated_features)
def test_training_with_distributed_strategy(self): # TODO(Lifann) Servers will be alive and thus make other test cases # across the cases failed. So this case is kept only for demonstration. self.skipTest('Only for demonstration.') if not context.executing_eagerly(): self.skipTest('Only test in eager mode.') cluster_spec = tf.train.ClusterSpec({ 'ps': ['localhost:2220', 'localhost:2221'], 'worker': ['localhost:2222', 'localhost:2223'] }) ps_list, worker_list = _create_ps_and_worker_servers(cluster_spec) resolver = tf.distribute.cluster_resolver.SimpleClusterResolver( cluster_spec) strategy = tf.distribute.experimental.ParameterServerStrategy(resolver) coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator( strategy) with strategy.scope() as scope: var = de.get_variable('pf988', dim=2, initializer=0.1, devices=['/job:ps/task:0', '/job:ps/task:1']) shadow_var = de.shadow_ops.ShadowVariable(var, name='pf988-shadow', distribute_strategy=strategy) optimizer = optimizer_v2.adam.Adam(1E-4) optimizer = de.DynamicEmbeddingOptimizer(optimizer) def dist_dataset_fn(): dataset_values = np.arange(0, 10, dtype=np.int64) fn = lambda x: tf.data.Dataset.from_tensor_slices(dataset_values).batch( 4).repeat(None) return strategy.distribute_datasets_from_function(fn) dataset = coordinator.create_per_worker_dataset(dist_dataset_fn) @tf.function def step_fn(iterator): def replica_fn(ids): def loss_fn(ids): batch_size = tf.shape(ids)[0] emb = de.shadow_ops.embedding_lookup(shadow_var, ids) loss = tf.reduce_mean(emb) return loss optimizer.minimize(lambda: loss_fn(ids), [shadow_var]) return strategy.run(replica_fn, args=(next(iterator),)) iterator = iter(dataset) for i in range(5): coordinator.schedule(step_fn, args=(iterator,)) coordinator.join() self.assertAllEqual(var.size(), 10)
def _test_warm_start_estimator(self, num_shards, use_regex): devices = ["/cpu:0" for _ in range(num_shards)] ckpt_prefix = os.path.join(self.get_temp_dir(), "ckpt") id_list = [x for x in range(100)] val_list = [[x] for x in range(100)] emb_name = "t300_{}_{}".format(num_shards, use_regex) with self.session(graph=ops.Graph()) as sess: embeddings = de.get_variable(emb_name, dtypes.int64, dtypes.float32, devices=devices, initializer=0.0) ids = constant_op.constant(id_list, dtype=dtypes.int64) vals = constant_op.constant(val_list, dtype=dtypes.float32) self.evaluate(embeddings.upsert(ids, vals)) save = saver.Saver(var_list=[embeddings]) save.save(sess, ckpt_prefix) def _input_fn(): dataset = tf.data.Dataset.from_tensor_slices({ 'ids': constant_op.constant([[x] for x in id_list], dtype=dtypes.int64) }) return dataset def _model_fn(features, labels, mode, params): ids = features['ids'] embeddings = de.get_variable(emb_name, dtypes.int64, dtypes.float32, devices=devices, initializer=0.0) emb = de.embedding_lookup(embeddings, ids, name="lookup") emb.graph.add_to_collection( de.GraphKeys.DYNAMIC_EMBEDDING_VARIABLES, embeddings) vars_to_warm_start = [embeddings] if use_regex: vars_to_warm_start = [".*t300.*"] warm_start_hook = de.WarmStartHook( ckpt_to_initialize_from=ckpt_prefix, vars_to_warm_start=vars_to_warm_start) return tf.estimator.EstimatorSpec( mode=tf.estimator.ModeKeys.PREDICT, predictions=emb, prediction_hooks=[warm_start_hook]) predictor = tf.estimator.Estimator(model_fn=_model_fn) predictions = predictor.predict(_input_fn) pred_vals = [] for pred in predictions: pred_vals.append(pred) self.assertAllEqual(pred_vals, val_list)
def test_get_size(self): if not context.executing_eagerly(): self.skipTest('Only test in eager mode.') params = de.get_variable('pn012', dim=2, initializer=0.1) params.upsert( constant_op.constant([1, 2, 3], dtype=dtypes.int64), constant_op.constant([[1., 1.], [2., 2.], [3., 3.]], dtype=dtypes.float32)) shadow = de.shadow_ops.ShadowVariable(params) self.assertEqual(shadow.size(), 3)
def test_max_norm(self): with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): embeddings = de.get_variable("t310", dtypes.int64, dtypes.float32, initializer=2.0) ids = constant_op.constant([0], dtype=dtypes.int64) embedding = de.embedding_lookup(embeddings, ids, max_norm=1.0) self.assertAllEqual(embedding.eval(), [[1.0]])
def _test_warm_start_rename(self, num_shards, use_regex): devices = ["/cpu:0" for _ in range(num_shards)] ckpt_prefix = os.path.join(self.get_temp_dir(), "ckpt") id_list = [x for x in range(100)] val_list = [[x] for x in range(100)] emb_name = "t200_{}_{}".format(num_shards, use_regex) with self.session(graph=ops.Graph()) as sess: embeddings = de.get_variable("save_{}".format(emb_name), dtypes.int64, dtypes.float32, devices=devices, initializer=0.0) ids = constant_op.constant(id_list, dtype=dtypes.int64) vals = constant_op.constant(val_list, dtype=dtypes.float32) self.evaluate(embeddings.upsert(ids, vals)) save = saver.Saver(var_list=[embeddings]) save.save(sess, ckpt_prefix) with self.session(graph=ops.Graph()) as sess: embeddings = de.get_variable("restore_{}".format(emb_name), dtypes.int64, dtypes.float32, devices=devices, initializer=0.0) ids = constant_op.constant(id_list, dtype=dtypes.int64) emb = de.embedding_lookup(embeddings, ids, name="lookup") sess.graph.add_to_collection( de.GraphKeys.DYNAMIC_EMBEDDING_VARIABLES, embeddings) vars_to_warm_start = [embeddings] if use_regex: vars_to_warm_start = [".*t200.*"] restore_op = de.warm_start(ckpt_to_initialize_from=ckpt_prefix, vars_to_warm_start=vars_to_warm_start, var_name_to_prev_var_name={ "restore_{}".format(emb_name): "save_{}".format(emb_name) }) self.evaluate(restore_op) self.assertAllEqual(emb, val_list)
def test_dynamic_shape_checking(self): np.random.seed(8) with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): for dim in [1, 10]: for ids_shape in [None, [-1, 1], [1, -1, 1], [-1, 1, 1]]: with variable_scope.variable_scope( "test_static_shape_checking" + str(dim), reuse=variable_scope.AUTO_REUSE, ): params = de.get_variable( "test_static_shape_checking-" + str(dim), dtypes.int64, dtypes.float32, initializer=2.0, dim=dim, ) params_nn = variable_scope.get_variable( "n", shape=[100, dim], use_resource=False) ids = script_ops.py_func( _create_dynamic_shape_tensor(min_val=0, max_val=100), inp=[], Tout=dtypes.int64, stateful=True, ) if ids_shape is not None: ids = array_ops.reshape(ids, ids_shape) embedding_test = de.embedding_lookup(params, ids) embedding_base = embedding_ops.embedding_lookup( params_nn, ids) # check static shape if ids_shape is None: # ids with unknown shape self.assertTrue( embedding_test.shape == embedding_base.shape) else: # ids with no fully-defined shape. self.assertAllEqual( embedding_test.shape.as_list(), embedding_base.shape.as_list(), ) self.evaluate(variables.global_variables_initializer()) # check static shape for _ in range(10): embedding_test_val, embedding_base_val = self.evaluate( [embedding_test, embedding_base]) self.assertAllEqual(embedding_test_val.shape, embedding_base_val.shape)
def test_embedding_lookup_shape(self): def _evaluate(tensors, feed_dict): sess = ops.get_default_session() if sess is None: with self.test_session() as sess: return sess.run(tensors, feed_dict=feed_dict) else: return sess.run(tensors, feed_dict=feed_dict) with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): default_val = -1 keys = constant_op.constant([0, 1, 2], dtypes.int64) values = constant_op.constant([[0, 0, 0], [1, 1, 1], [2, 2, 2]], dtypes.int32) table = de.get_variable("t140", dtypes.int64, dtypes.int32, dim=3, initializer=default_val) self.evaluate(table.upsert(keys, values)) self.assertAllEqual(3, self.evaluate(table.size())) # shape of ids is fully defined ids = constant_op.constant([[0, 1], [2, 4]], dtypes.int64) embeddings = de.embedding_lookup(table, ids) self.assertAllEqual([2, 2, 3], embeddings.get_shape()) re = self.evaluate(embeddings) self.assertAllEqual([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [-1, -1, -1]]], re) # shape of ids is partially defined ids = gen_array_ops.placeholder(shape=(2, None), dtype=dtypes.int64) embeddings = de.embedding_lookup(table, ids) self.assertFalse(embeddings.get_shape().is_fully_defined()) re = _evaluate( embeddings, feed_dict={ids: np.asarray([[0, 1], [2, 4]], dtype=np.int64)}) self.assertAllEqual([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [-1, -1, -1]]], re) # shape of ids is unknown ids = gen_array_ops.placeholder(dtype=dtypes.int64) embeddings = de.embedding_lookup(table, ids) self.assertEqual(embeddings.get_shape(), tensor_shape.unknown_shape()) re = _evaluate( embeddings, feed_dict={ids: np.asarray([[0, 1], [2, 4]], dtype=np.int64)}) self.assertAllEqual([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [-1, -1, -1]]], re)
def test_dynamic_embedding_variable_export_insert(self): with self.session(config=default_config, use_gpu=test_util.is_gpu_available()): default_val = constant_op.constant([-1, -1], dtypes.int64) keys = constant_op.constant([0, 1, 2], dtypes.int64) values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int32) table1 = de.get_variable("t101", dtypes.int64, dtypes.int32, initializer=default_val, dim=2) self.assertAllEqual(0, self.evaluate(table1.size())) self.evaluate(table1.upsert(keys, values)) self.assertAllEqual(3, self.evaluate(table1.size())) input_keys = constant_op.constant([0, 1, 3], dtypes.int64) expected_output = [[0, 1], [2, 3], [-1, -1]] output1 = table1.lookup(input_keys) self.assertAllEqual(expected_output, self.evaluate(output1)) exported_keys, exported_values = table1.export() self.assertAllEqual(3, self.evaluate(exported_keys).size) self.assertAllEqual(6, self.evaluate(exported_values).size) # Populate a second table from the exported data table2 = de.get_variable("t102", dtypes.int64, dtypes.int32, initializer=default_val, dim=2) self.assertAllEqual(0, self.evaluate(table2.size())) self.evaluate(table2.upsert(exported_keys, exported_values)) self.assertAllEqual(3, self.evaluate(table2.size())) # Verify lookup result is still the same output2 = table2.lookup(input_keys) self.assertAllEqual(expected_output, self.evaluate(output2))