def test_update_embedding_param(self): params = Parameters() for name in ["test_1", "test_2"]: params.embedding_params[name] = EmbeddingTable(name, 8) slot_key = get_slot_table_name(name, "momentum") params.embedding_params[slot_key] = EmbeddingTable( slot_key, 8, "0.0", True) indices = { "test_1": np.array([1, 5]), "test_2": np.array([10]), } embed_vars = { "test_1": tf.Variable(np.random.rand(2, 8).astype(np.float32)), "test_2": tf.Variable(np.random.rand(1, 8).astype(np.float32)), } slot_vars = { "test_1": { "momentum": tf.Variable(np.random.rand(2, 8).astype(np.float32)) }, "test_2": { "momentum": tf.Variable(np.random.rand(1, 8).astype(np.float32)) }, } opt = SGD(momentum=0.1) opt_wrapper = OptimizerWrapper(opt, None, None, params.set_embedding_param) opt_wrapper._tls._unique_ids_all_layers = indices opt_wrapper._tls._embed_variables = embed_vars opt_wrapper._tls._slot_variables = slot_vars opt_wrapper._update_embedding_param() for name in ["test_1", "test_2"]: self.assertTrue( np.allclose( embed_vars[name].numpy(), params.get_embedding_param(name, indices[name]), )) slot = "momentum" slot_table_name = get_slot_table_name(name, slot) self.assertTrue( np.allclose( slot_vars[name][slot].numpy(), params.get_embedding_param(slot_table_name, indices[name]), ))
def test_create_embedding_table_for_slots(self): slot_name = "momentum" init_value = 3.5 table = EmbeddingTable( get_slot_table_name(self.name, slot_name), dim=self.dim, initializer=init_value, is_slot=True, ) self.assertIsNotNone(table) self.assertEqual(table.name, get_slot_table_name(self.name, slot_name)) self.assertEqual(table.dim, self.dim) # test initialize embedding = table.get([2]) self.assertTrue((embedding - init_value < 0.0001).all())
def test_delete_variables(self): params = Parameters() embed_layers = ["test_1", "test_2"] slot_names = ["m", "v"] dim = 8 for layer in embed_layers: params.embedding_params[layer] = EmbeddingTable(layer, dim) for slot in slot_names: slot_key = get_slot_table_name(layer, slot) params.embedding_params[slot_key] = EmbeddingTable( slot_key, dim, "0.0", True) opt = Adam() opt_wrapper = OptimizerWrapper(opt, None, params.get_embedding_param, params.set_embedding_param) opt_wrapper._init_thread_local() for name in embed_layers: opt_wrapper._tls._unique_ids_all_layers[name] = np.ndarray( [2], np.int32) opt_wrapper._create_embedding_variable( name, np.ndarray([2, dim], np.float32)) opt_wrapper._get_slot_and_set_to_optimizer(name) self.assertTrue(len(opt._weights) == 4) self.assertTrue(len(opt._slots) == 2) for slot_dict in opt._slots.values(): self.assertTrue(len(slot_dict) == 2) opt_wrapper._delete_slots_and_weights_in_optimizer() self.assertTrue(len(opt._weights) == 0) self.assertTrue(len(opt._slots) == 0)
def test_set_slot_to_optimizer(self): embed_name = "test_emb" indices = np.ndarray([2], dtype=np.int32) embed_values = np.ndarray([2, 2], dtype=np.float32) slot_values = { "m": np.ndarray([2, 2], dtype=np.float32), "v": np.ndarray([2, 2], dtype=np.float32), } params = Parameters() params.embedding_params[embed_name] = EmbeddingTable(embed_name, 8) for slot in ["m", "v"]: slot_table_name = get_slot_table_name(embed_name, slot) params.embedding_params[slot_table_name] = EmbeddingTable( slot_table_name, 2, "0.0", True) opt = Adam() opt_wrapper = OptimizerWrapper(opt, None, params.get_embedding_param) opt_wrapper._init_thread_local() opt_wrapper._tls._unique_ids_all_layers[embed_name] = indices opt_wrapper._create_embedding_variable(embed_name, embed_values) opt_wrapper._get_slot_and_set_to_optimizer(embed_name) self.assertEqual(len(opt._slots), 1) opt_slots = list(opt._slots.values())[0] self.assertEqual(sorted(opt_slots.keys()), ["m", "v"]) for name in ["m", "v"]: self.assertTrue( np.allclose(opt_slots[name].numpy(), slot_values[name]))
def _get_slot_and_set_to_optimizer(self, layer_name): """Looks up slot value and set it to TensorFlow optimizer.""" for slot_name in self._allowed_slot_names: param_name = get_slot_table_name(layer_name, slot_name) indices = self._tls._unique_ids_all_layers[layer_name] slot_value = self._lookup_embedding_func(param_name, indices) # self._create_slot_variable creates a slot variable in tf # optimizer and set slot_value to it. self._create_slot_variable(layer_name, slot_name, slot_value)
def _update_embedding_param(self): """Report updated embedding vectors and slots to kv store.""" for layer, ids in self._tls._unique_ids_all_layers.items(): value = self._get_embedding_variable(layer).numpy() self._update_embedding_func(layer, ids, value) for slot in self._allowed_slot_names: value = self._get_slot_variable(layer, slot).numpy() slot_table_name = get_slot_table_name(layer, slot) self._update_embedding_func(slot_table_name, ids, value)
def create_slot_params(self, slot_names, init_values): embed_layer_names = list(self.embedding_params.keys()) for layer_name in embed_layer_names: for slot_name in slot_names: key = get_slot_table_name(layer_name, slot_name) if key in self.embedding_params: raise ValueError( "An embedding layer has unexpected name %s" % key) self.embedding_params[key] = EmbeddingTable( key, self.embedding_params[layer_name].dim, init_values[slot_name], True, )
def _test_get_embedding_param(self, slot_names=[], slot_init_value={}): indices = [0, 3, 7] res = self.params.get_embedding_param(self.embedding_table_name, indices) self.assertTupleEqual(res.shape, (3, 10)) for slot in slot_names: res = self.params.get_embedding_param( get_slot_table_name(self.embedding_table_name, slot), indices) self.assertTrue(((res - slot_init_value[slot]) < 0.0001).all()) res = self.params.get_embedding_param(self.embedding_table_name, []) self.assertIsNone(res) with self.assertRaises(ValueError): self.params.get_embedding_param("tom", indices)
def test_push_model(self): opt_func_name = "ftrl_optimizer" opt = load_module(_module_file).__dict__[opt_func_name]() opt_config = opt.get_config() slot_names = ["accumulator", "linear"] slot_init_value = { "accumulator": opt_config["initial_accumulator_value"], "linear": 0.0, } self.create_default_server_and_stub(optimizer=opt_func_name) param0 = { "v0": np.random.rand(3, 2).astype(np.float32), "v1": np.random.rand(10, 32).astype(np.float32), } param1 = { "v0": np.ones([3, 2], dtype=np.float32), "v1": np.ones([10, 32], dtype=np.float32), } models = [param0, param1] for idx, model in enumerate(models): req = elasticdl_pb2.Model() req.version = idx + 1 for name in model: serialize_ndarray(model[name], req.dense_parameters[name]) req.embedding_table_infos.append(self._embedding_info) res = self._stub.push_model(req) self.assertEqual(res, empty_pb2.Empty()) # self._parameters is initialized with the first push_model call # and the second push_model has no effect self.assertEqual(self._parameters.version, 1) for name in param0: self.assertTrue( np.allclose( param0[name], self._parameters.non_embedding_params[name].numpy(), ) ) self.assertEqual( self._embedding_info.name, self._parameters.embedding_params[ self._embedding_info.name ].name, ) self.assertEqual( self._embedding_info.dim, self._parameters.embedding_params[ self._embedding_info.name ].dim, ) self.assertEqual( tf.keras.initializers.get( self._embedding_info.initializer ).__class__, self._parameters.embedding_params[ self._embedding_info.name ].initializer.__class__, ) for slot_name in slot_names: name = get_slot_table_name( self._embedding_info.name, slot_name ) table = self._parameters.embedding_params[name] self.assertTrue(name, table.name) self.assertTrue(self._embedding_info.dim, table.dim) embedding = table.get([2]) self.assertTrue( (embedding - slot_init_value[slot_name] < 0.0001).all() )