Beispiel #1
0
    def _generate_lookup_keys(self, grads_and_vars):
        """Generate lookup keys from a list of (gradient, variable) pairs.

        Arguments:
            grads_and_vars: A list of (gradient, layer name) pairs.

        Returns:
            A tuple of (`embedding_keys`, `slot_keys`, `embedding_key_index`,
                `slot_key_index`).
            `embedding_keys`: A list of keys for embedding vectors in kv
                store.
            `slot_keys`: A list of keys for slots in kv store.
            `embedding_key_index`: A python dictionary records the position
                of embedding keys for the same layer, i.e. an item
                `{layer_name: (start, end)}` means `embedding_keys[start:end]`
                are keys for the same layer named `layer_name`.
            `slot_key_index`: A python dictionary records the position of slot
                keys for the same layer and the smae slot, i.e. an item
                `{layer_name: {slot_name: (start, end)}}` means
                `slot_keys[start:end]` are keys for the same layer named
                `layer_name` and same slot named `slot_name`.

        """
        embed_keys = []
        embed_key_index = {}
        slot_keys = []
        slot_key_index = {}
        self._unique_ids_all_layers = {}

        # generate keys
        for it, (grad, layer_name) in enumerate(grads_and_vars):
            # de-duplicate gradient's indices
            unique_ids, indices = tf.unique(grad.indices)
            unique_ids = unique_ids.numpy()
            if layer_name in self._unique_ids_all_layers:
                # TODO: support grads_and_vars with duplicated layer name
                logger.warning("grads_and_vars has duplicated layer name %s." %
                               layer_name)
            self._unique_ids_all_layers[layer_name] = unique_ids
            grad_new = tf.IndexedSlices(grad.values, indices)
            grads_and_vars[it] = (grad_new, layer_name)

            # generate embedding keys
            start = len(embed_keys)
            embed_keys.extend(
                [Embedding.get_key([layer_name, i]) for i in unique_ids])
            end = len(embed_keys)
            embed_key_index[layer_name] = (start, end)

            # generate slot keys
            for slot in self._allowed_slot_names:
                start = len(slot_keys)
                slot_keys.extend([
                    Embedding.get_key([layer_name, slot, i])
                    for i in unique_ids
                ])
                end = len(slot_keys)
                slot_key_index.setdefault(layer_name,
                                          {}).setdefault(slot, (start, end))
        return embed_keys, slot_keys, embed_key_index, slot_key_index
Beispiel #2
0
    def test_lookup(self):
        opt = Adam()
        opt_wrapper = OptimizerWrapper(opt, None, {})
        embedding_dim = 4
        layers = ["embedding_0", "embedding_1"]
        grads = [
            tf.IndexedSlices(None, tf.constant([2, 0, 2])),
            tf.IndexedSlices(None, tf.constant([1, 2, 0, 2])),
        ]
        ids_list = [[2, 0], [1, 2, 0]]
        grads_and_vars = list(zip(grads, layers))
        mock_kv_store = MockKvStore({})
        for layer in layers:
            for id in range(3):
                mock_kv_store.update(
                    keys=[Embedding.get_key([layer, id])],
                    values=[np.random.rand(embedding_dim).astype(np.float32)],
                )
                for i, slot in enumerate(["m", "v"]):
                    mock_kv_store.update(
                        keys=[Embedding.get_key([layer, slot, id])],
                        values=[
                            np.random.rand(embedding_dim).astype(np.float32)
                        ],
                    )

        with mock.patch.object(
            EmbeddingService, "lookup_embedding", mock_kv_store.lookup
        ):
            embeddings, slot_values = opt_wrapper._lookup_embeddings_and_slots(
                grads_and_vars
            )

        grad0 = grads_and_vars[0][0]
        self.assertTrue((grad0.indices.numpy() == [0, 1, 0]).all())
        grad1 = grads_and_vars[1][0]
        self.assertTrue((grad1.indices.numpy() == [0, 1, 2, 1]).all())

        for ids, layer in zip(ids_list, layers):
            self.assertTrue(
                (opt_wrapper._unique_ids_all_layers[layer] == ids).all()
            )

            values, _ = mock_kv_store.lookup(
                keys=[Embedding.get_key([layer, id]) for id in ids]
            )
            values = np.concatenate(values).reshape(-1, embedding_dim)
            self.assertTrue((embeddings[layer] - values < 0.0001).all())

            for slot in ["m", "v"]:
                values, _ = mock_kv_store.lookup(
                    keys=[Embedding.get_key([layer, slot, id]) for id in ids]
                )
                values = np.concatenate(values).reshape(-1, embedding_dim)
                self.assertTrue(
                    (slot_values[layer][slot] - values < 0.0001).all()
                )
    def _test_correctness(self, optimizer_class, X, Y, seed, **kwargs):
        """Test the correctness of specific TensorFlow optimizer."""
        _model_file = get_module_file_path(
            os.path.dirname(os.path.realpath(__file__)),
            "embedding_test_module.KerasEmbeddingModel",
        )
        model_module = load_module(_model_file).__dict__

        # train model with TensorFlow optimizer
        weights = self._random_init_model_weight(
            [(4, 4), (4, 4), (72, 1), (1,)], seed
        )
        loss_fn = model_module["loss"]
        model1 = model_module["KerasEmbeddingModel"](4, 4, weights)
        opt1 = optimizer_class(**kwargs)
        _train(model1, opt1, X, Y, loss_fn, random_seed=seed)

        model2 = model_module["EdlEmbeddingModel"](4, weights[2:])
        opt2 = optimizer_class(**kwargs)

        layer_names = [layer.name for layer in find_layer(model2, Embedding)]
        embed_dims = dict([(layer_name, 4) for layer_name in layer_names])

        # intialize embedding vectors in kv store
        mock_kv_store = MockKvStore({})
        for layer, embed_table in zip(layer_names, weights[:2]):
            for i, embed_vector in enumerate(embed_table):
                mock_kv_store.update(["%s-%d" % (layer, i)], [embed_vector])

        # train model with optimizer wrapper
        with mock.patch.object(
            EmbeddingService, "lookup_embedding", mock_kv_store.lookup
        ), mock.patch.object(
            EmbeddingService, "update_embedding", mock_kv_store.update
        ):
            _train_edl_embedding_with_optimizer_wrapper(
                model2, opt2, X, Y, loss_fn, embed_dims, random_seed=seed
            )

        # compare trained parameters
        wrong_msg = (
            "The updated parameters of Optimizer Wrapper and TensorFlow "
            "optimizer %s differ." % opt1.get_config()["name"]
        )

        for layer1, layer2 in zip(model1.layers, model2.layers):
            if "embedding" in layer2.name:
                w1 = layer1.weights[0].numpy()
                keys = [Embedding.get_key([layer2.name, i]) for i in range(4)]
                w2 = np.concatenate(mock_kv_store.lookup(keys)[0]).reshape(
                    4, -1
                )
                self.assertTrue((w1 - w2 < 0.0001).all(), msg=wrong_msg)
            else:
                for w1, w2 in zip(layer1.weights, layer2.weights):
                    self.assertTrue(
                        (w1 - w2 < 0.0001).numpy().all(), msg=wrong_msg
                    )
Beispiel #4
0
    def test_generate_lookup_keys(self):
        opt = Adam(amsgrad=True)
        opt_wrapper = OptimizerWrapper(opt, None, {})
        slots = ["m", "v", "vhat"]
        layers = ["test_0", "test_1"]
        grads = [
            tf.IndexedSlices(None, tf.constant([2, 0, 2])),
            tf.IndexedSlices(None, tf.constant([1, 2, 0, 2])),
        ]
        ids_list = [[2, 0], [1, 2, 0]]
        grads_and_vars = list(zip(grads, layers))
        arr = opt_wrapper._generate_lookup_keys(grads_and_vars)
        embed_keys, slot_keys, embed_layer_index, slot_layer_index = arr

        expected_embed_keys = [
            Embedding.get_key([layer, id])
            for layer, ids in zip(layers, ids_list) for id in ids
        ]
        self.assertTrue(embed_keys == expected_embed_keys)
        expected_slot_keys = [
            Embedding.get_key([layer, slot, id])
            for layer, ids in zip(layers, ids_list) for slot in slots
            for id in ids
        ]
        self.assertTrue(slot_keys == expected_slot_keys)

        expected_embed_layer_index = {"test_0": (0, 2), "test_1": (2, 5)}
        self.assertTrue(embed_layer_index == expected_embed_layer_index)
        expected_slot_layer_index = {
            "test_0": {
                "m": (0, 2),
                "v": (2, 4),
                "vhat": (4, 6)
            },
            "test_1": {
                "m": (6, 9),
                "v": (9, 12),
                "vhat": (12, 15)
            },
        }
        self.assertTrue(slot_layer_index == expected_slot_layer_index)

        for layer, ids in zip(layers, ids_list):
            self.assertTrue(
                (opt_wrapper._unique_ids_all_layers[layer] == ids).all())
Beispiel #5
0
    def _report_to_kv_store(self):
        """Report updated embedding vectors and slots to kv store."""
        keys = []
        values = []
        for layer, ids in self._unique_ids_all_layers.items():
            value = self._get_embedding_variable(layer).numpy()
            for id, v in zip(ids, value):
                keys.append(Embedding.get_key([layer, id]))
                values.append(v)

            for slot in self._allowed_slot_names:
                value = self._get_slot_variable(layer, slot).numpy()
                for id, v in zip(ids, value):
                    keys.append(Embedding.get_key([layer, slot, id]))
                    values.append(v)

        EmbeddingService.update_embedding(keys, values,
                                          self._kv_store_endpoint)
Beispiel #6
0
    def _update_model(self):
        assert self._lock.locked()
        grad_var = []

        # (grad, var) pairs excluding keras Embedding layer and
        # ElasticDL Embedding layer
        for k in self._gradient_sum:
            self._gradient_sum[k] = self._gradient_sum[k] / self._grad_to_wait
            grad_var.append((self._gradient_sum[k], self._model[k]))

        # (grad, var) pair of Keras Embedding layer
        for k in self._gradient_sum_indexed:
            grad_var.append((self._gradient_sum_indexed[k], self._model[k]))

        # (grad, var) pair of ElasticDL Embedding layer
        edl_embedding_offset = len(grad_var)
        unique_ids_list = []
        if self._edl_embedding_gradients:
            for layer_name, grads in self._edl_embedding_gradients.items():
                unique_ids, idx = tf.unique(grads.indices)
                unique_ids_list.append(unique_ids)
                grads_idx_transformed = tf.IndexedSlices(grads.values, idx)
                keys = [
                    Embedding.get_key([layer_name, i])
                    for i in unique_ids.numpy()
                ]
                embeddings, unknown_keys = EmbeddingService.lookup_embedding(
                    embedding_service_endpoint=(
                        self._embedding_service_endpoint),
                    keys=keys,
                )
                if unknown_keys:
                    raise RuntimeError(
                        "Master reviced %d unknown embedding keys: %s ..." %
                        (len(unknown_keys), str(unknown_keys[0])))
                if not embeddings:
                    continue
                embeddings = np.concatenate(embeddings,
                                            axis=0).reshape(len(keys), -1)
                embedding_var = tf.Variable(embeddings)
                grad_var.append((grads_idx_transformed, embedding_var))

        # TODO: support optimizer with slots such as Adam, FTRL
        self._opt.apply_gradients(grad_var)

        # report updated embedding table to EmbeddingService
        self._update_edl_embedding_table(
            zip(
                self._edl_embedding_gradients.keys(),
                unique_ids_list,
                [v for g, v in grad_var[edl_embedding_offset:]],
            ))
        self._update_model_version()
        self._gradient_sum.clear()
        self._gradient_sum_indexed.clear()
        self._edl_embedding_gradients.clear()
        self._grad_n = 0
Beispiel #7
0
 def test_initialize_in_lookup(self):
     opt = Adam()
     opt_wrapper = OptimizerWrapper(opt, None, {"test_1": 4})
     grads_and_vars = [(tf.IndexedSlices(None, tf.constant([0])), "test_1")]
     mock_kv_store = MockKvStore({})
     mock_kv_store.update(
         keys=[Embedding.get_key(["test_1", 0])],
         values=[np.random.rand(4).astype(np.float32)],
     )
     with mock.patch.object(EmbeddingService, "lookup_embedding",
                            mock_kv_store.lookup):
         embeddings, slot_values = opt_wrapper._lookup_embeddings_and_slots(
             grads_and_vars)
     self.assertTrue((slot_values["test_1"]["m"] < 0.0001).all())
     self.assertTrue((slot_values["test_1"]["v"] < 0.0001).all())
Beispiel #8
0
    def test_report_to_kv_store(self):
        opt = SGD(momentum=0.1)
        opt_wrapper = OptimizerWrapper(opt, None, {})

        ids_list = [[1, 5], [10]]
        opt_wrapper._unique_ids_all_layers = {
            "test_1": np.array(ids_list[0]),
            "test_2": np.array(ids_list[1]),
        }
        t = np.array([1.0, 1.0, 1.0])
        opt_wrapper._embed_variables = {
            "test_1": tf.Variable([t, t * 5]),
            "test_2": tf.Variable([t * 10]),
        }
        opt_wrapper._slot_variables = {
            "test_1": {
                "momentum": tf.Variable([t / 10.0, t / 2.0])
            },
            "test_2": {
                "momentum": tf.Variable([t])
            },
        }

        mock_kv_store = MockKvStore({})
        with mock.patch.object(EmbeddingService, "update_embedding",
                               mock_kv_store.update):
            opt_wrapper._report_to_kv_store()

        expected_mock_kv_store = MockKvStore({})
        expected_mock_kv_store.update(
            keys=["test_1-1", "test_1-5", "test_2-10"],
            values=[t, t * 5.0, t * 10.0],
        )
        expected_mock_kv_store.update(
            keys=[
                "test_1-momentum-1",
                "test_1-momentum-5",
                "test_2-momentum-10",
            ],
            values=[t / 10.0, t / 2.0, t],
        )
        for k, ids in zip(["test_1", "test_2"], ids_list):
            for id in ids:
                key = Embedding.get_key([k, id])
                v, _ = mock_kv_store.lookup([key])
                expected_v, _ = expected_mock_kv_store.lookup([key])
                self.assertTrue((v[0] == expected_v[0]).all())
Beispiel #9
0
 def lookup_embedding(self,
                      ids,
                      layer_name,
                      initializer="uniform",
                      embedding_table_dim=128):
     keys = [Embedding.get_key([layer_name, id]) for id in ids]
     (
         embedding_vectors,
         unknown_keys_index,
     ) = EmbeddingService.lookup_embedding(
         keys=keys,
         embedding_service_endpoint=self._embedding_service_endpoint,
     )
     if unknown_keys_index:
         # Initialize unknown_keys' embedding vectors and write into Redis.
         unknown_keys = [keys[index] for index in unknown_keys_index]
         initializer = tf.keras.initializers.get(initializer)
         embedding_vector_init = [
             initializer(shape=[1, embedding_table_dim]).numpy()
             for _ in unknown_keys
         ]
         embedding_vector_init = np.concatenate(embedding_vector_init,
                                                axis=0)
         EmbeddingService.update_embedding(
             keys=unknown_keys,
             embedding_vectors=embedding_vector_init,
             embedding_service_endpoint=self._embedding_service_endpoint,
             set_if_not_exist=True,
         )
         # Lookup unknown_keys' embedding vectors
         (
             embedding_vectors_new,
             unknown_keys_idx_new,
         ) = EmbeddingService.lookup_embedding(
             keys=unknown_keys,
             embedding_service_endpoint=self._embedding_service_endpoint,
         )
         if unknown_keys_idx_new:
             raise Exception("Update embedding vector: %s failed." % str(
                 [unknown_keys[index] for index in unknown_keys_idx_new]))
         for key_index, vector in zip(unknown_keys_index,
                                      embedding_vectors_new):
             embedding_vectors[key_index] = vector
     embedding_vectors = np.concatenate(embedding_vectors, axis=0)
     return embedding_vectors.reshape((len(keys), embedding_table_dim))
Beispiel #10
0
    def _update_edl_embedding_table(self, name_var_list):
        """
            Put updated embedding vectors' ids and values together
            and use EmbeddingService.update_embedding() to update
            embedding table in the distributed storage
        """
        keys = []
        embeddings = []
        for layer_name, unique_ids, embedding_var in name_var_list:
            keys.extend([
                Embedding.get_key([layer_name, i]) for i in unique_ids.numpy()
            ])
            embeddings.extend([i for i in embedding_var.numpy()])

        if embeddings:
            EmbeddingService.update_embedding(
                keys=keys,
                embedding_vectors=embeddings,
                embedding_service_endpoint=self._embedding_service_endpoint,
            )
Beispiel #11
0
    def test_train_acceleration_with_embedding(self):
        kv_store = MockKvStore()
        model_inst = CustomModel()
        master = MasterServicer(
            2,
            2,
            tf.optimizers.SGD(0.1),
            None,
            init_var=model_inst.trainable_variables,
            checkpoint_filename_for_init=None,
            checkpoint_service=None,
            evaluation_service=None,
        )
        arguments = [
            "--worker_id",
            1,
            "--job_type",
            JobType.TRAINING_ONLY,
            "--minibatch_size",
            32,
            "--model_zoo",
            _model_zoo_path,
            "--model_def",
            "embedding_test_module.EdlEmbeddingModel",
        ]
        args = parse_worker_args(arguments)
        worker = Worker(args)
        worker._stub = InProcessMaster(master)

        inputs_list = [
            {
                "f1": tf.constant([[0], [1], [2]], tf.int64),
                "f2": tf.constant([[2], [1], [0]], tf.int64),
            },
            {
                "f1": tf.constant([[3], [4], [3]], tf.int64),
                "f2": tf.constant([[2], [1], [0]], tf.int64),
            },
        ]
        labels_list = [[0, 1, 0], [1, 1, 0]]
        input_dim = 5
        embedding_dim = 16
        worker.set_model(model_inst)

        # initialize kv store
        for layer in model_inst.layers:
            if isinstance(layer, Embedding):
                name = layer.name
                keys = [Embedding.get_key([name, i]) for i in range(input_dim)]
                values = [
                    np.random.rand(embedding_dim).astype(np.float32)
                    for i in range(input_dim)
                ]
                kv_store.update(keys, values)

        with mock.patch.object(
            EmbeddingService, "lookup_embedding", kv_store.lookup
        ), mock.patch.object(
            EmbeddingService, "update_embedding", kv_store.update
        ):
            worker._init_embedding_layer()
            worker._run_model_call_before_training(inputs_list[0])

            # run training process without tf.function
            correct_grads = []
            correct_ids_list = []
            for features, labels in zip(inputs_list, labels_list):
                loss, grads = worker.training_process_eagerly(features, labels)
                correct_grads.append(grads)
                ids = {}
                for layer in worker._embedding_layers:
                    ids[layer.name] = layer.embedding_and_ids[0].batch_ids
                correct_ids_list.append(ids)
                worker._reset_embedding()

            # run training process with tf.function
            test_grads = []
            test_ids_list = []
            for features, labels in zip(inputs_list, labels_list):
                self.assertFalse(worker._train_eagerly)
                loss, grads = worker.training_process(features, labels)
                test_grads.append(grads)
                ids = {}
                for layer in worker._embedding_layers:
                    ids[layer.name] = copy.deepcopy(
                        layer.embedding_and_ids[0].batch_ids
                    )
                test_ids_list.append(ids)
                worker._reset_embedding()

        # compare the gradients
        for test_g, correct_g in zip(test_grads, correct_grads):
            for g1, g2 in zip(test_g, correct_g):
                if isinstance(g1, tf.IndexedSlices):
                    self.assertTrue(np.isclose(g1.values, g2.values).all())
                    self.assertTrue(np.isclose(g1.indices, g2.indices).all())
                else:
                    self.assertTrue(np.isclose(g1, g2).all())

        for test_ids, correct_ids in zip(correct_ids_list, test_ids_list):
            for layer_name in correct_ids.keys():
                self.assertTrue(
                    tf.equal(test_ids[layer_name], correct_ids[layer_name])
                    .numpy()
                    .all()
                )
 def lookup_func(ids, layer_name, initializer, output_dim):
     values, unknown = EmbeddingService.lookup_embedding(
         [Embedding.get_key([layer_name, i]) for i in ids]
     )
     return np.concatenate(values).reshape(len(ids), -1)
Beispiel #13
0
    def _test_async_correctness(
        self,
        grads_and_vars_batches,
        embed_values,
        expected_non_embed_values,
        expected_embed_values=None,
    ):
        """Checks the correctness of async OptimizerWrapper. This function
        creates many threads and these threads call
        `OptimizerWrapper.apply_gradients` simultaneously.

        Args:
            grads_and_vars_batches: A python list of `grads_and_vars`. Every
                thread takes a `grads_and_vars` and calls `apply_gradients`.
            embed_values: A python dictionary of
                `(layer_name, embedding table)`.
            expected_non_embed_values: A python list of expected non-embdding
                values after applying gradients.
            expected_embed_values: A python dictionary of expected embedding
                values after applying gradients. None means no need to check
                embedding values.
        """
        thread_num = len(grads_and_vars_batches)
        embed_dims = {}
        embed_var_n = len(embed_values)
        mock_kv_store = MockKvStore()
        for layer, values in embed_values.items():
            embed_dims[layer] = values.shape[1]
            input_dim = values.shape[0]

            keys = [
                Embedding.get_key([layer, idx]) for idx in range(input_dim)
            ]
            mock_kv_store.update(keys, values)

        opt = SGD(0.1)
        opt_wrapper = OptimizerWrapper(opt, None, embed_dims, True)

        with mock.patch.object(EmbeddingService, "lookup_embedding",
                               mock_kv_store.lookup), mock.patch.object(
                                   EmbeddingService, "update_embedding",
                                   mock_kv_store.update):
            # call optimizer_wrapper.apply_gradients asynchronously
            def _apply_gradients(opt_wrapper, grads_and_vars):
                # sleep 1s to wait that all threads are in this method call
                time.sleep(1)
                opt_wrapper.apply_gradients(grads_and_vars)

            executor = ThreadPoolExecutor(max_workers=thread_num)
            tasks = [
                executor.submit(_apply_gradients, opt_wrapper, grads_and_vars)
                for grads_and_vars in grads_and_vars_batches
            ]
            _ = [task.result() for task in tasks]

            # check updated results of non-embedding variables
            non_embed_vars = [
                var for grad, var in grads_and_vars_batches[0][:-embed_var_n]
            ]
            for var, expected_value in zip(non_embed_vars,
                                           expected_non_embed_values):
                self.assertTrue(np.isclose(var.numpy(), expected_value).all())

            # `expected_embed_values=None` means that no need to check
            # embedding table
            if not expected_embed_values:
                return
            # check updated results of embedding table
            for layer, expected_values in expected_embed_values.items():
                keys = [
                    Embedding.get_key([layer, idx]) for idx in range(input_dim)
                ]
                raw_value, _ = mock_kv_store.lookup(keys)
                value = np.concatenate(raw_value).reshape(input_dim, -1)

                self.assertTrue(
                    any([
                        np.isclose(value, expected).all()
                        for expected in expected_values
                    ]))