def _test_correctness(self, optimizer_class, X, Y, seed, **opt_kwargs):
        """Test the correctness of specific TensorFlow optimizer."""
        _model_file = get_module_file_path(
            os.path.dirname(os.path.realpath(__file__)),
            "embedding_test_module.KerasEmbeddingModel",
        )
        model_module = load_module(_model_file).__dict__

        # train model with TensorFlow optimizer
        dim = 4
        weights = self._random_init_model_weight([(4, dim), (4, dim), (72, 1),
                                                  (1, )], seed)
        loss_fn = model_module["loss"]
        model1 = model_module["KerasEmbeddingModel"](4, dim, weights)
        opt1 = optimizer_class(**opt_kwargs)
        _train(model1, opt1, X, Y, loss_fn, random_seed=seed)

        model2 = model_module["EdlEmbeddingModel"](dim, weights[2:])
        opt2 = optimizer_class(**opt_kwargs)

        embedding_weight_names = [
            layer.embedding_weight_name
            for layer in find_layer(model2, Embedding)
        ]

        # create Parameters object and initialize embedding vectors
        params = Parameters()
        for weight_name, embed_value in zip(embedding_weight_names,
                                            weights[:2]):
            embed_table = EmbeddingTable(weight_name, dim)
            embed_table.set(range(len(embed_value)), embed_value)
            params.embedding_params[weight_name] = embed_table

        _train_edl_embedding_with_optimizer_wrapper(model2,
                                                    opt2,
                                                    X,
                                                    Y,
                                                    loss_fn,
                                                    params,
                                                    random_seed=seed)

        # compare trained parameters
        wrong_msg = (
            "The updated parameters of Optimizer Wrapper and TensorFlow "
            "optimizer %s differ." % opt1.get_config()["name"])

        for layer1, layer2 in zip(model1.layers, model2.layers):
            if "embedding" in layer2.name:
                w1 = layer1.weights[0].numpy()
                w2 = params.get_embedding_param(layer2.embedding_weight_name,
                                                range(4))
                self.assertTrue(np.isclose(w1, w2).all(), msg=wrong_msg)
            else:
                for w1, w2 in zip(layer1.weights, layer2.weights):
                    self.assertTrue(np.isclose(w1.numpy(), w2.numpy()).all(),
                                    msg=wrong_msg)
 def _mock_model_parameters(self, model):
     params = Parameters()
     for weight in model.trainable_variables:
         if "embedding" in weight.name:
             embedding_table = EmbeddingTable(
                 name=weight.name,
                 dim=weight.shape[1],
                 initializer="RandomUniform",
             )
             embedding_table.set(np.arange(weight.shape[0]),
                                 np.ones(weight.shape))
             params.embedding_params[weight.name] = embedding_table
         else:
             params.non_embedding_params[weight.name] = tf.ones(
                 weight.shape)
     params.version = 100
     return params
Beispiel #3
0
    def test_restore_parameters_from_checkpoint(self):
        checkpoint_dir = "elasticdl/python/tests/testdata/ps_ckpt"
        checkpoint_saver = CheckpointSaver(checkpoint_dir, 0, 0, False)
        params = Parameters()
        table = EmbeddingTable("embedding", 2, "random_uniform")
        table.set([0, 1, 2, 3], np.ones((4, 2), dtype=np.float32))
        params.embedding_params["embedding"] = table
        params.non_embedding_params["dense/kernel:0"] = tf.Variable(
            [[1.0], [1.0]]
        )
        params.non_embedding_params["dense/bias:0"] = tf.Variable([1.0])
        params.version = 100
        model_pb = params.to_model_pb()
        checkpoint_saver.save(100, model_pb, False)

        checkpoint_dir_for_init = checkpoint_dir + "/version-100"
        args = PserverArgs(
            ps_id=0,
            num_ps_pods=2,
            model_zoo=_test_model_zoo_path,
            model_def="test_module.custom_model",
            checkpoint_dir_for_init=checkpoint_dir_for_init,
        )
        pserver_0 = ParameterServer(args)

        embedding_table = pserver_0.parameters.embedding_params["embedding"]
        self.assertEqual(
            list(embedding_table.embedding_vectors.keys()), [0, 2]
        )
        self.assertEqual(
            list(pserver_0.parameters.non_embedding_params.keys()),
            ["dense/kernel:0"],
        )
        self.assertTrue(
            np.array_equal(
                pserver_0.parameters.non_embedding_params[
                    "dense/kernel:0"
                ].numpy(),
                np.array([[1], [1]], dtype=int),
            )
        )
        self.assertEqual(pserver_0.parameters.version, 100)

        args = PserverArgs(
            ps_id=1,
            num_ps_pods=2,
            model_zoo=_test_model_zoo_path,
            model_def="test_module.custom_model",
            checkpoint_dir_for_init=checkpoint_dir_for_init,
        )
        pserver_1 = ParameterServer(args)

        embedding_table = pserver_1.parameters.embedding_params["embedding"]
        self.assertEqual(
            list(embedding_table.embedding_vectors.keys()), [1, 3]
        )
        self.assertEqual(
            list(pserver_1.parameters.non_embedding_params.keys()),
            ["dense/bias:0"],
        )
        self.assertTrue(
            np.array_equal(
                pserver_1.parameters.non_embedding_params[
                    "dense/bias:0"
                ].numpy(),
                np.array([1], dtype=int),
            )
        )
        self.assertEqual(pserver_1.parameters.version, 100)
Beispiel #4
0
    def _test_async_correctness(
        self,
        grads_and_vars_batches,
        embed_values,
        expected_non_embed_values,
        expected_embed_values=None,
    ):
        """Checks the correctness of async OptimizerWrapper. This function
        creates many threads and these threads call
        `OptimizerWrapper.apply_gradients` simultaneously.

        Args:
            grads_and_vars_batches: A python list of `grads_and_vars`. Every
                thread takes a `grads_and_vars` and calls `apply_gradients`.
            embed_values: A python dictionary of
                `(layer_name, embedding table)`.
            expected_non_embed_values: A python list of expected non-embdding
                values after applying gradients.
            expected_embed_values: A python dictionary of expected embedding
                values after applying gradients. None means no need to check
                embedding values.
        """
        thread_num = len(grads_and_vars_batches)
        input_dims = {}
        embed_var_n = len(embed_values)
        params = Parameters()
        for layer, values in embed_values.items():
            embed_dim = values.shape[1]
            input_dims[layer] = values.shape[0]
            embed_table = EmbeddingTable(layer, embed_dim)
            embed_table.set(range(input_dims[layer]), values)
            params.embedding_params[layer] = embed_table

        opt = SGD(0.1)
        opt_wrapper = OptimizerWrapper(
            opt,
            True,
            lookup_embedding_func=params.get_embedding_param,
            update_embedding_func=params.set_embedding_param,
        )

        # call optimizer_wrapper.apply_gradients asynchronously
        def _apply_gradients(opt_wrapper, grads_and_vars):
            # sleep 1s to wait that all threads are in this method call
            time.sleep(1)
            opt_wrapper.apply_gradients(grads_and_vars)

        executor = ThreadPoolExecutor(max_workers=thread_num)
        tasks = [
            executor.submit(_apply_gradients, opt_wrapper, grads_and_vars)
            for grads_and_vars in grads_and_vars_batches
        ]
        _ = [task.result() for task in tasks]

        # check updated results of non-embedding variables
        non_embed_vars = [
            var for grad, var in grads_and_vars_batches[0][:-embed_var_n]
        ]
        for var, expected_value in zip(non_embed_vars,
                                       expected_non_embed_values):
            self.assertTrue(np.isclose(var.numpy(), expected_value).all())

        # `expected_embed_values=None` means that no need to check
        # embedding table
        if not expected_embed_values:
            return
        # check updated results of embedding table
        for layer, expected_values in expected_embed_values.items():
            value = params.get_embedding_param(layer, range(input_dims[layer]))

            self.assertTrue(
                any([
                    np.isclose(value, expected).all()
                    for expected in expected_values
                ]))
Beispiel #5
0
class EmbeddingTableTest(unittest.TestCase):
    def setUp(self):
        self.name = "embedding_1"
        self.dim = 10
        self.initializer = "uniform"
        self.table = EmbeddingTable(self.name, self.dim, self.initializer)

    def test_embedding_table_init(self):
        self.assertIsNotNone(self.table)
        self.assertEqual(self.table.name, self.name)
        self.assertEqual(self.table.dim, self.dim)
        self.assertEqual(
            tf.keras.initializers.get(self.initializer).__class__,
            self.table.initializer.__class__,
        )

    def test_embedding_table_get(self):
        self.table.clear()
        indices = [0, 3, 7]
        res = self.table.get(indices)
        self.assertTupleEqual(res.shape, (3, 10))

        res = self.table.get([])
        self.assertIsNone(res)

        self.table.get([0, 3, 8])
        self.assertEqual(len(self.table.embedding_vectors), 4)

    def test_embedding_table_set(self):
        self.table.clear()
        indices = [0, 1, 4]
        x = len(indices)
        values = np.random.uniform(size=x * self.dim).reshape((x, self.dim))
        self.table.set(indices, values)

        row0 = self.table.get([0])
        row1 = self.table.get([1])
        row4 = self.table.get([4])

        rows = [row0, row1, row4]
        rows = np.concatenate(rows)
        np.testing.assert_array_equal(rows, values)

    def test_create_embedding_table(self):
        embedding_pb = EmbeddingTableInfo()
        embedding_pb.name = self.name
        embedding_pb.dim = self.dim
        embedding_pb.initializer = self.initializer
        table = create_embedding_table(embedding_pb)
        self.assertIsNotNone(table)
        self.assertEqual(table.name, self.name)
        self.assertEqual(
            tf.keras.initializers.get(self.initializer).__class__,
            table.initializer.__class__,
        )
        self.assertEqual(table.dim, self.dim)

    def test_create_embedding_table_for_slots(self):
        slot_name = "momentum"
        init_value = 3.5
        table = EmbeddingTable(
            get_slot_table_name(self.name, slot_name),
            dim=self.dim,
            initializer=init_value,
            is_slot=True,
        )
        self.assertIsNotNone(table)
        self.assertEqual(table.name, get_slot_table_name(self.name, slot_name))
        self.assertEqual(table.dim, self.dim)
        # test initialize
        embedding = table.get([2])
        self.assertTrue((embedding - init_value < 0.0001).all())