Exemple #1
0
    def test_push_gradient_async_update(self):
        self.create_default_server_and_stub()
        self.push_gradient_test_setup()

        # Test applying gradients to embedding and non-embedding parameters
        req = elasticdl_pb2.PushGradientsRequest()
        for g, name in zip(self.grad_values0, self.var_names):
            serialize_ndarray(g, req.gradients.dense_parameters[name])
        serialize_indexed_slices(
            self.embedding_grads0,
            req.gradients.embedding_tables[self._embedding_info.name],
        )
        res = self._stub.push_gradients(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.version, 1)
        expected_values = [
            v - self._lr * g
            for v, g in zip(self.var_values, self.grad_values0)
        ]
        for name, expected_value in zip(self.var_names, expected_values):
            self.assertTrue(
                np.allclose(
                    expected_value,
                    self._parameters.non_embedding_params[name].numpy(),
                )
            )

        expected_embed_table = np.copy(self.embedding_table)
        for gv, gi in zip(
            self.embedding_grads0.values, self.embedding_grads0.indices
        ):
            expected_embed_table[gi] -= self._lr * gv

        actual_embed_table = self._parameters.get_embedding_param(
            self._embedding_info.name, range(len(expected_embed_table))
        )
        self.assertTrue(np.allclose(expected_embed_table, actual_embed_table))

        # Test applying gradients with same name
        for name, var in zip(self.var_names, self.var_values):
            self._parameters.non_embedding_params[name] = tf.Variable(var)
        req = elasticdl_pb2.PushGradientsRequest()
        serialize_ndarray(
            self.grad_values1[1],
            req.gradients.dense_parameters[self.var_names[0]],
        )
        res = self._stub.push_gradients(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.version, 2)
        expected_values = [
            self.var_values[0] - self._lr * self.grad_values1[1],
            self.var_values[1],
        ]
        for expected_value, name in zip(expected_values, self.var_names):
            self.assertTrue(
                np.allclose(
                    expected_value,
                    self._parameters.non_embedding_params[name].numpy(),
                )
            )
    def setUp(self):
        self.params = Parameters()

        self.model_pb = Model()
        self.infos_pb = self.model_pb.embedding_table_infos
        self.tensors_pb = self.model_pb.dense_parameters
        self.embedding_tables_pb = self.model_pb.embedding_tables

        self.embedding_table_name = "embedding_1"
        self.embedding_dim = 10
        embedding_pb = self.infos_pb.add()
        embedding_pb.name = self.embedding_table_name
        embedding_pb.dim = self.embedding_dim
        embedding_pb.initializer = "uniform"

        arr1 = np.random.uniform(size=(3, 4))
        serialize_ndarray(arr1, self.tensors_pb["x"])
        arr2 = np.random.uniform(size=(4, 5))
        serialize_ndarray(arr2, self.tensors_pb["y"])

        embedding_vectors = np.random.uniform(size=(2, 10))
        embedding_indices = np.array([0, 8])
        serialize_indexed_slices(
            Tensor(None, embedding_vectors, embedding_indices),
            self.embedding_tables_pb[self.embedding_table_name],
        )
Exemple #3
0
    def test_pull_dense_parameters(self):
        self.create_default_server_and_stub()
        param0 = {
            "v0": np.random.rand(3, 2).astype(np.float32),
            "v1": np.random.rand(10, 32).astype(np.float32),
        }
        pull_req = elasticdl_pb2.PullDenseParametersRequest()
        pull_req.version = -1
        # try to pull variable
        res = self._stub.pull_dense_parameters(pull_req)
        # not initialized
        self.assertFalse(res.initialized)

        # init variable
        req = elasticdl_pb2.Model()
        req.version = 1
        for name, var in param0.items():
            serialize_ndarray(var, req.dense_parameters[name])
        res = self._stub.push_model(req)
        self.assertEqual(res, empty_pb2.Empty())

        # pull variable back
        res = self._stub.pull_dense_parameters(pull_req)
        self.assertTrue(res.initialized)
        self.assertEqual(res.version, req.version)
        for name, pb in res.dense_parameters.items():
            tensor = pb_to_ndarray(pb)
            self.assertTrue(np.allclose(param0[name], tensor))

        # pull variable again, no param as no updated version
        pull_req.version = res.version
        res = self._stub.pull_dense_parameters(pull_req)
        self.assertTrue(res.initialized)
        self.assertEqual(res.version, pull_req.version)
        self.assertTrue(not res.dense_parameters)
Exemple #4
0
 def pull_embedding_vectors(self, request, _):
     result = tensor_pb2.TensorProto()
     if not request.ids:
         return result
     embedding_vectors = self._parameters.get_embedding_param(
         request.name, request.ids)
     serialize_ndarray(embedding_vectors, result)
     return result
Exemple #5
0
 def report_variable_to_ps(self, ps_id):
     model = elasticdl_pb2.Model()
     model.version = self._model_versions_from_ps[ps_id]
     if ps_id in self._ps_vars:
         vars = self._ps_vars[ps_id]
         for var in vars:
             serialize_ndarray(var.numpy(),
                               model.dense_parameters[var.name])
     self._ps_stubs[ps_id].push_model(model)
Exemple #6
0
 def report_evaluation_metrics(self, model_outputs, labels):
     """
     report evaluation metrics to ps.
     """
     req = elasticdl_pb2.ReportEvaluationMetricsRequest()
     for name, output in model_outputs.items():
         output = np.concatenate(output)
         serialize_ndarray(output, req.model_outputs[name])
     labels = np.concatenate(labels)
     serialize_ndarray(labels, req.labels)
     req.worker_id = self._worker_id
     self._stub.report_evaluation_metrics(req)
Exemple #7
0
 def push_dense_parameters(self, parameters, ps_id, version):
     """
     Push dense parameters to PS
     Args:
         parameters: a list of Tensors
         ps_id: PS id
         version: model version
     """
     model = elasticdl_pb2.Model()
     model.version = version
     for p in parameters:
         if self.parameter_to_ps[p.name] == ps_id:
             serialize_ndarray(p.values, model.dense_parameters[p.name])
     self.ps_stubs[ps_id].push_model(model)
Exemple #8
0
    def report_evaluation_metrics(self, model_outputs, labels):
        """Report evaluation metrics to master.

        Args:
            model_outputs: dict
            the evaluation result on training.

            labels: numpy array
            the labels on training dataset.
        """
        req = elasticdl_pb2.ReportEvaluationMetricsRequest()
        for name, output in model_outputs.items():
            output = np.concatenate(output)
            serialize_ndarray(output, req.model_outputs[name])
        labels = np.concatenate(labels)
        serialize_ndarray(labels, req.labels)
        req.worker_id = self._worker_id
        self._stub.report_evaluation_metrics(req)
    def push_gradient_test_setup(self):
        self.var_names = ["test_1", "test_2"]
        self.var_values = [
            np.array([10.0, 20.0, 30.0], np.float32),
            np.array([20.0, 40.0, 60.0], np.float32),
        ]
        self.grad_values0 = [
            np.array([1.0, 2.0, 3.0], np.float32),
            np.array([2.0, 4.0, 6.0], np.float32),
        ]
        self.grad_values1 = [
            np.array([0.0, 0.0, 7.0], np.float32),
            np.array([9.0, 9.0, 6.0], np.float32),
        ]

        dim = self._embedding_info.dim
        self.embedding_table = (
            np.random.rand(4 * dim).reshape((4, dim)).astype(np.float32)
        )
        self.embedding_grads0 = tf.IndexedSlices(
            values=np.random.rand(3 * dim)
            .reshape((3, dim))
            .astype(np.float32),
            indices=(3, 1, 3),
        )
        self.embedding_grads1 = tf.IndexedSlices(
            values=np.random.rand(3 * dim)
            .reshape((3, dim))
            .astype(np.float32),
            indices=(2, 2, 3),
        )
        push_model_req = elasticdl_pb2.Model()
        push_model_req.version = self._parameters.version
        for name, value in zip(self.var_names, self.var_values):
            serialize_ndarray(value, push_model_req.dense_parameters[name])
        push_model_req.embedding_table_infos.append(self._embedding_info)
        self._stub.push_model(push_model_req)

        for name, var in zip(self.var_names, self.var_values):
            self._parameters.non_embedding_params[name] = tf.Variable(var)

        self._parameters.embedding_params[self._embedding_info.name].set(
            range(len(self.embedding_table)), self.embedding_table
        )
Exemple #10
0
    def to_model_pb(self):
        """ Convert all parameters including embedding and non-embedding
        parameters to `elasticdl_pb2.Model` which can be serialized.
        """
        model_pb = elasticdl_pb2.Model()
        model_pb.version = self.version
        for name, var in self.non_embedding_params.items():
            serialize_ndarray(var.numpy(), model_pb.dense_parameters[name])

        for name, embedding_table in self.embedding_params.items():
            # Slot embedding table is not weights in the model, so we don't
            # save it to checkpoint.
            if not embedding_table.is_slot:
                serialize_indexed_slices(
                    embedding_table.to_indexed_slices(),
                    model_pb.embedding_tables[name],
                )
                embedding_info = embedding_table.to_embedding_table_info_pb()
                model_pb.embedding_table_infos.append(embedding_info)
        return model_pb
Exemple #11
0
    def pull_dense_parameters(self, request, _):
        """
        Response with all non-embedding parameters if initialized.
        """
        res = elasticdl_pb2.PullDenseParametersResponse()
        if not self._parameters.initialized:
            res.initialized = False
            return res

        # Only sync-SGD needs lock
        # TODO: use a read-write lock to support multiple concurrent reads
        if not self._use_async:
            self._lock.acquire()
        res.version = self._parameters.version
        # No need to send variables if the requester has the latest version.
        if self._parameters.version > request.version:
            for name, var in self._parameters.non_embedding_params.items():
                serialize_ndarray(var.numpy(), res.dense_parameters[name])
        if not self._use_async:
            self._lock.release()
        res.initialized = True
        return res
Exemple #12
0
    def report_gradient_to_ps(self, grads):
        self._timing.start_record_time("report_gradient")
        reqs = [
            elasticdl_pb2.PushGradientsRequest() for i in range(self._ps_num)
        ]
        ps_grads = {}
        non_embed_vars_n = len(self._non_embed_vars)
        for g, v in zip(
            grads[:non_embed_vars_n], self._non_embed_vars.values()
        ):
            ps_id = self._var_to_ps[v.name]
            if ps_id not in ps_grads:
                ps_grads[ps_id] = {v.name: g}
            else:
                if v.name not in ps_grads[ps_id]:
                    ps_grads[ps_id][v.name] = g
                else:
                    if isinstance(g, tf.IndexedSlices):
                        ps_grads[ps_id][v.name] = merge_indexed_slices(
                            ps_grads[ps_id][v.name], g
                        )
                    else:
                        ps_grads[ps_id][v.name] += g

        for ps_id, pair in ps_grads.items():
            for name, g in pair.items():
                if isinstance(g, tf.IndexedSlices):
                    v, i = deduplicate_indexed_slices(g.values, g.indices)
                    ps_grads[ps_id][name] = tf.IndexedSlices(v, i)

        for ps_id in ps_grads:
            req = reqs[ps_id]
            for name, g in ps_grads[ps_id].items():
                # Keras embedding layer has a dense parameter,
                # but an indexed slices type gradient
                if isinstance(g, tf.IndexedSlices):
                    serialize_indexed_slices(
                        Tensor(None, g.values.numpy(), g.indices.numpy()),
                        req.gradients.embedding_tables[name],
                    )
                else:
                    serialize_ndarray(
                        g.numpy(), req.gradients.dense_parameters[name]
                    )

        edl_embedding_name_values = self._collect_edl_embedding_name_values()

        if edl_embedding_name_values:
            edl_embedding_grads = grads[non_embed_vars_n:]
            bet_number = 0
            for name, embedding_and_ids in edl_embedding_name_values:
                bet_number += len(embedding_and_ids)
            if len(edl_embedding_grads) != bet_number:
                raise ValueError(
                    "elasticdl.layers.embedding related gradient number %d "
                    "does not match the number of its output tensor %d."
                    % (len(edl_embedding_grads), bet_number)
                )

            grad_accum_iter = 0
            for name, embedding_and_ids in edl_embedding_name_values:
                g_values = None
                g_indices = None
                for _, ids in embedding_and_ids:
                    grad = edl_embedding_grads[grad_accum_iter]
                    grad_accum_iter += 1
                    # ElasticDL embedding layer with Sparse Gradients
                    if isinstance(grad, tf.IndexedSlices):
                        grad = grad.values
                    if g_values is not None:
                        g_values = tf.concat([g_values, grad], axis=0)
                        g_indices = tf.concat([g_indices, ids], axis=0)
                    else:
                        g_values = grad
                        g_indices = ids

                # Sum up the values of the duplicated indices in the
                # gradients. It can reduce the gradient payload of the
                # dense embedding.
                g_values, g_indices = deduplicate_indexed_slices(
                    values=g_values, indices=g_indices
                )

                results = scatter_embedding_vector(
                    g_values.numpy(), g_indices.numpy(), self._ps_num
                )

                for ps_id in results:
                    req = reqs[ps_id]
                    gv, gi = results[ps_id]
                    serialize_indexed_slices(
                        Tensor(None, gv, gi),
                        req.gradients.embedding_tables[name],
                    )

        report_futures = []
        for ps_id in range(self._ps_num):
            req = reqs[ps_id]
            req.gradients.version = self._model_versions_from_ps[ps_id]
            req.learning_rate = K.get_value(self._model.optimizer.lr)
            report_future = self._ps_stubs[ps_id].push_gradients.future(req)
            report_futures.append(report_future)

        accepted = False
        max_version = -1
        for report_future in report_futures:
            res = report_future.result()
            if res.accepted:
                accepted = True
            if res.version > max_version:
                max_version = res.version
        self._timing.end_record_time("report_gradient")
        return accepted, max_version
Exemple #13
0
    def test_push_gradient_sync_update(self):
        self.create_server_and_stub(
            grads_to_wait=2, lr_staleness_modulation=False, use_async=False
        )
        self.push_gradient_test_setup()

        req = elasticdl_pb2.PushGradientsRequest()
        req.gradients.version = 0
        for g, name in zip(self.grad_values0, self.var_names):
            serialize_ndarray(g, req.gradients.dense_parameters[name])
        serialize_indexed_slices(
            self.embedding_grads0,
            req.gradients.embedding_tables[self._embedding_info.name],
        )

        res = self._stub.push_gradients(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.version, 0)

        req = elasticdl_pb2.PushGradientsRequest()
        req.gradients.version = 0
        for g, name in zip(self.grad_values1, self.var_names):
            serialize_ndarray(g, req.gradients.dense_parameters[name])
        serialize_indexed_slices(
            self.embedding_grads1,
            req.gradients.embedding_tables[self._embedding_info.name],
        )
        res = self._stub.push_gradients(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.version, 1)

        req = elasticdl_pb2.PushGradientsRequest()
        req.gradients.version = 0
        for g, name in zip(self.grad_values1, self.var_names):
            serialize_ndarray(g, req.gradients.dense_parameters[name])
        res = self._stub.push_gradients(req)
        self.assertEqual(res.accepted, False)
        self.assertEqual(res.version, 1)

        expected_values = [
            self.var_values[0]
            - self._lr * (self.grad_values0[0] + self.grad_values1[0]) / 2,
            self.var_values[1]
            - self._lr * (self.grad_values0[1] + self.grad_values1[1]) / 2,
        ]
        for expected_value, name in zip(expected_values, self.var_names):
            self.assertTrue(
                np.allclose(
                    expected_value,
                    self._parameters.non_embedding_params[name].numpy(),
                )
            )

        expected_embed_table = np.copy(self.embedding_table)
        for gv, gi in zip(
            self.embedding_grads0.values, self.embedding_grads0.indices
        ):
            expected_embed_table[gi] -= self._lr * gv
        for gv, gi in zip(
            self.embedding_grads1.values, self.embedding_grads1.indices
        ):
            expected_embed_table[gi] -= self._lr * gv

        actual_embed_table = self._parameters.get_embedding_param(
            self._embedding_info.name, range(len(expected_embed_table))
        )
        self.assertTrue(np.allclose(expected_embed_table, actual_embed_table))
Exemple #14
0
    def test_push_model(self):
        opt_func_name = "ftrl_optimizer"
        opt = load_module(_module_file).__dict__[opt_func_name]()
        opt_config = opt.get_config()
        slot_names = ["accumulator", "linear"]
        slot_init_value = {
            "accumulator": opt_config["initial_accumulator_value"],
            "linear": 0.0,
        }

        self.create_default_server_and_stub(optimizer=opt_func_name)
        param0 = {
            "v0": np.random.rand(3, 2).astype(np.float32),
            "v1": np.random.rand(10, 32).astype(np.float32),
        }
        param1 = {
            "v0": np.ones([3, 2], dtype=np.float32),
            "v1": np.ones([10, 32], dtype=np.float32),
        }

        models = [param0, param1]

        for idx, model in enumerate(models):
            req = elasticdl_pb2.Model()
            req.version = idx + 1
            for name in model:
                serialize_ndarray(model[name], req.dense_parameters[name])
            req.embedding_table_infos.append(self._embedding_info)
            res = self._stub.push_model(req)
            self.assertEqual(res, empty_pb2.Empty())
            # self._parameters is initialized with the first push_model call
            # and the second push_model has no effect
            self.assertEqual(self._parameters.version, 1)
            for name in param0:
                self.assertTrue(
                    np.allclose(
                        param0[name],
                        self._parameters.non_embedding_params[name].numpy(),
                    )
                )
            self.assertEqual(
                self._embedding_info.name,
                self._parameters.embedding_params[
                    self._embedding_info.name
                ].name,
            )
            self.assertEqual(
                self._embedding_info.dim,
                self._parameters.embedding_params[
                    self._embedding_info.name
                ].dim,
            )
            self.assertEqual(
                tf.keras.initializers.get(
                    self._embedding_info.initializer
                ).__class__,
                self._parameters.embedding_params[
                    self._embedding_info.name
                ].initializer.__class__,
            )

            for slot_name in slot_names:
                name = get_slot_table_name(
                    self._embedding_info.name, slot_name
                )
                table = self._parameters.embedding_params[name]
                self.assertTrue(name, table.name)
                self.assertTrue(self._embedding_info.dim, table.dim)
                embedding = table.get([2])
                self.assertTrue(
                    (embedding - slot_init_value[slot_name] < 0.0001).all()
                )
Exemple #15
0
    def push_gradients(
        self, grads, edl_grads, learning_rate, model_versions,
    ):
        """
        Push gradients to PS. There two kinds of gradients:
         - gradients of normal layers
         - sparse gradients of ElasticDL embedding layers
        """
        reqs = [
            elasticdl_pb2.PushGradientsRequest() for i in range(self.ps_num)
        ]
        ps_grads = {}

        # 1. handle grads
        for grad in grads:
            ps_id = self.parameter_to_ps[grad.name]
            if ps_id not in ps_grads:
                ps_grads[ps_id] = {grad.name: grad}
            else:
                if grad.name not in ps_grads[ps_id]:
                    ps_grads[ps_id][grad.name] = grad
                else:
                    if grad.indices is not None:
                        ps_grads[ps_id][grad.name] = merge_indexed_slices(
                            ps_grads[ps_id][grad.name], grad
                        )
                    else:
                        ps_grads[ps_id][grad.name].values += grad.values

        for ps_id, pair in ps_grads.items():
            for name, grad in pair.items():
                if grad.indices is not None:
                    v, i = deduplicate_indexed_slices(
                        grad.values, grad.indices
                    )
                    ps_grads[ps_id][name] = Tensor(None, v, i)

        for ps_id in ps_grads:
            req = reqs[ps_id]
            for name, grad in ps_grads[ps_id].items():
                # Keras embedding layer has a dense parameter,
                # but an indexed slices type gradient
                if grad.indices is not None:
                    serialize_indexed_slices(
                        Tensor(None, grad.values, grad.indices),
                        req.gradients.embedding_tables[name],
                    )
                else:
                    serialize_ndarray(
                        grad.values, req.gradients.dense_parameters[name]
                    )

        # 2. handle sparse grads of elasticdl embedding layers
        groups = {}
        for grad in edl_grads:
            if grad.name not in groups:
                groups[grad.name] = grad
            else:
                groups[grad.name] = merge_indexed_slices(
                    groups[grad.name], grad
                )

        # Sum up the values of the duplicated indices in the
        # gradients. It can reduce the gradient payload of the
        # dense embedding.
        for name, grad in groups.items():
            v, i = deduplicate_indexed_slices(grad.values, grad.indices)
            groups[name] = Tensor(None, v, i)

            results = scatter_embedding_vector(
                groups[name].values, groups[name].indices, self.ps_num
            )

            for ps_id in results:
                req = reqs[ps_id]
                gv, gi = results[ps_id]
                serialize_indexed_slices(
                    Tensor(None, gv, gi), req.gradients.embedding_tables[name],
                )

        # 3. push gradients to PS
        report_futures = []
        for ps_id in range(self.ps_num):
            req = reqs[ps_id]
            req.gradients.version = model_versions[ps_id]
            req.learning_rate = learning_rate
            report_future = self.ps_stubs[ps_id].push_gradients.future(req)
            report_futures.append(report_future)

        accepted = False
        max_version = -1
        for report_future in report_futures:
            res = report_future.result()
            if res.accepted:
                accepted = True
            if res.version > max_version:
                max_version = res.version
        return accepted, max_version