예제 #1
0
 def report_variable_to_master(self):
     req = elasticdl_pb2.ReportVariableRequest()
     for v in self._non_embed_vars.values():
         emplace_tensor_pb_from_ndarray(req.variable,
                                        v.numpy(),
                                        name=v.name)
     self._stub.ReportVariable(req)
예제 #2
0
    def test_pull_variable(self):
        self.create_default_server_and_stub()
        param0 = {
            "v0": np.random.rand(3, 2).astype(np.float32),
            "v1": np.random.rand(10, 32).astype(np.float32),
        }
        pull_req = empty_pb2.Empty()
        # try to pull variable
        res = self._stub.pull_variable(pull_req)
        # not initialized
        self.assertFalse(res.model_init_status)

        # init variable
        req = elasticdl_pb2.Model()
        req.version = 1
        for name, var in param0.items():
            emplace_tensor_pb_from_ndarray(req.param, var, name=name)
        res = self._stub.push_model(req)
        self.assertEqual(res, empty_pb2.Empty())

        # pull variable back
        res = self._stub.pull_variable(pull_req)
        self.assertTrue(res.model_init_status)
        self.assertEqual(res.model.version, req.version)
        for param in res.model.param:
            name = param.name
            tensor = tensor_pb_to_ndarray(param)
            self.assertTrue(np.allclose(param0[name], tensor))
예제 #3
0
파일: worker.py 프로젝트: zuodh/elasticdl
 def report_variable_to_ps(self, ps_id):
     model = elasticdl_pb2.Model()
     if ps_id in self._ps_vars:
         vars = self._ps_vars[ps_id]
         for var in vars:
             emplace_tensor_pb_from_ndarray(model.param,
                                            var.numpy(),
                                            name=var.name)
     self._ps_stubs[ps_id].push_model(model)
예제 #4
0
 def makeGrad():
     """ Make a ReportGradientRequest compatible with model"""
     req = elasticdl_pb2.ReportGradientRequest()
     emplace_tensor_pb_from_ndarray(req.gradient,
                                    np.array([0.1], np.float32),
                                    name="x")
     emplace_tensor_pb_from_ndarray(req.gradient,
                                    np.array([0.03, 0.06], np.float32),
                                    name="y")
     req.model_version = 1
     return req
예제 #5
0
 def report_evaluation_metrics(self, model_outputs, labels):
     """
     report evaluation metrics to ps.
     """
     req = elasticdl_pb2.ReportEvaluationMetricsRequest()
     for name, output in model_outputs.items():
         output = np.concatenate(output)
         emplace_tensor_pb_from_ndarray(req.model_outputs,
                                        output,
                                        name=name)
     labels = np.concatenate(labels)
     tensor = Tensor(values=labels)
     serialize_tensor(tensor, req.labels)
     self._stub.report_evaluation_metrics(req)
예제 #6
0
    def test_emplace_tensor_pb_from_ndarray(self):
        values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], np.float32)
        indices = np.array([0, 2])
        name = "test"
        model = elasticdl_pb2.Model()
        emplace_tensor_pb_from_ndarray(model.param, values, indices, name)
        pb = model.param[-1]
        print("pb", pb)

        expected_pb = Tensor(values, indices, name).to_tensor_pb()
        self.assertEqual(pb.name, expected_pb.name)
        self.assertEqual(pb.dim, expected_pb.dim)
        self.assertEqual(pb.content, expected_pb.content)
        self.assertEqual(pb.indices, expected_pb.indices)
        self.assertEqual(pb.dtype, expected_pb.dtype)
예제 #7
0
    def push_gradient_test_setup(self):
        self.var_names = ["test_1", "test_2"]
        self.var_values = [
            np.array([10.0, 20.0, 30.0], np.float32),
            np.array([20.0, 40.0, 60.0], np.float32),
        ]
        self.grad_values0 = [
            np.array([1.0, 2.0, 3.0], np.float32),
            np.array([2.0, 4.0, 6.0], np.float32),
        ]
        self.grad_values1 = [
            np.array([0.0, 0.0, 7.0], np.float32),
            np.array([9.0, 9.0, 6.0], np.float32),
        ]

        dim = self._embedding_info.dim
        self.embedding_table = (
            np.random.rand(4 * dim).reshape((4, dim)).astype(np.float32)
        )
        self.embedding_grads0 = tf.IndexedSlices(
            values=np.random.rand(3 * dim)
            .reshape((3, dim))
            .astype(np.float32),
            indices=(3, 1, 3),
        )
        self.embedding_grads1 = tf.IndexedSlices(
            values=np.random.rand(3 * dim)
            .reshape((3, dim))
            .astype(np.float32),
            indices=(2, 2, 3),
        )
        push_model_req = elasticdl_pb2.Model()
        push_model_req.version = self._parameters.version
        for name, value in zip(self.var_names, self.var_values):
            emplace_tensor_pb_from_ndarray(
                push_model_req.param, value, name=name
            )
        push_model_req.embedding_table_info.append(self._embedding_info)
        self._stub.push_model(push_model_req)

        for name, var in zip(self.var_names, self.var_values):
            self._parameters.non_embedding_params[name] = tf.Variable(var)

        self._parameters.embedding_params[self._embedding_info.name].set(
            range(len(self.embedding_table)), self.embedding_table
        )
예제 #8
0
 def report_evaluation_metrics(self, model_outputs, labels):
     """
     report evaluation metrics to ps, return (accepted, model_version)
     from rpc call.
     """
     req = elasticdl_pb2.ReportEvaluationMetricsRequest()
     for name, output in model_outputs.items():
         output = np.concatenate(output)
         emplace_tensor_pb_from_ndarray(req.model_outputs,
                                        output,
                                        name=name)
     labels = np.concatenate(labels)
     tensor = Tensor(values=labels)
     serialize_tensor(tensor, req.labels)
     req.model_version = self._model_version if self._use_multi_ps else -1
     res = self._stub.ReportEvaluationMetrics(req)
     return res.accepted, res.model_version
예제 #9
0
    def to_model_pb(self):
        """ Convert all parameters including embedding and non-embedding
        parameters to `elasticdl_pb2.Model` which can be serialized.
        """
        model_pb = elasticdl_pb2.Model()
        model_pb.version = self.version
        for name, var in self.non_embedding_params.items():
            emplace_tensor_pb_from_ndarray(
                model_pb.param, var.numpy(), name=name
            )

        for name, embedding_table in self.embedding_params.items():
            embedding_table_tensor = embedding_table.to_tensor()
            tensor_pb = model_pb.param.add()
            serialize_tensor(embedding_table_tensor, tensor_pb)

            embedding_info = embedding_table.to_embedding_table_info_pb()
            model_pb.embedding_table_info.append(embedding_info)

        return model_pb
예제 #10
0
    def test_push_gradient_async_update(self):
        self.create_default_server_and_stub()
        self.push_gradient_test_setup()

        # Test applying gradients to embedding and non-embedding parameters
        req = elasticdl_pb2.PushGradientRequest()
        for g, name in zip(self.grad_values0, self.var_names):
            emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)
        emplace_tensor_pb_from_ndarray(
            req.gradients,
            values=self.embedding_grads0.values,
            indices=self.embedding_grads0.indices,
            name=self._embedding_info.name,
        )
        res = self._stub.push_gradient(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.model_version, 1)
        expected_values = [
            v - self._lr * g
            for v, g in zip(self.var_values, self.grad_values0)
        ]
        for name, expected_value in zip(self.var_names, expected_values):
            self.assertTrue(
                np.allclose(
                    expected_value,
                    self._parameters.non_embedding_params[name].numpy(),
                ))

        expected_embed_table = np.copy(self.embedding_table)
        for gv, gi in zip(self.embedding_grads0.values,
                          self.embedding_grads0.indices):
            expected_embed_table[gi] -= self._lr * gv

        actual_embed_table = self._parameters.get_embedding_param(
            self._embedding_info.name, range(len(expected_embed_table)))
        self.assertTrue(np.allclose(expected_embed_table, actual_embed_table))

        # Test applying gradients with same name
        for name, var in zip(self.var_names, self.var_values):
            self._parameters.non_embedding_params[name] = tf.Variable(var)
        req = elasticdl_pb2.PushGradientRequest()
        for g in self.grad_values1:
            emplace_tensor_pb_from_ndarray(req.gradients,
                                           g,
                                           name=self.var_names[0])
        res = self._stub.push_gradient(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.model_version, 2)
        expected_values = [
            self.var_values[0] - self._lr * self.grad_values1[0] -
            self._lr * self.grad_values1[1],
            self.var_values[1],
        ]
        for expected_value, name in zip(expected_values, self.var_names):
            self.assertTrue(
                np.allclose(
                    expected_value,
                    self._parameters.non_embedding_params[name].numpy(),
                ))
예제 #11
0
    def pull_variable(self, request, _):
        """
        Response with all non-embedding parameters if initialized.
        """
        res = elasticdl_pb2.PullVariableResponse()
        if not self._parameters.init_status:
            res.model_init_status = False
            return res

        # Only sync-SGD needs lock
        # TODO: use a read-write lock to support multiple concurrent reads
        if not self._use_async:
            self._lock.acquire()
        res.model.version = self._parameters.version
        for name, var in self._parameters.non_embedding_params.items():
            emplace_tensor_pb_from_ndarray(res.model.param,
                                           var.numpy(),
                                           name=name)
        if not self._use_async:
            self._lock.release()
        res.model_init_status = True
        return res
예제 #12
0
    def testReportGradient(self):
        def makeGrad():
            """ Make a ReportGradientRequest compatible with model"""
            req = elasticdl_pb2.ReportGradientRequest()
            emplace_tensor_pb_from_ndarray(req.gradient,
                                           np.array([0.1], np.float32),
                                           name="x")
            emplace_tensor_pb_from_ndarray(req.gradient,
                                           np.array([0.03, 0.06], np.float32),
                                           name="y")
            req.model_version = 1
            return req

        master = MasterServicer(
            3,
            3,
            tf.optimizers.SGD(0.1),
            None,
            init_var=[],
            checkpoint_filename_for_init="",
            checkpoint_service=CheckpointService("", 0, 0, False),
            evaluation_service=None,
        )
        master._version = 1
        master.set_model_var("x", np.array([2.0], dtype=np.float32))
        master.set_model_var("y", np.array([12.0, 13.0], dtype=np.float32))

        # Report a future version, should raise exception
        req = makeGrad()
        req.model_version = 2
        self.assertRaises(ValueError, master.ReportGradient, req, None)

        # Report an old version, should not be accepted
        req = makeGrad()
        req.model_version = 0
        res = master.ReportGradient(req, None)
        self.assertFalse(res.accepted)
        self.assertEqual(1, res.model_version)

        # Report a unknown gradient, should raise.
        req = makeGrad()
        emplace_tensor_pb_from_ndarray(req.gradient,
                                       np.array([0.1], np.float32),
                                       name="z")
        self.assertRaises(ValueError, master.ReportGradient, req, None)

        # Report an incompatible gradient, should raise.
        req = makeGrad()
        emplace_tensor_pb_from_ndarray(req.gradient,
                                       np.array([0.1], np.float32),
                                       name="y")
        self.assertRaises(ValueError, master.ReportGradient, req, None)

        # Report a current version, should be accepted.
        req = makeGrad()
        res = master.ReportGradient(req, None)
        self.assertTrue(res.accepted)
        self.assertEqual(1, res.model_version)

        # Report a current version with part of gradients, should be accepted.
        req = makeGrad()
        req.gradient.pop()
        res = master.ReportGradient(req, None)
        self.assertTrue(res.accepted)
        self.assertEqual(1, res.model_version)
        # Gradient should be accumulated.
        np.testing.assert_array_equal(np.array([0.2], dtype=np.float32),
                                      master._gradient_sum["x"])
        np.testing.assert_array_equal(np.array([0.03, 0.06], dtype=np.float32),
                                      master._gradient_sum["y"])
        self.assertEqual(2, master._grad_n)

        # Report a current version, should be accepted, and a new version
        # created
        req = makeGrad()
        res = master.ReportGradient(req, None)
        self.assertTrue(res.accepted)
        self.assertEqual(2, res.model_version)
        self.assertFalse(master._gradient_sum)
        self.assertEqual(0, master._grad_n)
        np.testing.assert_array_equal(
            # [2] - 0.1 * [0.1]
            np.array([1.99], dtype=np.float32),
            master._model["x"].numpy(),
        )
        np.testing.assert_array_equal(
            # [12, 13] - 0.1 * [0.02, 0.04]
            np.array([11.998, 12.996], dtype=np.float32),
            master._model["y"].numpy(),
        )
예제 #13
0
    def report_gradient_to_ps(self, grads):
        self._timing.start_record_time("report_gradient")
        reqs = [
            elasticdl_pb2.PushGradientRequest() for i in range(self._ps_num)
        ]
        ps_grads = {}
        non_embed_vars_n = len(self._non_embed_vars)
        for g, v in zip(grads[:non_embed_vars_n],
                        self._non_embed_vars.values()):
            ps_id = self._var_to_ps[v.name]
            if ps_id not in ps_grads:
                ps_grads[ps_id] = [(g, v.name)]
            else:
                ps_grads[ps_id].append((g, v.name))

        for ps_id in ps_grads:
            req = reqs[ps_id]
            for g, name in ps_grads[ps_id]:
                emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)

        edl_embedding_name_values = self._collect_edl_embedding_name_values()

        if edl_embedding_name_values:
            edl_embedding_grads = grads[non_embed_vars_n:]
            bet_number = 0
            for name, embedding_and_ids in edl_embedding_name_values:
                bet_number += len(embedding_and_ids)
            if len(edl_embedding_grads) != bet_number:
                raise ValueError(
                    "elasticdl.layers.embedding related gradient number %d "
                    "does not match the number of its output tensor %d." %
                    (len(edl_embedding_grads), bet_number))

            grad_accum_iter = 0
            for name, embedding_and_ids in edl_embedding_name_values:
                g_values = None
                g_indices = None
                for _, ids in embedding_and_ids:
                    grad = edl_embedding_grads[grad_accum_iter]
                    grad_accum_iter += 1
                    # ElasticDL embedding layer with Sparse Gradients
                    if isinstance(grad, tf.IndexedSlices):
                        grad = grad.values
                    if g_values is not None:
                        g_values = tf.concat([g_values, grad], axis=0)
                        g_indices = tf.concat([g_indices, ids], axis=0)
                    else:
                        g_values = grad
                        g_indices = ids

                # Sum up the values of the duplicated indices in the
                # gradients. It can reduce the gradient payload of the
                # dense embedding.
                g_values, g_indices = deduplicate_indexed_slices(
                    values=g_values, indices=g_indices)

                results = scatter_embedding_vector(g_values.numpy(),
                                                   g_indices.numpy(),
                                                   self._ps_num)

                for ps_id in results:
                    req = reqs[ps_id]
                    gv, gi = results[ps_id]
                    emplace_tensor_pb_from_ndarray(req.gradients,
                                                   values=gv,
                                                   indices=gi,
                                                   name=name)

        report_futures = []
        for ps_id in range(self._ps_num):
            req = reqs[ps_id]
            req.model_version = self._model_versions_from_ps[ps_id]
            report_future = self._ps_stubs[ps_id].push_gradient.future(req)
            report_futures.append(report_future)

        accepted = False
        max_version = -1
        for report_future in report_futures:
            res = report_future.result()
            if res.accepted:
                accepted = True
            if res.model_version > max_version:
                max_version = res.model_version
        self._timing.end_record_time("report_gradient")
        return accepted, max_version
예제 #14
0
    def test_push_model(self):
        opt_func_name = "ftrl_optimizer"
        opt = load_module(_module_file).__dict__[opt_func_name]()
        opt_config = opt.get_config()
        slot_names = ["accumulator", "linear"]
        slot_init_value = {
            "accumulator": opt_config["initial_accumulator_value"],
            "linear": 0.0,
        }

        self.create_default_server_and_stub(optimizer=opt_func_name)
        param0 = {
            "v0": np.random.rand(3, 2).astype(np.float32),
            "v1": np.random.rand(10, 32).astype(np.float32),
        }
        param1 = {
            "v0": np.ones([3, 2], dtype=np.float32),
            "v1": np.ones([10, 32], dtype=np.float32),
        }

        models = [param0, param1]

        for idx, model in enumerate(models):
            req = elasticdl_pb2.Model()
            req.version = idx + 1
            for name in model:
                emplace_tensor_pb_from_ndarray(req.param,
                                               model[name],
                                               name=name)
            req.embedding_table_info.append(self._embedding_info)
            res = self._stub.push_model(req)
            self.assertEqual(res, empty_pb2.Empty())
            # self._parameters is initialized with the first push_model call
            # and the second push_model has no effect
            self.assertEqual(self._parameters.version, 1)
            for name in param0:
                self.assertTrue(
                    np.allclose(
                        param0[name],
                        self._parameters.non_embedding_params[name].numpy(),
                    ))
            self.assertEqual(
                self._embedding_info.name,
                self._parameters.embedding_params[
                    self._embedding_info.name].name,
            )
            self.assertEqual(
                self._embedding_info.dim,
                self._parameters.embedding_params[
                    self._embedding_info.name].dim,
            )
            self.assertEqual(
                tf.keras.initializers.get(
                    self._embedding_info.initializer).__class__,
                self._parameters.embedding_params[
                    self._embedding_info.name].initializer.__class__,
            )

            for slot_name in slot_names:
                name = get_slot_table_name(self._embedding_info.name,
                                           slot_name)
                table = self._parameters.embedding_params[name]
                self.assertTrue(name, table.name)
                self.assertTrue(self._embedding_info.dim, table.dim)
                embedding = table.get([2])
                self.assertTrue(
                    (embedding - slot_init_value[slot_name] < 0.0001).all())
예제 #15
0
    def test_push_gradient_sync_update(self):
        self.create_server_and_stub(grads_to_wait=2,
                                    lr_staleness_modulation=False,
                                    use_async=False)
        self.push_gradient_test_setup()

        req = elasticdl_pb2.PushGradientRequest()
        req.model_version = 0
        for g, name in zip(self.grad_values0, self.var_names):
            emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)
        emplace_tensor_pb_from_ndarray(
            req.gradients,
            values=self.embedding_grads0.values,
            indices=self.embedding_grads0.indices,
            name=self._embedding_info.name,
        )
        res = self._stub.push_gradient(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.model_version, 0)

        req = elasticdl_pb2.PushGradientRequest()
        req.model_version = 0
        for g, name in zip(self.grad_values1, self.var_names):
            emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)
        emplace_tensor_pb_from_ndarray(
            req.gradients,
            values=self.embedding_grads1.values,
            indices=self.embedding_grads1.indices,
            name=self._embedding_info.name,
        )
        res = self._stub.push_gradient(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.model_version, 1)

        req = elasticdl_pb2.PushGradientRequest()
        req.model_version = 0
        for g, name in zip(self.grad_values1, self.var_names):
            emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)
        res = self._stub.push_gradient(req)
        self.assertEqual(res.accepted, False)
        self.assertEqual(res.model_version, 1)

        expected_values = [
            self.var_values[0] - self._lr *
            (self.grad_values0[0] + self.grad_values1[0]) / 2,
            self.var_values[1] - self._lr *
            (self.grad_values0[1] + self.grad_values1[1]) / 2,
        ]
        for expected_value, name in zip(expected_values, self.var_names):
            self.assertTrue(
                np.allclose(
                    expected_value,
                    self._parameters.non_embedding_params[name].numpy(),
                ))

        expected_embed_table = np.copy(self.embedding_table)
        for gv, gi in zip(self.embedding_grads0.values,
                          self.embedding_grads0.indices):
            expected_embed_table[gi] -= self._lr * gv
        for gv, gi in zip(self.embedding_grads1.values,
                          self.embedding_grads1.indices):
            expected_embed_table[gi] -= self._lr * gv

        actual_embed_table = self._parameters.get_embedding_param(
            self._embedding_info.name, range(len(expected_embed_table)))
        self.assertTrue(np.allclose(expected_embed_table, actual_embed_table))
예제 #16
0
 def _get_model_no_lock(self):
     pb_model = elasticdl_pb2.Model()
     pb_model.version = self._version
     for k, v in self._model.items():
         emplace_tensor_pb_from_ndarray(pb_model.param, v.numpy(), name=k)
     return pb_model
예제 #17
0
    def report_gradient_to_ps(self, grads):
        reqs = [
            elasticdl_pb2.PushGradientRequest()
            for i in range(len(self._ps_stubs))
        ]
        ps_grads = {}
        non_embed_vars_n = len(self._non_embed_vars)
        for g, v in zip(grads[:non_embed_vars_n],
                        self._non_embed_vars.values()):
            ps_id = self._var_to_ps[v.name]
            if ps_id not in ps_grads:
                ps_grads[ps_id] = [(g, v.name)]
            else:
                ps_grads[ps_id].append((g, v.name))

        for ps_id in ps_grads:
            req = reqs[ps_id]
            for g, name in ps_grads[ps_id]:
                emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)

        if self._embedding_layers:
            edl_embedding_grads = grads[non_embed_vars_n:]
            bet_number = 0
            for layer in self._embedding_layers:
                bet_number += len(layer.embedding_and_ids)
            if len(edl_embedding_grads) != bet_number:
                raise ValueError(
                    "elasticdl.layers.embedding related gradient number %d "
                    "does not match the number of its output tensor %d." %
                    (len(edl_embedding_grads), bet_number))

            grad_accum_iter = 0
            for layer in self._embedding_layers:
                g_values = None
                g_indices = None
                for _, ids in layer.embedding_and_ids:
                    grad = edl_embedding_grads[grad_accum_iter]
                    grad_accum_iter += 1
                    # ElasticDL embedding layer with Sparse Gradients
                    if isinstance(grad, tf.IndexedSlices):
                        grad = grad.values
                    if g_values is not None:
                        g_values = tf.concat([g_values, grad], axis=0)
                        g_indices = tf.concat([g_indices, ids], axis=0)
                    else:
                        g_values = grad
                        g_indices = ids

                results = scatter_embedding_vector(g_values.numpy(),
                                                   g_indices.numpy(),
                                                   len(self._ps_stubs))

                for ps_id in results:
                    req = reqs[ps_id]
                    gv, gi = results[ps_id]
                    emplace_tensor_pb_from_ndarray(req.gradients,
                                                   values=gv,
                                                   indices=gi,
                                                   name=layer.name)

        report_futures = []
        for ps_id in range(len(self._ps_stubs)):
            req = reqs[ps_id]
            req.model_version = self._model_version
            report_future = self._ps_stubs[ps_id].push_gradient.future(req)
            report_futures.append(report_future)

        for report_future in report_futures:
            res = report_future.result()
        # TODO: choose the last response temporarily
        return res.accepted, res.model_version