Esempio n. 1
0
    def test_push_gradient_async_update(self):
        self.create_default_server_and_stub()
        self.push_gradient_test_setup()

        # Test applying gradients to embedding and non-embedding parameters
        req = elasticdl_pb2.PushGradientRequest()
        for g, name in zip(self.grad_values0, self.var_names):
            emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)
        emplace_tensor_pb_from_ndarray(
            req.gradients,
            values=self.embedding_grads0.values,
            indices=self.embedding_grads0.indices,
            name=self._embedding_info.name,
        )
        res = self._stub.push_gradient(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.model_version, 1)
        expected_values = [
            v - self._lr * g
            for v, g in zip(self.var_values, self.grad_values0)
        ]
        for name, expected_value in zip(self.var_names, expected_values):
            self.assertTrue(
                np.allclose(
                    expected_value,
                    self._parameters.non_embedding_params[name].numpy(),
                ))

        expected_embed_table = np.copy(self.embedding_table)
        for gv, gi in zip(self.embedding_grads0.values,
                          self.embedding_grads0.indices):
            expected_embed_table[gi] -= self._lr * gv

        actual_embed_table = self._parameters.get_embedding_param(
            self._embedding_info.name, range(len(expected_embed_table)))
        self.assertTrue(np.allclose(expected_embed_table, actual_embed_table))

        # Test applying gradients with same name
        for name, var in zip(self.var_names, self.var_values):
            self._parameters.non_embedding_params[name] = tf.Variable(var)
        req = elasticdl_pb2.PushGradientRequest()
        for g in self.grad_values1:
            emplace_tensor_pb_from_ndarray(req.gradients,
                                           g,
                                           name=self.var_names[0])
        res = self._stub.push_gradient(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.model_version, 2)
        expected_values = [
            self.var_values[0] - self._lr * self.grad_values1[0] -
            self._lr * self.grad_values1[1],
            self.var_values[1],
        ]
        for expected_value, name in zip(expected_values, self.var_names):
            self.assertTrue(
                np.allclose(
                    expected_value,
                    self._parameters.non_embedding_params[name].numpy(),
                ))
Esempio n. 2
0
    def report_gradient_to_ps(self, grads):
        self._timing.start_record_time("report_gradient")
        reqs = [
            elasticdl_pb2.PushGradientRequest() for i in range(self._ps_num)
        ]
        ps_grads = {}
        non_embed_vars_n = len(self._non_embed_vars)
        for g, v in zip(grads[:non_embed_vars_n],
                        self._non_embed_vars.values()):
            ps_id = self._var_to_ps[v.name]
            if ps_id not in ps_grads:
                ps_grads[ps_id] = [(g, v.name)]
            else:
                ps_grads[ps_id].append((g, v.name))

        for ps_id in ps_grads:
            req = reqs[ps_id]
            for g, name in ps_grads[ps_id]:
                emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)

        edl_embedding_name_values = self._collect_edl_embedding_name_values()

        if edl_embedding_name_values:
            edl_embedding_grads = grads[non_embed_vars_n:]
            bet_number = 0
            for name, embedding_and_ids in edl_embedding_name_values:
                bet_number += len(embedding_and_ids)
            if len(edl_embedding_grads) != bet_number:
                raise ValueError(
                    "elasticdl.layers.embedding related gradient number %d "
                    "does not match the number of its output tensor %d." %
                    (len(edl_embedding_grads), bet_number))

            grad_accum_iter = 0
            for name, embedding_and_ids in edl_embedding_name_values:
                g_values = None
                g_indices = None
                for _, ids in embedding_and_ids:
                    grad = edl_embedding_grads[grad_accum_iter]
                    grad_accum_iter += 1
                    # ElasticDL embedding layer with Sparse Gradients
                    if isinstance(grad, tf.IndexedSlices):
                        grad = grad.values
                    if g_values is not None:
                        g_values = tf.concat([g_values, grad], axis=0)
                        g_indices = tf.concat([g_indices, ids], axis=0)
                    else:
                        g_values = grad
                        g_indices = ids

                # Sum up the values of the duplicated indices in the
                # gradients. It can reduce the gradient payload of the
                # dense embedding.
                g_values, g_indices = deduplicate_indexed_slices(
                    values=g_values, indices=g_indices)

                results = scatter_embedding_vector(g_values.numpy(),
                                                   g_indices.numpy(),
                                                   self._ps_num)

                for ps_id in results:
                    req = reqs[ps_id]
                    gv, gi = results[ps_id]
                    emplace_tensor_pb_from_ndarray(req.gradients,
                                                   values=gv,
                                                   indices=gi,
                                                   name=name)

        report_futures = []
        for ps_id in range(self._ps_num):
            req = reqs[ps_id]
            req.model_version = self._model_versions_from_ps[ps_id]
            report_future = self._ps_stubs[ps_id].push_gradient.future(req)
            report_futures.append(report_future)

        accepted = False
        max_version = -1
        for report_future in report_futures:
            res = report_future.result()
            if res.accepted:
                accepted = True
            if res.model_version > max_version:
                max_version = res.model_version
        self._timing.end_record_time("report_gradient")
        return accepted, max_version
Esempio n. 3
0
    def test_push_gradient_sync_update(self):
        self.create_server_and_stub(grads_to_wait=2,
                                    lr_staleness_modulation=False,
                                    use_async=False)
        self.push_gradient_test_setup()

        req = elasticdl_pb2.PushGradientRequest()
        req.model_version = 0
        for g, name in zip(self.grad_values0, self.var_names):
            emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)
        emplace_tensor_pb_from_ndarray(
            req.gradients,
            values=self.embedding_grads0.values,
            indices=self.embedding_grads0.indices,
            name=self._embedding_info.name,
        )
        res = self._stub.push_gradient(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.model_version, 0)

        req = elasticdl_pb2.PushGradientRequest()
        req.model_version = 0
        for g, name in zip(self.grad_values1, self.var_names):
            emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)
        emplace_tensor_pb_from_ndarray(
            req.gradients,
            values=self.embedding_grads1.values,
            indices=self.embedding_grads1.indices,
            name=self._embedding_info.name,
        )
        res = self._stub.push_gradient(req)
        self.assertEqual(res.accepted, True)
        self.assertEqual(res.model_version, 1)

        req = elasticdl_pb2.PushGradientRequest()
        req.model_version = 0
        for g, name in zip(self.grad_values1, self.var_names):
            emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)
        res = self._stub.push_gradient(req)
        self.assertEqual(res.accepted, False)
        self.assertEqual(res.model_version, 1)

        expected_values = [
            self.var_values[0] - self._lr *
            (self.grad_values0[0] + self.grad_values1[0]) / 2,
            self.var_values[1] - self._lr *
            (self.grad_values0[1] + self.grad_values1[1]) / 2,
        ]
        for expected_value, name in zip(expected_values, self.var_names):
            self.assertTrue(
                np.allclose(
                    expected_value,
                    self._parameters.non_embedding_params[name].numpy(),
                ))

        expected_embed_table = np.copy(self.embedding_table)
        for gv, gi in zip(self.embedding_grads0.values,
                          self.embedding_grads0.indices):
            expected_embed_table[gi] -= self._lr * gv
        for gv, gi in zip(self.embedding_grads1.values,
                          self.embedding_grads1.indices):
            expected_embed_table[gi] -= self._lr * gv

        actual_embed_table = self._parameters.get_embedding_param(
            self._embedding_info.name, range(len(expected_embed_table)))
        self.assertTrue(np.allclose(expected_embed_table, actual_embed_table))
Esempio n. 4
0
    def report_gradient_to_ps(self, grads):
        reqs = [
            elasticdl_pb2.PushGradientRequest()
            for i in range(len(self._ps_stubs))
        ]
        ps_grads = {}
        non_embed_vars_n = len(self._non_embed_vars)
        for g, v in zip(grads[:non_embed_vars_n],
                        self._non_embed_vars.values()):
            ps_id = self._var_to_ps[v.name]
            if ps_id not in ps_grads:
                ps_grads[ps_id] = [(g, v.name)]
            else:
                ps_grads[ps_id].append((g, v.name))

        for ps_id in ps_grads:
            req = reqs[ps_id]
            for g, name in ps_grads[ps_id]:
                emplace_tensor_pb_from_ndarray(req.gradients, g, name=name)

        if self._embedding_layers:
            edl_embedding_grads = grads[non_embed_vars_n:]
            bet_number = 0
            for layer in self._embedding_layers:
                bet_number += len(layer.embedding_and_ids)
            if len(edl_embedding_grads) != bet_number:
                raise ValueError(
                    "elasticdl.layers.embedding related gradient number %d "
                    "does not match the number of its output tensor %d." %
                    (len(edl_embedding_grads), bet_number))

            grad_accum_iter = 0
            for layer in self._embedding_layers:
                g_values = None
                g_indices = None
                for _, ids in layer.embedding_and_ids:
                    grad = edl_embedding_grads[grad_accum_iter]
                    grad_accum_iter += 1
                    # ElasticDL embedding layer with Sparse Gradients
                    if isinstance(grad, tf.IndexedSlices):
                        grad = grad.values
                    if g_values is not None:
                        g_values = tf.concat([g_values, grad], axis=0)
                        g_indices = tf.concat([g_indices, ids], axis=0)
                    else:
                        g_values = grad
                        g_indices = ids

                results = scatter_embedding_vector(g_values.numpy(),
                                                   g_indices.numpy(),
                                                   len(self._ps_stubs))

                for ps_id in results:
                    req = reqs[ps_id]
                    gv, gi = results[ps_id]
                    emplace_tensor_pb_from_ndarray(req.gradients,
                                                   values=gv,
                                                   indices=gi,
                                                   name=layer.name)

        report_futures = []
        for ps_id in range(len(self._ps_stubs)):
            req = reqs[ps_id]
            req.model_version = self._model_version
            report_future = self._ps_stubs[ps_id].push_gradient.future(req)
            report_futures.append(report_future)

        for report_future in report_futures:
            res = report_future.result()
        # TODO: choose the last response temporarily
        return res.accepted, res.model_version