Example #1
0
 def report_evaluation_metrics(self, model_outputs_pb, labels):
     labels = pb_to_ndarray(labels)
     model_outputs = {}
     for name, tensor_pb in model_outputs_pb.items():
         model_outputs[name] = pb_to_ndarray(tensor_pb)
     self.evaluation_metrics.update_evaluation_metrics(
         model_outputs, labels)
Example #2
0
    def pull_embedding_vectors(self, layer_name, embedding_ids):
        """Pulls and returns embedding vectors ordered by the embedding ids."""
        ps_ids = {}
        ps_ids_index = {}
        for idx, embedding_id in enumerate(embedding_ids):
            ps_id = int_to_id(embedding_id, self._ps_num)
            ps_ids.setdefault(ps_id, []).append(embedding_id)
            ps_ids_index.setdefault(ps_id, []).append(idx)

        embeddings = []
        index = []
        pb_future_and_id_pairs = []
        for ps_id, embedding_ids in ps_ids.items():
            req = elasticdl_pb2.PullEmbeddingVectorRequest()
            req.name = layer_name
            req.ids.extend(embedding_ids)
            pb_future = self._ps_stubs[ps_id].pull_embedding_vectors.future(
                req
            )
            pb_future_and_id_pairs.append((pb_future, ps_id))
        for pb_future, ps_id in pb_future_and_id_pairs:
            pb = pb_future.result()
            embeddings.append(pb_to_ndarray(pb))
            index.extend(ps_ids_index[ps_id])
        embeddings = np.concatenate(embeddings)

        # adjust the order of embedding vectors
        new_embeddings = np.empty_like(embeddings)
        new_embeddings[index] = embeddings
        return new_embeddings
Example #3
0
    def get_model(self):
        self._timing.start_record_time("get_model")
        if self._distribution_strategy != DistributionStrategy.ALLREDUCE:
            variable_future_and_id_pairs = []
            if self._use_multi_ps:
                self.init_ps_var_partition()
            for ps_id, stub in enumerate(self._ps_stubs):
                if ps_id not in self._ps_vars:
                    continue
                # async grpc call
                req = elasticdl_pb2.PullDenseParametersRequest()
                req.version = self._model_versions_from_ps[ps_id]
                var_future = stub.pull_dense_parameters.future(req)
                variable_future_and_id_pairs.append((var_future, ps_id))

            for var_future, ps_id in variable_future_and_id_pairs:
                res = var_future.result()
                if not res.initialized:
                    # push variable to ps for initialization
                    self.report_variable_to_ps(ps_id)
                    req = elasticdl_pb2.PullDenseParametersRequest()
                    req.version = self._model_versions_from_ps[ps_id]
                    res = self._ps_stubs[ps_id].pull_dense_parameters(req)
                    if not res.initialized:
                        # TODO: support PS fault-tolerance
                        raise RuntimeError(
                            "PS pod %d cannot be initialized" % ps_id
                        )

                for name, pb in res.dense_parameters.items():
                    self._non_embed_vars[name].assign(pb_to_ndarray(pb))
                self._model_versions_from_ps[ps_id] = res.version

            self._model_version = max(self._model_versions_from_ps)
        self._timing.end_record_time("get_model")
Example #4
0
    def test_pull_dense_parameters(self):
        self.create_default_server_and_stub()
        param0 = {
            "v0": np.random.rand(3, 2).astype(np.float32),
            "v1": np.random.rand(10, 32).astype(np.float32),
        }
        pull_req = elasticdl_pb2.PullDenseParametersRequest()
        pull_req.version = -1
        # try to pull variable
        res = self._stub.pull_dense_parameters(pull_req)
        # not initialized
        self.assertFalse(res.initialized)

        # init variable
        req = elasticdl_pb2.Model()
        req.version = 1
        for name, var in param0.items():
            serialize_ndarray(var, req.dense_parameters[name])
        res = self._stub.push_model(req)
        self.assertEqual(res, empty_pb2.Empty())

        # pull variable back
        res = self._stub.pull_dense_parameters(pull_req)
        self.assertTrue(res.initialized)
        self.assertEqual(res.version, req.version)
        for name, pb in res.dense_parameters.items():
            tensor = pb_to_ndarray(pb)
            self.assertTrue(np.allclose(param0[name], tensor))

        # pull variable again, no param as no updated version
        pull_req.version = res.version
        res = self._stub.pull_dense_parameters(pull_req)
        self.assertTrue(res.initialized)
        self.assertEqual(res.version, pull_req.version)
        self.assertTrue(not res.dense_parameters)
Example #5
0
    def init_from_model_pb(self, model_pb):
        """Initializes `Parameters` with model protocol buffer.

        The `Parameters` accepts model pb and initialize only when it is
        not initialized. Otherwise, it ignores the model pb.

        Args:
            model_pb: The model protocol buffer used for initialization.

        Returns:
            A bool indicates whether `Parameters` accepts this model pb or not.
        """
        if not self.initialized:
            infos = model_pb.embedding_table_infos
            self.init_embedding_params(infos)
            for name, pb in model_pb.dense_parameters.items():
                # Please note that `tf.Variable` will do something with magic.
                # If you pass a name "somename" to a `tf.Variable`, the final
                # variable name will be "somename:0". So the `tf.Variable.name`
                # is meaningless, we must avoid use it in PS side.
                arr = pb_to_ndarray(pb)
                var = tf.Variable(initial_value=arr, trainable=True)
                self.non_embedding_params[name] = var

            for name, pb in model_pb.embedding_tables.items():
                s = pb_to_indexed_slices(pb)
                self.embedding_params[name].set(s.indices, s.values)
            self.version = max(0, model_pb.version)
            self.initialized = True
            return True
        return False
Example #6
0
    def pull_dense_parameters(self, ps_ids, model_versions):
        """
        Pull dense parameters from PS.
        """
        variable_future_and_id_pairs = []
        for ps_id in ps_ids:
            if ps_id not in self.ps_to_parameter:
                continue
            stub = self.ps_stubs[ps_id]
            # async grpc call
            req = elasticdl_pb2.PullDenseParametersRequest()
            req.version = model_versions[ps_id]
            var_future = stub.pull_dense_parameters.future(req)
            variable_future_and_id_pairs.append((var_future, ps_id))

        dense_params = {}
        uninit_ps = []

        for var_future, ps_id in variable_future_and_id_pairs:
            res = var_future.result()
            if not res.initialized:
                uninit_ps.append(ps_id)
            else:
                for name, pb in res.dense_parameters.items():
                    dense_params[name] = pb_to_ndarray(pb)
                model_versions[ps_id] = res.version

        return dense_params, uninit_ps
Example #7
0
def _get_params_shard_from_pb(model_pb, shard_index, shard_num):
    """Get parameters including variables values and embedding table
    from a model protobuf.
    Args:
        model_pb: A Model protobuf instance.
        shard_index: Model shard index.
        shard_num: The total number of model shards.
    Return:
        non_embedding_vars: A Python dict in which the key is a variable
            name and the value is a `tf.Variable` object.
        embedding_table_values: A Python dict in which the key is an embedding
            table name and the value is a tuple with 2 elements. The value[0]
            is indices and value[1] is the corresponding embedding vector.
    """
    non_embedding_vars = {}
    embedding_table_values = {}

    for name, pb in model_pb.dense_parameters.items():
        if string_to_id(name, shard_num) == shard_index:
            non_embedding_vars[name] = tf.Variable(
                initial_value=pb_to_ndarray(pb), trainable=True)
    for name, pb in model_pb.embedding_tables.items():
        embedding_table_values.setdefault(name, ([], []))
        t = pb_to_indexed_slices(pb)
        for embedding_id, vector in zip(t.indices, t.values):
            if int_to_id(embedding_id, shard_num) == shard_index:
                embedding_table_values[name][0].append(embedding_id)
                embedding_table_values[name][1].append(vector)
    return non_embedding_vars, embedding_table_values
Example #8
0
 def get_embedding_vectors(self, name, ids):
     pull_req = elasticdl_pb2.PullEmbeddingVectorRequest()
     pull_req.name = name
     pull_req.ids.extend(ids)
     res = self._stub.pull_embedding_vectors(pull_req)
     if res.tensor_content:
         return pb_to_ndarray(res)
     else:
         return None
Example #9
0
 def verify(array):
     pb = ndarray_to_pb(array)
     new_array = pb_to_ndarray(pb)
     np.testing.assert_array_equal(array, new_array)
Example #10
0
    def push_gradients(self, request, _):
        res = elasticdl_pb2.PushGradientsResponse()
        if self._use_async:
            grad_vars = []

            for name, pb in request.gradients.dense_parameters.items():
                grad = pb_to_ndarray(pb)
                self._parameters.check_grad(Tensor(name, grad, None))
                grad = tf.constant(grad)
                var = self._parameters.get_non_embedding_param(name)
                grad_vars.append((grad, var))

            for name, pb in request.gradients.embedding_tables.items():
                grad = pb_to_indexed_slices(pb)
                self._parameters.check_grad(
                    Tensor(name, grad.values, grad.indices))
                if name in self._parameters.non_embedding_params:
                    var = self._parameters.get_non_embedding_param(name)
                    grad_vars.append((grad, var))
                else:
                    grad_vars.append((grad, name))

            learning_rate = request.learning_rate
            # TODO: if request.learning_rate == 0.0, modulate learning_rate
            #       in self._optimizer with staleness
            if self._lr_staleness_modulation and learning_rate > 0.0:
                staleness = max(
                    1, self._parameters.version - request.gradients.version)
                # Modulate learning rate by staleness
                learning_rate /= staleness

            self._set_optimizer_learning_rate(learning_rate)
            self._optimizer.apply_gradients(grad_vars)
            with self._version_lock:
                self._parameters.version += 1
                self._save_params_to_checkpoint_if_needed()
                version = self._parameters.version
            self._report_version_if_needed(version)

            res.accepted = True
            res.version = self._parameters.version
            return res
        else:
            if (request.gradients.version <
                    self._parameters.version - self._sync_version_tolerance):
                res.accepted = False
                res.version = self._parameters.version
                return res

            with self._lock:
                for name, pb in request.gradients.dense_parameters.items():
                    grad = pb_to_ndarray(pb)
                    self._parameters.check_grad(Tensor(name, grad, None))
                    if name in self._grads_buffer:
                        self._grads_buffer[name] = (self._grads_buffer[name] +
                                                    grad)
                    else:
                        self._grads_buffer[name] = grad

                for name, pb in request.gradients.embedding_tables.items():
                    grad = pb_to_indexed_slices(pb)
                    self._parameters.check_grad(
                        Tensor(name, grad.values, grad.indices))
                    if name in self._grads_buffer:
                        self._grads_buffer[name] = merge_indexed_slices(
                            self._grads_buffer[name], grad)
                    else:
                        self._grads_buffer[name] = grad

                self._grads_n += 1
                res.accepted = True

                updated_version = False
                version = self._parameters.version
                if self._grads_n == self._grads_to_wait:
                    grad_vars = []
                    for name, grad in self._grads_buffer.items():
                        # Dense gradients are averaged,
                        # while sparse gradients are summed
                        if not isinstance(grad, tf.IndexedSlices):
                            grad = grad / self._grads_to_wait
                            grad = tf.constant(grad)
                        var = self._parameters.get_non_embedding_param(name)
                        if var is None:
                            grad_vars.append((grad, name))
                        else:
                            grad_vars.append((grad, var))

                    self._set_optimizer_learning_rate(request.learning_rate)
                    self._optimizer.apply_gradients(grad_vars)
                    self._grads_n = 0
                    self._grads_buffer.clear()
                    self._parameters.version += 1
                    self._save_params_to_checkpoint_if_needed()
                    version = self._parameters.version
                    updated_version = True

            if updated_version:
                self._report_version_if_needed(version)
            res.version = version
            return res