Ejemplo n.º 1
0
    def init_from_model_pb(self, model_pb):
        """Initializes `Parameters` with model protocol buffer.

        The `Parameters` accepts model pb and initialize only when it is
        not initialized. Otherwise, it ignores the model pb.

        Args:
            model_pb: The model protocol buffer used for initialization.

        Returns:
            A bool indicates whether `Parameters` accepts this model pb or not.
        """
        if not self.initialized:
            infos = model_pb.embedding_table_infos
            self.init_embedding_params(infos)
            for name, pb in model_pb.dense_parameters.items():
                # Please note that `tf.Variable` will do something with magic.
                # If you pass a name "somename" to a `tf.Variable`, the final
                # variable name will be "somename:0". So the `tf.Variable.name`
                # is meaningless, we must avoid use it in PS side.
                arr = pb_to_ndarray(pb)
                var = tf.Variable(initial_value=arr, trainable=True)
                self.non_embedding_params[name] = var

            for name, pb in model_pb.embedding_tables.items():
                s = pb_to_indexed_slices(pb)
                self.embedding_params[name].set(s.indices, s.values)
            self.version = max(0, model_pb.version)
            self.initialized = True
            return True
        return False
Ejemplo n.º 2
0
def _get_params_shard_from_pb(model_pb, shard_index, shard_num):
    """Get parameters including variables values and embedding table
    from a model protobuf.
    Args:
        model_pb: A Model protobuf instance.
        shard_index: Model shard index.
        shard_num: The total number of model shards.
    Return:
        non_embedding_vars: A Python dict in which the key is a variable
            name and the value is a `tf.Variable` object.
        embedding_table_values: A Python dict in which the key is an embedding
            table name and the value is a tuple with 2 elements. The value[0]
            is indices and value[1] is the corresponding embedding vector.
    """
    non_embedding_vars = {}
    embedding_table_values = {}

    for name, pb in model_pb.dense_parameters.items():
        if string_to_id(name, shard_num) == shard_index:
            non_embedding_vars[name] = tf.Variable(
                initial_value=pb_to_ndarray(pb), trainable=True)
    for name, pb in model_pb.embedding_tables.items():
        embedding_table_values.setdefault(name, ([], []))
        t = pb_to_indexed_slices(pb)
        for embedding_id, vector in zip(t.indices, t.values):
            if int_to_id(embedding_id, shard_num) == shard_index:
                embedding_table_values[name][0].append(embedding_id)
                embedding_table_values[name][1].append(vector)
    return non_embedding_vars, embedding_table_values
Ejemplo n.º 3
0
 def verify(slices):
     pb = indexed_slices_to_pb(slices)
     new_slices = pb_to_indexed_slices(pb)
     np.testing.assert_array_equal(slices.values, new_slices.values)
     np.testing.assert_array_equal(slices.indices, new_slices.indices)
Ejemplo n.º 4
0
    def push_gradients(self, request, _):
        res = elasticdl_pb2.PushGradientsResponse()
        if self._use_async:
            grad_vars = []

            for name, pb in request.gradients.dense_parameters.items():
                grad = pb_to_ndarray(pb)
                self._parameters.check_grad(Tensor(name, grad, None))
                grad = tf.constant(grad)
                var = self._parameters.get_non_embedding_param(name)
                grad_vars.append((grad, var))

            for name, pb in request.gradients.embedding_tables.items():
                grad = pb_to_indexed_slices(pb)
                self._parameters.check_grad(
                    Tensor(name, grad.values, grad.indices))
                if name in self._parameters.non_embedding_params:
                    var = self._parameters.get_non_embedding_param(name)
                    grad_vars.append((grad, var))
                else:
                    grad_vars.append((grad, name))

            learning_rate = request.learning_rate
            # TODO: if request.learning_rate == 0.0, modulate learning_rate
            #       in self._optimizer with staleness
            if self._lr_staleness_modulation and learning_rate > 0.0:
                staleness = max(
                    1, self._parameters.version - request.gradients.version)
                # Modulate learning rate by staleness
                learning_rate /= staleness

            self._set_optimizer_learning_rate(learning_rate)
            self._optimizer.apply_gradients(grad_vars)
            with self._version_lock:
                self._parameters.version += 1
                self._save_params_to_checkpoint_if_needed()
                version = self._parameters.version
            self._report_version_if_needed(version)

            res.accepted = True
            res.version = self._parameters.version
            return res
        else:
            if (request.gradients.version <
                    self._parameters.version - self._sync_version_tolerance):
                res.accepted = False
                res.version = self._parameters.version
                return res

            with self._lock:
                for name, pb in request.gradients.dense_parameters.items():
                    grad = pb_to_ndarray(pb)
                    self._parameters.check_grad(Tensor(name, grad, None))
                    if name in self._grads_buffer:
                        self._grads_buffer[name] = (self._grads_buffer[name] +
                                                    grad)
                    else:
                        self._grads_buffer[name] = grad

                for name, pb in request.gradients.embedding_tables.items():
                    grad = pb_to_indexed_slices(pb)
                    self._parameters.check_grad(
                        Tensor(name, grad.values, grad.indices))
                    if name in self._grads_buffer:
                        self._grads_buffer[name] = merge_indexed_slices(
                            self._grads_buffer[name], grad)
                    else:
                        self._grads_buffer[name] = grad

                self._grads_n += 1
                res.accepted = True

                updated_version = False
                version = self._parameters.version
                if self._grads_n == self._grads_to_wait:
                    grad_vars = []
                    for name, grad in self._grads_buffer.items():
                        # Dense gradients are averaged,
                        # while sparse gradients are summed
                        if not isinstance(grad, tf.IndexedSlices):
                            grad = grad / self._grads_to_wait
                            grad = tf.constant(grad)
                        var = self._parameters.get_non_embedding_param(name)
                        if var is None:
                            grad_vars.append((grad, name))
                        else:
                            grad_vars.append((grad, var))

                    self._set_optimizer_learning_rate(request.learning_rate)
                    self._optimizer.apply_gradients(grad_vars)
                    self._grads_n = 0
                    self._grads_buffer.clear()
                    self._parameters.version += 1
                    self._save_params_to_checkpoint_if_needed()
                    version = self._parameters.version
                    updated_version = True

            if updated_version:
                self._report_version_if_needed(version)
            res.version = version
            return res