Example #1
0
    def _process_gradients(self, edl_embedding_gradients, indexed_grads, grads,
                           request_version):
        if not self._use_async:
            # grads of ElasticDL Embedding layer
            for k, v in edl_embedding_gradients.items():
                if k in self._edl_embedding_gradients:
                    self._edl_embedding_gradients[k] = merge_indexed_slices(
                        self._edl_embedding_gradients[k], v)
                else:
                    self._edl_embedding_gradients[k] = v

            # grads of Keras Embedding layer
            for k, v in indexed_grads.items():
                if k not in self._gradient_sum_indexed:
                    self._gradient_sum_indexed[k] = v
                else:
                    grads_s = self._gradient_sum_indexed[k]
                    self._gradient_sum_indexed[k] = merge_indexed_slices(
                        grads_s, v)

            # other grads
            for k, v in grads.items():
                if not self._use_async and k in self._gradient_sum:
                    self._gradient_sum[k] = self._gradient_sum[k] + v
                else:
                    self._gradient_sum[k] = v
            self._grad_n += 1

        need_to_update_model = self._use_async
        if not self._use_async and self._grad_n >= self._grad_to_wait:
            need_to_update_model = True
            # get gradient average for sync SGD
            for k in self._gradient_sum:
                self._gradient_sum[k] = (self._gradient_sum[k] /
                                         self._grad_to_wait)
            edl_embedding_gradients = self._edl_embedding_gradients
            indexed_grads = self._gradient_sum_indexed
            grads = self._gradient_sum
        if need_to_update_model:
            self._update_optimizer(request_version)
            self._update_model(grads, indexed_grads, edl_embedding_gradients)
Example #2
0
    def _process_gradients(
        self, edl_embedding_gradients, indexed_grads, grads, request_version
    ):
        if not self._use_async:
            # grads of ElasticDL Embedding layer
            for k, v in edl_embedding_gradients.items():
                if k in self._edl_embedding_gradients:
                    self._edl_embedding_gradients[k] = merge_indexed_slices(
                        self._edl_embedding_gradients[k], v
                    )
                else:
                    self._edl_embedding_gradients[k] = v

            # grads of Keras Embedding layer
            for k, v in indexed_grads.items():
                if k not in self._gradient_sum_indexed:
                    self._gradient_sum_indexed[k] = v
                else:
                    grads_s = self._gradient_sum_indexed[k]
                    self._gradient_sum_indexed[k] = merge_indexed_slices(
                        grads_s, v
                    )

            # other grads
            for k, v in grads.items():
                if not self._use_async and k in self._gradient_sum:
                    self._gradient_sum[k] = self._gradient_sum[k] + v
                else:
                    self._gradient_sum[k] = v
            self._grad_n += 1
        else:
            # TODO: do not accumulate gradients but apply directly.
            pass

        # staleness-aware learning rate modulation
        if self._lr_modulation:
            staleness = max(1, self._version - request_version)
            self._lr_modulation.set_multiplier(1.0 / staleness)
        if self._use_async or self._grad_n >= self._grad_to_wait:
            self._update_model()
Example #3
0
    def ReportGradient(self, request, _):
        model_version_valid = self._validate_model_version(
            request.model_version
        )

        res = elasticdl_pb2.ReportGradientResponse()
        if not model_version_valid:
            logger.warning(
                "Task result for outdated version %d dropped",
                request.model_version,
            )
            res.accepted = False
            res.model_version = self._version
            return res

        # TODO: Update task queue with task_id
        with self._lock:
            tmp = {}
            indexed_grads = {}
            edl_embedding_gradients = {}
            # Do sanity check before accumulating gradients.
            for k, v in request.gradient.items():
                if k not in self._model:
                    if v.indices:
                        # grads of ElasticDL Embedding layer
                        # TODO: check arr.shape[1] = embedding_dim of this
                        # EdlEmbedding layer
                        arr = tensor_to_ndarray(v)
                        edl_embedding_gradients[k] = arr
                        continue
                    else:
                        raise ValueError(
                            "Gradient key: %s is not part of model", k
                        )

                arr = tensor_to_ndarray(v)
                if isinstance(arr, tf.IndexedSlices):
                    if arr.values.shape[1] != self._model[k].numpy().shape[1]:
                        raise ValueError(
                            "Gradient key: %s has incompatible "
                            "indexed slice dimension %d, expected %d"
                            % (
                                k,
                                arr.values.shape[1],
                                self._model[k].numpy().shape[1],
                            )
                        )

                    max_index = tf.math.reduce_max(arr.indices).numpy()
                    if max_index >= self._model[k].numpy().shape[0]:
                        raise ValueError(
                            "Gradient key: %s has wrong indices %d, "
                            "out of range %d"
                            % (
                                k,
                                max_index,
                                self._model[k].numpy().shape[0] - 1,
                            )
                        )
                    indexed_grads[k] = arr
                else:
                    if arr.shape != self._model[k].numpy().shape:
                        raise ValueError(
                            "Gradient key: %s has incompatible dimension", k
                        )
                    tmp[k] = arr

            # grads of ElasticDL Embedding layer
            for k, v in edl_embedding_gradients.items():
                if k in self._edl_embedding_gradients:
                    self._edl_embedding_gradients[k] = merge_indexed_slices(
                        self._edl_embedding_gradients[k], v
                    )
                else:
                    self._edl_embedding_gradients[k] = v

            # grads of Keras Embedding layer
            for k, v in indexed_grads.items():
                if k not in self._gradient_sum_indexed:
                    self._gradient_sum_indexed[k] = v
                else:
                    grads_s = self._gradient_sum_indexed[k]
                    self._gradient_sum_indexed[k] = merge_indexed_slices(
                        grads_s, v
                    )

            # other grads
            for k, v in tmp.items():
                if not self._use_async and k in self._gradient_sum:
                    self._gradient_sum[k] = self._gradient_sum[k] + v
                else:
                    self._gradient_sum[k] = v

            self._grad_n += 1
            if self._use_async or self._grad_n >= self._grad_to_wait:
                self._update_model()
                self._update_evaluation()
                self._update_checkpoint()

        res.accepted = True
        res.model_version = self._version
        return res