def testtensor_to_ndarray(self):
        t = elasticdl_pb2.Tensor()
        # No dim defined, should raise.
        self.assertRaises(ValueError, tensor_to_ndarray, t)

        # Empty array, should be ok.
        t.dim.append(0)
        t.content = b""
        arr = tensor_to_ndarray(t)
        np.testing.assert_array_equal(np.array([], dtype=np.float32), arr)

        # Pathological case, one of the dimensions is 0.
        del t.dim[:]
        t.dim.extend([2, 0, 1, 9])
        t.content = b""
        arr = tensor_to_ndarray(t)
        np.testing.assert_array_equal(
            np.ndarray(shape=[2, 0, 1, 9], dtype=np.float32), arr)

        t.content = b"\0" * (4 * 12)

        # Wrong content size, should raise
        del t.dim[:]
        t.dim.extend([11])
        self.assertRaises(ValueError, tensor_to_ndarray, t)

        # Compatible dimensions, should be ok.
        for m in (1, 2, 3, 4, 6, 12):
            del t.dim[:]
            t.content = b"\0" * (4 * 12)
            t.dim.extend([m, 12 // m])
            arr = tensor_to_ndarray(t)
 def report_evaluation_metrics(self, evaluation_version,
                               evaluation_metrics):
     if (self.model_version >= 0
             and evaluation_version != self.model_version):
         logger.error(
             "Drop a wrong version evaluation: request %d, receive %d" %
             (self.model_version, evaluation_version))
         return False
     for k, v in evaluation_metrics.items():
         if k in self._evaluation_metrics:
             self._evaluation_metrics[k] += tensor_to_ndarray(v)
         else:
             self._evaluation_metrics[k] = np.copy(tensor_to_ndarray(v))
     self._completed_minibatches += 1
     return True
 def report_evaluation_metrics(self, evaluation_version, model_outputs,
                               labels):
     if (self.model_version >= 0
             and evaluation_version != self.model_version):
         logger.error(
             "Drop a wrong version evaluation: request %d, receive %d" %
             (self.model_version, evaluation_version))
         return False
     labels = tensor_to_ndarray(labels)
     for key, tensor in model_outputs.items():
         metrics = self._metrics_dict.get(key, {})
         if not metrics:
             continue
         outputs = tensor_to_ndarray(tensor)
         for metric_inst in metrics.values():
             metric_inst.update_state(labels, outputs)
     return True
Beispiel #4
0
    def get_model(self, version, method):
        """
        get model from master, and update model_version
        """
        req = elasticdl_pb2.GetModelRequest()
        req.version = version
        req.method = method
        model = self._stub.GetModel(req)

        for var in self._model.trainable_variables:
            # Assumes all trainable variables exist in model.param.
            var.assign(tensor_to_ndarray(model.param[var.name]))
        self._model_version = model.version
Beispiel #5
0
 def _init_model_from_tensor_dict(self, tensor_dict):
     assert tensor_dict
     for name, val in tensor_dict.items():
         self.set_model_var(name, tensor_to_ndarray(val))
Beispiel #6
0
    def ReportGradient(self, request, _):
        model_version_valid = self._use_async or self._validate_model_version(
            request.model_version
        )

        res = elasticdl_pb2.ReportGradientResponse()
        if not model_version_valid:
            logger.warning(
                "Task result for outdated version %d dropped",
                request.model_version,
            )
            res.accepted = False
            res.model_version = self._version
            return res

        tmp = {}
        indexed_grads = {}
        edl_embedding_gradients = {}
        # Do sanity check before accumulating gradients.
        for k, v in request.gradient.items():
            if k not in self._model:
                if v.indices:
                    # grads of ElasticDL Embedding layer
                    # TODO: check arr.shape[1] = embedding_dim of this
                    # EdlEmbedding layer
                    arr = tensor_to_ndarray(v)
                    edl_embedding_gradients[k] = arr
                    continue
                else:
                    raise ValueError(
                        "Gradient key: %s is not part of model", k
                    )

            arr = tensor_to_ndarray(v)
            if isinstance(arr, tf.IndexedSlices):
                if arr.values.shape[1] != self._model[k].numpy().shape[1]:
                    raise ValueError(
                        "Gradient key: %s has incompatible "
                        "indexed slice dimension %d, expected %d"
                        % (
                            k,
                            arr.values.shape[1],
                            self._model[k].numpy().shape[1],
                        )
                    )

                max_index = tf.math.reduce_max(arr.indices).numpy()
                if max_index >= self._model[k].numpy().shape[0]:
                    raise ValueError(
                        "Gradient key: %s has wrong indices %d, "
                        "out of range %d"
                        % (k, max_index, self._model[k].numpy().shape[0] - 1)
                    )
                indexed_grads[k] = arr
            else:
                if arr.shape != self._model[k].numpy().shape:
                    raise ValueError(
                        "Gradient key: %s has incompatible dimension", k
                    )
                tmp[k] = arr

        if not self._use_async:
            self._lock.acquire()
        self._process_gradients(
            edl_embedding_gradients, indexed_grads, tmp, request.model_version
        )
        if not self._use_async:
            self._lock.release()

        res.accepted = True
        res.model_version = self._version
        return res
Beispiel #7
0
    def testGetModel(self):
        master = MasterServicer(
            2,
            3,
            None,
            None,
            init_var=[],
            checkpoint_filename_for_init="",
            checkpoint_service=CheckpointService("", 0, 0, False),
            evaluation_service=None,
        )
        master.set_model_var("x", np.array([1.0, 1.0], dtype=np.float32))
        # Now master model is version 0
        self.assertEqual(0, master._version)

        # Get version 0 with minimum method
        req = elasticdl_pb2.GetModelRequest()
        req.version = 0
        req.method = elasticdl_pb2.MINIMUM
        model = master.GetModel(req, None)
        self.assertEqual(0, model.version)
        self.assertEqual(["x"], list(model.param.keys()))
        np.testing.assert_array_equal(
            np.array([1.0, 1.0]), tensor_to_ndarray(model.param["x"])
        )

        # Increase master model version to 1, but still request
        # version 0 with minimum method, we should get version 1
        master._version = 1
        master.set_model_var("x", np.array([2.0, 2.0], dtype=np.float32))
        master.set_model_var("y", np.array([12.0, 13.0], dtype=np.float32))
        model = master.GetModel(req, None)
        self.assertEqual(1, model.version)
        self.assertEqual(["x", "y"], list(sorted(model.param.keys())))
        np.testing.assert_array_equal(
            np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"])
        )
        np.testing.assert_array_equal(
            np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"])
        )

        # Try to get version 2, it should raise exception.
        req.version = 2
        self.assertRaises(ValueError, master.GetModel, req, None)

        # Get fixed version 1
        req.method = elasticdl_pb2.FIXED
        req.version = 1
        model = master.GetModel(req, None)
        self.assertEqual(1, model.version)
        self.assertEqual(["x", "y"], list(sorted(model.param.keys())))
        np.testing.assert_array_equal(
            np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"])
        )
        np.testing.assert_array_equal(
            np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"])
        )

        # Previous model unavailable due to no checkpoint
        req.version = 0
        model = master.GetModel(req, None)
        self.assertFalse(model.param)

        # Previous model available through checkpoint
        with tempfile.TemporaryDirectory() as tempdir:
            chk_dir = os.path.join(tempdir, "testGetModel")
            os.makedirs(chk_dir)
            req.version = master._version
            req.method = elasticdl_pb2.MINIMUM
            model = master.GetModel(req, None)
            master._checkpoint_service = CheckpointService(
                chk_dir, 2, 5, False
            )
            master._checkpoint_service.save(master._version, model, False)
            master._version = 2
            master.set_model_var("z", np.array([2.0, 2.0], dtype=np.float32))
            req.version = 1
            req.method = elasticdl_pb2.FIXED
            model = master.GetModel(req, None)
            self.assertEqual(1, model.version)
            self.assertEqual(["x", "y"], list(sorted(model.param.keys())))
            np.testing.assert_array_equal(
                np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"])
            )
            np.testing.assert_array_equal(
                np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"])
            )
 def verify(a):
     b = tensor_to_ndarray(ndarray_to_tensor(a))
     np.testing.assert_array_equal(a, b)
Beispiel #9
0
    def ReportGradient(self, request, _):
        model_version_valid = self._validate_model_version(
            request.model_version
        )

        res = elasticdl_pb2.ReportGradientResponse()
        if not model_version_valid:
            logger.warning(
                "Task result for outdated version %d dropped",
                request.model_version,
            )
            res.accepted = False
            res.model_version = self._version
            return res

        # TODO: Update task queue with task_id
        with self._lock:
            tmp = {}
            indexed_grads = {}
            edl_embedding_gradients = {}
            # Do sanity check before accumulating gradients.
            for k, v in request.gradient.items():
                if k not in self._model:
                    if v.indices:
                        # grads of ElasticDL Embedding layer
                        # TODO: check arr.shape[1] = embedding_dim of this
                        # EdlEmbedding layer
                        arr = tensor_to_ndarray(v)
                        edl_embedding_gradients[k] = arr
                        continue
                    else:
                        raise ValueError(
                            "Gradient key: %s is not part of model", k
                        )

                arr = tensor_to_ndarray(v)
                if isinstance(arr, tf.IndexedSlices):
                    if arr.values.shape[1] != self._model[k].numpy().shape[1]:
                        raise ValueError(
                            "Gradient key: %s has incompatible "
                            "indexed slice dimension %d, expected %d"
                            % (
                                k,
                                arr.values.shape[1],
                                self._model[k].numpy().shape[1],
                            )
                        )

                    max_index = tf.math.reduce_max(arr.indices).numpy()
                    if max_index >= self._model[k].numpy().shape[0]:
                        raise ValueError(
                            "Gradient key: %s has wrong indices %d, "
                            "out of range %d"
                            % (
                                k,
                                max_index,
                                self._model[k].numpy().shape[0] - 1,
                            )
                        )
                    indexed_grads[k] = arr
                else:
                    if arr.shape != self._model[k].numpy().shape:
                        raise ValueError(
                            "Gradient key: %s has incompatible dimension", k
                        )
                    tmp[k] = arr

            # grads of ElasticDL Embedding layer
            for k, v in edl_embedding_gradients.items():
                if k in self._edl_embedding_gradients:
                    self._edl_embedding_gradients[k] = merge_indexed_slices(
                        self._edl_embedding_gradients[k], v
                    )
                else:
                    self._edl_embedding_gradients[k] = v

            # grads of Keras Embedding layer
            for k, v in indexed_grads.items():
                if k not in self._gradient_sum_indexed:
                    self._gradient_sum_indexed[k] = v
                else:
                    grads_s = self._gradient_sum_indexed[k]
                    self._gradient_sum_indexed[k] = merge_indexed_slices(
                        grads_s, v
                    )

            # other grads
            for k, v in tmp.items():
                if not self._use_async and k in self._gradient_sum:
                    self._gradient_sum[k] = self._gradient_sum[k] + v
                else:
                    self._gradient_sum[k] = v

            self._grad_n += 1
            if self._use_async or self._grad_n >= self._grad_to_wait:
                self._update_model()
                self._update_evaluation()
                self._update_checkpoint()

        res.accepted = True
        res.model_version = self._version
        return res