Ejemplo n.º 1
0
    def testUserDefinedModel(self):
        master = MasterServicer(
            2,
            3,
            None,
            None,
            init_var=[],
            checkpoint_filename_for_init="",
            checkpoint_service=CheckpointService("", 0, 0, False),
            evaluation_service=None,
        )
        req = elasticdl_pb2.GetModelRequest()
        req.method = elasticdl_pb2.MINIMUM
        req.version = 0

        model_inst = SimpleModel()
        model_inst.build(SimpleModel.input_shapes())
        for variable in model_inst.trainable_variables:
            master.set_model_var(variable.name, variable.numpy())
        # Get version 0
        model = master.GetModel(req, None)
        self.assertEqual(0, model.version)
        self.assertEqual(
            [
                "dense_1/bias:0",
                "dense_1/kernel:0",
                "dense_2/bias:0",
                "dense_2/kernel:0",
            ],
            list(sorted(model.param.keys())),
        )
Ejemplo n.º 2
0
    def testSaveLoadCheckpoint(self):
        init_var = m["custom_model"]().trainable_variables
        with tempfile.TemporaryDirectory() as tempdir:
            chkp_dir = os.path.join(tempdir, "testSaveLoadCheckpoint")
            os.makedirs(chkp_dir)
            checkpointer = CheckpointService(chkp_dir, 3, 5, False)
            self.assertTrue(checkpointer.is_enabled())

            master = MasterServicer(
                2,
                3,
                None,
                None,
                init_var=init_var,
                checkpoint_filename_for_init="",
                checkpoint_service=checkpointer,
                evaluation_service=None,
            )

            req = elasticdl_pb2.GetModelRequest()
            req.method = elasticdl_pb2.MINIMUM
            req.version = 0
            model = master.GetModel(req, None)
            checkpointer.save(0, model, False)
            loaded_model = checkpointer.get_checkpoint_model(0)
            self.assertEqual(model.version, loaded_model.version)
            for var, loaded_var in zip(model.param, loaded_model.param):
                self.assertEqual(var, loaded_var)
Ejemplo n.º 3
0
 def _get_non_embedding_variables(self, version, method):
     """Get model from master, and update model_version
     """
     req = elasticdl_pb2.GetModelRequest()
     req.version = version
     req.method = method
     model = self._stub.GetModel(req, None)
     variables = {}
     for tensor_pb in model.param:
         tensor = Tensor.from_tensor_pb(tensor_pb)
         variables[tensor.name] = tensor.to_ndarray()
     return variables
Ejemplo n.º 4
0
    def get_model(self, version, method):
        """
        get model from master, and update model_version
        """
        req = elasticdl_pb2.GetModelRequest()
        req.version = version
        req.method = method
        model = self._stub.GetModel(req)

        for var in self._model.trainable_variables:
            # Assumes all trainable variables exist in model.param.
            var.assign(tensor_to_ndarray(model.param[var.name]))
        self._model_version = model.version
Ejemplo n.º 5
0
    def testInitFromCheckpoint(self):
        init_var = m["custom_model"]().trainable_variables
        with tempfile.TemporaryDirectory() as tempdir:
            chkp_dir = os.path.join(tempdir, "testInitFromCheckpoint")
            os.makedirs(chkp_dir)
            master = MasterServicer(
                2,
                3,
                None,
                None,
                init_var=init_var,
                checkpoint_filename_for_init="",
                checkpoint_service=CheckpointService(chkp_dir, 2, 3, False),
                evaluation_service=None,
            )
            req = elasticdl_pb2.GetModelRequest()
            req.method = elasticdl_pb2.MINIMUM
            req.version = 0
            model = master.GetModel(req, None)
            master._checkpoint_service.save(master._version, model, False)

            chkp_file = master._checkpoint_service.get_checkpoint_path(
                master._version
            )
            # Create variables from init_var, get init value from checkpoint.
            master2 = MasterServicer(
                2,
                3,
                None,
                None,
                init_var=init_var,
                checkpoint_filename_for_init=chkp_file,
                checkpoint_service=CheckpointService("", 0, 0, False),
                evaluation_service=None,
            )
            model2 = master2.GetModel(req, None)
            self.assertEqual(model, model2)
            # Create variables from checkpoint.
            master3 = MasterServicer(
                2,
                3,
                None,
                None,
                init_var=[],
                checkpoint_filename_for_init=chkp_file,
                checkpoint_service=CheckpointService("", 0, 0, False),
                evaluation_service=None,
            )
            model3 = master3.GetModel(req, None)
            self.assertEqual(model, model3)
Ejemplo n.º 6
0
    def get_model_from_master(self, version, method):
        """
        get model from master, and update model_version
        """
        req = elasticdl_pb2.GetModelRequest()
        req.version = version
        req.method = method
        model = self._stub.GetModel(req)

        # Assumes all trainable variables exist in model.param.
        for tensor_pb in model.param:
            tensor = Tensor.from_tensor_pb(tensor_pb)
            self._non_embed_vars[tensor.name].assign(tensor.to_ndarray())
        self._model_version = model.version
Ejemplo n.º 7
0
    def testGetModel(self):
        master = MasterServicer(
            2,
            3,
            None,
            None,
            init_var=[],
            checkpoint_filename_for_init="",
            checkpoint_service=CheckpointService("", 0, 0, False),
            evaluation_service=None,
        )
        master.set_model_var("x", np.array([1.0, 1.0], dtype=np.float32))
        # Now master model is version 0
        self.assertEqual(0, master._version)

        # Get version 0 with minimum method
        req = elasticdl_pb2.GetModelRequest()
        req.version = 0
        req.method = elasticdl_pb2.MINIMUM
        model = master.GetModel(req, None)
        self.assertEqual(0, model.version)
        self.assertEqual(["x"], list(model.param.keys()))
        np.testing.assert_array_equal(
            np.array([1.0, 1.0]), tensor_to_ndarray(model.param["x"])
        )

        # Increase master model version to 1, but still request
        # version 0 with minimum method, we should get version 1
        master._version = 1
        master.set_model_var("x", np.array([2.0, 2.0], dtype=np.float32))
        master.set_model_var("y", np.array([12.0, 13.0], dtype=np.float32))
        model = master.GetModel(req, None)
        self.assertEqual(1, model.version)
        self.assertEqual(["x", "y"], list(sorted(model.param.keys())))
        np.testing.assert_array_equal(
            np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"])
        )
        np.testing.assert_array_equal(
            np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"])
        )

        # Try to get version 2, it should raise exception.
        req.version = 2
        self.assertRaises(ValueError, master.GetModel, req, None)

        # Get fixed version 1
        req.method = elasticdl_pb2.FIXED
        req.version = 1
        model = master.GetModel(req, None)
        self.assertEqual(1, model.version)
        self.assertEqual(["x", "y"], list(sorted(model.param.keys())))
        np.testing.assert_array_equal(
            np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"])
        )
        np.testing.assert_array_equal(
            np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"])
        )

        # Previous model unavailable due to no checkpoint
        req.version = 0
        model = master.GetModel(req, None)
        self.assertFalse(model.param)

        # Previous model available through checkpoint
        with tempfile.TemporaryDirectory() as tempdir:
            chk_dir = os.path.join(tempdir, "testGetModel")
            os.makedirs(chk_dir)
            req.version = master._version
            req.method = elasticdl_pb2.MINIMUM
            model = master.GetModel(req, None)
            master._checkpoint_service = CheckpointService(
                chk_dir, 2, 5, False
            )
            master._checkpoint_service.save(master._version, model, False)
            master._version = 2
            master.set_model_var("z", np.array([2.0, 2.0], dtype=np.float32))
            req.version = 1
            req.method = elasticdl_pb2.FIXED
            model = master.GetModel(req, None)
            self.assertEqual(1, model.version)
            self.assertEqual(["x", "y"], list(sorted(model.param.keys())))
            np.testing.assert_array_equal(
                np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"])
            )
            np.testing.assert_array_equal(
                np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"])
            )