def testUserDefinedModel(self): master = MasterServicer( 2, 3, None, None, init_var=[], checkpoint_filename_for_init="", checkpoint_service=CheckpointService("", 0, 0, False), evaluation_service=None, ) req = elasticdl_pb2.GetModelRequest() req.method = elasticdl_pb2.MINIMUM req.version = 0 model_inst = SimpleModel() model_inst.build(SimpleModel.input_shapes()) for variable in model_inst.trainable_variables: master.set_model_var(variable.name, variable.numpy()) # Get version 0 model = master.GetModel(req, None) self.assertEqual(0, model.version) self.assertEqual( [ "dense_1/bias:0", "dense_1/kernel:0", "dense_2/bias:0", "dense_2/kernel:0", ], list(sorted(model.param.keys())), )
def testSaveLoadCheckpoint(self): init_var = m["custom_model"]().trainable_variables with tempfile.TemporaryDirectory() as tempdir: chkp_dir = os.path.join(tempdir, "testSaveLoadCheckpoint") os.makedirs(chkp_dir) checkpointer = CheckpointService(chkp_dir, 3, 5, False) self.assertTrue(checkpointer.is_enabled()) master = MasterServicer( 2, 3, None, None, init_var=init_var, checkpoint_filename_for_init="", checkpoint_service=checkpointer, evaluation_service=None, ) req = elasticdl_pb2.GetModelRequest() req.method = elasticdl_pb2.MINIMUM req.version = 0 model = master.GetModel(req, None) checkpointer.save(0, model, False) loaded_model = checkpointer.get_checkpoint_model(0) self.assertEqual(model.version, loaded_model.version) for var, loaded_var in zip(model.param, loaded_model.param): self.assertEqual(var, loaded_var)
def _get_non_embedding_variables(self, version, method): """Get model from master, and update model_version """ req = elasticdl_pb2.GetModelRequest() req.version = version req.method = method model = self._stub.GetModel(req, None) variables = {} for tensor_pb in model.param: tensor = Tensor.from_tensor_pb(tensor_pb) variables[tensor.name] = tensor.to_ndarray() return variables
def get_model(self, version, method): """ get model from master, and update model_version """ req = elasticdl_pb2.GetModelRequest() req.version = version req.method = method model = self._stub.GetModel(req) for var in self._model.trainable_variables: # Assumes all trainable variables exist in model.param. var.assign(tensor_to_ndarray(model.param[var.name])) self._model_version = model.version
def testInitFromCheckpoint(self): init_var = m["custom_model"]().trainable_variables with tempfile.TemporaryDirectory() as tempdir: chkp_dir = os.path.join(tempdir, "testInitFromCheckpoint") os.makedirs(chkp_dir) master = MasterServicer( 2, 3, None, None, init_var=init_var, checkpoint_filename_for_init="", checkpoint_service=CheckpointService(chkp_dir, 2, 3, False), evaluation_service=None, ) req = elasticdl_pb2.GetModelRequest() req.method = elasticdl_pb2.MINIMUM req.version = 0 model = master.GetModel(req, None) master._checkpoint_service.save(master._version, model, False) chkp_file = master._checkpoint_service.get_checkpoint_path( master._version ) # Create variables from init_var, get init value from checkpoint. master2 = MasterServicer( 2, 3, None, None, init_var=init_var, checkpoint_filename_for_init=chkp_file, checkpoint_service=CheckpointService("", 0, 0, False), evaluation_service=None, ) model2 = master2.GetModel(req, None) self.assertEqual(model, model2) # Create variables from checkpoint. master3 = MasterServicer( 2, 3, None, None, init_var=[], checkpoint_filename_for_init=chkp_file, checkpoint_service=CheckpointService("", 0, 0, False), evaluation_service=None, ) model3 = master3.GetModel(req, None) self.assertEqual(model, model3)
def get_model_from_master(self, version, method): """ get model from master, and update model_version """ req = elasticdl_pb2.GetModelRequest() req.version = version req.method = method model = self._stub.GetModel(req) # Assumes all trainable variables exist in model.param. for tensor_pb in model.param: tensor = Tensor.from_tensor_pb(tensor_pb) self._non_embed_vars[tensor.name].assign(tensor.to_ndarray()) self._model_version = model.version
def testGetModel(self): master = MasterServicer( 2, 3, None, None, init_var=[], checkpoint_filename_for_init="", checkpoint_service=CheckpointService("", 0, 0, False), evaluation_service=None, ) master.set_model_var("x", np.array([1.0, 1.0], dtype=np.float32)) # Now master model is version 0 self.assertEqual(0, master._version) # Get version 0 with minimum method req = elasticdl_pb2.GetModelRequest() req.version = 0 req.method = elasticdl_pb2.MINIMUM model = master.GetModel(req, None) self.assertEqual(0, model.version) self.assertEqual(["x"], list(model.param.keys())) np.testing.assert_array_equal( np.array([1.0, 1.0]), tensor_to_ndarray(model.param["x"]) ) # Increase master model version to 1, but still request # version 0 with minimum method, we should get version 1 master._version = 1 master.set_model_var("x", np.array([2.0, 2.0], dtype=np.float32)) master.set_model_var("y", np.array([12.0, 13.0], dtype=np.float32)) model = master.GetModel(req, None) self.assertEqual(1, model.version) self.assertEqual(["x", "y"], list(sorted(model.param.keys()))) np.testing.assert_array_equal( np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"]) ) np.testing.assert_array_equal( np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"]) ) # Try to get version 2, it should raise exception. req.version = 2 self.assertRaises(ValueError, master.GetModel, req, None) # Get fixed version 1 req.method = elasticdl_pb2.FIXED req.version = 1 model = master.GetModel(req, None) self.assertEqual(1, model.version) self.assertEqual(["x", "y"], list(sorted(model.param.keys()))) np.testing.assert_array_equal( np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"]) ) np.testing.assert_array_equal( np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"]) ) # Previous model unavailable due to no checkpoint req.version = 0 model = master.GetModel(req, None) self.assertFalse(model.param) # Previous model available through checkpoint with tempfile.TemporaryDirectory() as tempdir: chk_dir = os.path.join(tempdir, "testGetModel") os.makedirs(chk_dir) req.version = master._version req.method = elasticdl_pb2.MINIMUM model = master.GetModel(req, None) master._checkpoint_service = CheckpointService( chk_dir, 2, 5, False ) master._checkpoint_service.save(master._version, model, False) master._version = 2 master.set_model_var("z", np.array([2.0, 2.0], dtype=np.float32)) req.version = 1 req.method = elasticdl_pb2.FIXED model = master.GetModel(req, None) self.assertEqual(1, model.version) self.assertEqual(["x", "y"], list(sorted(model.param.keys()))) np.testing.assert_array_equal( np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"]) ) np.testing.assert_array_equal( np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"]) )