def setUp(self): self.model = MockModel() self.optimizer = Adam(self.model.parameters(), lr=0.5) self.scheduler = LambdaLR(self.optimizer, lambda x: 1.0) self.scheduler.last_epoch = 100 self.batchesPerm = [1, 2, 5, 3, 4, 5] self.batchesDone = 2 self.config = {"attr": 1, "attr2": 2} self.steps = 100 self.checkpoint = Checkpoint(model=self.model, optimizer=self.optimizer, scheduler=self.scheduler, batchesPerm=self.batchesPerm, batchesDone=self.batchesDone, config=self.config, steps=self.steps) self.checkpointDict = { "model": self.model.state_dict(), "optimizer": self.optimizer.state_dict(), "scheduler": self.scheduler.state_dict(), "batchesPerm": self.batchesPerm, "batchesDone": self.batchesDone, "config": self.config, "steps": self.steps }
def test_loadModelPreloaded(self): model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 Checkpoint.loadModel(model=model, checkpoint=self.checkpointDict) self.assertListEqual(model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(model.weight2.tolist(), self.model.weight2.data.tolist())
def test_loadModel(self): model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 Checkpoint.loadModel(model=model, checkpoint=self.savedFixtureCheckpointTo) self.assertListEqual(model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(model.weight2.tolist(), self.model.weight2.data.tolist())
def run_reader_extractive(checkpointDict, reader_output, reranker_output): ext_reader_cfg = config["reader"]["extractive"]["config"] cache_dir = config["transformers_cache"] checkpointDict["config"][ "cache"] = cache_dir # overwrite the old loaded cache path model = Reader(checkpointDict["config"], initPretrainedWeights=False) Checkpoint.loadModel(model, checkpointDict, config["device"]) if "multi_gpu" in ext_reader_cfg and ext_reader_cfg[ "multi_gpu"] and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) logging.info("DataParallel active!") extractor = AnswerExtractor(model, config["device"]) extractor.model.eval() tokenizer = AutoTokenizer.from_pretrained( checkpointDict["config"]['tokenizer_type'], cache_dir=cache_dir, use_fast=True) database = get_database_path() database = PassDatabase(database) with ReaderDataset( reranker_output, tokenizer, database, ext_reader_cfg["batch_size"], checkpointDict["config"]['include_doc_title']) as dataset: logging.info(f"Extracting top k answers scores") res = {} for i, (query, answers, scores, passageIds, charOffsets) in \ tqdm(enumerate(extractor.extract(dataset, ext_reader_cfg["top_k_answers"], ext_reader_cfg["max_tokens_for_answer"])), total=len(dataset)): res[i] = { "raw_question": query, "answers": answers, "reader_scores": scores, "passages": passageIds, "char_offsets": charOffsets } with jsonlines.open(reader_output, "w") as wF: for _, record in res.items(): wF.write(record)
def test_load(self): model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 optimizerF = AnyOptimizerFactory(Adam, {"lr": 999}) schedulerF = AnySchedulerFactory(LambdaLR, {"lr_lambda": lambda x: 1.0}) checkpoint = Checkpoint.load(model=model, optimizerF=optimizerF, schedulerF=schedulerF, checkpoint=self.savedFixtureCheckpointTo) self.assertListEqual(model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(model.weight2.tolist(), self.model.weight2.data.tolist()) self.assertListEqual(checkpoint.model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(checkpoint.model.weight2.tolist(), self.model.weight2.data.tolist()) for p in checkpoint.optimizer.param_groups: self.assertEqual(p["lr"], 0.5) for blr in checkpoint.scheduler.base_lrs: self.assertEqual(blr, 0.5) self.assertEqual(checkpoint.scheduler.last_epoch, 100) self.assertEqual(checkpoint.batchesPerm, self.batchesPerm) self.assertEqual(checkpoint.batchesDone, self.batchesDone) self.assertEqual(checkpoint.config, self.config) self.assertEqual(checkpoint.steps, self.steps) # test if the optimizer updates the model model.weight.grad = torch.ones_like(model.weight.data) checkpoint.optimizer.step() checkpoint.optimizer.zero_grad() # check the parameters self.assertTrue( torch.allclose(model.weight.grad, torch.zeros_like(model.weight.grad))) self.assertFalse( torch.allclose(model.weight.data, self.model.weight.data)) self.assertTrue( torch.allclose(model.weight2.data, self.model.weight2.data)) # check the step of scheduler checkpoint.scheduler.step() self.assertEqual(checkpoint.scheduler.last_epoch, 101)
def test_loadModelGPU(self): if torch.cuda.is_available(): device = torch.device("cuda:0") model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 Checkpoint.loadModel(model=model, checkpoint=self.savedFixtureCheckpointTo, device=device) self.assertEqual(model.weight.data.device, device) self.assertEqual(model.weight2.data.device, device) # check the parameters self.assertListEqual(model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(model.weight2.tolist(), self.model.weight2.data.tolist()) else: self.skipTest("Cuda device is not available.")
def test_loadGPU(self): if torch.cuda.is_available(): device = torch.device("cuda:0") model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 optimizerF = AnyOptimizerFactory(Adam, {"lr": 999}) schedulerF = AnySchedulerFactory(LambdaLR, {"lr_lambda": lambda x: 1.0}) checkpoint = Checkpoint.load( model=model, optimizerF=optimizerF, schedulerF=schedulerF, checkpoint=self.savedFixtureCheckpointTo, device=device) self.assertEqual(model.weight.data.device, device) self.assertEqual(model.weight2.data.device, device) # test if the optimizer updates the model model.weight.grad = torch.ones_like(model.weight) checkpoint.optimizer.step() checkpoint.optimizer.zero_grad() # check the parameters self.assertTrue( torch.allclose(model.weight.grad, torch.zeros_like(model.weight.grad))) self.assertFalse( torch.allclose(model.weight.data, self.model.weight.data.to(device))) self.assertTrue( torch.allclose(model.weight2.data, self.model.weight2.data.to(device))) # check the step of scheduler checkpoint.scheduler.step() self.assertEqual(checkpoint.scheduler.last_epoch, 101) else: self.skipTest("Cuda device is not available.")
class TestCheckpoint(TestBase): def setUp(self): self.model = MockModel() self.optimizer = Adam(self.model.parameters(), lr=0.5) self.scheduler = LambdaLR(self.optimizer, lambda x: 1.0) self.scheduler.last_epoch = 100 self.batchesPerm = [1, 2, 5, 3, 4, 5] self.batchesDone = 2 self.config = {"attr": 1, "attr2": 2} self.steps = 100 self.checkpoint = Checkpoint(model=self.model, optimizer=self.optimizer, scheduler=self.scheduler, batchesPerm=self.batchesPerm, batchesDone=self.batchesDone, config=self.config, steps=self.steps) self.checkpointDict = { "model": self.model.state_dict(), "optimizer": self.optimizer.state_dict(), "scheduler": self.scheduler.state_dict(), "batchesPerm": self.batchesPerm, "batchesDone": self.batchesDone, "config": self.config, "steps": self.steps } def test_save(self): self.checkpoint.save(self.saveCheckpointTo) self.assertTrue(os.path.exists(self.saveCheckpointTo)) loaded = torch.load(self.saveCheckpointTo) self.assertTrue(isinstance(loaded, dict)) # has all keys allKeys = { "model", "optimizer", "scheduler", "batchesPerm", "batchesDone", "config", "steps" } self.assertEqual(len(loaded), len(allKeys)) for k in allKeys: self.assertTrue(k in loaded, msg=f"The checkpoint does not have {k} key.") self.assertListEqual(loaded["model"]["weight"].tolist(), self.model.weight.data.tolist()) self.assertListEqual(loaded["model"]["weight2"].tolist(), self.model.weight2.data.tolist()) self.assertEqual(loaded["optimizer"]["param_groups"][0]["lr"], 0.5) self.assertEqual(loaded["scheduler"]["base_lrs"][0], 0.5) self.assertEqual(loaded["scheduler"]["last_epoch"], 100) self.assertEqual(loaded["batchesPerm"], self.batchesPerm) self.assertEqual(loaded["batchesDone"], self.batchesDone) self.assertEqual(loaded["config"], self.config) self.assertEqual(loaded["steps"], self.steps) def test_load(self): model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 optimizerF = AnyOptimizerFactory(Adam, {"lr": 999}) schedulerF = AnySchedulerFactory(LambdaLR, {"lr_lambda": lambda x: 1.0}) checkpoint = Checkpoint.load(model=model, optimizerF=optimizerF, schedulerF=schedulerF, checkpoint=self.savedFixtureCheckpointTo) self.assertListEqual(model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(model.weight2.tolist(), self.model.weight2.data.tolist()) self.assertListEqual(checkpoint.model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(checkpoint.model.weight2.tolist(), self.model.weight2.data.tolist()) for p in checkpoint.optimizer.param_groups: self.assertEqual(p["lr"], 0.5) for blr in checkpoint.scheduler.base_lrs: self.assertEqual(blr, 0.5) self.assertEqual(checkpoint.scheduler.last_epoch, 100) self.assertEqual(checkpoint.batchesPerm, self.batchesPerm) self.assertEqual(checkpoint.batchesDone, self.batchesDone) self.assertEqual(checkpoint.config, self.config) self.assertEqual(checkpoint.steps, self.steps) # test if the optimizer updates the model model.weight.grad = torch.ones_like(model.weight.data) checkpoint.optimizer.step() checkpoint.optimizer.zero_grad() # check the parameters self.assertTrue( torch.allclose(model.weight.grad, torch.zeros_like(model.weight.grad))) self.assertFalse( torch.allclose(model.weight.data, self.model.weight.data)) self.assertTrue( torch.allclose(model.weight2.data, self.model.weight2.data)) # check the step of scheduler checkpoint.scheduler.step() self.assertEqual(checkpoint.scheduler.last_epoch, 101) def test_loadPreloaded(self): model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 optimizerF = AnyOptimizerFactory(Adam, {"lr": 999}) schedulerF = AnySchedulerFactory(LambdaLR, {"lr_lambda": lambda x: 1.0}) checkpoint = Checkpoint.load(model=model, optimizerF=optimizerF, schedulerF=schedulerF, checkpoint=self.checkpointDict) self.assertListEqual(model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(model.weight2.tolist(), self.model.weight2.data.tolist()) self.assertListEqual(checkpoint.model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(checkpoint.model.weight2.tolist(), self.model.weight2.data.tolist()) for p in checkpoint.optimizer.param_groups: self.assertEqual(p["lr"], 0.5) for blr in checkpoint.scheduler.base_lrs: self.assertEqual(blr, 0.5) self.assertEqual(checkpoint.scheduler.last_epoch, 100) self.assertEqual(checkpoint.batchesPerm, self.batchesPerm) self.assertEqual(checkpoint.batchesDone, self.batchesDone) self.assertEqual(checkpoint.config, self.config) self.assertEqual(checkpoint.steps, self.steps) # test if the optimizer updates the model model.weight.grad = torch.ones_like(model.weight.data) checkpoint.optimizer.step() checkpoint.optimizer.zero_grad() # check the parameters self.assertTrue( torch.allclose(model.weight.grad, torch.zeros_like(model.weight.grad))) self.assertFalse( torch.allclose(model.weight.data, self.model.weight.data)) self.assertTrue( torch.allclose(model.weight2.data, self.model.weight2.data)) # check the step of scheduler checkpoint.scheduler.step() self.assertEqual(checkpoint.scheduler.last_epoch, 101) def test_loadGPU(self): if torch.cuda.is_available(): device = torch.device("cuda:0") model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 optimizerF = AnyOptimizerFactory(Adam, {"lr": 999}) schedulerF = AnySchedulerFactory(LambdaLR, {"lr_lambda": lambda x: 1.0}) checkpoint = Checkpoint.load( model=model, optimizerF=optimizerF, schedulerF=schedulerF, checkpoint=self.savedFixtureCheckpointTo, device=device) self.assertEqual(model.weight.data.device, device) self.assertEqual(model.weight2.data.device, device) # test if the optimizer updates the model model.weight.grad = torch.ones_like(model.weight) checkpoint.optimizer.step() checkpoint.optimizer.zero_grad() # check the parameters self.assertTrue( torch.allclose(model.weight.grad, torch.zeros_like(model.weight.grad))) self.assertFalse( torch.allclose(model.weight.data, self.model.weight.data.to(device))) self.assertTrue( torch.allclose(model.weight2.data, self.model.weight2.data.to(device))) # check the step of scheduler checkpoint.scheduler.step() self.assertEqual(checkpoint.scheduler.last_epoch, 101) else: self.skipTest("Cuda device is not available.") def test_loadModel(self): model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 Checkpoint.loadModel(model=model, checkpoint=self.savedFixtureCheckpointTo) self.assertListEqual(model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(model.weight2.tolist(), self.model.weight2.data.tolist()) def test_loadModelPreloaded(self): model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 Checkpoint.loadModel(model=model, checkpoint=self.checkpointDict) self.assertListEqual(model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(model.weight2.tolist(), self.model.weight2.data.tolist()) def test_loadModelGPU(self): if torch.cuda.is_available(): device = torch.device("cuda:0") model = MockModel() model.weight.data[0] = 100 model.weight2.data[0] = 100 Checkpoint.loadModel(model=model, checkpoint=self.savedFixtureCheckpointTo, device=device) self.assertEqual(model.weight.data.device, device) self.assertEqual(model.weight2.data.device, device) # check the parameters self.assertListEqual(model.weight.tolist(), self.model.weight.data.tolist()) self.assertListEqual(model.weight2.tolist(), self.model.weight2.data.tolist()) else: self.skipTest("Cuda device is not available.")