class TestRobertaModel(unittest.TestCase): def init_data(self, use_cuda) -> None: torch.set_grad_enabled(False) torch.set_num_threads(4) turbo_transformers.set_num_threads(4) self.test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') self.cfg = RobertaConfig() self.torch_model = RobertaModel(self.cfg) self.torch_model.eval() if torch.cuda.is_available(): self.torch_model.to(self.test_device) self.turbo_model = turbo_transformers.RobertaModel.from_torch( self.torch_model, self.test_device) def check_torch_and_turbo(self, use_cuda): self.init_data(use_cuda) num_iter = 20 device_name = "GPU" if use_cuda else "CPU" input_ids = torch.randint(low=0, high=self.cfg.vocab_size - 1, size=(1, 10), dtype=torch.long, device=self.test_device) torch_model = lambda: self.torch_model(input_ids) torch_result, torch_qps, torch_time = \ test_helper.run_model(torch_model, use_cuda, num_iter) print(f'RobertaModel PyTorch({device_name}) QPS {torch_qps}') turbo_model = (lambda: self.turbo_model(input_ids)) with turbo_transformers.pref_guard("roberta_perf") as perf: turbo_result, turbo_qps, turbo_time = \ test_helper.run_model(turbo_model, use_cuda, num_iter) print(f'RobertaModel TurboTransformer({device_name}) QPS {turbo_qps}') torch_result_final = torch_result[0].cpu().numpy() turbo_result_final = turbo_result[0].cpu().numpy() # print(numpy.size(torch_result_final), numpy.size(turbo_result_final)) # print(torch_result_final - turbo_result_final) self.assertTrue( numpy.allclose(torch_result_final, turbo_result_final, atol=1e-3, rtol=1e-3)) def test_Roberta_model(self): if torch.cuda.is_available() and \ turbo_transformers.config.is_compiled_with_cuda(): self.check_torch_and_turbo(use_cuda=True) self.check_torch_and_turbo(use_cuda=False)
def test(use_cuda): torch.set_grad_enabled(False) torch.set_num_threads(4) turbo_transformers.set_num_threads(4) test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') cfg = RobertaConfig() torch_model = RobertaModel(cfg) torch_model.eval() if torch.cuda.is_available(): torch_model.to(test_device) turbo_model = turbo_transformers.RobertaModel.from_torch( torch_model, test_device) input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(1, 10), dtype=torch.long, device=test_device) torch_result = torch_model(input_ids) torch_result_final = torch_result[0][:, 0].cpu().numpy() turbo_result = turbo_model(input_ids) turbo_result_final = turbo_result[0].cpu().numpy() # See the differences # print(numpy.size(torch_result_final), numpy.size(turbo_result_final)) # print(torch_result_final - turbo_result_final) assert (numpy.allclose(torch_result_final, turbo_result_final, atol=1e-3, rtol=1e-3))