def init_data(self, use_cuda) -> None: test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') if not use_cuda: torch.set_num_threads(1) torch.set_grad_enabled(False) self.cfg = BertConfig() self.intermediate_size = self.cfg.intermediate_size # 3072; self.hidden_size = self.cfg.hidden_size # 768 self.torch_bertout = BertOutput(self.cfg) self.torch_bertout.eval() if use_cuda: self.torch_bertout.to(test_device) self.turbo_bertout = turbo_transformers.BertOutput.from_torch( self.torch_bertout) self.intermediate_output = torch.rand( size=(batch_size, seq_length, self.intermediate_size), dtype=torch.float32, device=test_device) self.attention_output = torch.rand(size=(batch_size, seq_length, self.hidden_size), dtype=torch.float32, device=test_device)
class TestBertOut(unittest.TestCase): def init_data(self, use_cuda) -> None: test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') if not use_cuda: torch.set_num_threads(1) torch.set_grad_enabled(False) self.cfg = BertConfig() self.intermediate_size = self.cfg.intermediate_size # 3072; self.hidden_size = self.cfg.hidden_size # 768 self.torch_bertout = BertOutput(self.cfg) self.torch_bertout.eval() if use_cuda: self.torch_bertout.to(test_device) self.turbo_bertout = turbo_transformers.BertOutput.from_torch( self.torch_bertout) self.intermediate_output = torch.rand( size=(batch_size, seq_length, self.intermediate_size), dtype=torch.float32, device=test_device) self.attention_output = torch.rand(size=(batch_size, seq_length, self.hidden_size), dtype=torch.float32, device=test_device) def check_torch_and_turbo(self, use_cuda): self.init_data(use_cuda) num_iter = 2 device = "GPU" if use_cuda else "CPU" torch_model = lambda: self.torch_bertout(self.intermediate_output, self.attention_output) torch_result, torch_qps, torch_time = \ test_helper.run_model(torch_model, use_cuda, num_iter) print(f'Bert Output Plain PyTorch({device}) QPS {torch_qps}') turbo_model = lambda: self.turbo_bertout(self.intermediate_output, self.attention_output) turbo_result, turbo_qps, turbo_time = \ test_helper.run_model(turbo_model, use_cuda, num_iter) print( f'Bert Output Plain TurboTransformer({device}) QPS {turbo_qps}' ) # cuda version precision is lower due to tensor-core self.assertTrue( torch.max(torch.abs(torch_result - turbo_result)) < 1e-2 if use_cuda else 1e-4) def test_bertout(self): self.check_torch_and_turbo(use_cuda=False) if torch.cuda.is_available() and \ turbo_transformers.config.is_compiled_with_cuda(): self.check_torch_and_turbo(use_cuda=True)
def __init__(self, config): super().__init__() self.attention = NeZhaAttention(config) self.is_decoder = config.is_decoder if self.is_decoder: self.crossattention = NeZhaAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super().__init__() self.chunk_size_feed_forward = config.chunk_size_feed_forward self.seq_len_dim = 1 self.attention = TTAAttention(config) self.is_decoder = config.is_decoder self.add_cross_attention = config.add_cross_attention if self.add_cross_attention: assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added" self.crossattention = TTAAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(FastformerLayer, self).__init__() self.attention = FastAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(EntityAwareLayer, self).__init__() self.attention = EntityAwareAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(CaptionBertLayer, self).__init__(config) self.attention = CaptionBertAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super().__init__() self.attention = BertAttention(config) self.crossattention = BertAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)