def __init__(self, config): super().__init__() self.attention = NeZhaAttention(config) self.is_decoder = config.is_decoder if self.is_decoder: self.crossattention = NeZhaAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def init_data(self, use_cuda: bool) -> None: self.test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') if not use_cuda: torch.set_num_threads(4) turbo_transformers.set_num_threads(4) torch.set_grad_enabled(False) self.cfg = BertConfig() self.torch_intermediate = BertIntermediate(self.cfg) if torch.cuda.is_available(): self.torch_intermediate.to(self.test_device) self.torch_intermediate.eval() self.turbo_intermediate = turbo_transformers.BertIntermediate.from_torch( self.torch_intermediate)
def __init__(self, config): super().__init__() self.chunk_size_feed_forward = config.chunk_size_feed_forward self.seq_len_dim = 1 self.attention = TTAAttention(config) self.is_decoder = config.is_decoder self.add_cross_attention = config.add_cross_attention if self.add_cross_attention: assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added" self.crossattention = TTAAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(FastformerLayer, self).__init__() self.attention = FastAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(EntityAwareLayer, self).__init__() self.attention = EntityAwareAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(CaptionBertLayer, self).__init__(config) self.attention = CaptionBertAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
class TestBertIntermediate(unittest.TestCase): def init_data(self, use_cuda: bool) -> None: self.test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') if not use_cuda: torch.set_num_threads(4) turbo_transformers.set_num_threads(4) torch.set_grad_enabled(False) self.cfg = BertConfig() self.torch_intermediate = BertIntermediate(self.cfg) if torch.cuda.is_available(): self.torch_intermediate.to(self.test_device) self.torch_intermediate.eval() self.turbo_intermediate = turbo_transformers.BertIntermediate.from_torch( self.torch_intermediate) def check_torch_and_turbo(self, use_cuda): self.init_data(use_cuda=use_cuda) device = "GPU" if use_cuda else "CPU" num_iter = 2 hidden_size = self.cfg.hidden_size input_tensor = torch.rand(size=(batch_size, seq_length, hidden_size), dtype=torch.float32, device=self.test_device) turbo_model = lambda: self.turbo_intermediate(input_tensor) turbo_result, turbo_qps, turbo_time = \ test_helper.run_model(turbo_model, use_cuda, num_iter) print( f"BertIntermediate \"({batch_size},{seq_length:03})\" ", f"{device} TurboTransform QPS, {turbo_qps}, time, {turbo_time}" ) torch_model = lambda: self.torch_intermediate(input_tensor) torch_result, torch_qps, torch_time = \ test_helper.run_model(torch_model, use_cuda, num_iter) print(f"BertIntermediate \"({batch_size},{seq_length:03})\" ", f"{device} Torch QPS, {torch_qps}, time, {torch_time}") torch_result = torch_result.cpu().numpy() turbo_result = turbo_result.cpu().numpy() self.assertTrue( numpy.allclose(torch_result, turbo_result, rtol=1e-4, atol=1e-3)) with open("bert_intermediate_res.txt", "a") as fh: fh.write( f"\"({batch_size},{seq_length:03})\", {torch_qps}, {turbo_qps}\n" ) def test_intermediate(self): self.check_torch_and_turbo(use_cuda=False) if torch.cuda.is_available() and \ turbo_transformers.config.is_compiled_with_cuda(): self.check_torch_and_turbo(use_cuda=True)
def __init__(self, config): super().__init__() self.attention = BertAttention(config) self.crossattention = BertAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)