Example #1
0
 def __init__(self, config):
     super().__init__()
     self.attention = NeZhaAttention(config)
     self.is_decoder = config.is_decoder
     if self.is_decoder:
         self.crossattention = NeZhaAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
Example #2
0
        def init_data(self, use_cuda: bool) -> None:
            self.test_device = torch.device('cuda:0') if use_cuda else \
                torch.device('cpu:0')
            if not use_cuda:
                torch.set_num_threads(4)
                turbo_transformers.set_num_threads(4)

            torch.set_grad_enabled(False)
            self.cfg = BertConfig()

            self.torch_intermediate = BertIntermediate(self.cfg)
            if torch.cuda.is_available():
                self.torch_intermediate.to(self.test_device)
            self.torch_intermediate.eval()

            self.turbo_intermediate = turbo_transformers.BertIntermediate.from_torch(
                self.torch_intermediate)
Example #3
0
 def __init__(self, config):
     super().__init__()
     self.chunk_size_feed_forward = config.chunk_size_feed_forward
     self.seq_len_dim = 1
     self.attention = TTAAttention(config)
     self.is_decoder = config.is_decoder
     self.add_cross_attention = config.add_cross_attention
     if self.add_cross_attention:
         assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
         self.crossattention = TTAAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
Example #4
0
 def __init__(self, config):
     super(FastformerLayer, self).__init__()
     self.attention = FastAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
Example #5
0
    def __init__(self, config):
        super(EntityAwareLayer, self).__init__()

        self.attention = EntityAwareAttention(config)
        self.intermediate = BertIntermediate(config)
        self.output = BertOutput(config)
Example #6
0
 def __init__(self, config):
     super(CaptionBertLayer, self).__init__(config)
     self.attention = CaptionBertAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
Example #7
0
    class TestBertIntermediate(unittest.TestCase):
        def init_data(self, use_cuda: bool) -> None:
            self.test_device = torch.device('cuda:0') if use_cuda else \
                torch.device('cpu:0')
            if not use_cuda:
                torch.set_num_threads(4)
                turbo_transformers.set_num_threads(4)

            torch.set_grad_enabled(False)
            self.cfg = BertConfig()

            self.torch_intermediate = BertIntermediate(self.cfg)
            if torch.cuda.is_available():
                self.torch_intermediate.to(self.test_device)
            self.torch_intermediate.eval()

            self.turbo_intermediate = turbo_transformers.BertIntermediate.from_torch(
                self.torch_intermediate)

        def check_torch_and_turbo(self, use_cuda):
            self.init_data(use_cuda=use_cuda)
            device = "GPU" if use_cuda else "CPU"
            num_iter = 2
            hidden_size = self.cfg.hidden_size
            input_tensor = torch.rand(size=(batch_size, seq_length,
                                            hidden_size),
                                      dtype=torch.float32,
                                      device=self.test_device)

            turbo_model = lambda: self.turbo_intermediate(input_tensor)
            turbo_result, turbo_qps, turbo_time = \
                test_helper.run_model(turbo_model, use_cuda, num_iter)

            print(
                f"BertIntermediate \"({batch_size},{seq_length:03})\" ",
                f"{device} TurboTransform QPS,  {turbo_qps}, time, {turbo_time}"
            )

            torch_model = lambda: self.torch_intermediate(input_tensor)
            torch_result, torch_qps, torch_time = \
                test_helper.run_model(torch_model, use_cuda, num_iter)

            print(f"BertIntermediate \"({batch_size},{seq_length:03})\" ",
                  f"{device} Torch QPS,  {torch_qps}, time, {torch_time}")

            torch_result = torch_result.cpu().numpy()
            turbo_result = turbo_result.cpu().numpy()

            self.assertTrue(
                numpy.allclose(torch_result,
                               turbo_result,
                               rtol=1e-4,
                               atol=1e-3))

            with open("bert_intermediate_res.txt", "a") as fh:
                fh.write(
                    f"\"({batch_size},{seq_length:03})\", {torch_qps}, {turbo_qps}\n"
                )

        def test_intermediate(self):
            self.check_torch_and_turbo(use_cuda=False)
            if torch.cuda.is_available() and \
                turbo_transformers.config.is_compiled_with_cuda():
                self.check_torch_and_turbo(use_cuda=True)
Example #8
0
 def __init__(self, config):
     super().__init__()
     self.attention = BertAttention(config)
     self.crossattention = BertAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)