def init_data(self, use_cuda) -> None:
            test_device = torch.device('cuda:0') if use_cuda else \
                    torch.device('cpu:0')
            if not use_cuda:
                torch.set_num_threads(1)

            torch.set_grad_enabled(False)
            self.cfg = BertConfig()
            self.intermediate_size = self.cfg.intermediate_size  # 3072;
            self.hidden_size = self.cfg.hidden_size  # 768
            self.torch_bertout = BertOutput(self.cfg)
            self.torch_bertout.eval()
            if use_cuda:
                self.torch_bertout.to(test_device)

            self.turbo_bertout = turbo_transformers.BertOutput.from_torch(
                self.torch_bertout)

            self.intermediate_output = torch.rand(
                size=(batch_size, seq_length, self.intermediate_size),
                dtype=torch.float32,
                device=test_device)
            self.attention_output = torch.rand(size=(batch_size, seq_length,
                                                     self.hidden_size),
                                               dtype=torch.float32,
                                               device=test_device)
    class TestBertOut(unittest.TestCase):
        def init_data(self, use_cuda) -> None:
            test_device = torch.device('cuda:0') if use_cuda else \
                    torch.device('cpu:0')
            if not use_cuda:
                torch.set_num_threads(1)

            torch.set_grad_enabled(False)
            self.cfg = BertConfig()
            self.intermediate_size = self.cfg.intermediate_size  # 3072;
            self.hidden_size = self.cfg.hidden_size  # 768
            self.torch_bertout = BertOutput(self.cfg)
            self.torch_bertout.eval()
            if use_cuda:
                self.torch_bertout.to(test_device)

            self.turbo_bertout = turbo_transformers.BertOutput.from_torch(
                self.torch_bertout)

            self.intermediate_output = torch.rand(
                size=(batch_size, seq_length, self.intermediate_size),
                dtype=torch.float32,
                device=test_device)
            self.attention_output = torch.rand(size=(batch_size, seq_length,
                                                     self.hidden_size),
                                               dtype=torch.float32,
                                               device=test_device)

        def check_torch_and_turbo(self, use_cuda):
            self.init_data(use_cuda)
            num_iter = 2
            device = "GPU" if use_cuda else "CPU"

            torch_model = lambda: self.torch_bertout(self.intermediate_output,
                                                     self.attention_output)
            torch_result, torch_qps, torch_time = \
                test_helper.run_model(torch_model, use_cuda, num_iter)
            print(f'Bert Output Plain PyTorch({device}) QPS {torch_qps}')

            turbo_model = lambda: self.turbo_bertout(self.intermediate_output,
                                                     self.attention_output)
            turbo_result, turbo_qps, turbo_time = \
                test_helper.run_model(turbo_model, use_cuda, num_iter)
            print(
                f'Bert Output Plain TurboTransformer({device}) QPS {turbo_qps}'
            )

            # cuda version precision is lower due to tensor-core
            self.assertTrue(
                torch.max(torch.abs(torch_result - turbo_result)) < 1e-2
                if use_cuda else 1e-4)

        def test_bertout(self):
            self.check_torch_and_turbo(use_cuda=False)
            if torch.cuda.is_available() and \
                turbo_transformers.config.is_compiled_with_cuda():
                self.check_torch_and_turbo(use_cuda=True)
Example #3
0
    def __init__(self, config):
        super(BertConnectionLayer, self).__init__()
        self.biattention = BertBiAttention(config)
        self.biOutput = BertBiOutput(config)

        v_config = BertConfig.from_dict(config.v_config)
        self.v_intermediate = BertIntermediate(v_config)
        self.v_output = BertOutput(v_config)

        t_config = BertConfig.from_dict(config.t_config)
        self.t_intermediate = BertIntermediate(t_config)
        self.t_output = BertOutput(t_config)
Example #4
0
    def __init__(self, config):
        super().__init__()
        # The cross-attention Layer
        self.visual_attention = BertCrossattLayer(config)

        # Self-attention Layers
        self.lang_self_att = BertAttention(config)
        self.visn_self_att = BertAttention(config)

        # Intermediate and Output Layers (FFNs)
        self.lang_inter = BertIntermediate(config)
        self.lang_output = BertOutput(config)
        self.visn_inter = BertIntermediate(config)
        self.visn_output = BertOutput(config)
Example #5
0
 def __init__(self, config):
     super(BertGraphLayer, self).__init__()
     self.attention = BertGraphAttention(config)
     self.is_decoder = config.is_decoder
     if self.is_decoder:
         self.crossattention = BertGraphAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
Example #6
0
    def __init__(self, config):
        super(SpanAttentionLayer, self).__init__()
        # create modules
        self.attention = SpanAttention(config)
        self.intermediate = BertIntermediate(config)
        self.output = BertOutput(config)

        # initialize weights
        self.init_weights()
Example #7
0
 def __init__(self, config):
     super().__init__()
     self.chunk_size_feed_forward = config.chunk_size_feed_forward
     self.seq_len_dim = 1
     self.attention = BertAttention(config)
     self.is_decoder = config.is_decoder
     self.add_cross_attention = config.add_cross_attention
     if self.add_cross_attention:
         assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
         self.crossattention = BertAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
Example #8
0
    def __init__(self, config):
        super(BertLayerOracleSparse, self).__init__()
        logger.info(
            f"Set Oracle Sparse with key_c:{config.key_c} and query_c:{config.query_c}!"
        )

        self.attention = BertAttention(config)
        self.attention.self.output_attentions = True
        self.intermediate = BertIntermediate(config)
        self.output = BertOutput(config)

        self.key_c = config.key_c
        self.query_c = config.query_c
        self.num_heads = config.num_attention_heads
Example #9
0
    def __init__(self, config):
        super(EntityAwareLayer, self).__init__()

        self.attention = EntityAwareAttention(config)
        self.intermediate = BertIntermediate(config)
        self.output = BertOutput(config)
 def __init__(self, config):
     super(BertScanLayer, self).__init__()
     self.attention = BertAttention(config)
     self.scan_attention = BertScanAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
Example #11
0
 def __init__(self, config):
     super().__init__()
     self.attention = BertAttentionJit(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
Example #12
0
 def __init__(self, config):
     super(CaptionBertLayer, self).__init__(config)
     self.attention = CaptionBertAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
 def __init__(self, config):
     super(GramBertLayer, self).__init__()
     self.attention = GramBertAttention(
         config)  # attention+linear+dropout+res-connnect+norm
     self.intermediate = BertIntermediate(config)  # linear
     self.output = BertOutput(config)  # linear+dropout+res-connnect+norm