예제 #1
0
    def __init__(self, config):
        super(VariableNormTransformerLayer, self).__init__()
        self.config = config

        if self.config.norm_type == 'layer':
            self.attention_norm = nn.LayerNorm(config.hidden_size,
                                               eps=config.layer_norm_eps)
        elif self.config.norm_type == 'adanorm':
            self.attention_norm = AdaNorm(0.3, config.layer_norm_eps)
        elif self.config.norm_type == 'scalenorm':
            self.attention_norm = ScaleNorm(config.hidden_size**0.5)

        self.self_attention = BertSelfAttention(config)
        self.self_out = nn.Linear(config.hidden_size, config.hidden_size)
        self.self_dropout = nn.Dropout(config.hidden_dropout_prob)

        if self.config.norm_type == 'layer':
            self.ff_norm = nn.LayerNorm(config.hidden_size,
                                        eps=config.layer_norm_eps)
        elif self.config.norm_type == 'adanorm':
            self.ff_norm = AdaNorm(0.3, config.layer_norm_eps)
        elif self.config.norm_type == 'scalenorm':
            self.ff_norm = ScaleNorm(config.hidden_size**0.5)

        self.ff1 = BertIntermediate(config)
        self.ff2 = nn.Linear(config.intermediate_size, config.hidden_size)
        self.ff_dropout = nn.Dropout(config.hidden_dropout_prob)
예제 #2
0
    def __init__(self, config):
        super(BertConnectionLayer, self).__init__()
        self.biattention = BertBiAttention(config)
        self.biOutput = BertBiOutput(config)

        v_config = BertConfig.from_dict(config.v_config)
        self.v_intermediate = BertIntermediate(v_config)
        self.v_output = BertOutput(v_config)

        t_config = BertConfig.from_dict(config.t_config)
        self.t_intermediate = BertIntermediate(t_config)
        self.t_output = BertOutput(t_config)
예제 #3
0
파일: lxmert.py 프로젝트: slbinilkumar/mmf
    def __init__(self, config):
        super().__init__()
        # The cross-attention Layer
        self.visual_attention = BertCrossattLayer(config)

        # Self-attention Layers
        self.lang_self_att = BertAttention(config)
        self.visn_self_att = BertAttention(config)

        # Intermediate and Output Layers (FFNs)
        self.lang_inter = BertIntermediate(config)
        self.lang_output = BertOutput(config)
        self.visn_inter = BertIntermediate(config)
        self.visn_output = BertOutput(config)
예제 #4
0
        def init_data(self, use_cuda: bool) -> None:
            self.test_device = torch.device('cuda:0') if use_cuda else \
                torch.device('cpu:0')
            if not use_cuda:
                torch.set_num_threads(1)

            torch.set_grad_enabled(False)
            self.cfg = BertConfig()

            self.torch_intermediate = BertIntermediate(self.cfg)
            if torch.cuda.is_available():
                self.torch_intermediate.to(self.test_device)
            self.torch_intermediate.eval()

            self.turbo_intermediate = turbo_transformers.BertIntermediate.from_torch(
                self.torch_intermediate)
예제 #5
0
 def __init__(self, config):
     super().__init__()
     self.attention = MyBertAttention10(config)
     self.is_decoder = config.is_decoder
     if self.is_decoder:
         self.crossattention = BertAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = MyBertOutput10(config)
예제 #6
0
 def __init__(self, config):
     super(BertGraphLayer, self).__init__()
     self.attention = BertGraphAttention(config)
     self.is_decoder = config.is_decoder
     if self.is_decoder:
         self.crossattention = BertGraphAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
예제 #7
0
파일: kar.py 프로젝트: ndoll1998/KnowBert
    def __init__(self, config):
        super(SpanAttentionLayer, self).__init__()
        # create modules
        self.attention = SpanAttention(config)
        self.intermediate = BertIntermediate(config)
        self.output = BertOutput(config)

        # initialize weights
        self.init_weights()
예제 #8
0
    def __init__(self, config):
        super(GLTPairCompose, self).__init__()
        self._non_compositional_reps = config.non_compositional_reps
        if config.non_compositional_reps:
            self.lstm = nn.LSTM(config.hidden_size,
                                config.hidden_size,
                                num_layers=1,
                                batch_first=True,
                                bidirectional=True)

        self.attention = GLTPairComposeAttention(config)
        self.intermediate = BertIntermediate(config)
        self.output = GLTSelfOutput(config,
                                    dropout=True,
                                    dense=False,
                                    l_norm=True)

        self.weighted_output = GLTSelfOutput(config,
                                             dense=False,
                                             dropout=False)

        if config.grounded:
            self.meaning_query_2 = nn.Linear(config.hidden_size, 2)

            self.control_gate = None
            n_options = 0
            if config.control_gate_add_skip:
                n_options += 1
            if config.control_gate_add_intersect:
                n_options += 1
            if config.control_gate_add_union:
                n_options += 1
            if config.control_gate_add_vis:
                n_options += 1
                if config.control_gate_add_extra_vis_module_left_branching:
                    n_options += 1
            if n_options == 0:
                raise (AttributeError("At least one module must be added"))
            self.control_gate = nn.Linear(config.hidden_size, n_options)
            self.control_gate_add_skip = config.control_gate_add_skip
            self.control_gate_add_union = config.control_gate_add_union
            self.control_gate_add_intersect = config.control_gate_add_intersect
            self.control_gate_add_vis = config.control_gate_add_vis
            self.control_gate_add_extra_vis_module_left_branching = config.control_gate_add_extra_vis_module_left_branching
            self.control_gate_set_vis_left_branching = config.control_gate_set_vis_left_branching

            if self.control_gate_add_vis:
                self.vis_text_text_comp = GLTVisualTextComp(config)
                self.constt_rep_lin = nn.Linear(config.hidden_size,
                                                config.hidden_size)

        self.activation = nn.GELU()
        self.dropout = nn.Dropout(config.layer_dropout_prob)

        self.constt_energy = self.lin = nn.Linear(config.hidden_size, 1)
예제 #9
0
 def __init__(self, config):
     super().__init__()
     self.chunk_size_feed_forward = config.chunk_size_feed_forward
     self.seq_len_dim = 1
     self.attention = BertAttention(config)
     self.is_decoder = config.is_decoder
     self.add_cross_attention = config.add_cross_attention
     if self.add_cross_attention:
         assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
         self.crossattention = BertAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
예제 #10
0
    def __init__(self, config):
        super(BertLayerOracleSparse, self).__init__()
        logger.info(
            f"Set Oracle Sparse with key_c:{config.key_c} and query_c:{config.query_c}!"
        )

        self.attention = BertAttention(config)
        self.attention.self.output_attentions = True
        self.intermediate = BertIntermediate(config)
        self.output = BertOutput(config)

        self.key_c = config.key_c
        self.query_c = config.query_c
        self.num_heads = config.num_attention_heads
 def __init__(self, config):
     super(BertScanLayer, self).__init__()
     self.attention = BertAttention(config)
     self.scan_attention = BertScanAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
예제 #12
0
 def __init__(self, config):
     super().__init__()
     self.attention = BertAttentionJit(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
예제 #13
0
class VariableNormTransformerLayer(nn.Module):
    def __init__(self, config):
        super(VariableNormTransformerLayer, self).__init__()
        self.config = config

        if self.config.norm_type == 'layer':
            self.attention_norm = nn.LayerNorm(config.hidden_size,
                                               eps=config.layer_norm_eps)
        elif self.config.norm_type == 'adanorm':
            self.attention_norm = AdaNorm(0.3, config.layer_norm_eps)
        elif self.config.norm_type == 'scalenorm':
            self.attention_norm = ScaleNorm(config.hidden_size**0.5)

        self.self_attention = BertSelfAttention(config)
        self.self_out = nn.Linear(config.hidden_size, config.hidden_size)
        self.self_dropout = nn.Dropout(config.hidden_dropout_prob)

        if self.config.norm_type == 'layer':
            self.ff_norm = nn.LayerNorm(config.hidden_size,
                                        eps=config.layer_norm_eps)
        elif self.config.norm_type == 'adanorm':
            self.ff_norm = AdaNorm(0.3, config.layer_norm_eps)
        elif self.config.norm_type == 'scalenorm':
            self.ff_norm = ScaleNorm(config.hidden_size**0.5)

        self.ff1 = BertIntermediate(config)
        self.ff2 = nn.Linear(config.intermediate_size, config.hidden_size)
        self.ff_dropout = nn.Dropout(config.hidden_dropout_prob)

    def forward(self, hidden_states, attention_mask=None, *args, **kwargs):
        residual = hidden_states
        if self.config.prenorm:
            hidden_states = self.attention_norm(hidden_states)
        # Self-attention sublayers
        if not attention_mask is None:
            if attention_mask.ndim == 2:
                attention_mask = attention_mask[:, None, None, :]
        hidden_states, attentions = self.self_attention(
            hidden_states,
            attention_mask=attention_mask,
            output_attentions=self.config.output_attentions)
        hidden_states = self.self_out(hidden_states)
        hidden_states = self.self_dropout(hidden_states) + residual
        if not self.config.prenorm:
            hidden_states = self.attention_norm(hidden_states)

        residual = hidden_states
        if self.config.prenorm:
            hidden_states = self.ff_norm(hidden_states)
        # FF sublayer
        hidden_states = self.ff1(hidden_states)
        hidden_state = torch.nn.functional.gelu(hidden_states)
        hidden_states = self.ff2(hidden_states)
        hidden_states = self.ff_dropout(hidden_states) + residual
        if not self.config.prenorm:
            hidden_states = self.ff_norm(hidden_states)

        return hidden_states, attentions

    def load_from_bert(self, bert_layer):
        self.self_attention.load_state_dict(
            bert_layer.attention.self.state_dict())
        self.self_out.load_state_dict(
            bert_layer.attention.output.dense.state_dict())
        self.ff1.load_state_dict(bert_layer.intermediate.state_dict())
        self.ff2.load_state_dict(bert_layer.output.dense.state_dict())
        if self.config.norm_type == "layer":
            self.attention_norm.load_state_dict(
                bert_layer.attention.output.LayerNorm.state_dict())
            self.ff_norm.load_state_dict(
                bert_layer.output.LayerNorm.state_dict())
예제 #14
0
 def __init__(self, config):
     super(MyBertAdapterLayer9, self).__init__()
     self.new_attention = MyBertAttention9(config)
     self.new_intermediate = BertIntermediate(config)
     self.new_output = MyBertOutput9(config)
     self.adapter = BertAdapter9(config)
예제 #15
0
 def __init__(self, config):
     super(CaptionBertLayer, self).__init__(config)
     self.attention = CaptionBertAttention(config)
     self.intermediate = BertIntermediate(config)
     self.output = BertOutput(config)
예제 #16
0
    class TestBertIntermediate(unittest.TestCase):
        def init_data(self, use_cuda: bool) -> None:
            self.test_device = torch.device('cuda:0') if use_cuda else \
                torch.device('cpu:0')
            if not use_cuda:
                torch.set_num_threads(1)

            torch.set_grad_enabled(False)
            self.cfg = BertConfig()

            self.torch_intermediate = BertIntermediate(self.cfg)
            if torch.cuda.is_available():
                self.torch_intermediate.to(self.test_device)
            self.torch_intermediate.eval()

            self.turbo_intermediate = turbo_transformers.BertIntermediate.from_torch(
                self.torch_intermediate)

        def check_torch_and_turbo(self, use_cuda):
            self.init_data(use_cuda=use_cuda)
            device = "GPU" if use_cuda else "CPU"
            num_iter = 2
            hidden_size = self.cfg.hidden_size
            input_tensor = torch.rand(size=(batch_size, seq_length,
                                            hidden_size),
                                      dtype=torch.float32,
                                      device=self.test_device)

            turbo_model = lambda: self.turbo_intermediate(input_tensor)
            turbo_result, turbo_qps, turbo_time = \
                test_helper.run_model(turbo_model, use_cuda, num_iter)

            print(
                f"BertIntermediate \"({batch_size},{seq_length:03})\" ",
                f"{device} TurboTransform QPS,  {turbo_qps}, time, {turbo_time}"
            )

            torch_model = lambda: self.torch_intermediate(input_tensor)
            torch_result, torch_qps, torch_time = \
                test_helper.run_model(torch_model, use_cuda, num_iter)

            print(f"BertIntermediate \"({batch_size},{seq_length:03})\" ",
                  f"{device} Torch QPS,  {torch_qps}, time, {torch_time}")

            torch_result = torch_result.cpu().numpy()
            turbo_result = turbo_result.cpu().numpy()

            self.assertTrue(
                numpy.allclose(torch_result,
                               turbo_result,
                               rtol=1e-4,
                               atol=1e-3))

            with open("bert_intermediate_res.txt", "a") as fh:
                fh.write(
                    f"\"({batch_size},{seq_length:03})\", {torch_qps}, {torch_qps}\n"
                )

        def test_intermediate(self):
            self.check_torch_and_turbo(use_cuda=False)
            if torch.cuda.is_available() and \
                turbo_transformers.config.is_compiled_with_cuda():
                self.check_torch_and_turbo(use_cuda=True)
예제 #17
0
파일: model.py 프로젝트: patelrajnath/luke
    def __init__(self, config):
        super(EntityAwareLayer, self).__init__()

        self.attention = EntityAwareAttention(config)
        self.intermediate = BertIntermediate(config)
        self.output = BertOutput(config)
 def __init__(self, config):
     super(GramBertLayer, self).__init__()
     self.attention = GramBertAttention(
         config)  # attention+linear+dropout+res-connnect+norm
     self.intermediate = BertIntermediate(config)  # linear
     self.output = BertOutput(config)  # linear+dropout+res-connnect+norm