Ejemplo n.º 1
0
    def __init__(self, config, q_dim):
        super(TransformerPredictionLayer, self).__init__()
        self.config = config
        h_dim = config.input_dim
        # input_dim = config.input_dim

        self.hidden = h_dim

        self.position_encoder = PositionalEncoder(h_dim, config)

        # Cascade Network
        bert_config = BertConfig(config.hidden_dim, config.trans_heads,
                                 config.trans_drop)

        self.sp_transformer = BertLayer(bert_config)
        self.sp_linear = nn.Linear(h_dim * 2, 1)

        self.start_input_linear = nn.Linear(h_dim + 1, h_dim)
        self.start_transformer = BertLayer(bert_config)
        self.start_linear = nn.Linear(h_dim, 1)

        self.end_input_linear = nn.Linear(2 * h_dim + 1, h_dim)
        self.end_transformer = BertLayer(bert_config)
        self.end_linear = nn.Linear(h_dim, 1)

        self.type_input_linear = nn.Linear(2 * h_dim + 1, h_dim)
        self.type_transformer = BertLayer(bert_config)
        self.type_linear = nn.Linear(h_dim, 3)

        self.cache_S = 0
        self.cache_mask = None
Ejemplo n.º 2
0
    def __init__(
        self,
        bert_model,
        output_dim,
        add_transformer_layer=False,
        layer_pulled=-1,
        aggregation="first",
    ):
        super(BertWrapper, self).__init__()
        self.layer_pulled = layer_pulled
        self.aggregation = aggregation
        self.add_transformer_layer = add_transformer_layer
        # deduce bert output dim from the size of embeddings
        bert_output_dim = bert_model.embeddings.word_embeddings.weight.size(1)

        if add_transformer_layer:
            config_for_one_layer = BertConfig(
                0,
                hidden_size=bert_output_dim,
                num_attention_heads=int(bert_output_dim / 64),
                intermediate_size=3072,
                hidden_act='gelu',
            )
            self.additional_transformer_layer = BertLayer(config_for_one_layer)
        self.additional_linear_layer = torch.nn.Linear(bert_output_dim, output_dim)
        self.bert_model = bert_model
Ejemplo n.º 3
0
    def __init__(
        self,
        config,
        feat_dim,
        phn_size,
        sep_size,
        mask_prob=0.15,
        mask_but_no_prob=0.1,
    ):
        super(BertModel, self).__init__(config)
        self.mask_prob = mask_prob
        self.mask_but_no_prob = mask_but_no_prob
        self.sep_size = sep_size

        self.feat_embeddings = nn.Linear(feat_dim, config.hidden_size)
        self.feat_mask_vec = nn.Parameter(torch.zeros(feat_dim),
                                          requires_grad=True)
        self.positional_encoding = PositionalEncoding(config.hidden_size)

        self.model = BertModel
        layer = BertLayer(config)
        self.encoder = nn.ModuleList(
            [copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
        self.feat_out_layer = nn.Linear(config.hidden_size, feat_dim)
        self.target_out_layer = nn.Linear(config.hidden_size, phn_size)
        self.apply(self.init_bert_weights)
Ejemplo n.º 4
0
 def __init__(self, vocab_size, original_hidden_size, num_layers, tau=1):
     super().__init__()
     self.bert_layer = BertLayer(BertConfig(
         vocab_size_or_config_json_file=vocab_size,
         hidden_size=original_hidden_size * num_layers,
     ))
     self.linear_layer = nn.Linear(original_hidden_size * num_layers, 1)
     self.log_sigmoid = nn.LogSigmoid()
     self.tau = tau
Ejemplo n.º 5
0
 def __init__(self, config, my_dropout_p):
     super(BertSplitPreTrainedModel, self).__init__(config)
     logger.info(f'Model {__class__.__name__} is loading...')
     layers.set_seq_dropout(True)
     layers.set_my_dropout_prob(my_dropout_p)
     self.bert = BertModel(config)
     self.query_self_attn = layers.MultiHeadPooling(config.hidden_size, 6)
     self.value_self_attn = layers.MultiHeadPooling(config.hidden_size, 6)
     self.sentence_input = layers.BertSentInput(config)
     self.sentence_encoder = BertLayer(config)
     self.attention_score = layers.AttentionScore(config.hidden_size, 256)
Ejemplo n.º 6
0
 def __init__(self, config):
     super(BertSentencePreTrainedModel2, self).__init__(config)
     logger.info(f'Model {__class__.__name__} is loading...')
     # layers.set_seq_dropout(True)
     # layers.set_my_dropout_prob(my_dropout_p)
     self.bert = BertModel(config)
     self.bert_sent_input = layers.BertSentInput(config)
     config.intermediate_size = 1024
     config.num_attention_heads = 6
     self.bert_layer = BertLayer(config)
     self.sent_label_predictor = nn.Linear(config.hidden_size, 1)
Ejemplo n.º 7
0
    def __init__(self, input_dim, out_dim, config):
        super(InteractionLayer, self).__init__()
        self.config = config
        self.use_trans = config.basicblock_trans

        if config.basicblock_trans:
            bert_config = BertConfig(input_dim, config.trans_heads,
                                     config.trans_drop)
            self.transformer = BertLayer(bert_config)
            self.transformer_linear = nn.Linear(input_dim, out_dim)
        else:
            self.lstm = LSTMWrapper(input_dim, out_dim // 2, 1)
Ejemplo n.º 8
0
    def __init__(self, config):
        super(BertEncoder, self).__init__()
        layer = BertLayer(config)
        print("BertEncoder layer init...", layer)
        # 按照config里设置的encoder层数复制n个encoder层
        # self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
        encoders = []
        for _ in range(config.num_hidden_layers):
            encoders.append(copy.deepcopy(layer))

        # 第13个encoder
        encoders.append(copy.deepcopy(layer))
        self.layer = nn.ModuleList(encoders)
Ejemplo n.º 9
0
 def __init__(self,
              input_size,
              causal=True,
              bidirectional=False,
              num_layers=3,
              num_heads=4,
              dropout=0.2,
              max_seq_len=32):
     super().__init__()
     self.pos_embedding = nn.Embedding(max_seq_len, input_size)
     self.causal = causal
     self.bidirectional = bidirectional
     bert_config = self.BertConfig(input_size, num_heads, dropout)
     self.forward_transformer = nn.ModuleList(
         [BertLayer(bert_config) for _ in range(num_layers)])
     self.c_size = input_size
     if bidirectional and causal:
         self.backward_transformer = nn.ModuleList(
             [BertLayer(bert_config) for _ in range(num_layers)])
         self.c_size *= 2
     else:
         self.backward_transformer = None
Ejemplo n.º 10
0
def test_BertLayer():
    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
    config = BertConfig(vocab_size_or_config_json_file=32000,
                        hidden_size=768,
                        num_hidden_layers=12,
                        num_attention_heads=12,
                        intermediate_size=3072)
    embeddings = BertEmbeddings(config)
    model = BertLayer(config)

    embedding_output = embeddings(input_ids, token_type_ids)
    input_mask = input_mask.view([-1, 1, 1, input_mask.size()[-1]]).float()
    print(model(embedding_output, input_mask))
Ejemplo n.º 11
0
def get_pretrained_bert(modelname, num_hidden_layers=None):
    bert = BertModel.from_pretrained(modelname)
    if num_hidden_layers is None:
        return bert
    old_num_hidden_layers = bert.config.num_hidden_layers
    if num_hidden_layers < old_num_hidden_layers:
        # Only use the bottom n layers
        del bert.encoder.layer[num_hidden_layers:]
    elif num_hidden_layers > old_num_hidden_layers:
        # Add BertLayer(s)
        for i in range(num_hidden_layers, num_hidden_layers):
            bert.encoder.layer.add_module(str(i), BertLayer(bert.config))
    if num_hidden_layers != old_num_hidden_layers:
        bert.config.num_hidden_layers = num_hidden_layers
        bert.init_bert_weights(bert.pooler.dense)
    return bert
Ejemplo n.º 12
0
 def __init__(self, bert_model, mode, add_transformer_layer=False, layer_pulled=-1, aggregation="first"):
     
     super().__init__()
     
     self.layer_pulled          = layer_pulled
     self.aggregation           = aggregation
     self.add_transformer_layer = add_transformer_layer
     self.bert_model            = bert_model
     
     bert_output_dim = bert_model.embeddings.word_embeddings.weight.size(1)
     
     if add_transformer_layer:
         config_for_one_layer = BertConfig(
             0, hidden_size=bert_output_dim, num_attention_heads=int(
                 bert_output_dim / 64), intermediate_size=3072, hidden_act='gelu')
         self.additional_transformer_layer = BertLayer(config_for_one_layer)
     
     # Possibly add final linear layer
     self.output_linear = None
     assert mode in ['bi_encoder', 'cross_encoder']
     if mode == 'cross_encoder':
         print('BertWrapper (cross_encoder): adding linear output_linear')
         self.output_linear = torch.nn.Linear(bert_output_dim, 1)
Ejemplo n.º 13
0
 def __init__(self, hidden_size, config):
     super().__init__()
     self.layer1 = BertLayer(config)
     self.layer2 = BertLayer(config)
     self.xlayer = BertLayer(config)
Ejemplo n.º 14
0
 def __init__(self, config):
     super(Interaction_2layer, self).__init__()
     layer = BertLayer(config)
     self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(2)])