コード例 #1
0
ファイル: model.py プロジェクト: bindung/BERT-keras-minimal
 def __init__(self, n_state: int, n_head: int, d_hid: int, residual_dropout: float, attention_dropout: float,
              use_attn_mask: bool, layer_id: int, neg_inf: float, ln_epsilon: float, accurate_gelu: bool) -> None:
     self.attention = MultiHeadSelfAttention(n_state, n_head, attention_dropout, use_attn_mask, layer_id, neg_inf)
     self.drop1 = Dropout(residual_dropout, name='layer_{}/ln_1_drop'.format(layer_id))
     self.add1 = Add(name='layer_{}/ln_1_add'.format(layer_id))
     self.ln1 = LayerNormalization(ln_epsilon, name='layer_{}/ln_1'.format(layer_id))
     self.ffn = PositionWiseFF(n_state, d_hid, layer_id, accurate_gelu)
     self.drop2 = Dropout(residual_dropout, name='layer_{}/ln_2_drop'.format(layer_id))
     self.add2 = Add(name='layer_{}/ln_2_add'.format(layer_id))
     self.ln2 = LayerNormalization(ln_epsilon, name='layer_{}/ln_2'.format(layer_id))
コード例 #2
0
ファイル: embedding.py プロジェクト: zzx2017/BERT-keras
    def __init__(self, output_dim: int = 768, dropout: float = 0.1, vocab_size: int = 30000 + TextEncoder.SPECIAL_COUNT,
                 max_len: int = 512, trainable_pos_embedding: bool = True, use_one_dropout: bool = False,
                 use_embedding_layer_norm: bool = False, ln_epsilon: float = 1e-5, **kwargs):
        super().__init__(**kwargs)
        self.max_len = max_len
        self.use_one_dropout = use_one_dropout
        self.output_dim = output_dim
        self.dropout = dropout
        self.vocab_size = vocab_size
        self.trainable_pos_embedding = trainable_pos_embedding

        self.segment_emb = keras.layers.Embedding(TextEncoder.NUM_SEGMENTS, output_dim, input_length=max_len,
                                                  name='SegmentEmbedding')
        if not trainable_pos_embedding:
            self.pos_emb = keras.layers.Embedding(max_len, output_dim, trainable=False, input_length=max_len,
                                                  name='PositionEmbedding',
                                                  weights=[_get_pos_encoding_matrix(max_len, output_dim)])
        else:
            self.pos_emb = keras.layers.Embedding(max_len, output_dim, input_length=max_len, name='PositionEmbedding')
        self.token_emb = keras.layers.Embedding(vocab_size, output_dim, input_length=max_len, name='TokenEmbedding')
        self.embedding_dropout = keras.layers.Dropout(dropout, name='EmbeddingDropOut')
        self.add_embeddings = keras.layers.Add(name='AddEmbeddings')
        self.use_embedding_layer_norm = use_embedding_layer_norm
        if self.use_embedding_layer_norm:
            self.embedding_layer_norm = LayerNormalization(ln_epsilon)
        else:
            self.embedding_layer_norm = None
        self.ln_epsilon = ln_epsilon
コード例 #3
0
    def __init__(self, layer: nn.Module, n_layers: int):
        """
        Constructor for the global Encoder.

        :param layer: layer type to use.
        :param n_layers: Number of layers to use.
        """
        # call base constructor
        super(Encoder, self).__init__()
        self.layers = clone(layer, n_layers)

        self.norm = LayerNormalization(layer.size)
コード例 #4
0
    def __init__(self, layer: nn.Module, N: int):
        """
        Constructor for the global ``Decoder``.

        :param layer: layer module to use.

        :param N: number of decoder layers to use.
        """
        # call base constructor
        super(Decoder, self).__init__()

        self.layers = clone(layer, N)

        self.norm = LayerNormalization(layer.size)
コード例 #5
0
    def __init__(self,
                 output_dim: int = 768,
                 dropout: float = 0.1,
                 vocab_size: int = 30000,
                 max_len: int = 512,
                 trainable_pos_embedding: bool = True,
                 use_one_dropout: bool = False,
                 use_embedding_layer_norm: bool = False,
                 layer_norm_epsilon: float = 1e-5,
                 **kwargs):
        super().__init__(**kwargs)
        self.max_len = max_len
        self.use_one_dropout = use_one_dropout
        self.output_dim = output_dim
        self.dropout = dropout
        self.vocab_size = vocab_size

        # Bert keras uses two segments for next-sentence classification task
        self.segment_emb = keras.layers.Embedding(2,
                                                  output_dim,
                                                  name='SegmentEmbedding')

        self.trainable_pos_embedding = trainable_pos_embedding
        if not trainable_pos_embedding:
            self.pos_emb = keras.layers.Embedding(
                max_len,
                output_dim,
                trainable=False,
                name='PositionEmbedding',
                weights=[_get_pos_encoding_matrix(max_len, output_dim)])
        else:
            self.pos_emb = keras.layers.Embedding(max_len,
                                                  output_dim,
                                                  name='PositionEmbedding')

        self.token_emb = keras.layers.Embedding(vocab_size,
                                                output_dim,
                                                name='TokenEmbedding')
        self.embedding_dropout = keras.layers.Dropout(dropout,
                                                      name='EmbeddingDropOut')
        self.add_embeddings = keras.layers.Add(name='AddEmbeddings')
        self.use_embedding_layer_norm = use_embedding_layer_norm
        if self.use_embedding_layer_norm:
            self.embedding_layer_norm = LayerNormalization(layer_norm_epsilon)
        else:
            self.embedding_layer_norm = None
        self.layer_norm_epsilon = layer_norm_epsilon