Example #1
0
    def __init__(self,
                 vocab_size,
                 hidden_size=768,
                 hidden_dropout_prob=0.1,
                 max_position_embeddings=512,
                 type_vocab_size=2,
                 pad_token_id=0):
        super(ErnieEmbeddings, self).__init__()

        self.word_embeddings = nn.Embedding(vocab_size,
                                            hidden_size,
                                            padding_idx=pad_token_id)
        self.position_embeddings = nn.Embedding(max_position_embeddings,
                                                hidden_size)
        self.token_type_embeddings = nn.Embedding(type_vocab_size, hidden_size)
        self.layer_norm = nn.LayerNorm(hidden_size)
        self.dropout = nn.Dropout(hidden_dropout_prob)
Example #2
0
 def __init__(self,
              embedding_size,
              hidden_size,
              vocab_size,
              activation,
              embedding_weights=None):
     super(RoFormerLMPredictionHead, self).__init__()
     self.transform = nn.Linear(hidden_size, embedding_size)
     self.activation = getattr(nn.functional, activation)
     self.layer_norm = nn.LayerNorm(embedding_size)
     self.decoder_weight = (self.create_parameter(
         shape=[vocab_size, embedding_size],
         dtype=self.transform.weight.dtype,
         is_bias=False,
     ) if embedding_weights is None else embedding_weights)
     self.decoder_bias = self.create_parameter(
         shape=[vocab_size], dtype=self.decoder_weight.dtype, is_bias=True)
Example #3
0
    def __init__(self,
                 num_classes,
                 backbone_indices,
                 in_channels,
                 mla_channels=256,
                 mlahead_channels=128,
                 lr_multiple=10):
        super().__init__()

        if len(backbone_indices) != 4:
            raise RuntimeError

        self.mla_feat_nums = len(backbone_indices)
        self.norms = nn.LayerList(
            [nn.LayerNorm(normalized_shape=in_channels, epsilon=1e-6)] *
            self.mla_feat_nums)

        self.mla = ConvMLA(in_channels, mla_channels)

        self.aux_heads = nn.LayerList([
            nn.Conv2D(in_channels=mla_channels,
                      out_channels=num_classes,
                      kernel_size=1)
        ] * self.mla_feat_nums)

        self.feat_convs = nn.LayerList([
            nn.Sequential(
                layers.ConvBNReLU(in_channels=mla_channels,
                                  out_channels=mlahead_channels,
                                  kernel_size=3,
                                  padding=1),
                layers.ConvBNReLU(in_channels=mlahead_channels,
                                  out_channels=mlahead_channels,
                                  kernel_size=3,
                                  padding=1),
                nn.Upsample(
                    scale_factor=4, mode='bilinear', align_corners=True))
        ] * self.mla_feat_nums)

        self.backbone_indices = backbone_indices
        self.in_channels = in_channels

        self.cls_head = nn.Conv2D(in_channels=4 * mlahead_channels,
                                  out_channels=num_classes,
                                  kernel_size=3,
                                  padding=1)
Example #4
0
    def __init__(self):
        super(ModelLinear, self).__init__()
        with supernet(expand_ratio=(1.0, 2.0, 4.0)) as ofa_super:
            models = []
            models += [nn.Embedding(num_embeddings=64, embedding_dim=64)]
            models += [nn.Linear(64, 128)]
            models += [nn.LayerNorm(128)]
            models += [nn.Linear(128, 256)]
            models = ofa_super.convert(models)

        with supernet(expand_ratio=(1, 2, 4)) as ofa_super:
            models1 = []
            models1 += [nn.Linear(256, 256)]
            models1 = ofa_super.convert(models1)

        models += models1
        self.models = paddle.nn.Sequential(*models)
    def __init__(self,
                 decoder_layers,
                 num_layers,
                 norm=None,
                 hidden_size=None,
                 topo=None):
        super(TransformerDecoder, self).__init__()

        self.topo = topo
        self.num_layers = num_layers
        self.layers = decoder_layers
        self.norm = norm
        if norm is "LayerNorm":
            self.norm = nn.LayerNorm(hidden_size)
        elif norm is not None:
            raise ValueError("Only support LayerNorm")
        self.checkpoints = []
Example #6
0
    def __init__(self,
                 vocab_size,
                 embed_tokens=None,
                 pad_token_id=0,
                 d_model=1280,
                 num_encoder_layers=2,
                 encoder_attention_heads=32,
                 encoder_ffn_dim=5120,
                 dropout=0.1,
                 activation_function='gelu',
                 attention_dropout=0.0,
                 activation_dropout=0.0,
                 max_position_embeddings=128,
                 init_std=0.02,
                 scale_embedding=True,
                 normalize_before=True):
        super().__init__()
        self.init_std = init_std
        self.pad_token_id = pad_token_id
        if embed_tokens is not None:
            self.embed_tokens = embed_tokens
        else:
            self.embed_tokens = nn.Embedding(num_embeddings=vocab_size,
                                             embedding_dim=d_model,
                                             padding_idx=pad_token_id)
        self.embed_scale = math.sqrt(d_model) if scale_embedding else 1.0
        self.encoder_embed_positions = BlenderbotLearnedPositionalEmbedding(
            num_embeddings=max_position_embeddings, embedding_dim=d_model)

        self.encoder_dropout = nn.Dropout(dropout)
        self.encoder_layernorm = nn.LayerNorm(normalized_shape=d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=encoder_attention_heads,
            dim_feedforward=encoder_ffn_dim,
            dropout=dropout,
            activation=activation_function,
            attn_dropout=attention_dropout,
            act_dropout=activation_dropout,
            normalize_before=normalize_before)
        self.encoder = nn.TransformerEncoder(encoder_layer=encoder_layer,
                                             num_layers=num_encoder_layers)

        self.apply(self.init_weights)
Example #7
0
    def __init__(
        self,
        d_model,
        d_inner,
        layer_norm_eps,
        dropout,
        ff_activation,
    ):
        super(XLNetFeedForward, self).__init__()

        self.layer_norm = nn.LayerNorm(d_model, epsilon=layer_norm_eps)
        self.layer_1 = nn.Linear(d_model, d_inner)
        self.layer_2 = nn.Linear(d_inner, d_model)
        self.dropout = nn.Dropout(dropout)
        if isinstance(ff_activation, str):
            self.activation_function = ACT2FN[ff_activation]
        else:
            self.activation_function = ff_activation
Example #8
0
    def __init__(self, in_channels, ratio):
        super().__init__()

        self.conv_mask = nn.Conv2D(in_channels=in_channels,
                                   out_channels=1,
                                   kernel_size=1)

        self.softmax = nn.Softmax(axis=2)

        inter_channels = int(in_channels * ratio)
        self.channel_add_conv = nn.Sequential(
            nn.Conv2D(in_channels=in_channels,
                      out_channels=inter_channels,
                      kernel_size=1),
            nn.LayerNorm(normalized_shape=[inter_channels, 1, 1]), nn.ReLU(),
            nn.Conv2D(in_channels=inter_channels,
                      out_channels=in_channels,
                      kernel_size=1))
Example #9
0
    def __init__(
        self,
        vocab_size,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        normalize_before=True,
        max_position_embeddings=512,
        type_vocab_size=2,
        initializer_range=0.02,
        unk_token_id=0,
        pad_token_id=0,
        bos_token_id=1,
        eos_token_id=2,
        mask_token_id=30000,
    ):
        super(UnifiedTransformerModel, self).__init__()
        self.unk_token_id = unk_token_id
        self.pad_token_id = pad_token_id
        self.bos_token_id = bos_token_id
        self.eos_token_id = eos_token_id
        self.mask_token_id = mask_token_id
        self.initializer_range = initializer_range

        self.embeddings = UnifiedTransformerEmbeddings(
            vocab_size, hidden_size, hidden_dropout_prob,
            max_position_embeddings, type_vocab_size)
        encoder_layer = nn.TransformerEncoderLayer(
            hidden_size,
            num_attention_heads,
            intermediate_size,
            dropout=hidden_dropout_prob,
            activation=hidden_act,
            attn_dropout=attention_probs_dropout_prob,
            act_dropout=0,
            normalize_before=normalize_before)
        encoder_norm = nn.LayerNorm(hidden_size)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers,
                                             encoder_norm)
        self.apply(self.init_weights)
Example #10
0
    def __init__(self,
                 vocab_size,
                 embed_tokens=None,
                 pad_token_id=1,
                 d_model=768,
                 num_decoder_layers=6,
                 decoder_attention_heads=12,
                 decoder_ffn_dim=3072,
                 dropout=0.1,
                 activation_function='gelu',
                 attention_dropout=0.1,
                 activation_dropout=0.1,
                 max_position_embeddings=1024,
                 init_std=0.02,
                 scale_embedding=True,
                 normalize_before=False):
        super().__init__()
        self.init_std = init_std
        if embed_tokens is not None:
            self.embed_tokens = embed_tokens
        else:
            self.embed_tokens = nn.Embedding(num_embeddings=vocab_size,
                                             embedding_dim=d_model,
                                             padding_idx=pad_token_id)

        self.decoder_embed_positions = BlenderbotSmallLearnedPositionalEmbedding(
            num_embeddings=max_position_embeddings, embedding_dim=d_model)
        self.decoder_dropout = nn.Dropout(dropout)
        self.decoder_layernorm_embedding = nn.LayerNorm(
            normalized_shape=d_model)
        self.embed_scale = math.sqrt(d_model) if scale_embedding else 1.0

        decoder_layer = BlenderbotSmallDecoderLayer(
            d_model=d_model,
            nhead=decoder_attention_heads,
            dim_feedforward=decoder_ffn_dim,
            dropout=dropout,
            activation=activation_function,
            attn_dropout=attention_dropout,
            act_dropout=activation_dropout,
            normalize_before=normalize_before)
        self.decoder = TransformerDecoder(decoder_layer=decoder_layer,
                                          num_layers=num_decoder_layers)
        self.apply(self.init_weights)
Example #11
0
 def __init__(self,
              vocab_size,
              embedding_size=128,
              hidden_dropout_prob=0.1,
              max_position_embeddings=512,
              type_vocab_size=16,
              padding_idx=0,
              cls_num=2):
     super().__init__()
     self.word_embeddings = nn.Embedding(vocab_size,
                                         embedding_size,
                                         padding_idx=padding_idx)
     self.position_embeddings = nn.Embedding(max_position_embeddings,
                                             embedding_size)
     self.token_type_embeddings = nn.Embedding(type_vocab_size,
                                               embedding_size)
     self.layer_norm = nn.LayerNorm(embedding_size)
     self.dropout = nn.Dropout(hidden_dropout_prob)
     self.cls_num = cls_num
Example #12
0
 def __init__(self,
              hidden_size,
              vocab_size,
              activation,
              embedding_weights=None):
     super(BertLMPredictionHead, self).__init__()
     self.weight_attr = paddle.ParamAttr(
         initializer=paddle.fluid.initializer.ConstantInitializer(value=0.000001))
     # self.transform = nn.Linear(hidden_size, hidden_size)
     self.transform = nn.Linear(hidden_size, hidden_size, weight_attr=self.weight_attr, bias_attr=False)
     self.activation = getattr(nn.functional, activation)
     self.layer_norm = nn.LayerNorm(hidden_size)
     self.decoder_weight = self.create_parameter(
         shape=[hidden_size, vocab_size],
         dtype=self.transform.weight.dtype,
         is_bias=False) if embedding_weights is None else embedding_weights
         #is_bias=True) if embedding_weights is None else embedding_weights
     self.decoder_bias = self.create_parameter(
         shape=[vocab_size], dtype=self.decoder_weight.dtype, is_bias=True)
Example #13
0
    def __init__(self,
                 vocab_size,
                 hidden_size=768,
                 hidden_dropout_prob=0.1,
                 max_position_embeddings=512,
                 type_vocab_size=16,
                 use_relative_position=True):
        super(NeZhaEmbeddings, self).__init__()
        self.use_relative_position = use_relative_position

        self.word_embeddings = nn.Embedding(vocab_size, hidden_size)

        if not use_relative_position:
            self.position_embeddings = nn.Embedding(max_position_embeddings,
                                                    hidden_size)

        self.token_type_embeddings = nn.Embedding(type_vocab_size, hidden_size)
        self.layer_norm = nn.LayerNorm(hidden_size)
        self.dropout = nn.Dropout(hidden_dropout_prob)
Example #14
0
    def __init__(self):
        super(ModelLinear1, self).__init__()
        with supernet(channel=((64, 128, 256), (64, 128, 256),
                               (64, 128, 256))) as ofa_super:
            models = []
            models += [nn.Embedding(num_embeddings=64, embedding_dim=64)]
            models += [nn.Linear(64, 128)]
            models += [nn.LayerNorm(128)]
            models += [nn.Linear(128, 256)]
            models = ofa_super.convert(models)

        with supernet(channel=((64, 128, 256), )) as ofa_super:
            models1 = []
            models1 += [nn.Linear(256, 256)]
            models1 = ofa_super.convert(models1)

        models += models1

        self.models = paddle.nn.Sequential(*models)
Example #15
0
    def __init__(self,
                 n_head,
                 d_model,
                 d_head,
                 dropout,
                 attn_dropout=0,
                 tgt_len=None,
                 ext_len=None,
                 mem_len=None,
                 normalize_before=False):
        super(RelMultiHeadAttn, self).__init__()

        self.n_head = n_head
        self.d_model = d_model
        self.d_head = d_head
        self.dropout = dropout

        self.qkv_proj = nn.Linear(
            d_model,
            3 * n_head * d_head,
            weight_attr=paddle.nn.initializer.Normal(
                mean=0.0, std=0.01),
            bias_attr=False)

        self.drop = nn.Dropout(dropout)
        self.attn_drop = nn.Dropout(attn_dropout)
        self.o_proj = nn.Linear(
            n_head * d_head,
            d_model,
            weight_attr=paddle.nn.initializer.Normal(
                mean=0.0, std=0.01),
            bias_attr=False)

        self.layer_norm = nn.LayerNorm(
            d_model,
            weight_attr=paddle.nn.initializer.Normal(
                mean=1.0, std=0.01),
            bias_attr=paddle.nn.initializer.Constant(0.0))

        self.scale = 1 / (d_head**0.5)

        self.normalize_before = normalize_before
Example #16
0
    def __init__(self, vocab_size, type_size, max_position_seq_len, num_layers,
                 n_head, hidden_size, attn_dropout, act_dropout):
        super(Plato2Encoder, self).__init__()

        self.n_head = n_head

        self.word_embedding_layer = nn.Embedding(vocab_size, hidden_size)
        self.sent_embedding_layer = nn.Embedding(type_size, hidden_size)
        self.pos_embedding_layer = nn.Embedding(max_position_seq_len,
                                                hidden_size)

        self.encoder_layers = []
        for i in range(num_layers):
            encoder_layer = Plato2EncoderLayer(n_head, hidden_size,
                                               attn_dropout, act_dropout)
            self.encoder_layers.append(encoder_layer)
            self.add_sublayer('layers.' + str(i), encoder_layer)
        self.post_encoder_layer_norm = nn.LayerNorm(hidden_size)

        self.dropout_layer = nn.Dropout(act_dropout)
Example #17
0
    def __init__(
        self,
        embedding_size,
        vocab_size,
        hidden_size,
        hidden_act,
    ):
        super(AlbertMLMHead, self).__init__()

        self.layer_norm = nn.LayerNorm(embedding_size)
        self.bias = self.create_parameter(
            [vocab_size],
            is_bias=True,
            default_initializer=nn.initializer.Constant(value=0))
        self.dense = nn.Linear(hidden_size, embedding_size)
        self.decoder = nn.Linear(embedding_size, vocab_size)
        self.activation = ACT2FN[hidden_act]

        # link bias
        self.decoder.bias = self.bias
Example #18
0
    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
        super().__init__()
        if img_size % patch_size != 0:
            raise Exception(
                f"img_size {img_size} should be divided by patch_size {patch_size}."
            )

        img_size = to_2tuple(img_size)
        patch_size = to_2tuple(patch_size)

        self.img_size = img_size
        self.patch_size = patch_size
        self.H, self.W = img_size[0] // patch_size[0], img_size[
            1] // patch_size[1]
        self.num_patches = self.H * self.W
        self.proj = nn.Conv2D(in_chans,
                              embed_dim,
                              kernel_size=patch_size,
                              stride=patch_size)
        self.norm = nn.LayerNorm(embed_dim)
    def __init__(self,
                 hidden_size=1024,
                 intermediate_size=4 * 1024,
                 initializer_range=0.02):
        super(MLPLayer, self).__init__()
        d_model = hidden_size
        dim_feedforward = intermediate_size
        weight_attr = paddle.ParamAttr(
            initializer=nn.initializer.Normal(mean=0.0, std=initializer_range))
        bias_attr = None

        self.linear0 = nn.Linear(d_model,
                                 dim_feedforward,
                                 weight_attr,
                                 bias_attr=bias_attr)
        self.linear1 = nn.Linear(dim_feedforward,
                                 d_model,
                                 weight_attr,
                                 bias_attr=bias_attr)
        self.norm = nn.LayerNorm(d_model, epsilon=1e-5)
    def __init__(self,
                 n_src_vocab=200,
                 d_word_vec=20,
                 n_layers=3,
                 n_head=2,
                 d_k=10,
                 d_v=10,
                 d_model=20,
                 d_inner=10,
                 pad_idx=0,
                 dropout=0.1,
                 n_position=200,
                 emb_weight=None):
        "args:"
        "n_src_vocab(int): the number of vocabulary of input"
        "src_pad_idx(int): the index of padding word of input"
        "d_word_vec(int) : the dimension of word2vec and d_word_vec is equal to d_model"
        "d_inner(int):     the number of hidden units of PositionwiseForward layer"
        "n_layers(int): the number of decoder layer and encoder layer"
        "n_head(int): the number of attention head"
        "d_k: dimension of d matrix"
        "d_v: dimension of v matrix"
        "src_emb_weight: weight of input w2v"
        super().__init__()

        self.src_word_emb = nn.Embedding(n_src_vocab,
                                         d_word_vec,
                                         sparse=True,
                                         padding_idx=pad_idx)

        if emb_weight is not None:
            self.src_word_emb.weight.set_value(emb_weight)
            self.src_word_emb.stop_gradient = True
        self.position_enc = PositionalEncoding(d_word_vec,
                                               n_position=n_position)
        self.dropout = nn.Dropout(dropout)
        self.layer_stack = nn.LayerList([
            EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)
        ])
        self.layer_norm = nn.LayerNorm(d_model, epsilon=1e-6)
Example #21
0
    def __init__(self,
                 img_size=224,
                 patch_size=7,
                 stride=4,
                 in_chans=3,
                 embed_dim=768):
        super().__init__()
        img_size = to_2tuple(img_size)
        patch_size = to_2tuple(patch_size)

        self.img_size = img_size
        self.patch_size = patch_size
        self.H, self.W = img_size[0] // patch_size[0], img_size[
            1] // patch_size[1]
        self.num_patches = self.H * self.W
        self.proj = nn.Conv2D(in_chans,
                              embed_dim,
                              kernel_size=patch_size,
                              stride=stride,
                              padding=(patch_size[0] // 2, patch_size[1] // 2))
        self.norm = nn.LayerNorm(embed_dim)
Example #22
0
    def __init__(self,
                 vocab_size,
                 emb_size=128,
                 hidden_dropout_prob=0.1,
                 max_position_embeddings=512,
                 type_vocab_size=2,
                 pad_token_id=0,
                 rel_pos_size=None,
                 num_attention_heads=None):
        super(ErnieGramEmbeddings, self).__init__()

        self.word_embeddings = nn.Embedding(
            vocab_size, emb_size, padding_idx=pad_token_id)
        self.position_embeddings = nn.Embedding(max_position_embeddings,
                                                emb_size)
        self.token_type_embeddings = nn.Embedding(type_vocab_size, emb_size)
        if rel_pos_size and num_attention_heads:
            self.rel_pos_embeddings = nn.Embedding(rel_pos_size,
                                                   num_attention_heads)
        self.layer_norm = nn.LayerNorm(emb_size)
        self.dropout = nn.Dropout(hidden_dropout_prob)
Example #23
0
    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
        super().__init__()
        img_size = to_2tuple(img_size)
        patch_size = to_2tuple(patch_size)

        self.img_size = img_size
        self.patch_size = patch_size
        assert (
            img_size[0] % patch_size[0] == 0
            and img_size[1] % patch_size[1] == 0
        ), f"img_size {img_size} should be divided by patch_size {patch_size}."
        # Note: self.H, self.W and self.num_patches are not used
        self.H, self.W = img_size[0] // patch_size[0], img_size[
            1] // patch_size[1]
        # since the image size may change on the fly.
        self.num_patches = self.H * self.W
        self.proj = nn.Conv2D(in_chans,
                              embed_dim,
                              kernel_size=patch_size,
                              stride=patch_size)
        self.norm = nn.LayerNorm(embed_dim)
Example #24
0
    def __init__(self,
                 vocab_size,
                 hidden_size=768,
                 hidden_dropout_prob=0.1,
                 max_position_embeddings=512,
                 type_vocab_size=16,
                 layer_norm_eps=1e-12):
        super().__init__()
        self.word_embeddings = nn.Embedding(vocab_size,
                                            hidden_size,
                                            padding_idx=None)

        self.position_embeddings = nn.Embedding(max_position_embeddings,
                                                hidden_size)
        self.token_type_embeddings = nn.Embedding(type_vocab_size, hidden_size)

        self.LayerNorm = nn.LayerNorm(hidden_size, epsilon=layer_norm_eps)
        self.dropout = nn.Dropout(hidden_dropout_prob)

        self.register_buffer(
            "position_ids",
            paddle.arange(max_position_embeddings).expand((1, -1)))
Example #25
0
    def __init__(
        self,
        vocab_size,
        hidden_size,
        pad_token_id,
        type_vocab_size,
        max_position_embeddings,
        pinyin_map_len,
        glyph_embedding_dim,
        layer_norm_eps=1e-12,
        hidden_dropout_prob=0.1,
    ):
        super(FusionBertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(vocab_size,
                                            hidden_size,
                                            padding_idx=pad_token_id)
        self.position_embeddings = nn.Embedding(max_position_embeddings,
                                                hidden_size)
        self.token_type_embeddings = nn.Embedding(type_vocab_size, hidden_size)
        self.pinyin_embeddings = PinyinEmbedding(
            pinyin_map_len=pinyin_map_len,
            embedding_size=128,
            pinyin_out_dim=hidden_size,
        )
        self.glyph_embeddings = GlyphEmbedding(vocab_size, glyph_embedding_dim)

        self.glyph_map = nn.Linear(glyph_embedding_dim, hidden_size)
        self.map_fc = nn.Linear(hidden_size * 3, hidden_size)
        self.layer_norm = nn.LayerNorm(hidden_size, epsilon=layer_norm_eps)
        self.dropout = nn.Dropout(hidden_dropout_prob)

        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
        self.register_buffer(
            "position_ids",
            paddle.expand(paddle.arange(max_position_embeddings,
                                        dtype="int64"),
                          shape=[1, -1]),
        )
 def __init__(self,
              hidden_size=64,
              intermediate_size=4 * 64,
              initializer_range=0.02):
     super(MLPLayer, self).__init__()
     d_model = hidden_size
     dim_feedforward = intermediate_size
     np.random.seed(2021)
     arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
     arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model))
     arr2 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
     arr3 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model))
     weight_attr0 = paddle.ParamAttr(
         initializer=NumpyArrayInitializer(arr0))
     weight_attr1 = paddle.ParamAttr(
         initializer=NumpyArrayInitializer(arr1))
     weight_attr2 = paddle.ParamAttr(
         initializer=NumpyArrayInitializer(arr2))
     weight_attr3 = paddle.ParamAttr(
         initializer=NumpyArrayInitializer(arr3))
     bias_attr = None
     self.linear0 = nn.Linear(d_model,
                              dim_feedforward,
                              weight_attr0,
                              bias_attr=bias_attr)
     self.linear1 = nn.Linear(dim_feedforward,
                              d_model,
                              weight_attr1,
                              bias_attr=bias_attr)
     self.norm = nn.LayerNorm(d_model, epsilon=1e-5)
     self.linear2 = nn.Linear(d_model,
                              dim_feedforward,
                              weight_attr2,
                              bias_attr=bias_attr)
     self.linear3 = nn.Linear(dim_feedforward,
                              d_model,
                              weight_attr3,
                              bias_attr=bias_attr)
Example #27
0
File: conv.py Project: WenjinW/PGL
    def __init__(self,
                 input_size,
                 output_size,
                 activation=None,
                 init_eps=0.0,
                 train_eps=False):
        super(GINConv, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.linear1 = nn.Linear(input_size, output_size, bias_attr=True)
        self.linear2 = nn.Linear(output_size, output_size, bias_attr=True)
        self.layer_norm = nn.LayerNorm(output_size)
        if train_eps:
            self.epsilon = self.create_parameter(
                shape=[1, 1],
                dtype='float32',
                default_initializer=nn.initializer.Constant(value=init_eps))
        else:
            self.epsilon = init_eps

        if isinstance(activation, str):
            activation = getattr(F, activation)
        self.activation = activation
Example #28
0
 def __init__(self,
              hidden_size=768,
              num_attention_heads=12,
              intermediate_size=3072,
              hidden_act="gelu",
              hidden_dropout_prob=0.1,
              attention_probs_dropout_prob=0.1,
              max_relative_position=64,
              layer_norm_eps=1e-12):
     super(NeZhaLayer, self).__init__()
     self.seq_len_dim = 1
     self.layer_norm = nn.LayerNorm(hidden_size, epsilon=layer_norm_eps)
     self.attention = NeZhaAttention(
         hidden_size=hidden_size,
         num_attention_heads=num_attention_heads,
         hidden_dropout_prob=hidden_dropout_prob,
         attention_probs_dropout_prob=attention_probs_dropout_prob,
         max_relative_position=max_relative_position,
         layer_norm_eps=layer_norm_eps)
     self.ffn = nn.Linear(hidden_size, intermediate_size)
     self.ffn_output = nn.Linear(intermediate_size, hidden_size)
     self.activation = ACT2FN[hidden_act]
     self.dropout = nn.Dropout(hidden_dropout_prob)
Example #29
0
    def __init__(self, k_size=3, ch=64, s_state=False, c_state=False):
        super(SANN_Attention, self).__init__()
        print(
            f'************************************use SANN_Attention s_state => {s_state} -- c_state => {c_state}'
        )
        self.avg_pool = nn.AdaptiveAvgPool2D(1)
        self.max_pool = nn.AdaptiveAvgPool2D(1)
        self.sigmoid = nn.Sigmoid()
        self.s_state = s_state
        self.c_state = c_state

        if c_state:
            self.c_attention = nn.Sequential(
                nn.Conv1D(1,
                          1,
                          kernel_size=k_size,
                          padding=(k_size - 1) // 2,
                          bias_attr=False), nn.LayerNorm([1, ch]),
                nn.LeakyReLU(0.3), nn.Linear(ch, ch, bias_attr=False))

        if s_state:
            self.conv_s = nn.Sequential(Conv(ch, ch // 4, k=1))
            self.s_attention = nn.Conv2D(2, 1, 7, padding=3, bias_attr=False)
Example #30
0
 def __init__(self):
     super(ModelLinear2, self).__init__()
     with supernet(expand_ratio=None) as ofa_super:
         models = []
         models += [
             nn.Embedding(num_embeddings=64,
                          embedding_dim=64,
                          weight_attr=paddle.ParamAttr(name='emb'))
         ]
         models += [
             nn.Linear(64,
                       128,
                       weight_attr=paddle.ParamAttr(name='fc1_w'),
                       bias_attr=paddle.ParamAttr(name='fc1_b'))
         ]
         models += [
             nn.LayerNorm(128,
                          weight_attr=paddle.ParamAttr(name='ln1_w'),
                          bias_attr=paddle.ParamAttr(name='ln1_b'))
         ]
         models += [nn.Linear(128, 256)]
         models = ofa_super.convert(models)
     self.models = paddle.nn.Sequential(*models)