Ejemplo n.º 1
0
    def __init__(self,
                 in_planes,
                 out_planes,
                 kernel_size=1,
                 stride=1,
                 padding=0):
        super(Conv_BN_ReLU, self).__init__()
        self.conv = nn.Conv2D(in_planes,
                              out_planes,
                              kernel_size=kernel_size,
                              stride=stride,
                              padding=padding,
                              bias_attr=False)
        self.bn = nn.BatchNorm2D(out_planes, momentum=0.1)
        self.relu = nn.ReLU()

        for m in self.sublayers():
            if isinstance(m, nn.Conv2D):
                n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
                m.weight = paddle.create_parameter(
                    shape=m.weight.shape,
                    dtype='float32',
                    default_initializer=paddle.nn.initializer.Normal(
                        0, math.sqrt(2. / n)))
            elif isinstance(m, nn.BatchNorm2D):
                m.weight = paddle.create_parameter(
                    shape=m.weight.shape,
                    dtype='float32',
                    default_initializer=paddle.nn.initializer.Constant(1.0))
                m.bias = paddle.create_parameter(
                    shape=m.bias.shape,
                    dtype='float32',
                    default_initializer=paddle.nn.initializer.Constant(0.0))
Ejemplo n.º 2
0
    def __init__(self, input_resolution: int, patch_size: int, width: int,
                 layers: int, heads: int, output_dim: int):
        super().__init__()
        self.input_resolution = input_resolution
        self.output_dim = output_dim
        self.conv1 = nn.Conv2D(in_channels=3,
                               out_channels=width,
                               kernel_size=patch_size,
                               stride=patch_size,
                               bias_attr=False)

        # scale = width ** -0.5
        #self.class_embedding = paddle.create_parameter(scale * torch.randn(width))
        self.class_embedding = paddle.create_parameter((width, ), 'float32')

        #self.positional_embedding = paddle.create_parameter(scale * torch.randn((input_resolution // patch_size) ** 2 + 1, width))
        self.positional_embedding = paddle.create_parameter(
            ((input_resolution // patch_size)**2 + 1, width), 'float32')

        self.ln_pre = nn.LayerNorm(width)

        self.transformer = Transformer(width, layers, heads)

        self.ln_post = nn.LayerNorm(width)
        self.proj = paddle.create_parameter((width, output_dim), 'float32')
Ejemplo n.º 3
0
    def __init__(self,
                 feature_dim=128,
                 bottleneck_setting=MobileFaceNet_BottleNeck_Setting,
                 **args):
        super().__init__()
        self.conv1 = ConvBlock(3, 64, 3, 2, 1)
        self.dw_conv1 = ConvBlock(64, 64, 3, 1, 1, dw=True)

        self.cur_channel = 64
        block = BottleNeck
        self.blocks = self._make_layer(block, bottleneck_setting)

        self.conv2 = ConvBlock(128, 512, 1, 1, 0)
        self.linear7 = ConvBlock(512, 512, 7, 1, 0, dw=True, linear=True)
        self.linear1 = ConvBlock(512, feature_dim, 1, 1, 0, linear=True)

        for m in self.sublayers():
            if isinstance(m, nn.Conv2D):
                # ks * ks * out_ch
                n = m.weight.shape[1] * m.weight.shape[2] * m.weight.shape[3]
                m.weight = paddle.create_parameter(
                    shape=m.weight.shape,
                    dtype='float32',
                    default_initializer=nn.initializer.Normal(
                        mean=0.0, std=math.sqrt(2.0 / n)))
                # nn.init.normal_(m.weight, 0, 0.1)
            elif isinstance(m, (nn.BatchNorm, nn.BatchNorm2D, nn.GroupNorm)):
                m.weight = paddle.create_parameter(
                    shape=m.weight.shape,
                    dtype='float32',
                    default_initializer=nn.initializer.Constant(value=1.0))
                m.bias = paddle.create_parameter(
                    shape=m.bias.shape,
                    dtype='float32',
                    default_initializer=nn.initializer.Constant(value=0.0))
Ejemplo n.º 4
0
    def __init__(self, opt):
        super(MotLoss, self).__init__()
        self.crit = paddle.nn.MSELoss() if opt.mse_loss else FocalLoss()
        self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
            RegLoss() if opt.reg_loss == 'sl1' else None
        self.crit_wh = paddle.nn.L1Loss(reduction='sum') if opt.dense_wh else \
            NormRegL1Loss() if opt.norm_wh else \
                RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg
        self.opt = opt
        self.emb_dim = opt.reid_dim
        self.nID = opt.nID

        # param_attr = paddle.ParamAttr(initializer=KaimingUniform())
        # bound = 1 / math.sqrt(self.emb_dim)
        # bias_attr = paddle.ParamAttr(initializer=Uniform(-bound, bound))
        # self.classifier = nn.Linear(self.emb_dim, self.nID, weight_attr=param_attr, bias_attr=bias_attr)
        self.classifier = nn.Linear(self.emb_dim, self.nID, bias_attr=True)
        if opt.id_loss == 'focal': # 一般用不到
            # torch.nn.init.normal_(self.classifier.weight, std=0.01)
            prior_prob = 0.01
            bias_value = -math.log((1 - prior_prob) / prior_prob)
            # torch.nn.init.constant_(self.classifier.bias, bias_value)

            weight_attr = paddle.framework.ParamAttr(initializer=nn.initializer.Normal(std=0.01))
            bias_attr = paddle.framework.ParamAttr(initializer=nn.initializer.Constant(bias_value))
            self.classifier = nn.Linear(self.emb_dim, self.nID, weight_attr=weight_attr, bias_attr=bias_attr)
        self.IDLoss = nn.CrossEntropyLoss(ignore_index=-1)
        self.emb_scale = math.sqrt(2) * math.log(self.nID - 1)
        # self.s_det = nn.Parameter(-1.85 * torch.ones(1))
        # self.s_id = nn.Parameter(-1.05 * torch.ones(1))
        self.s_det = paddle.create_parameter([1], dtype='float32', default_initializer = nn.initializer.Constant(value=-1.85))
        self.s_id = paddle.create_parameter([1], dtype='float32', default_initializer = nn.initializer.Constant(value=-1.05))
Ejemplo n.º 5
0
    def __init__(self,
                 sparse_feature_number=1000001,
                 sparse_feature_dim=9,
                 dense_feature_dim=13,
                 sparse_num_field=26):
        super(FM, self).__init__()
        self.sparse_feature_number = sparse_feature_number  # 1000001
        self.sparse_feature_dim = sparse_feature_dim  # 9
        self.dense_feature_dim = dense_feature_dim  # 13
        self.sparse_num_field = sparse_num_field  # sparse_inputs_slots-1==>26
        self.layer_sizes = layer_sizes  # fc_sizes: [512, 256, 128, 32]

        # 一阶稀疏特征
        self.sparse_feature_oneOrderWeight = paddle.nn.Embedding(
            sparse_feature_number, 1, padding_idx=0, sparse=True)
        ## 一阶连续特征
        self.dense_feature_oneOrderWeight = paddle.create_parameter(
            [dense_feature_dim], "float32")
        # 二阶特征
        self.sparse_latent_vecs = paddle.nn.Embedding(sparse_feature_number,
                                                      embedding_dim,
                                                      padding_idx=0,
                                                      sparse=True)
        self.dense_latent_vecs = paddle.create_parameter(
            [1, dense_feature_dim, embedding_dim], "float32")
Ejemplo n.º 6
0
    def __init__(self,
                 sparse_feature_dim,
                 num_fields,
                 activation=None,
                 use_bias=False):
        super(FieldWiseBiInteraction, self).__init__()
        self.sparse_feature_dim = sparse_feature_dim
        self.num_fields = num_fields
        self.use_bias = use_bias
        self.activation = activation

        self.kernel_mf = paddle.create_parameter(
            shape=[int(self.num_fields * (self.num_fields - 1) / 2), 1],
            dtype='float32',
            default_initializer=paddle.nn.initializer.XavierUniform())

        self.kernel_fm = paddle.create_parameter(
            shape=[self.num_fields, 1],
            dtype='float32',
            default_initializer=paddle.nn.initializer.XavierUniform())

        if self.use_bias:
            self.bias_mf = paddle.create_parameter(
                shape=[1, ],
                dtype='float32',
                default_initializer=paddle.nn.initializer.Constant(value=0.0))

            self.bias_fm = paddle.create_parameter(
                shape=[1, ],
                dtype='float32',
                default_initializer=paddle.nn.initializer.Constant(value=0.0))
Ejemplo n.º 7
0
    def __init__(self,
                 n_token,
                 d_embed,
                 d_proj,
                 cutoffs,
                 div_val=1,
                 sample_softmax=False):
        super(AdaptiveEmbedding, self).__init__()

        self.n_token = n_token
        self.d_embed = d_embed

        self.cutoffs = cutoffs + [n_token]
        self.div_val = div_val
        self.d_proj = d_proj

        self.emb_scale = d_proj**0.5

        self.cutoff_ends = [0] + self.cutoffs

        self.emb_layers = nn.LayerList()
        self.emb_projs = nn.ParameterList()
        if div_val == 1:
            self.emb_layers.append(
                nn.Embedding(
                    n_token,
                    d_embed,
                    sparse=sample_softmax > 0,
                    weight_attr=paddle.nn.initializer.Normal(
                        mean=0.0, std=0.01)))
            if d_proj != d_embed:
                self.emb_projs.append(
                    paddle.create_parameter(
                        shape=[d_embed, d_proj],
                        dtype=global_dtype,
                        default_initializer=paddle.nn.initializer.Normal(
                            mean=0.0, std=0.01)))
        else:
            for i in range(len(self.cutoffs)):
                l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1]
                d_emb_i = d_embed // (div_val**i)
                self.emb_layers.append(
                    nn.Embedding(
                        r_idx - l_idx,
                        d_emb_i,
                        weight_attr=paddle.nn.initializer.Normal(
                            mean=0.0, std=0.01)))
                self.emb_projs.append(
                    paddle.create_parameter(
                        shape=[d_emb_i, d_proj],
                        dtype=global_dtype,
                        default_initializer=paddle.nn.initializer.Normal(
                            mean=0.0, std=0.01)))
Ejemplo n.º 8
0
    def __init__(self, sparse_feature_number, sparse_feature_dim,
                 dense_feature_dim, sparse_num_field):
        super(FM, self).__init__()
        self.sparse_feature_number = sparse_feature_number
        self.sparse_feature_dim = sparse_feature_dim
        self.dense_feature_dim = dense_feature_dim
        self.dense_emb_dim = self.sparse_feature_dim
        self.sparse_num_field = sparse_num_field
        self.init_value_ = 0.1
        use_sparse = True
        if paddle.is_compiled_with_npu():
            use_sparse = False
        # sparse coding
        self.embedding_one = paddle.nn.Embedding(
            sparse_feature_number,
            1,
            padding_idx=0,
            sparse=use_sparse,
            weight_attr=paddle.ParamAttr(
                initializer=paddle.nn.initializer.TruncatedNormal(
                    mean=0.0,
                    std=self.init_value_ /
                    math.sqrt(float(self.sparse_feature_dim)))))

        self.embedding = paddle.nn.Embedding(
            self.sparse_feature_number,
            self.sparse_feature_dim,
            sparse=use_sparse,
            padding_idx=0,
            weight_attr=paddle.ParamAttr(
                initializer=paddle.nn.initializer.TruncatedNormal(
                    mean=0.0,
                    std=self.init_value_ /
                    math.sqrt(float(self.sparse_feature_dim)))))

        # dense coding
        self.dense_w_one = paddle.create_parameter(
            shape=[self.dense_feature_dim],
            dtype='float32',
            default_initializer=paddle.nn.initializer.TruncatedNormal(
                mean=0.0,
                std=self.init_value_ /
                math.sqrt(float(self.sparse_feature_dim))))

        self.dense_w = paddle.create_parameter(
            shape=[1, self.dense_feature_dim, self.dense_emb_dim],
            dtype='float32',
            default_initializer=paddle.nn.initializer.TruncatedNormal(
                mean=0.0,
                std=self.init_value_ /
                math.sqrt(float(self.sparse_feature_dim))))
Ejemplo n.º 9
0
def _simple_network():
    """
    Define a simple network composed by a single linear layer.
    """
    input = paddle.static.data(
        name="input", shape=[None, 2, 2], dtype="float32")
    weight = paddle.create_parameter(
        shape=[2, 3],
        dtype="float32",
        attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.1)))
    bias = paddle.create_parameter(shape=[3], dtype="float32")
    linear_out = paddle.nn.functional.linear(x=input, weight=weight, bias=bias)
    out = paddle.tensor.sum(linear_out)
    return input, out, weight
Ejemplo n.º 10
0
 def __init__(self, num_feats, in_feats, num_hops, sample_size):
     super(PartialWeightedAggregator, self).__init__()
     self.weight_store = []
     self.agg_feats = nn.ParameterList()
     self.discounts = nn.ParameterList()
     self.num_hops = num_hops
     for _ in range(num_hops):
         self.weight_store.append(paddle.Tensor(num_feats, in_feats))
         # self.agg_feats.append(nn.Parameter(torch.Tensor(sample_size, in_feats)))
         # self.discounts.append(nn.Parameter(torch.Tensor(in_feats)))
         # nn.init.xavier_uniform_(self.weight_store[-1])
         self.agg_feats.append(paddle.create_parameter(shape=paddle.Tensor(sample_size, in_feats),dtype='float32',attr=paddle.framework.ParamAttr(name="linear_weight", initializer=paddle.nn.initializer.XavierNormal(self.agg_feats[-1]))))
         self.discounts.append(paddle.create_parameter(shape=paddle.Tensor(in_feats),dtype='float32',attr=paddle.framework.ParamAttr(name="linear_weight", initializer=paddle.nn.initializer.XavierNormal(self.agg_feats[-1]))))
     self.reset_parameters()
Ejemplo n.º 11
0
 def __init__(self, hidden_size, variance_epsilon=1e-12):
     """Initialization."""
     super(LayerNorm, self).__init__()
     self.beta = paddle.create_parameter(
         shape=[hidden_size],
         dtype="float32",
         default_initializer=nn.initializer.Assign(
             paddle.zeros([hidden_size], "float32")))
     self.gamma = paddle.create_parameter(
         shape=[hidden_size],
         dtype="float32",
         default_initializer=nn.initializer.Assign(
             paddle.ones([hidden_size], "float32")))
     self.variance_epsilon = variance_epsilon
Ejemplo n.º 12
0
    def __init__(self,
                 nsp_reader,
                 num_layers,
                 n_head,
                 hidden_size,
                 vocab_size=8001,
                 type_size=2,
                 latent_type_size=20,
                 max_position_seq_len=256,
                 act_dropout=0.1,
                 attn_dropout=0.1,
                 max_dec_len=64,
                 min_dec_len=1,
                 topk=10):
        super(Plato2InferModel, self).__init__()

        self.nsp_reader = nsp_reader
        self.num_layers = num_layers
        self.latent_type_size = latent_type_size
        self.max_dec_len = max_dec_len
        self.min_dec_len = min_dec_len
        self.topk = topk
        self.unk_id = 0
        self.bos_id = 1
        self.eos_id = 2
        self.mask_id = 8000
        self.after_eos = paddle.ones([vocab_size]) * -1e9
        self.after_eos[self.eos_id] = 0
        self.is_cn = False
        self.batch_size = 1

        self.latent_weight = paddle.create_parameter(
            [hidden_size, latent_type_size], 'float32')

        self.plato2_encoder = Plato2Encoder(
            vocab_size, type_size, max_position_seq_len, num_layers, n_head,
            hidden_size, attn_dropout, act_dropout)

        self.logits_fc_layer = nn.Linear(hidden_size, hidden_size)
        self.logits_layer_norm = nn.LayerNorm(hidden_size)
        self.logits_bias = paddle.create_parameter(
            [vocab_size], 'float32', is_bias=True)

        self.nsp_predictor = NSP(vocab_size, type_size, max_position_seq_len,
                                 num_layers, n_head, hidden_size, attn_dropout,
                                 act_dropout)

        self.gelu_layer = nn.GELU()
        self.softmax = nn.Softmax()
Ejemplo n.º 13
0
 def __init__(self, num_state, num_node, num_class):
     super().__init__()
     self.vis_gcn = GCN(num_state, num_node)
     self.word_gcn = GCN(num_state, num_class)
     self.transfer = GraphTransfer(num_state)
     self.gamma_vis = paddle.zeros([num_node])
     self.gamma_word = paddle.zeros([num_class])
     self.gamma_vis = paddle.create_parameter(
         shape=paddle.shape(self.gamma_vis),
         dtype=str(self.gamma_vis.numpy().dtype),
         default_initializer=paddle.nn.initializer.Assign(self.gamma_vis))
     self.gamma_word = paddle.create_parameter(
         shape=paddle.shape(self.gamma_word),
         dtype=str(self.gamma_word.numpy().dtype),
         default_initializer=paddle.nn.initializer.Assign(self.gamma_word))
Ejemplo n.º 14
0
    def __init__(self, cfg, name=None):
        cfg['return_additional_info'] = True
        cfg['has_pooler'] = False
        super(ErnieModelForGeneration, self).__init__(cfg, name=name)
        initializer = nn.initializer.TruncatedNormal(
            std=cfg['initializer_range'])
        d_model = cfg['hidden_size']
        d_vocab = cfg['vocab_size']

        self.mlm = _build_linear(
            d_model,
            d_model,
            append_name(name, 'mask_lm_trans_fc'),
            initializer,
        )
        self.act = ACT_DICT[cfg['hidden_act']]()
        self.mlm_ln = _build_ln(d_model,
                                name=append_name(name, 'mask_lm_trans'))
        self.mlm_bias = P.create_parameter(
            dtype='float32',
            shape=[d_vocab],
            attr=P.ParamAttr(name=append_name(name, 'mask_lm_out_fc.b_0'),
                             initializer=nn.initializer.Constant(value=0.0)),
            is_bias=True,
        )
        self.train()
Ejemplo n.º 15
0
    def __init__(self, cfg, name=None):
        super(ErnieModelForPretraining, self).__init__(cfg, name=name)
        initializer = nn.initializer.TruncatedNormal(
            std=cfg['initializer_range'])
        d_model = cfg['hidden_size']
        d_vocab = cfg['vocab_size']

        self.pooler_heads = nn.LayerList([NSPHead(cfg, name=name)])
        self.mlm = _build_linear(
            d_model,
            d_model,
            append_name(name, 'mask_lm_trans_fc'),
            initializer,
        )
        self.act = ACT_DICT[cfg['hidden_act']]()
        self.mlm_ln = _build_ln(d_model,
                                name=append_name(name, 'mask_lm_trans'))
        self.mlm_bias = P.create_parameter(
            dtype='float32',
            shape=[d_vocab],
            attr=P.ParamAttr(name=append_name(name, 'mask_lm_out_fc.b_0'),
                             initializer=nn.initializer.Constant(value=0.0)),
            is_bias=True,
        )
        self.train()
Ejemplo n.º 16
0
def kaiming_normal_(param, a=0, mode='fan_in', nonlinearity='leaky_relu'):
    replaced_param = paddle.create_parameter(
        shape=param.shape,
        dtype=param.dtype,
        default_initializer=KaimingNormal(
            a=a, mode=mode, nonlinearity=nonlinearity))
    paddle.assign(replaced_param, param)
Ejemplo n.º 17
0
def constant_init_(param, val):
    replaced_param = paddle.create_parameter(
        shape=param.shape,
        dtype=param.dtype,
        default_initializer=paddle.nn.initializer.Assign(
            paddle.full(param.shape, val, param.dtype)))
    paddle.assign(replaced_param, param)
Ejemplo n.º 18
0
    def __init__(self, in_channels, nclass):
        super().__init__()
        self.nclass = nclass
        inter_channels = in_channels // 4
        self.inp = paddle.zeros(shape=(nclass, 300), dtype='float32')
        self.inp = paddle.create_parameter(
            shape=self.inp.shape,
            dtype=str(self.inp.numpy().dtype),
            default_initializer=paddle.nn.initializer.Assign(self.inp))
        self.inp.stop_gradient = True

        self.fc1 = nn.Sequential(nn.Linear(300, 128), nn.BatchNorm1D(128),
                                 nn.ReLU())
        self.fc2 = nn.Sequential(nn.Linear(128, 256), nn.BatchNorm1D(256),
                                 nn.ReLU())
        self.conv5 = layers.ConvBNReLU(in_channels,
                                       inter_channels,
                                       3,
                                       padding=1,
                                       bias_attr=False,
                                       stride=1)

        self.gloru = GlobalReasonUnit(in_channels=inter_channels,
                                      num_state=256,
                                      num_node=84,
                                      nclass=nclass)
        self.conv6 = nn.Sequential(nn.Dropout(0.1),
                                   nn.Conv2D(inter_channels, nclass, 1))
Ejemplo n.º 19
0
    def __init__(self, C: int, B: int = 1):
        super(Inspiration, self).__init__()

        self.weight = self.weight = paddle.create_parameter(shape=[1, C, C], dtype='float32')
        # non-parameter buffer
        self.G = paddle.to_tensor(np.random.rand(B, C, C))
        self.C = C
Ejemplo n.º 20
0
    def make_attention_layer(self, name_base, size):
        row = size[0]
        col = size[1]
        vec = paddle.create_parameter(
            shape=(col, 1),
            dtype="float32",
            name=name_base + "_vec_generated",
            default_initializer=paddle.nn.initializer.Normal(std=0.1))
        self.add_parameter(name_base + "_vec_generated", vec)
        index = len(self.attention_vec)
        self.attention_vec.append(vec)
        linear = paddle.nn.Linear(
            in_features=row,
            out_features=col,
            weight_attr=paddle.ParamAttr(
                initializer=paddle.nn.initializer.Normal(std=0.01)))
        self.attention_layer.append(linear)
        self.add_sublayer(name_base + "_linear_generated", linear)

        def func(input):
            # input [b,g, row]
            # [b,g,col]
            project = self.attention_layer[index](input)
            # [b,g,1]
            project = paddle.matmul(project, self.attention_vec[index])
            #[b,1,g]
            project = paddle.transpose(project, perm=[0, 2, 1])
            weight = paddle.nn.functional.softmax(project)
            #[b, 1, row]
            output = paddle.matmul(weight, input)
            #[b,row]
            output = paddle.reshape(output, [-1, row])
            return output

        return func
Ejemplo n.º 21
0
def zeros_init_(param):
    replaced_param = paddle.create_parameter(
        shape=param.shape,
        dtype=param.dtype,
        default_initializer=paddle.nn.initializer.Assign(
            paddle.zeros(param.shape, param.dtype)))
    paddle.assign(replaced_param, param)
Ejemplo n.º 22
0
    def __init__(
            self,
            embed_dim: int,
            # vision
            image_resolution: int,
            vision_layers: Union[Tuple[int, int, int, int], int],
            vision_width: int,
            vision_patch_size: int,
            # text
            context_length: int,
            vocab_size: int,
            transformer_width: int,
            transformer_heads: int,
            transformer_layers: int):
        super().__init__()

        self.context_length = context_length
        if isinstance(vision_layers, (tuple, list)):
            vision_heads = vision_width * 32 // 64
            self.visual = ModifiedResNet(layers=vision_layers,
                                         output_dim=embed_dim,
                                         heads=vision_heads,
                                         input_resolution=image_resolution,
                                         width=vision_width)
        else:
            vision_heads = vision_width // 64
            self.visual = VisualTransformer(input_resolution=image_resolution,
                                            patch_size=vision_patch_size,
                                            width=vision_width,
                                            layers=vision_layers,
                                            heads=vision_heads,
                                            output_dim=embed_dim)

        self.transformer = Transformer(width=transformer_width,
                                       layers=transformer_layers,
                                       heads=transformer_heads,
                                       attn_mask=self.build_attention_mask())

        self.vocab_size = vocab_size
        self.token_embedding = nn.Embedding(vocab_size, transformer_width)
        self.positional_embedding = paddle.create_parameter(
            (self.context_length, transformer_width), 'float32')
        self.ln_final = nn.LayerNorm(transformer_width)

        self.text_projection = paddle.create_parameter(
            (transformer_width, embed_dim), 'float32')
        self.logit_scale = paddle.create_parameter((1, ), 'float32')
Ejemplo n.º 23
0
def normal_init_(param, mean=0.0, std=1.0):
    replaced_param = paddle.create_parameter(
        shape=param.shape,
        dtype=param.dtype,
        default_initializer=paddle.nn.initializer.Assign(
            paddle.normal(
                mean=mean, std=std, shape=param.shape)))
    paddle.assign(replaced_param, param)
Ejemplo n.º 24
0
 def __init__(self, n_channels, scale=1.0):
     super(L2Norm, self).__init__()
     self.n_channels = n_channels
     self.scale = scale
     self.eps = 1e-10
     self.weight = paddle.create_parameter(shape=[self.n_channels],
                                           dtype='float32')
     self.weight.set_value(paddle.zeros([self.n_channels]) + self.scale)
Ejemplo n.º 25
0
 def __init__(self, hidden_size, hidden_act, layer_norm_eps, vocab_size):
     super().__init__()
     self.transform = SqueezeBertPredictionHeadTransform(
         hidden_size, hidden_act, layer_norm_eps)
     self.decoder = nn.Linear(hidden_size, vocab_size, bias_attr=False)
     self.bias = paddle.create_parameter([vocab_size],
                                         dtype='float32',
                                         is_bias=True)
     self.decoder.bias = self.bias
Ejemplo n.º 26
0
def column_parallel_linear(input,
                           in_size,
                           out_size,
                           use_bias=True,
                           gather_out=True,
                           mp_rank=0,
                           mp_nranks=1,
                           dtype="float32",
                           param_attr=None,
                           bias_attr=None,
                           param_name=None,
                           bias_name=None,
                           ring_id=0):
    assert out_size % mp_nranks == 0
    out_size_per_part = out_size // mp_nranks
    weight = paddle.create_parameter(shape=[in_size, out_size_per_part],
                                     dtype=dtype,
                                     name=param_name,
                                     attr=param_attr,
                                     is_bias=False)
    weight.is_distributed = True
    paddle.static.default_startup_program().global_block().vars[
        weight.name].is_distributed = True
    paddle.static.default_main_program().global_block().vars[
        weight.name].is_distributed = True
    if use_bias:
        bias = paddle.create_parameter(shape=[out_size_per_part],
                                       dtype=dtype,
                                       name=bias_name,
                                       attr=param_attr,
                                       is_bias=True)
        bias.is_distributed = True
        paddle.static.default_startup_program().global_block().vars[
            bias.name].is_distributed = True
        paddle.static.default_main_program().global_block().vars[
            bias.name].is_distributed = True
    out = paddle.matmul(input, weight)
    if use_bias:
        out = paddle.elementwise_add(out, bias)
    if gather_out:
        output = []
        paddle.distributed.all_gather(output, out, group=ring_id)
        out = paddle.concat(output, axis=len(out.shape) - 1)
    return out
Ejemplo n.º 27
0
    def __init__(self, sparse_feature_number, sparse_feature_dim,
                 dense_feature_dim, sparse_num_field):
        super(FFM, self).__init__()
        self.sparse_feature_number = sparse_feature_number
        self.sparse_feature_dim = sparse_feature_dim
        self.dense_feature_dim = dense_feature_dim
        self.dense_emb_dim = self.sparse_feature_dim
        self.sparse_num_field = sparse_num_field
        self.init_value_ = 0.1

        # sparse part coding
        self.embedding_one = paddle.nn.Embedding(
            sparse_feature_number,
            1,
            sparse=True,
            weight_attr=paddle.ParamAttr(
                initializer=paddle.nn.initializer.TruncatedNormal(
                    mean=0.0,
                    std=self.init_value_ /
                    math.sqrt(float(self.sparse_feature_dim)))))

        self.embedding = paddle.nn.Embedding(
            self.sparse_feature_number,
            self.sparse_feature_dim * self.sparse_num_field,
            sparse=True,
            weight_attr=paddle.ParamAttr(
                initializer=paddle.nn.initializer.TruncatedNormal(
                    mean=0.0,
                    std=self.init_value_ /
                    math.sqrt(float(self.sparse_feature_dim)))))

        # dense part coding
        self.dense_w_one = paddle.create_parameter(
            shape=[self.dense_feature_dim],
            dtype='float32',
            default_initializer=paddle.nn.initializer.Constant(value=1.0))

        self.dense_w = paddle.create_parameter(
            shape=[
                1, self.dense_feature_dim,
                self.dense_emb_dim * self.sparse_num_field
            ],
            dtype='float32',
            default_initializer=paddle.nn.initializer.Constant(value=1.0))
Ejemplo n.º 28
0
    def __init__(self, in_features, layer_num=2, low_rank=32, num_experts=4):
        super(CrossNetMix, self).__init__()
        self.layer_num = layer_num
        self.num_experts = num_experts

        # U: (in_features, low_rank)
        self.U_list = paddle.nn.ParameterList([
            paddle.create_parameter(
                shape=[num_experts, in_features, low_rank],
                dtype='float32',
                default_initializer=paddle.nn.initializer.XavierNormal())
            for i in range(self.layer_num)
        ])

        # V: (in_features, low_rank)
        self.V_list = paddle.nn.ParameterList([
            paddle.create_parameter(
                shape=[num_experts, in_features, low_rank],
                dtype='float32',
                default_initializer=paddle.nn.initializer.XavierNormal())
            for i in range(self.layer_num)
        ])

        # C: (low_rank, low_rank)
        self.C_list = paddle.nn.ParameterList([
            paddle.create_parameter(
                shape=[num_experts, low_rank, low_rank],
                dtype='float32',
                default_initializer=paddle.nn.initializer.XavierNormal())
            for i in range(self.layer_num)
        ])

        self.gating = nn.LayerList(
            [nn.Linear(in_features, 1) for i in range(self.num_experts)])

        self.bias = paddle.nn.ParameterList([
            paddle.create_parameter(
                shape=[in_features, 1],
                dtype='float32',
                default_initializer=paddle.nn.initializer.Constant(value=0.0))
            for i in range(self.layer_num)
        ])
Ejemplo n.º 29
0
 def __init__(self, in_channels, ds=8, activation=nn.ReLU):
     super(BAM, self).__init__()
     self.key_channel = in_channels //8
     self.activation = activation
     self.ds = ds
     self.pool = nn.AvgPool2D(self.ds)
     self.query_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels // 8, kernel_size=1)
     self.key_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels // 8, kernel_size=1)
     self.value_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels, kernel_size=1)
     self.gamma = nn.ParameterList([paddle.create_parameter(shape=[1], dtype='float32', default_initializer=nn.initializer.Constant(value=0))])
     self.softmax = nn.Softmax(axis=-1)
 def __init__(self, weight, output_size, global_dtype):
     super(SimpleMatmul, self).__init__()
     self.weight = paddle.create_parameter(
         shape=weight.shape,
         dtype=global_dtype,
         attr=paddle.ParamAttr(
             initializer=paddle.nn.initializer.Assign(weight)))
     self.bias = self.create_parameter(
         shape=[output_size],
         dtype=global_dtype,
         attr=paddle.ParamAttr(
             initializer=paddle.nn.initializer.Constant(0.0)))