def __init__(self, in_planes, out_planes, kernel_size=1, stride=1, padding=0): super(Conv_BN_ReLU, self).__init__() self.conv = nn.Conv2D(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, bias_attr=False) self.bn = nn.BatchNorm2D(out_planes, momentum=0.1) self.relu = nn.ReLU() for m in self.sublayers(): if isinstance(m, nn.Conv2D): n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels m.weight = paddle.create_parameter( shape=m.weight.shape, dtype='float32', default_initializer=paddle.nn.initializer.Normal( 0, math.sqrt(2. / n))) elif isinstance(m, nn.BatchNorm2D): m.weight = paddle.create_parameter( shape=m.weight.shape, dtype='float32', default_initializer=paddle.nn.initializer.Constant(1.0)) m.bias = paddle.create_parameter( shape=m.bias.shape, dtype='float32', default_initializer=paddle.nn.initializer.Constant(0.0))
def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): super().__init__() self.input_resolution = input_resolution self.output_dim = output_dim self.conv1 = nn.Conv2D(in_channels=3, out_channels=width, kernel_size=patch_size, stride=patch_size, bias_attr=False) # scale = width ** -0.5 #self.class_embedding = paddle.create_parameter(scale * torch.randn(width)) self.class_embedding = paddle.create_parameter((width, ), 'float32') #self.positional_embedding = paddle.create_parameter(scale * torch.randn((input_resolution // patch_size) ** 2 + 1, width)) self.positional_embedding = paddle.create_parameter( ((input_resolution // patch_size)**2 + 1, width), 'float32') self.ln_pre = nn.LayerNorm(width) self.transformer = Transformer(width, layers, heads) self.ln_post = nn.LayerNorm(width) self.proj = paddle.create_parameter((width, output_dim), 'float32')
def __init__(self, feature_dim=128, bottleneck_setting=MobileFaceNet_BottleNeck_Setting, **args): super().__init__() self.conv1 = ConvBlock(3, 64, 3, 2, 1) self.dw_conv1 = ConvBlock(64, 64, 3, 1, 1, dw=True) self.cur_channel = 64 block = BottleNeck self.blocks = self._make_layer(block, bottleneck_setting) self.conv2 = ConvBlock(128, 512, 1, 1, 0) self.linear7 = ConvBlock(512, 512, 7, 1, 0, dw=True, linear=True) self.linear1 = ConvBlock(512, feature_dim, 1, 1, 0, linear=True) for m in self.sublayers(): if isinstance(m, nn.Conv2D): # ks * ks * out_ch n = m.weight.shape[1] * m.weight.shape[2] * m.weight.shape[3] m.weight = paddle.create_parameter( shape=m.weight.shape, dtype='float32', default_initializer=nn.initializer.Normal( mean=0.0, std=math.sqrt(2.0 / n))) # nn.init.normal_(m.weight, 0, 0.1) elif isinstance(m, (nn.BatchNorm, nn.BatchNorm2D, nn.GroupNorm)): m.weight = paddle.create_parameter( shape=m.weight.shape, dtype='float32', default_initializer=nn.initializer.Constant(value=1.0)) m.bias = paddle.create_parameter( shape=m.bias.shape, dtype='float32', default_initializer=nn.initializer.Constant(value=0.0))
def __init__(self, opt): super(MotLoss, self).__init__() self.crit = paddle.nn.MSELoss() if opt.mse_loss else FocalLoss() self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \ RegLoss() if opt.reg_loss == 'sl1' else None self.crit_wh = paddle.nn.L1Loss(reduction='sum') if opt.dense_wh else \ NormRegL1Loss() if opt.norm_wh else \ RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg self.opt = opt self.emb_dim = opt.reid_dim self.nID = opt.nID # param_attr = paddle.ParamAttr(initializer=KaimingUniform()) # bound = 1 / math.sqrt(self.emb_dim) # bias_attr = paddle.ParamAttr(initializer=Uniform(-bound, bound)) # self.classifier = nn.Linear(self.emb_dim, self.nID, weight_attr=param_attr, bias_attr=bias_attr) self.classifier = nn.Linear(self.emb_dim, self.nID, bias_attr=True) if opt.id_loss == 'focal': # 一般用不到 # torch.nn.init.normal_(self.classifier.weight, std=0.01) prior_prob = 0.01 bias_value = -math.log((1 - prior_prob) / prior_prob) # torch.nn.init.constant_(self.classifier.bias, bias_value) weight_attr = paddle.framework.ParamAttr(initializer=nn.initializer.Normal(std=0.01)) bias_attr = paddle.framework.ParamAttr(initializer=nn.initializer.Constant(bias_value)) self.classifier = nn.Linear(self.emb_dim, self.nID, weight_attr=weight_attr, bias_attr=bias_attr) self.IDLoss = nn.CrossEntropyLoss(ignore_index=-1) self.emb_scale = math.sqrt(2) * math.log(self.nID - 1) # self.s_det = nn.Parameter(-1.85 * torch.ones(1)) # self.s_id = nn.Parameter(-1.05 * torch.ones(1)) self.s_det = paddle.create_parameter([1], dtype='float32', default_initializer = nn.initializer.Constant(value=-1.85)) self.s_id = paddle.create_parameter([1], dtype='float32', default_initializer = nn.initializer.Constant(value=-1.05))
def __init__(self, sparse_feature_number=1000001, sparse_feature_dim=9, dense_feature_dim=13, sparse_num_field=26): super(FM, self).__init__() self.sparse_feature_number = sparse_feature_number # 1000001 self.sparse_feature_dim = sparse_feature_dim # 9 self.dense_feature_dim = dense_feature_dim # 13 self.sparse_num_field = sparse_num_field # sparse_inputs_slots-1==>26 self.layer_sizes = layer_sizes # fc_sizes: [512, 256, 128, 32] # 一阶稀疏特征 self.sparse_feature_oneOrderWeight = paddle.nn.Embedding( sparse_feature_number, 1, padding_idx=0, sparse=True) ## 一阶连续特征 self.dense_feature_oneOrderWeight = paddle.create_parameter( [dense_feature_dim], "float32") # 二阶特征 self.sparse_latent_vecs = paddle.nn.Embedding(sparse_feature_number, embedding_dim, padding_idx=0, sparse=True) self.dense_latent_vecs = paddle.create_parameter( [1, dense_feature_dim, embedding_dim], "float32")
def __init__(self, sparse_feature_dim, num_fields, activation=None, use_bias=False): super(FieldWiseBiInteraction, self).__init__() self.sparse_feature_dim = sparse_feature_dim self.num_fields = num_fields self.use_bias = use_bias self.activation = activation self.kernel_mf = paddle.create_parameter( shape=[int(self.num_fields * (self.num_fields - 1) / 2), 1], dtype='float32', default_initializer=paddle.nn.initializer.XavierUniform()) self.kernel_fm = paddle.create_parameter( shape=[self.num_fields, 1], dtype='float32', default_initializer=paddle.nn.initializer.XavierUniform()) if self.use_bias: self.bias_mf = paddle.create_parameter( shape=[1, ], dtype='float32', default_initializer=paddle.nn.initializer.Constant(value=0.0)) self.bias_fm = paddle.create_parameter( shape=[1, ], dtype='float32', default_initializer=paddle.nn.initializer.Constant(value=0.0))
def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, sample_softmax=False): super(AdaptiveEmbedding, self).__init__() self.n_token = n_token self.d_embed = d_embed self.cutoffs = cutoffs + [n_token] self.div_val = div_val self.d_proj = d_proj self.emb_scale = d_proj**0.5 self.cutoff_ends = [0] + self.cutoffs self.emb_layers = nn.LayerList() self.emb_projs = nn.ParameterList() if div_val == 1: self.emb_layers.append( nn.Embedding( n_token, d_embed, sparse=sample_softmax > 0, weight_attr=paddle.nn.initializer.Normal( mean=0.0, std=0.01))) if d_proj != d_embed: self.emb_projs.append( paddle.create_parameter( shape=[d_embed, d_proj], dtype=global_dtype, default_initializer=paddle.nn.initializer.Normal( mean=0.0, std=0.01))) else: for i in range(len(self.cutoffs)): l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1] d_emb_i = d_embed // (div_val**i) self.emb_layers.append( nn.Embedding( r_idx - l_idx, d_emb_i, weight_attr=paddle.nn.initializer.Normal( mean=0.0, std=0.01))) self.emb_projs.append( paddle.create_parameter( shape=[d_emb_i, d_proj], dtype=global_dtype, default_initializer=paddle.nn.initializer.Normal( mean=0.0, std=0.01)))
def __init__(self, sparse_feature_number, sparse_feature_dim, dense_feature_dim, sparse_num_field): super(FM, self).__init__() self.sparse_feature_number = sparse_feature_number self.sparse_feature_dim = sparse_feature_dim self.dense_feature_dim = dense_feature_dim self.dense_emb_dim = self.sparse_feature_dim self.sparse_num_field = sparse_num_field self.init_value_ = 0.1 use_sparse = True if paddle.is_compiled_with_npu(): use_sparse = False # sparse coding self.embedding_one = paddle.nn.Embedding( sparse_feature_number, 1, padding_idx=0, sparse=use_sparse, weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.TruncatedNormal( mean=0.0, std=self.init_value_ / math.sqrt(float(self.sparse_feature_dim))))) self.embedding = paddle.nn.Embedding( self.sparse_feature_number, self.sparse_feature_dim, sparse=use_sparse, padding_idx=0, weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.TruncatedNormal( mean=0.0, std=self.init_value_ / math.sqrt(float(self.sparse_feature_dim))))) # dense coding self.dense_w_one = paddle.create_parameter( shape=[self.dense_feature_dim], dtype='float32', default_initializer=paddle.nn.initializer.TruncatedNormal( mean=0.0, std=self.init_value_ / math.sqrt(float(self.sparse_feature_dim)))) self.dense_w = paddle.create_parameter( shape=[1, self.dense_feature_dim, self.dense_emb_dim], dtype='float32', default_initializer=paddle.nn.initializer.TruncatedNormal( mean=0.0, std=self.init_value_ / math.sqrt(float(self.sparse_feature_dim))))
def _simple_network(): """ Define a simple network composed by a single linear layer. """ input = paddle.static.data( name="input", shape=[None, 2, 2], dtype="float32") weight = paddle.create_parameter( shape=[2, 3], dtype="float32", attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.1))) bias = paddle.create_parameter(shape=[3], dtype="float32") linear_out = paddle.nn.functional.linear(x=input, weight=weight, bias=bias) out = paddle.tensor.sum(linear_out) return input, out, weight
def __init__(self, num_feats, in_feats, num_hops, sample_size): super(PartialWeightedAggregator, self).__init__() self.weight_store = [] self.agg_feats = nn.ParameterList() self.discounts = nn.ParameterList() self.num_hops = num_hops for _ in range(num_hops): self.weight_store.append(paddle.Tensor(num_feats, in_feats)) # self.agg_feats.append(nn.Parameter(torch.Tensor(sample_size, in_feats))) # self.discounts.append(nn.Parameter(torch.Tensor(in_feats))) # nn.init.xavier_uniform_(self.weight_store[-1]) self.agg_feats.append(paddle.create_parameter(shape=paddle.Tensor(sample_size, in_feats),dtype='float32',attr=paddle.framework.ParamAttr(name="linear_weight", initializer=paddle.nn.initializer.XavierNormal(self.agg_feats[-1])))) self.discounts.append(paddle.create_parameter(shape=paddle.Tensor(in_feats),dtype='float32',attr=paddle.framework.ParamAttr(name="linear_weight", initializer=paddle.nn.initializer.XavierNormal(self.agg_feats[-1])))) self.reset_parameters()
def __init__(self, hidden_size, variance_epsilon=1e-12): """Initialization.""" super(LayerNorm, self).__init__() self.beta = paddle.create_parameter( shape=[hidden_size], dtype="float32", default_initializer=nn.initializer.Assign( paddle.zeros([hidden_size], "float32"))) self.gamma = paddle.create_parameter( shape=[hidden_size], dtype="float32", default_initializer=nn.initializer.Assign( paddle.ones([hidden_size], "float32"))) self.variance_epsilon = variance_epsilon
def __init__(self, nsp_reader, num_layers, n_head, hidden_size, vocab_size=8001, type_size=2, latent_type_size=20, max_position_seq_len=256, act_dropout=0.1, attn_dropout=0.1, max_dec_len=64, min_dec_len=1, topk=10): super(Plato2InferModel, self).__init__() self.nsp_reader = nsp_reader self.num_layers = num_layers self.latent_type_size = latent_type_size self.max_dec_len = max_dec_len self.min_dec_len = min_dec_len self.topk = topk self.unk_id = 0 self.bos_id = 1 self.eos_id = 2 self.mask_id = 8000 self.after_eos = paddle.ones([vocab_size]) * -1e9 self.after_eos[self.eos_id] = 0 self.is_cn = False self.batch_size = 1 self.latent_weight = paddle.create_parameter( [hidden_size, latent_type_size], 'float32') self.plato2_encoder = Plato2Encoder( vocab_size, type_size, max_position_seq_len, num_layers, n_head, hidden_size, attn_dropout, act_dropout) self.logits_fc_layer = nn.Linear(hidden_size, hidden_size) self.logits_layer_norm = nn.LayerNorm(hidden_size) self.logits_bias = paddle.create_parameter( [vocab_size], 'float32', is_bias=True) self.nsp_predictor = NSP(vocab_size, type_size, max_position_seq_len, num_layers, n_head, hidden_size, attn_dropout, act_dropout) self.gelu_layer = nn.GELU() self.softmax = nn.Softmax()
def __init__(self, num_state, num_node, num_class): super().__init__() self.vis_gcn = GCN(num_state, num_node) self.word_gcn = GCN(num_state, num_class) self.transfer = GraphTransfer(num_state) self.gamma_vis = paddle.zeros([num_node]) self.gamma_word = paddle.zeros([num_class]) self.gamma_vis = paddle.create_parameter( shape=paddle.shape(self.gamma_vis), dtype=str(self.gamma_vis.numpy().dtype), default_initializer=paddle.nn.initializer.Assign(self.gamma_vis)) self.gamma_word = paddle.create_parameter( shape=paddle.shape(self.gamma_word), dtype=str(self.gamma_word.numpy().dtype), default_initializer=paddle.nn.initializer.Assign(self.gamma_word))
def __init__(self, cfg, name=None): cfg['return_additional_info'] = True cfg['has_pooler'] = False super(ErnieModelForGeneration, self).__init__(cfg, name=name) initializer = nn.initializer.TruncatedNormal( std=cfg['initializer_range']) d_model = cfg['hidden_size'] d_vocab = cfg['vocab_size'] self.mlm = _build_linear( d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, ) self.act = ACT_DICT[cfg['hidden_act']]() self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) self.mlm_bias = P.create_parameter( dtype='float32', shape=[d_vocab], attr=P.ParamAttr(name=append_name(name, 'mask_lm_out_fc.b_0'), initializer=nn.initializer.Constant(value=0.0)), is_bias=True, ) self.train()
def __init__(self, cfg, name=None): super(ErnieModelForPretraining, self).__init__(cfg, name=name) initializer = nn.initializer.TruncatedNormal( std=cfg['initializer_range']) d_model = cfg['hidden_size'] d_vocab = cfg['vocab_size'] self.pooler_heads = nn.LayerList([NSPHead(cfg, name=name)]) self.mlm = _build_linear( d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, ) self.act = ACT_DICT[cfg['hidden_act']]() self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) self.mlm_bias = P.create_parameter( dtype='float32', shape=[d_vocab], attr=P.ParamAttr(name=append_name(name, 'mask_lm_out_fc.b_0'), initializer=nn.initializer.Constant(value=0.0)), is_bias=True, ) self.train()
def kaiming_normal_(param, a=0, mode='fan_in', nonlinearity='leaky_relu'): replaced_param = paddle.create_parameter( shape=param.shape, dtype=param.dtype, default_initializer=KaimingNormal( a=a, mode=mode, nonlinearity=nonlinearity)) paddle.assign(replaced_param, param)
def constant_init_(param, val): replaced_param = paddle.create_parameter( shape=param.shape, dtype=param.dtype, default_initializer=paddle.nn.initializer.Assign( paddle.full(param.shape, val, param.dtype))) paddle.assign(replaced_param, param)
def __init__(self, in_channels, nclass): super().__init__() self.nclass = nclass inter_channels = in_channels // 4 self.inp = paddle.zeros(shape=(nclass, 300), dtype='float32') self.inp = paddle.create_parameter( shape=self.inp.shape, dtype=str(self.inp.numpy().dtype), default_initializer=paddle.nn.initializer.Assign(self.inp)) self.inp.stop_gradient = True self.fc1 = nn.Sequential(nn.Linear(300, 128), nn.BatchNorm1D(128), nn.ReLU()) self.fc2 = nn.Sequential(nn.Linear(128, 256), nn.BatchNorm1D(256), nn.ReLU()) self.conv5 = layers.ConvBNReLU(in_channels, inter_channels, 3, padding=1, bias_attr=False, stride=1) self.gloru = GlobalReasonUnit(in_channels=inter_channels, num_state=256, num_node=84, nclass=nclass) self.conv6 = nn.Sequential(nn.Dropout(0.1), nn.Conv2D(inter_channels, nclass, 1))
def __init__(self, C: int, B: int = 1): super(Inspiration, self).__init__() self.weight = self.weight = paddle.create_parameter(shape=[1, C, C], dtype='float32') # non-parameter buffer self.G = paddle.to_tensor(np.random.rand(B, C, C)) self.C = C
def make_attention_layer(self, name_base, size): row = size[0] col = size[1] vec = paddle.create_parameter( shape=(col, 1), dtype="float32", name=name_base + "_vec_generated", default_initializer=paddle.nn.initializer.Normal(std=0.1)) self.add_parameter(name_base + "_vec_generated", vec) index = len(self.attention_vec) self.attention_vec.append(vec) linear = paddle.nn.Linear( in_features=row, out_features=col, weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.Normal(std=0.01))) self.attention_layer.append(linear) self.add_sublayer(name_base + "_linear_generated", linear) def func(input): # input [b,g, row] # [b,g,col] project = self.attention_layer[index](input) # [b,g,1] project = paddle.matmul(project, self.attention_vec[index]) #[b,1,g] project = paddle.transpose(project, perm=[0, 2, 1]) weight = paddle.nn.functional.softmax(project) #[b, 1, row] output = paddle.matmul(weight, input) #[b,row] output = paddle.reshape(output, [-1, row]) return output return func
def zeros_init_(param): replaced_param = paddle.create_parameter( shape=param.shape, dtype=param.dtype, default_initializer=paddle.nn.initializer.Assign( paddle.zeros(param.shape, param.dtype))) paddle.assign(replaced_param, param)
def __init__( self, embed_dim: int, # vision image_resolution: int, vision_layers: Union[Tuple[int, int, int, int], int], vision_width: int, vision_patch_size: int, # text context_length: int, vocab_size: int, transformer_width: int, transformer_heads: int, transformer_layers: int): super().__init__() self.context_length = context_length if isinstance(vision_layers, (tuple, list)): vision_heads = vision_width * 32 // 64 self.visual = ModifiedResNet(layers=vision_layers, output_dim=embed_dim, heads=vision_heads, input_resolution=image_resolution, width=vision_width) else: vision_heads = vision_width // 64 self.visual = VisualTransformer(input_resolution=image_resolution, patch_size=vision_patch_size, width=vision_width, layers=vision_layers, heads=vision_heads, output_dim=embed_dim) self.transformer = Transformer(width=transformer_width, layers=transformer_layers, heads=transformer_heads, attn_mask=self.build_attention_mask()) self.vocab_size = vocab_size self.token_embedding = nn.Embedding(vocab_size, transformer_width) self.positional_embedding = paddle.create_parameter( (self.context_length, transformer_width), 'float32') self.ln_final = nn.LayerNorm(transformer_width) self.text_projection = paddle.create_parameter( (transformer_width, embed_dim), 'float32') self.logit_scale = paddle.create_parameter((1, ), 'float32')
def normal_init_(param, mean=0.0, std=1.0): replaced_param = paddle.create_parameter( shape=param.shape, dtype=param.dtype, default_initializer=paddle.nn.initializer.Assign( paddle.normal( mean=mean, std=std, shape=param.shape))) paddle.assign(replaced_param, param)
def __init__(self, n_channels, scale=1.0): super(L2Norm, self).__init__() self.n_channels = n_channels self.scale = scale self.eps = 1e-10 self.weight = paddle.create_parameter(shape=[self.n_channels], dtype='float32') self.weight.set_value(paddle.zeros([self.n_channels]) + self.scale)
def __init__(self, hidden_size, hidden_act, layer_norm_eps, vocab_size): super().__init__() self.transform = SqueezeBertPredictionHeadTransform( hidden_size, hidden_act, layer_norm_eps) self.decoder = nn.Linear(hidden_size, vocab_size, bias_attr=False) self.bias = paddle.create_parameter([vocab_size], dtype='float32', is_bias=True) self.decoder.bias = self.bias
def column_parallel_linear(input, in_size, out_size, use_bias=True, gather_out=True, mp_rank=0, mp_nranks=1, dtype="float32", param_attr=None, bias_attr=None, param_name=None, bias_name=None, ring_id=0): assert out_size % mp_nranks == 0 out_size_per_part = out_size // mp_nranks weight = paddle.create_parameter(shape=[in_size, out_size_per_part], dtype=dtype, name=param_name, attr=param_attr, is_bias=False) weight.is_distributed = True paddle.static.default_startup_program().global_block().vars[ weight.name].is_distributed = True paddle.static.default_main_program().global_block().vars[ weight.name].is_distributed = True if use_bias: bias = paddle.create_parameter(shape=[out_size_per_part], dtype=dtype, name=bias_name, attr=param_attr, is_bias=True) bias.is_distributed = True paddle.static.default_startup_program().global_block().vars[ bias.name].is_distributed = True paddle.static.default_main_program().global_block().vars[ bias.name].is_distributed = True out = paddle.matmul(input, weight) if use_bias: out = paddle.elementwise_add(out, bias) if gather_out: output = [] paddle.distributed.all_gather(output, out, group=ring_id) out = paddle.concat(output, axis=len(out.shape) - 1) return out
def __init__(self, sparse_feature_number, sparse_feature_dim, dense_feature_dim, sparse_num_field): super(FFM, self).__init__() self.sparse_feature_number = sparse_feature_number self.sparse_feature_dim = sparse_feature_dim self.dense_feature_dim = dense_feature_dim self.dense_emb_dim = self.sparse_feature_dim self.sparse_num_field = sparse_num_field self.init_value_ = 0.1 # sparse part coding self.embedding_one = paddle.nn.Embedding( sparse_feature_number, 1, sparse=True, weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.TruncatedNormal( mean=0.0, std=self.init_value_ / math.sqrt(float(self.sparse_feature_dim))))) self.embedding = paddle.nn.Embedding( self.sparse_feature_number, self.sparse_feature_dim * self.sparse_num_field, sparse=True, weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.TruncatedNormal( mean=0.0, std=self.init_value_ / math.sqrt(float(self.sparse_feature_dim))))) # dense part coding self.dense_w_one = paddle.create_parameter( shape=[self.dense_feature_dim], dtype='float32', default_initializer=paddle.nn.initializer.Constant(value=1.0)) self.dense_w = paddle.create_parameter( shape=[ 1, self.dense_feature_dim, self.dense_emb_dim * self.sparse_num_field ], dtype='float32', default_initializer=paddle.nn.initializer.Constant(value=1.0))
def __init__(self, in_features, layer_num=2, low_rank=32, num_experts=4): super(CrossNetMix, self).__init__() self.layer_num = layer_num self.num_experts = num_experts # U: (in_features, low_rank) self.U_list = paddle.nn.ParameterList([ paddle.create_parameter( shape=[num_experts, in_features, low_rank], dtype='float32', default_initializer=paddle.nn.initializer.XavierNormal()) for i in range(self.layer_num) ]) # V: (in_features, low_rank) self.V_list = paddle.nn.ParameterList([ paddle.create_parameter( shape=[num_experts, in_features, low_rank], dtype='float32', default_initializer=paddle.nn.initializer.XavierNormal()) for i in range(self.layer_num) ]) # C: (low_rank, low_rank) self.C_list = paddle.nn.ParameterList([ paddle.create_parameter( shape=[num_experts, low_rank, low_rank], dtype='float32', default_initializer=paddle.nn.initializer.XavierNormal()) for i in range(self.layer_num) ]) self.gating = nn.LayerList( [nn.Linear(in_features, 1) for i in range(self.num_experts)]) self.bias = paddle.nn.ParameterList([ paddle.create_parameter( shape=[in_features, 1], dtype='float32', default_initializer=paddle.nn.initializer.Constant(value=0.0)) for i in range(self.layer_num) ])
def __init__(self, in_channels, ds=8, activation=nn.ReLU): super(BAM, self).__init__() self.key_channel = in_channels //8 self.activation = activation self.ds = ds self.pool = nn.AvgPool2D(self.ds) self.query_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels // 8, kernel_size=1) self.key_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels // 8, kernel_size=1) self.value_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels, kernel_size=1) self.gamma = nn.ParameterList([paddle.create_parameter(shape=[1], dtype='float32', default_initializer=nn.initializer.Constant(value=0))]) self.softmax = nn.Softmax(axis=-1)
def __init__(self, weight, output_size, global_dtype): super(SimpleMatmul, self).__init__() self.weight = paddle.create_parameter( shape=weight.shape, dtype=global_dtype, attr=paddle.ParamAttr( initializer=paddle.nn.initializer.Assign(weight))) self.bias = self.create_parameter( shape=[output_size], dtype=global_dtype, attr=paddle.ParamAttr( initializer=paddle.nn.initializer.Constant(0.0)))