def get_optimizer_lr(model): backbone_params = nn.ParameterList() other_params = nn.ParameterList() for name, param in model.named_parameters(): if 'ocr' in name: other_params.append(param) continue elif 'feature_extractor' not in name: other_params.append(param) continue else: backbone_params.append(param) return backbone_params, other_params
def __init__(self, num_feats, in_feats, num_hops, sample_size): super(PartialWeightedAggregator, self).__init__() self.weight_store = [] self.agg_feats = nn.ParameterList() self.discounts = nn.ParameterList() self.num_hops = num_hops for _ in range(num_hops): self.weight_store.append(paddle.Tensor(num_feats, in_feats)) # self.agg_feats.append(nn.Parameter(torch.Tensor(sample_size, in_feats))) # self.discounts.append(nn.Parameter(torch.Tensor(in_feats))) # nn.init.xavier_uniform_(self.weight_store[-1]) self.agg_feats.append(paddle.create_parameter(shape=paddle.Tensor(sample_size, in_feats),dtype='float32',attr=paddle.framework.ParamAttr(name="linear_weight", initializer=paddle.nn.initializer.XavierNormal(self.agg_feats[-1])))) self.discounts.append(paddle.create_parameter(shape=paddle.Tensor(in_feats),dtype='float32',attr=paddle.framework.ParamAttr(name="linear_weight", initializer=paddle.nn.initializer.XavierNormal(self.agg_feats[-1])))) self.reset_parameters()
def __init__(self, in_channels, ds=8, activation=nn.ReLU): super(BAM, self).__init__() self.key_channel = in_channels //8 self.activation = activation self.ds = ds self.pool = nn.AvgPool2D(self.ds) self.query_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels // 8, kernel_size=1) self.key_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels // 8, kernel_size=1) self.value_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels, kernel_size=1) self.gamma = nn.ParameterList([paddle.create_parameter(shape=[1], dtype='float32', default_initializer=nn.initializer.Constant(value=0))]) self.softmax = nn.Softmax(axis=-1)
def __init__(self, in_channels=2, edge_importance_weighting=True, data_bn=True, layout='fsd10', strategy='spatial', **kwargs): super(STGCN, self).__init__() self.data_bn = data_bn # load graph self.graph = Graph( layout=layout, strategy=strategy, ) A = paddle.to_tensor(self.graph.A, dtype='float32') self.register_buffer('A', A) # build networks spatial_kernel_size = A.shape[0] temporal_kernel_size = 9 kernel_size = (temporal_kernel_size, spatial_kernel_size) self.data_bn = nn.BatchNorm1D(in_channels * A.shape[1]) if self.data_bn else iden kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'} self.st_gcn_networks = nn.LayerList(( st_gcn_block(in_channels, 64, kernel_size, 1, residual=False, **kwargs0), st_gcn_block(64, 64, kernel_size, 1, **kwargs), st_gcn_block(64, 64, kernel_size, 1, **kwargs), st_gcn_block(64, 64, kernel_size, 1, **kwargs), st_gcn_block(64, 128, kernel_size, 2, **kwargs), st_gcn_block(128, 128, kernel_size, 1, **kwargs), st_gcn_block(128, 128, kernel_size, 1, **kwargs), st_gcn_block(128, 256, kernel_size, 2, **kwargs), st_gcn_block(256, 256, kernel_size, 1, **kwargs), st_gcn_block(256, 256, kernel_size, 1, **kwargs), )) # initialize parameters for edge importance weighting if edge_importance_weighting: self.edge_importance = nn.ParameterList([ self.create_parameter( shape=self.A.shape, default_initializer=nn.initializer.Constant(1)) for i in self.st_gcn_networks ]) else: self.edge_importance = [1] * len(self.st_gcn_networks) self.pool = nn.AdaptiveAvgPool2D(output_size=(1, 1))
def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, sample_softmax=False): super(AdaptiveEmbedding, self).__init__() self.n_token = n_token self.d_embed = d_embed self.cutoffs = cutoffs + [n_token] self.div_val = div_val self.d_proj = d_proj self.emb_scale = d_proj**0.5 self.cutoff_ends = [0] + self.cutoffs self.emb_layers = nn.LayerList() self.emb_projs = nn.ParameterList() if div_val == 1: self.emb_layers.append( nn.Embedding( n_token, d_embed, sparse=sample_softmax > 0, weight_attr=paddle.nn.initializer.Normal( mean=0.0, std=0.01))) if d_proj != d_embed: self.emb_projs.append( paddle.create_parameter( shape=[d_embed, d_proj], dtype=global_dtype, default_initializer=paddle.nn.initializer.Normal( mean=0.0, std=0.01))) else: for i in range(len(self.cutoffs)): l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1] d_emb_i = d_embed // (div_val**i) self.emb_layers.append( nn.Embedding( r_idx - l_idx, d_emb_i, weight_attr=paddle.nn.initializer.Normal( mean=0.0, std=0.01))) self.emb_projs.append( paddle.create_parameter( shape=[d_emb_i, d_proj], dtype=global_dtype, default_initializer=paddle.nn.initializer.Normal( mean=0.0, std=0.01)))
def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, keep_order=False): super(ProjAdaptiveSoftmax, self).__init__() self.n_token = n_token self.d_embed = d_embed self.d_proj = d_proj self.cutoffs = cutoffs + [n_token] self.cutoff_ends = [0] + self.cutoffs self.div_val = div_val self.shortlist_size = self.cutoffs[0] self.num_clusters = len(self.cutoffs) - 1 self.head_size = self.shortlist_size + self.num_clusters if self.num_clusters > 0: self.cluster_weight = paddle.create_parameter( shape=[self.num_clusters, self.d_embed], dtype=global_dtype, default_initializer=paddle.nn.initializer.Normal( mean=0.0, std=0.01)) self.cluster_bias = paddle.create_parameter( shape=[self.num_clusters], dtype=global_dtype, is_bias=True, default_initializer=paddle.nn.initializer.Constant(0.0)) self.out_layers_weight = nn.ParameterList() self.out_layers_bias = nn.ParameterList() self.out_projs = nn.ParameterList() if div_val == 1: for i in range(len(self.cutoffs)): if d_proj != d_embed: self.out_projs.append( paddle.create_parameter( shape=[d_proj, d_embed], dtype=global_dtype, default_initializer=paddle.nn.initializer.Normal( mean=0.0, std=0.01))) else: self.out_projs.append(None) self.out_layers_weight.append( paddle.create_parameter( shape=[n_token, d_embed], dtype=global_dtype, default_initializer=paddle.nn.initializer.Constant(0.0))) self.out_layers_bias.append( paddle.create_parameter( shape=[n_token], dtype=global_dtype, is_bias=True, default_initializer=paddle.nn.initializer.Constant(0.0))) else: for i in range(len(self.cutoffs)): l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1] d_emb_i = d_embed // (div_val**i) self.out_projs.append( paddle.create_parameter( shape=[d_proj, d_emb_i], dtype=global_dtype, default_initializer=paddle.nn.initializer.Normal( mean=0.0, std=0.01))) self.out_layers_weight.append( paddle.create_parameter( shape=[r_idx - l_idx, d_emb_i], dtype=global_dtype, default_initializer=paddle.nn.initializer.Uniform( low=-(r_idx - l_idx)**(-1.0 / 2.0), high=(r_idx - l_idx)**(-1.0 / 2.0)))) self.out_layers_bias.append( paddle.create_parameter( shape=[r_idx - l_idx], dtype=global_dtype, is_bias=True, default_initializer=paddle.nn.initializer.Uniform( low=-(r_idx - l_idx)**(-1.0 / 2.0), high=(r_idx - l_idx)**(-1.0 / 2.0)))) self.keep_order = keep_order
def __init__(self, img_size=224, patch_size=16, in_chans=3, class_num=1000, embed_dims=[64, 128, 256, 512], num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm, depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], block_cls=Block): super().__init__() self.class_num = class_num self.depths = depths # patch_embed self.patch_embeds = nn.LayerList() self.pos_embeds = nn.ParameterList() self.pos_drops = nn.LayerList() self.blocks = nn.LayerList() for i in range(len(depths)): if i == 0: self.patch_embeds.append( PatchEmbed(img_size, patch_size, in_chans, embed_dims[i])) else: self.patch_embeds.append( PatchEmbed(img_size // patch_size // 2**(i - 1), 2, embed_dims[i - 1], embed_dims[i])) patch_num = self.patch_embeds[i].num_patches + 1 if i == len( embed_dims) - 1 else self.patch_embeds[i].num_patches self.pos_embeds.append( self.create_parameter(shape=[1, patch_num, embed_dims[i]], default_initializer=zeros_)) self.pos_drops.append(nn.Dropout(p=drop_rate)) dpr = [ x.numpy()[0] for x in paddle.linspace(0, drop_path_rate, sum(depths)) ] # stochastic depth decay rule cur = 0 for k in range(len(depths)): _block = nn.LayerList([ block_cls(dim=embed_dims[k], num_heads=num_heads[k], mlp_ratio=mlp_ratios[k], qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, sr_ratio=sr_ratios[k]) for i in range(depths[k]) ]) self.blocks.append(_block) cur += depths[k] self.norm = norm_layer(embed_dims[-1]) # cls_token self.cls_token = self.create_parameter( shape=[1, 1, embed_dims[-1]], default_initializer=zeros_, attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) # classification head self.head = nn.Linear(embed_dims[-1], class_num) if class_num > 0 else Identity() # init weights for pos_emb in self.pos_embeds: trunc_normal_(pos_emb) self.apply(self._init_weights)
def __init__(self, num_feats, in_feats, num_hops): super(WeightedAggregator, self).__init__() self.agg_feats = nn.ParameterList() for _ in range(num_hops): # self.agg_feats.append(paddle.create_parameter(shape=paddle.Tensor(num_feats, in_feats),dtype='int',attr=paddle.framework.ParamAttr(name="linear_weight", initializer=paddle.nn.initializer.XavierNormal(self.agg_feats[-1])))) #"float16","float32","float64" self.agg_feats.append(paddle.create_parameter(shape=[num_feats, in_feats], dtype='float32'))