def __init__(self, in_features, out_features, sigma_zero=0.4, bias=True): super(NoisyLinear, self).__init__(in_features, out_features) sigma_init = sigma_zero / math.sqrt(in_features) sigma_weight = self.create_parameter( shape=[in_features, out_features], default_initializer=Assign( paddle.full((in_features, out_features), sigma_init))) self.add_parameter("sigma_weight", sigma_weight) self.register_buffer("epsilon_input", paddle.zeros((1, in_features))) self.register_buffer("epsilon_output", paddle.zeros((out_features, 1))) if bias: sigma_bias = self.create_parameter(shape=[out_features], default_initializer=Assign( paddle.full([out_features], sigma_init))) self.add_parameter("sigma_bias", sigma_bias)
def add_parameter(layer, datas, name=None): parameter = layer.create_parameter( shape=(datas.shape), default_initializer=Assign(datas) ) if name: layer.add_parameter(name, parameter) return parameter
def __init__(self, spacial_dim, embed_dim, num_heads, output_dim=None): super().__init__() positional_embedding = self.create_parameter( shape=(spacial_dim ** 2 + 1, embed_dim), default_initializer=Assign( paddle.randn((spacial_dim ** 2 + 1, embed_dim)) / embed_dim ** 0.5 ), ) self.add_parameter("positional_embedding", positional_embedding) self.attn = MultiHeadAttention(embed_dim, num_heads, output_dim)
def __init__(self, input_resolution, patch_size, width, layers, heads, output_dim): super().__init__() self.input_resolution = input_resolution self.output_dim = output_dim self.conv1 = nn.Conv2D( in_channels=3, out_channels=width, kernel_size=patch_size, stride=patch_size, bias_attr=False, ) scale = width ** -0.5 class_embedding = self.create_parameter( shape=(width,), default_initializer=Assign(scale * paddle.randn((width,))) ) self.add_parameter("class_embedding", class_embedding) positional_embedding = self.create_parameter( shape=(width,), default_initializer=Assign( scale * paddle.randn(((input_resolution // patch_size) ** 2 + 1, width)) ), ) self.add_parameter("positional_embedding", positional_embedding) self.ln_pre = nn.LayerNorm(width) self.transformer = Transformer(width, layers, heads) self.ln_post = nn.LayerNorm(width) proj = self.create_parameter( shape=(width,), default_initializer=Assign(scale * paddle.randn(((width, output_dim)))), ) self.add_parameter("proj", proj)
def __init__(self, *args, **kwargs): super(DistilledPoolingTransformer, self).__init__(*args, **kwargs) self.cls_token = self.create_parameter( shape=(1, 2, self.base_dims[0] * self.heads[0]), default_initializer=Assign( paddle.randn((1, 2, self.base_dims[0] * self.heads[0])))) self.add_parameter("cls_token", self.cls_token) if self.class_dim > 0: self.head_dist = nn.Linear(self.base_dims[-1] * self.heads[-1], self.class_dim) trunc_normal_(self.cls_token) self.head_dist.apply(self._init_weights)
from PIL import Image import numpy as np from paddle.nn import Conv2D from paddle.nn.initializer import Assign # 读入图片并转成numpy.ndarray # 换成灰度图 img = Image.open('000000355610.jpg').convert('L') img = np.array(img) # 创建初始化参数 w = np.ones([1, 1, 5, 5], dtype='float32') / 25 conv = Conv2D(in_channels=1, out_channels=1, kernel_size=[5, 5], weight_attr=paddle.ParamAttr(initializer=Assign(value=w))) x = img.astype('float32') x = x.reshape(1, 1, img.shape[0], img.shape[1]) x = paddle.to_tensor(x) y = conv(x) out = y.numpy() plt.figure(figsize=(20, 12)) f = plt.subplot(121) f.set_title('input image') plt.imshow(img, cmap='gray') f = plt.subplot(122) f.set_title('output feature map') out = out.squeeze() plt.imshow(out, cmap='gray')
def __init__(self, image_size, patch_size, stride, base_dims, depth, heads, mlp_ratio, in_chans=3, attn_drop_rate=.0, drop_rate=.0, drop_path_rate=.0, class_dim=1000): super(PoolingTransformer, self).__init__() total_block = sum(depth) padding = 0 block_idx = 0 width = math.floor((image_size + 2 * padding - patch_size) / stride + 1) self.base_dims = base_dims self.heads = heads self.class_dim = class_dim self.patch_size = patch_size self.pos_embed = self.create_parameter( shape=(1, base_dims[0] * heads[0], width, width), default_initializer=Assign( paddle.randn((1, base_dims[0] * heads[0], width, width)))) self.add_parameter("pos_embed", self.pos_embed) self.patch_embed = conv_embedding(in_chans, base_dims[0] * heads[0], patch_size, stride, padding) self.cls_token = self.create_parameter( shape=(1, 1, base_dims[0] * heads[0]), default_initializer=Assign( paddle.randn((1, 1, base_dims[0] * heads[0])))) self.add_parameter("cls_token", self.cls_token) self.pos_drop = nn.Dropout(p=drop_rate) self.transformers = nn.LayerList([]) self.pools = nn.LayerList([]) for stage in range(len(depth)): drop_path_prob = [ drop_path_rate * i / total_block for i in range(block_idx, block_idx + depth[stage]) ] block_idx += depth[stage] self.transformers.append( Transformer(base_dims[stage], depth[stage], heads[stage], mlp_ratio, drop_rate, attn_drop_rate, drop_path_prob)) if stage < len(heads) - 1: self.pools.append( conv_head_pooling(base_dims[stage] * heads[stage], base_dims[stage + 1] * heads[stage + 1], stride=2)) self.norm = nn.LayerNorm(base_dims[-1] * heads[-1], epsilon=1e-6) self.embed_dim = base_dims[-1] * heads[-1] # Classifier head if class_dim > 0: self.head = nn.Linear(base_dims[-1] * heads[-1], class_dim) trunc_normal_(self.pos_embed) trunc_normal_(self.cls_token) self.apply(self._init_weights)
def __init__( self, embed_dim, # vision image_resolution, vision_layers, vision_width, vision_patch_size, # text context_length, vocab_size, transformer_width, transformer_heads, transformer_layers, ): super().__init__() self.context_length = context_length self.embed_dim = embed_dim if isinstance(vision_layers, (tuple, list)): vision_heads = vision_width * 32 // 64 self.visual = ModifiedResNet( layers=vision_layers, output_dim=embed_dim, heads=vision_heads, input_resolution=image_resolution, width=vision_width, ) else: vision_heads = vision_width // 64 self.visual = VisualTransformer( input_resolution=image_resolution, patch_size=vision_patch_size, width=vision_width, layers=vision_layers, heads=vision_heads, output_dim=embed_dim, ) self.transformer = Transformer( width=transformer_width, layers=transformer_layers, heads=transformer_heads, attn_mask=self.build_attention_mask(), ) self.vocab_size = vocab_size self.token_embedding = nn.Embedding(vocab_size, transformer_width) positional_embedding = self.create_parameter( shape=(self.context_length, transformer_width), default_initializer=Assign( paddle.empty((self.context_length, transformer_width)) ), ) self.add_parameter("positional_embedding", positional_embedding) self.ln_final = nn.LayerNorm(transformer_width) text_projection = self.create_parameter( shape=(transformer_width, embed_dim), default_initializer=Assign(paddle.empty((transformer_width, embed_dim))), ) self.add_parameter("text_projection", text_projection) logit_scale = self.create_parameter( shape=(1,), default_initializer=Assign(paddle.ones([1])) ) self.add_parameter("logit_scale", logit_scale) self.initialize_parameters()