Ejemplo n.º 1
0
 def __init__(self, in_features, out_features, sigma_zero=0.4, bias=True):
     super(NoisyLinear, self).__init__(in_features, out_features)
     sigma_init = sigma_zero / math.sqrt(in_features)
     sigma_weight = self.create_parameter(
         shape=[in_features, out_features],
         default_initializer=Assign(
             paddle.full((in_features, out_features), sigma_init)))
     self.add_parameter("sigma_weight", sigma_weight)
     self.register_buffer("epsilon_input", paddle.zeros((1, in_features)))
     self.register_buffer("epsilon_output", paddle.zeros((out_features, 1)))
     if bias:
         sigma_bias = self.create_parameter(shape=[out_features],
                                            default_initializer=Assign(
                                                paddle.full([out_features],
                                                            sigma_init)))
         self.add_parameter("sigma_bias", sigma_bias)
Ejemplo n.º 2
0
def add_parameter(layer, datas, name=None):
    parameter = layer.create_parameter(
        shape=(datas.shape), default_initializer=Assign(datas)
    )
    if name:
        layer.add_parameter(name, parameter)
    return parameter
Ejemplo n.º 3
0
    def __init__(self, spacial_dim, embed_dim, num_heads, output_dim=None):
        super().__init__()
        positional_embedding = self.create_parameter(
            shape=(spacial_dim ** 2 + 1, embed_dim),
            default_initializer=Assign(
                paddle.randn((spacial_dim ** 2 + 1, embed_dim)) / embed_dim ** 0.5
            ),
        )
        self.add_parameter("positional_embedding", positional_embedding)

        self.attn = MultiHeadAttention(embed_dim, num_heads, output_dim)
Ejemplo n.º 4
0
    def __init__(self, input_resolution, patch_size, width, layers, heads, output_dim):
        super().__init__()
        self.input_resolution = input_resolution
        self.output_dim = output_dim
        self.conv1 = nn.Conv2D(
            in_channels=3,
            out_channels=width,
            kernel_size=patch_size,
            stride=patch_size,
            bias_attr=False,
        )

        scale = width ** -0.5

        class_embedding = self.create_parameter(
            shape=(width,), default_initializer=Assign(scale * paddle.randn((width,)))
        )
        self.add_parameter("class_embedding", class_embedding)

        positional_embedding = self.create_parameter(
            shape=(width,),
            default_initializer=Assign(
                scale * paddle.randn(((input_resolution // patch_size) ** 2 + 1, width))
            ),
        )
        self.add_parameter("positional_embedding", positional_embedding)

        self.ln_pre = nn.LayerNorm(width)

        self.transformer = Transformer(width, layers, heads)

        self.ln_post = nn.LayerNorm(width)

        proj = self.create_parameter(
            shape=(width,),
            default_initializer=Assign(scale * paddle.randn(((width, output_dim)))),
        )
        self.add_parameter("proj", proj)
Ejemplo n.º 5
0
    def __init__(self, *args, **kwargs):
        super(DistilledPoolingTransformer, self).__init__(*args, **kwargs)
        self.cls_token = self.create_parameter(
            shape=(1, 2, self.base_dims[0] * self.heads[0]),
            default_initializer=Assign(
                paddle.randn((1, 2, self.base_dims[0] * self.heads[0]))))
        self.add_parameter("cls_token", self.cls_token)

        if self.class_dim > 0:
            self.head_dist = nn.Linear(self.base_dims[-1] * self.heads[-1],
                                       self.class_dim)

        trunc_normal_(self.cls_token)
        self.head_dist.apply(self._init_weights)
Ejemplo n.º 6
0
from PIL import Image
import numpy as np
from paddle.nn import Conv2D
from paddle.nn.initializer import Assign

# 读入图片并转成numpy.ndarray
# 换成灰度图
img = Image.open('000000355610.jpg').convert('L')
img = np.array(img)

# 创建初始化参数
w = np.ones([1, 1, 5, 5], dtype='float32') / 25
conv = Conv2D(in_channels=1,
              out_channels=1,
              kernel_size=[5, 5],
              weight_attr=paddle.ParamAttr(initializer=Assign(value=w)))
x = img.astype('float32')
x = x.reshape(1, 1, img.shape[0], img.shape[1])
x = paddle.to_tensor(x)
y = conv(x)
out = y.numpy()

plt.figure(figsize=(20, 12))
f = plt.subplot(121)
f.set_title('input image')
plt.imshow(img, cmap='gray')

f = plt.subplot(122)
f.set_title('output feature map')
out = out.squeeze()
plt.imshow(out, cmap='gray')
Ejemplo n.º 7
0
    def __init__(self,
                 image_size,
                 patch_size,
                 stride,
                 base_dims,
                 depth,
                 heads,
                 mlp_ratio,
                 in_chans=3,
                 attn_drop_rate=.0,
                 drop_rate=.0,
                 drop_path_rate=.0,
                 class_dim=1000):
        super(PoolingTransformer, self).__init__()

        total_block = sum(depth)
        padding = 0
        block_idx = 0

        width = math.floor((image_size + 2 * padding - patch_size) / stride +
                           1)

        self.base_dims = base_dims
        self.heads = heads
        self.class_dim = class_dim

        self.patch_size = patch_size

        self.pos_embed = self.create_parameter(
            shape=(1, base_dims[0] * heads[0], width, width),
            default_initializer=Assign(
                paddle.randn((1, base_dims[0] * heads[0], width, width))))
        self.add_parameter("pos_embed", self.pos_embed)

        self.patch_embed = conv_embedding(in_chans, base_dims[0] * heads[0],
                                          patch_size, stride, padding)

        self.cls_token = self.create_parameter(
            shape=(1, 1, base_dims[0] * heads[0]),
            default_initializer=Assign(
                paddle.randn((1, 1, base_dims[0] * heads[0]))))
        self.add_parameter("cls_token", self.cls_token)

        self.pos_drop = nn.Dropout(p=drop_rate)

        self.transformers = nn.LayerList([])
        self.pools = nn.LayerList([])

        for stage in range(len(depth)):
            drop_path_prob = [
                drop_path_rate * i / total_block
                for i in range(block_idx, block_idx + depth[stage])
            ]
            block_idx += depth[stage]

            self.transformers.append(
                Transformer(base_dims[stage], depth[stage], heads[stage],
                            mlp_ratio, drop_rate, attn_drop_rate,
                            drop_path_prob))
            if stage < len(heads) - 1:
                self.pools.append(
                    conv_head_pooling(base_dims[stage] * heads[stage],
                                      base_dims[stage + 1] * heads[stage + 1],
                                      stride=2))

        self.norm = nn.LayerNorm(base_dims[-1] * heads[-1], epsilon=1e-6)
        self.embed_dim = base_dims[-1] * heads[-1]

        # Classifier head
        if class_dim > 0:
            self.head = nn.Linear(base_dims[-1] * heads[-1], class_dim)

        trunc_normal_(self.pos_embed)
        trunc_normal_(self.cls_token)
        self.apply(self._init_weights)
Ejemplo n.º 8
0
    def __init__(
        self,
        embed_dim,
        # vision
        image_resolution,
        vision_layers,
        vision_width,
        vision_patch_size,
        # text
        context_length,
        vocab_size,
        transformer_width,
        transformer_heads,
        transformer_layers,
    ):
        super().__init__()
        self.context_length = context_length
        self.embed_dim = embed_dim

        if isinstance(vision_layers, (tuple, list)):
            vision_heads = vision_width * 32 // 64
            self.visual = ModifiedResNet(
                layers=vision_layers,
                output_dim=embed_dim,
                heads=vision_heads,
                input_resolution=image_resolution,
                width=vision_width,
            )
        else:
            vision_heads = vision_width // 64
            self.visual = VisualTransformer(
                input_resolution=image_resolution,
                patch_size=vision_patch_size,
                width=vision_width,
                layers=vision_layers,
                heads=vision_heads,
                output_dim=embed_dim,
            )

        self.transformer = Transformer(
            width=transformer_width,
            layers=transformer_layers,
            heads=transformer_heads,
            attn_mask=self.build_attention_mask(),
        )

        self.vocab_size = vocab_size
        self.token_embedding = nn.Embedding(vocab_size, transformer_width)

        positional_embedding = self.create_parameter(
            shape=(self.context_length, transformer_width),
            default_initializer=Assign(
                paddle.empty((self.context_length, transformer_width))
            ),
        )
        self.add_parameter("positional_embedding", positional_embedding)

        self.ln_final = nn.LayerNorm(transformer_width)

        text_projection = self.create_parameter(
            shape=(transformer_width, embed_dim),
            default_initializer=Assign(paddle.empty((transformer_width, embed_dim))),
        )
        self.add_parameter("text_projection", text_projection)

        logit_scale = self.create_parameter(
            shape=(1,), default_initializer=Assign(paddle.ones([1]))
        )
        self.add_parameter("logit_scale", logit_scale)

        self.initialize_parameters()