예제 #1
0
 def init_hiddens(self, learn_init):
     if learn_init:
         h = nn.Parameter(torch.randn(self.layer_num, 1, self.hc))
         c = nn.Parameter(torch.randn(self.layer_num, 1, self.hc))
     else:
         h = nn.Parameters(torch.zeros(self.layer_num, 1, self.hc),
                           requires_grad=False)
         c = nn.Parameters(torch.zeros(self.layer_num, 1, self.hc),
                           requires_grad=False)
     return h, c
예제 #2
0
 def __init__(self, input_size, hidden_size, bias=False):
     '''
     Create a GRU cell.
     
     bias -- Not implement yet.
     '''
     super(GRUCell, self).__init__()
     self.Wx = nn.Parameters(torch.randn(input_size, hidden_size))
     self.Wh = nn.Parameters(torch.randn(hidden_size, hidden_size))
     self.input_to_hidden = nn.Linear(input_size, hidden_size * 2)
     self.hidden_to_hidden = nn.Linear(hidden_size, hidden_size * 2)
예제 #3
0
 def __getitem__(self, key):
     if key not in self.mapping:
         embedding = nn.Parameters(torch.Tensor(self.dim).to(self.opts.device))
         nn.init.normal_(embedding, std=0.1)
         self.mapping[key] = embedding
         self.register_parameter(key, embedding)
     return self.mapping[key]
예제 #4
0
    def __init__(self,
                 img_size=[244],
                 patch_size=16,
                 channels=3,
                 num_classes=0,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
                 mlp_ratio=4,
                 qkv_bias=False,
                 qk_scale=None,
                 drop_rate=0,
                 attn_drop_rate=0,
                 drop_path_rate=0.,
                 norm_layer=nn.LayerNorm,
                 **kwargs):

        super(ViT, self).__init__()
        self.num_features = self.embed_dim = embed_dim

        seld.patch_embed = PatchEmbed(img_size=img_size[0],
                                      patch_size=patch_size,
                                      channels=channels,
                                      embed_dim=embed_dim)
        num_patches = self.patch_embed.num_patches

        self.cls_token = nn.Parameters(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameters(
            torch.zeros(1, num_patches + 1, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        dpr = [x.item() for x in torch.linespace(0, drop_path_rate, depth)]

        self.blocks = nn.ModuleList([
            Block(dim=embed_dim,
                  num_heads=num_heads,
                  mlp_ratio=mlp_ratio,
                  qkv_bias=qkv_bias) for i in range(depth)
        ])
        self.norm = norm_layer(embed_dim)

        self.head = nn.Linear(
            embed_dim, num_Classes) if num_classes > 0 else nn.Identity()

        trunc_normal_(self.pos_embed, std=.02)
        trunc_normal_(self.cls_token, std=.02)
        self.apply(self._init_weights)
예제 #5
0
 def __init__(self, num_features, num_dims):
     super(BatchNorm, self).__init__()
     if num_dims == 2:
         shape = (1, num_features)
     else:
         shape = (1, num_features, 1, 1)
     # engage to calculate gradient
     self.gamma = nn.Parameter(torch.ones(shape))
     self.beta = nn.Parameters(torch.zeros(shape))
     # do not calculate gradient
     self.moving_mean = torch.zeros(shape)
     self.moving_var = torch.zeros(shape)