def init_hiddens(self, learn_init): if learn_init: h = nn.Parameter(torch.randn(self.layer_num, 1, self.hc)) c = nn.Parameter(torch.randn(self.layer_num, 1, self.hc)) else: h = nn.Parameters(torch.zeros(self.layer_num, 1, self.hc), requires_grad=False) c = nn.Parameters(torch.zeros(self.layer_num, 1, self.hc), requires_grad=False) return h, c
def __init__(self, input_size, hidden_size, bias=False): ''' Create a GRU cell. bias -- Not implement yet. ''' super(GRUCell, self).__init__() self.Wx = nn.Parameters(torch.randn(input_size, hidden_size)) self.Wh = nn.Parameters(torch.randn(hidden_size, hidden_size)) self.input_to_hidden = nn.Linear(input_size, hidden_size * 2) self.hidden_to_hidden = nn.Linear(hidden_size, hidden_size * 2)
def __getitem__(self, key): if key not in self.mapping: embedding = nn.Parameters(torch.Tensor(self.dim).to(self.opts.device)) nn.init.normal_(embedding, std=0.1) self.mapping[key] = embedding self.register_parameter(key, embedding) return self.mapping[key]
def __init__(self, img_size=[244], patch_size=16, channels=3, num_classes=0, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=False, qk_scale=None, drop_rate=0, attn_drop_rate=0, drop_path_rate=0., norm_layer=nn.LayerNorm, **kwargs): super(ViT, self).__init__() self.num_features = self.embed_dim = embed_dim seld.patch_embed = PatchEmbed(img_size=img_size[0], patch_size=patch_size, channels=channels, embed_dim=embed_dim) num_patches = self.patch_embed.num_patches self.cls_token = nn.Parameters(torch.zeros(1, 1, embed_dim)) self.pos_embed = nn.Parameters( torch.zeros(1, num_patches + 1, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) dpr = [x.item() for x in torch.linespace(0, drop_path_rate, depth)] self.blocks = nn.ModuleList([ Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias) for i in range(depth) ]) self.norm = norm_layer(embed_dim) self.head = nn.Linear( embed_dim, num_Classes) if num_classes > 0 else nn.Identity() trunc_normal_(self.pos_embed, std=.02) trunc_normal_(self.cls_token, std=.02) self.apply(self._init_weights)
def __init__(self, num_features, num_dims): super(BatchNorm, self).__init__() if num_dims == 2: shape = (1, num_features) else: shape = (1, num_features, 1, 1) # engage to calculate gradient self.gamma = nn.Parameter(torch.ones(shape)) self.beta = nn.Parameters(torch.zeros(shape)) # do not calculate gradient self.moving_mean = torch.zeros(shape) self.moving_var = torch.zeros(shape)