def __init__(self, num_features, num_dims, **kwargs): super(BatchNorm, self).__init__(**kwargs) if num_dims == 2: shape = (1, num_features) else: shape = (1, num_features, 1, 1) # 参与求梯度和迭代的拉伸和偏移参数,分别初始化成0和1 self.gamma = self.params.get('gamma', shape=shape, init=init.One()) self.bate = self.params.get('beta', shape=shape, init=init.One()) # 不参与求梯度和迭代的标量 self.moving_mean = nd.zeros(shape) self.moving_var = nd.zeros(shape)
def __init__(self,num_feature, num_dims, **kwargs): super(BtachNorm,self).__init__(**kwargs) shape = (1,num_feature) if num_dims == 2 else (1, num_feature, 1, 1) self.beta = self.params.get('beta', shape=shape, init = init.Zero()) self.gamma = self.params.get('gamma', shape = shape, init = init.One()) self.moving_mean = nd.zeros(shape) self.moving_var = nd.ones(shape)
def __init__(self, num_features, num_dims, **kwargs): super(BatchNorm, self).__init__(**kwargs) if num_dims == 2: shape = (1, num_features) else: shape = (1, num_features, 1, 1) self.gamma = self.params.get('gamma', shape=shape, init=init.One()) self.beta = self.params.get('beta', shape=shape, init=init.Zero()) # 不参与求梯度和迭代的变量,在CPU上初始化为0 self.moving_mean = nd.zeros(shape) self.moving_var = nd.zeros(shape)
def __init__(self, num_fearures, num_dims, **kwargs): super().__init__(**kwargs) if num_dims == 2: shape = (1, num_fearures) # 全连接层 else: shape = (1, num_fearures, 1, 1) # 二维卷基层 # 参与求梯度和迭代的 拉伸和偏移,分别初始化为0或1 self.gamma = self.params.get('gamma', shape=shape, init=init.One()) self.beta = self.params.get('beta', grad_req='null', shape=shape, init=init.Zero()) # 不参与求梯度和迭代的变量,在内存上全吃书啊为0,先在内存上初始化之后搬到显存上 self.moving_mean = nd.zeros(shape) self.moving_var = nd.zeros(shape)
b = net.hidden.bias print('hidden layer name: ', net.hidden.name, '\nweight: ', w, '\nbias: ', b) print('weight:', w.data(), '\nweight grad:', w.grad(), '\nbias:', b.data(),'\nbias grad:', b.grad()) params = net.collect_params() print(params) print(params['mlp0_dense0_bias'].data()) print(params.get('dense0_bias').data()) params = net.collect_params() params.initialize(init=init.Normal(sigma=0.02), force_reinit=True) print('hidden weight: ', net.hidden.weight.data(), '\nhidden bias: ', net.hidden.bias.data(), '\noutput weight: ', net.output.weight.data(), '\noutput bias: ',net.output.bias.data()) net.hidden.bias.initialize(init=init.One(), force_reinit=True) class MyInit(init.Initializer): def __init__(self, **kwargs): super().__init__(**kwargs) self._verbosity = True def _init_weight(self, name, arr): nd.random.uniform(low=10, high=20, out=arr) net = MLP() net.initialize(MyInit()) net(x) print(net.hidden.weight.data()) print(net.hidden.bias.data())
b.grad() #collect_params访问Block所有的参数,返回一个名字到对应Parameter的dict params = net.collect_params() params['sequential0_dense0_weight'].data() params['sequential0_dense0_bias'].data() params.get(dense1.weight).data() params.get(dense1.bias).data() '''---------------------------------------------------''' #定制初始化方法 from mxnet import init #(weight正态分布) params.initialize(init=init.Normal(sigma=0.02), force_reinit=True) #or(会将weight初始化为1) params.initialize(init=init.One(), force_reinit=True) #自定义初始化 class MyInit(init.Initializer): def __init__(self): super(MyInit,self).__init__() self._verbose = True def _init_weight(self, _, arr): #初始化权重,使用out=arr后不需指定形状 nd.random_uniform(low=5, high=10, out=arr) def _init_bais(self, _, arr): arr[:] = 2 #未初始化bias是gluon的bug params.initialize(init=MyInit(), force_reinit=True)
# print('net name %s weight %s bias %s' %(net[0].name, w, b)) # print('weight: ', w.data()) # print('weight gradient: ', w.grad()) # print('bias: ', b.data()) # print('bias gradient: ', b.grad()) params = net.collect_params() # for param in params: # print('param: ', param) # Different Initialization params.initialize(init=init.Normal(sigma=0.02), force_reinit=True) # print(net[0].weight.data(), net[0].bias.data()) params.initialize(init=init.One(), force_reinit=True) # print(net[0].weight.data(), net[0].bias.data()) net = build_net() params = net.collect_params() # print(params) net.initialize() params2 = net.collect_params() # print(params2) net(x) params3 = net.collect_params() # print(params3) # Share Parameters