Beispiel #1
0
    def __init__(self, num_features, num_dims, **kwargs):
        super(BatchNorm, self).__init__(**kwargs)
        if num_dims == 2:
            shape = (1, num_features)
        else:
            shape = (1, num_features, 1, 1)
        # 参与求梯度和迭代的拉伸和偏移参数,分别初始化成0和1
        self.gamma = self.params.get('gamma', shape=shape, init=init.One())
        self.bate = self.params.get('beta', shape=shape, init=init.One())

        # 不参与求梯度和迭代的标量
        self.moving_mean = nd.zeros(shape)
        self.moving_var = nd.zeros(shape)
Beispiel #2
0
 def __init__(self,num_feature, num_dims, **kwargs):
     super(BtachNorm,self).__init__(**kwargs)
     shape = (1,num_feature) if num_dims == 2 else (1, num_feature, 1, 1)
     self.beta = self.params.get('beta', shape=shape, init = init.Zero())
     self.gamma = self.params.get('gamma', shape = shape, init = init.One())
     self.moving_mean = nd.zeros(shape)
     self.moving_var = nd.ones(shape)
Beispiel #3
0
    def __init__(self, num_features, num_dims, **kwargs):
        super(BatchNorm, self).__init__(**kwargs)
        if num_dims == 2:
            shape = (1, num_features)
        else:
            shape = (1, num_features, 1, 1)

        self.gamma = self.params.get('gamma', shape=shape, init=init.One())
        self.beta = self.params.get('beta', shape=shape, init=init.Zero())
        # 不参与求梯度和迭代的变量,在CPU上初始化为0
        self.moving_mean = nd.zeros(shape)
        self.moving_var = nd.zeros(shape)
Beispiel #4
0
 def __init__(self, num_fearures, num_dims, **kwargs):
     super().__init__(**kwargs)
     if num_dims == 2:
         shape = (1, num_fearures)  # 全连接层
     else:
         shape = (1, num_fearures, 1, 1)  # 二维卷基层
     # 参与求梯度和迭代的 拉伸和偏移,分别初始化为0或1
     self.gamma = self.params.get('gamma', shape=shape, init=init.One())
     self.beta = self.params.get('beta',
                                 grad_req='null',
                                 shape=shape,
                                 init=init.Zero())
     # 不参与求梯度和迭代的变量,在内存上全吃书啊为0,先在内存上初始化之后搬到显存上
     self.moving_mean = nd.zeros(shape)
     self.moving_var = nd.zeros(shape)
b = net.hidden.bias
print('hidden layer name: ', net.hidden.name, '\nweight: ', w, '\nbias: ', b)
print('weight:', w.data(), '\nweight grad:', w.grad(), '\nbias:', b.data(),'\nbias grad:', b.grad())

params = net.collect_params()
print(params)
print(params['mlp0_dense0_bias'].data())
print(params.get('dense0_bias').data())

params = net.collect_params()
params.initialize(init=init.Normal(sigma=0.02), force_reinit=True)
print('hidden weight: ', net.hidden.weight.data(), '\nhidden bias: ',
      net.hidden.bias.data(), '\noutput weight: ', net.output.weight.data(),
      '\noutput bias: ',net.output.bias.data())

net.hidden.bias.initialize(init=init.One(), force_reinit=True)

class MyInit(init.Initializer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._verbosity = True

    def _init_weight(self, name, arr):
        nd.random.uniform(low=10, high=20, out=arr)

net = MLP()
net.initialize(MyInit())
net(x)
print(net.hidden.weight.data())
print(net.hidden.bias.data())
Beispiel #6
0
b.grad()

#collect_params访问Block所有的参数,返回一个名字到对应Parameter的dict
params = net.collect_params()
params['sequential0_dense0_weight'].data()
params['sequential0_dense0_bias'].data()
params.get(dense1.weight).data()
params.get(dense1.bias).data()

'''---------------------------------------------------'''
#定制初始化方法
from mxnet import init
#(weight正态分布)
params.initialize(init=init.Normal(sigma=0.02), force_reinit=True)
#or(会将weight初始化为1)
params.initialize(init=init.One(), force_reinit=True)

#自定义初始化
class MyInit(init.Initializer):
    def __init__(self):
        super(MyInit,self).__init__()
        self._verbose = True
    def _init_weight(self, _, arr):
        #初始化权重,使用out=arr后不需指定形状
        nd.random_uniform(low=5, high=10, out=arr)
    def _init_bais(self, _, arr):
        arr[:] = 2

#未初始化bias是gluon的bug
params.initialize(init=MyInit(), force_reinit=True)
# print('net name %s weight %s bias %s' %(net[0].name, w, b))

# print('weight: ', w.data())
# print('weight gradient: ', w.grad())
# print('bias: ', b.data())
# print('bias gradient: ', b.grad())

params = net.collect_params()
# for param in params:
#     print('param: ', param)

# Different Initialization
params.initialize(init=init.Normal(sigma=0.02), force_reinit=True)
# print(net[0].weight.data(), net[0].bias.data())

params.initialize(init=init.One(), force_reinit=True)
# print(net[0].weight.data(), net[0].bias.data())

net = build_net()
params = net.collect_params()
# print(params)

net.initialize()
params2 = net.collect_params()
# print(params2)

net(x)
params3 = net.collect_params()
# print(params3)

# Share Parameters