def weight_relu_initialization(link, mean=0.0, relu_a=0.0, way='forward'): dim = len(link.weight.data.shape) if dim == 2: # fc layer channel_out, channel_in = link.weight.data.shape y_k, x_k = 1, 1 elif dim == 4: # conv layer channel_out, channel_in, y_k, x_k = link.weight.data.shape n_i, n_i_next = NN.select_way(way, channel_in * y_k * x_k, channel_out * y_k * x_k) # calculate variance variance = initializer.variance_relu(n_i, n_i_next, a=relu_a) # orthogonal matrix w = [] for i in six.moves.range(channel_out): w.append(initializer.orthonorm(mean, variance, (channel_in, y_k * x_k), initializer.gauss, np.float32)) return np.reshape(w, link.weight.data.shape)
def bias_initialization(conv, constant=0): return initializer.const(conv.b.data.shape, constant=0, dtype=np.float32)