def __init__(self, normalized_shape, begin_norm_axis=-1, begin_params_axis=-1, gamma_init='ones', beta_init='zeros', use_batch_norm=False): super(FusedLayerNorm, self).__init__() if not isinstance(normalized_shape, (tuple, list)): raise TypeError( "The type of 'normalized_shape' should be tuple[int] or list[int], but '{}' type is {}." .format(normalized_shape, type(normalized_shape))) self.normalized_shape = normalized_shape self.begin_norm_axis = begin_norm_axis self.begin_params_axis = begin_params_axis self.gamma = Parameter(initializer(gamma_init, normalized_shape), name="gamma") self.beta = Parameter(initializer(beta_init, normalized_shape), name="beta") self.layer_norm = P.LayerNorm(begin_norm_axis=self.begin_norm_axis, begin_params_axis=self.begin_params_axis) self.batch_norm = P.BatchNorm(is_training=True, epsilon=1e-5) self.use_batch_norm = use_batch_norm self.mul = P.Mul() self.add = P.TensorAdd()
def __init__(self, mul_weight, strategy1=None, strategy2=None, strategy3=None): super().__init__() self.begin_norm_axis = 2 self.begin_params_axis = 1 self.mul = P.Mul().set_strategy(strategy1) self.layer_norm = P.LayerNorm(self.begin_norm_axis, self.begin_params_axis).set_strategy(strategy2) self.mul2 = P.Mul().set_strategy(strategy3) self.mul_weight = Parameter(mul_weight, "w1") self.normalized_shape = [64, 32, 16] self.gamma = Parameter(initializer('ones', self.normalized_shape), name="gamma") self.beta = Parameter(initializer('zeros', self.normalized_shape), name="beta")
def __init__(self, normalized_shape, begin_norm_axis=-1, begin_params_axis=-1, gamma_init='ones', beta_init='zeros', ): super(LayerNorm, self).__init__() self.normalized_shape = normalized_shape self.begin_norm_axis = begin_norm_axis self.begin_params_axis = begin_params_axis self.gamma = Parameter(initializer( gamma_init, normalized_shape), name="gamma") self.beta = Parameter(initializer( beta_init, normalized_shape), name="beta") self.layer_norm = P.LayerNorm(begin_norm_axis=self.begin_norm_axis, begin_params_axis=self.begin_params_axis)
def __init__(self, normalized_shape, begin_norm_axis=-1, begin_params_axis=-1, gamma_init='ones', beta_init='zeros', ): super(LayerNorm, self).__init__() if not isinstance(normalized_shape, (tuple, list)): raise TypeError("The type of 'normalized_shape' should be tuple[int] or list[int], but '{}' type is {}." .format(normalized_shape, type(normalized_shape))) self.normalized_shape = normalized_shape self.begin_norm_axis = begin_norm_axis self.begin_params_axis = begin_params_axis self.gamma = Parameter(initializer( gamma_init, normalized_shape), name="gamma") self.beta = Parameter(initializer( beta_init, normalized_shape), name="beta") self.layer_norm = P.LayerNorm(begin_norm_axis=self.begin_norm_axis, begin_params_axis=self.begin_params_axis)
'skip': ['backward_exec']}), ('Flatten', { 'block': P.Flatten(), 'desc_inputs': [[128, 32, 32, 64]], 'desc_bprop': [[128 * 32 * 8 * 16]]}), ('LogSoftmax', { 'block': P.LogSoftmax(), 'desc_inputs': [[64, 2]], 'desc_bprop': [[160, 30522]]}), ('LogSoftmaxGrad', { 'block': G.LogSoftmaxGrad(), 'desc_inputs': [[16, 1234], [16, 1234]], 'desc_bprop': [[64, 2]], 'skip': ['backward']}), ('LayerNorm', { 'block': P.LayerNorm(), 'desc_inputs': [[2, 16], [16], [16]], 'desc_bprop': [[2, 16], [2, 16], [2, 16]]}), ('LayerNormGrad', { 'block': G.LayerNormGrad(), 'desc_inputs': [[2, 16], [2, 16], [2, 16], [2, 16], [16]], 'desc_bprop': [[2, 16], [16], [16]], 'skip': ['backward']}), ('FusedBatchNorm', { 'block': P.FusedBatchNorm(), 'desc_inputs': [[128, 64, 32, 64], [64], [64], [64], [64]], 'desc_bprop': [[128, 64, 32, 64], [64], [64], [64], [64]], 'skip': []}), ('FusedBatchNormGrad', { 'block': G.FusedBatchNormGrad(), 'desc_inputs': [[128, 64, 32, 64], [128, 64, 32, 64], [64], [64], [64]],
def __init__(self, begin_norm_axis, begin_params_axis): super(LayerNormNet, self).__init__() self.norm = P.LayerNorm(begin_norm_axis, begin_params_axis)
def __init__(self): super(Net, self).__init__() self.layernorm = P.LayerNorm(1, 1)