def __init__(self): super(Net, self).__init__() self.bn = P.BatchNorm() self.scale = Parameter(initializer('ones', [64]), name='scale') self.offset = Parameter(initializer('zeros', [64]), name='offset') self.mean = Parameter(initializer('ones', [64]), name='mean') self.variance = Parameter(initializer('zeros', [64]), name='variance')
def __init__(self, normalized_shape, begin_norm_axis=-1, begin_params_axis=-1, gamma_init='ones', beta_init='zeros', use_batch_norm=False): super(FusedLayerNorm, self).__init__() if not isinstance(normalized_shape, (tuple, list)): raise TypeError( "The type of 'normalized_shape' should be tuple[int] or list[int], but '{}' type is {}." .format(normalized_shape, type(normalized_shape))) self.normalized_shape = normalized_shape self.begin_norm_axis = begin_norm_axis self.begin_params_axis = begin_params_axis self.gamma = Parameter(initializer(gamma_init, normalized_shape), name="gamma") self.beta = Parameter(initializer(beta_init, normalized_shape), name="beta") self.layer_norm = P.LayerNorm(begin_norm_axis=self.begin_norm_axis, begin_params_axis=self.begin_params_axis) self.batch_norm = P.BatchNorm(is_training=True, epsilon=1e-5) self.use_batch_norm = use_batch_norm self.mul = P.Mul() self.add = P.TensorAdd()
def bn_pass(): """ Sub a BN to Softmax. """ pattern = Call(P.BatchNorm()) target = Call(P.Softmax()) return pattern, target
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, gamma_init='ones', beta_init='zeros', moving_mean_init='zeros', moving_var_init='ones'): super(FusedBatchNorm, self).__init__() if num_features < 1: raise ValueError("num_features must be at least 1") if momentum < 0 or momentum > 1: raise ValueError( "momentum should be a number in range [0, 1], but got {}". format(momentum)) self.num_features = num_features self.eps = eps self.momentum = Tensor(1.0 - momentum, DT.float32) self.gamma = Parameter(initializer(gamma_init, num_features), name="gamma", requires_grad=affine) self.beta = Parameter(initializer(beta_init, num_features), name="beta", requires_grad=affine) self.moving_mean = Parameter(initializer(moving_mean_init, num_features), name="mean", requires_grad=False) self.moving_variance = Parameter(initializer(moving_var_init, num_features), name="variance", requires_grad=False) self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps) self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps) self.sub_mean = P.Sub().shard(((1), (1))) self.sub_var = P.Sub().shard(((1), (1))) self.mul_mean = P.Mul().shard(((1, ), ())) self.mul_var = P.Mul().shard(((1, ), ())) self.assign_sub_mean = P.AssignSub().shard(((1, ), (1, ))) self.assign_sub_var = P.AssignSub().shard(((1), (1))) self.sub_mean2 = P.Sub().shard(((1), (1))) self.sub_var2 = P.Sub().shard(((1), (1)))
def __init__(self): super(ConvBN, self).__init__() self.conv = P.Conv2D(32, 3) self.conv_weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32) self.scale = Tensor(np.ones([32]), mindspore.float32) self.bias = Tensor(np.ones([32]), mindspore.float32) self.mean = Tensor(np.ones([32]), mindspore.float32) self.variance = Tensor(np.ones([32]), mindspore.float32) self.bn = P.BatchNorm()
def bn_pass(): """ Sub a BN to Softmax. """ bn = P.BatchNorm() pattern = CallWith(bn) softmax = P.Softmax() target = CallWith(softmax, should_replace=False) return pattern, target
def single_bn_pass(): """ Sub a BN which does NOT take Conv as inputs to ReLU6. """ conv2d_prim = Prim("Conv2D") conv2d = Call(conv2d_prim) pattern_0 = NoneOf(conv2d) pattern = Call(P.BatchNorm(), [pattern_0]) target = Call(P.ReLU6(), [pattern_0]) return pattern, target
def __init__(self, input_scale, input_bias, input_mean, input_variance, is_training): super(Net, self).__init__() self.fused_bn_ex = P.BatchNorm(is_training=is_training, epsilon=1e-5, momentum=0.9) self.scale = Parameter(input_scale, name='scale') self.bias = Parameter(input_bias, name='b') self.mean = Parameter(input_mean, name='mean') self.variance = Parameter(input_variance, name='variance')
def single_bn_pass(): """ Sub a BN which does NOT take Conv as inputs to ReLU6. """ conv2d_prim = IsPrimTypeOf("Conv2D") conv2d = CallWith(conv2d_prim) pattern_0 = IsNot(conv2d) pattern = CallWith(P.BatchNorm(), inputs=[pattern_0]) target = CallWith(P.ReLU6(), inputs=[pattern_0]) return pattern, target
def __init__(self, num_features, eps=1e-5, momentum=0.9, affine=True, gamma_init='ones', beta_init='zeros', moving_mean_init='zeros', moving_var_init='ones', use_batch_statistics=True, device_num_each_group=1): super(_BatchNorm, self).__init__() if num_features < 1: raise ValueError("num_features must be at least 1") if momentum < 0 or momentum > 1: raise ValueError( "momentum should be a number in range [0, 1], but got {}". format(momentum)) self.use_batch_statistics = use_batch_statistics self.num_features = num_features self.eps = eps self.moving_mean = Parameter(initializer(moving_mean_init, num_features), name="mean", requires_grad=False) self.moving_variance = Parameter(initializer(moving_var_init, num_features), name="variance", requires_grad=False) self.gamma = Parameter(initializer(gamma_init, num_features), name="gamma", requires_grad=affine) self.beta = Parameter(initializer(beta_init, num_features), name="beta", requires_grad=affine) self.group = check_int_positive(device_num_each_group) self.is_global = False if self.group != 1: self.rank_id = get_rank() self.rank_size = get_group_size() self.device_list = [i for i in range(0, self.rank_size)] self.rank_list = self.list_group(self.device_list, self.group) self.rank_list_idx = len(self.rank_list) for i in range(self.rank_list_idx): if self.rank_id in self.rank_list[i] and self.group != 1: self.is_global = True management.create_group('group' + str(i), self.rank_list[i]) self.all_reduce = P.AllReduce( P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1) self.shape = P.Shape() self.reduce_mean = P.ReduceMean(keep_dims=True) self.square = P.Square() self.sqrt = P.Sqrt() self.cast = P.Cast() self.dtype = P.DType() self.reshape = P.Reshape() self.is_ascend = context.get_context("device_target") == "Ascend" if context.get_context("enable_ge"): self.is_ge_backend = True self.momentum = Tensor(1.0 - momentum, mstype.float32) else: self.is_ge_backend = False self.momentum = 1.0 - momentum if self.is_ge_backend or self.is_ascend: self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps) else: self.bn_train = P.FusedBatchNorm(mode=1, epsilon=self.eps, momentum=self.momentum) self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps) data_parallel_strategy = ((1, ), (1, )) data_parallel_strategy_one = ((1, ), ()) self.sub_mean = P.Sub().set_strategy(data_parallel_strategy) self.sub_var = P.Sub().set_strategy(data_parallel_strategy) self.mul_mean = P.Mul().set_strategy(data_parallel_strategy_one) self.mul_var = P.Mul().set_strategy(data_parallel_strategy_one) self.assign_sub_mean = P.AssignSub().set_strategy( data_parallel_strategy) self.assign_sub_var = P.AssignSub().set_strategy( data_parallel_strategy)
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ from mindspore.ops import Primitive from mindspore.ops import operations as P batch_norm = P.BatchNorm(is_training=False) bn_infer = Primitive('BNInfer') make_tuple = Primitive('make_tuple') tuple_getitem = Primitive('tuple_getitem') class FnDict: def __init__(self): self.fnDict = {} def __call__(self, fn): self.fnDict[fn.__name__] = fn def __getitem__(self, name): return self.fnDict[name]
def __init__(self, num_features, eps=1e-5, momentum=0.9, affine=True, gamma_init='ones', beta_init='zeros', moving_mean_init='zeros', moving_var_init='ones', use_batch_statistics=None, device_num_each_group=1, input_dims='2d', data_format='NCHW'): super(_BatchNorm, self).__init__() if num_features < 1: raise ValueError("num_features must be at least 1") if momentum < 0 or momentum > 1: raise ValueError("momentum should be a number in range [0, 1], but got {}".format(momentum)) self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name) if context.get_context("device_target") != "GPU" and self.format == "NHWC": raise ValueError("NHWC format only support in GPU target.") self.use_batch_statistics = use_batch_statistics self.num_features = num_features self.eps = eps self.input_dims = input_dims self.moving_mean = Parameter(initializer( moving_mean_init, num_features), name="mean", requires_grad=False) self.moving_variance = Parameter(initializer( moving_var_init, num_features), name="variance", requires_grad=False) self.gamma = Parameter(initializer( gamma_init, num_features), name="gamma", requires_grad=affine) self.beta = Parameter(initializer( beta_init, num_features), name="beta", requires_grad=affine) self.group = validator.check_positive_int(device_num_each_group) self.is_global = False if self.group != 1: self.rank_id = get_rank() self.rank_size = get_group_size() self.device_list = [i for i in range(0, self.rank_size)] self.rank_list = self.list_group(self.device_list, self.group) self.rank_list_idx = len(self.rank_list) for i in range(self.rank_list_idx): if self.rank_id in self.rank_list[i] and self.group != 1: self.is_global = True management.create_group('group' + str(i), self.rank_list[i]) self.all_reduce = P.AllReduce(P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1) self.shape = P.Shape() self.reduce_mean = P.ReduceMean(keep_dims=True) self.square = P.Square() self.sqrt = P.Sqrt() self.cast = P.Cast() self.dtype = P.DType() self.reshape = P.Reshape() self.is_ascend = context.get_context("device_target") == "Ascend" self.is_gpu = context.get_context("device_target") == "GPU" self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE self.momentum = 1.0 - momentum if context.get_context("enable_ge"): self.is_ge_backend = True else: self.is_ge_backend = False if self.is_graph_mode and (self.is_ge_backend or self.is_ascend): self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps) elif self.is_gpu: self.bn_train = P.FusedBatchNormEx(mode=1, epsilon=self.eps, momentum=self.momentum, data_format=self.format) else: self.bn_train = P.FusedBatchNorm(mode=1, epsilon=self.eps, momentum=self.momentum) self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps, data_format=self.format) self.enable_global_sync = self.is_global and (self.is_ge_backend or (self.is_graph_mode and self.is_ascend)) self.enable_default_train = self.is_graph_mode and not self.is_global and \ (self.is_ge_backend or self.is_ascend) data_parallel_strategy = ((1,), (1,)) data_parallel_strategy_one = ((1,), ()) self.sub_mean = P.Sub().shard(data_parallel_strategy) self.sub_var = P.Sub().shard(data_parallel_strategy) self.mul_mean = P.Mul().shard(data_parallel_strategy_one) self.mul_var = P.Mul().shard(data_parallel_strategy_one) self.assign_sub_mean = P.AssignSub().shard(data_parallel_strategy) self.assign_sub_var = P.AssignSub().shard(data_parallel_strategy)
'block': G.LayerNormGrad(), 'desc_inputs': [[2, 16], [2, 16], [2, 16], [2, 16], [16]], 'desc_bprop': [[2, 16], [16], [16]], 'skip': ['backward']}), ('FusedBatchNorm', { 'block': P.FusedBatchNorm(), 'desc_inputs': [[128, 64, 32, 64], [64], [64], [64], [64]], 'desc_bprop': [[128, 64, 32, 64], [64], [64], [64], [64]], 'skip': []}), ('FusedBatchNormGrad', { 'block': G.FusedBatchNormGrad(), 'desc_inputs': [[128, 64, 32, 64], [128, 64, 32, 64], [64], [64], [64]], 'desc_bprop': [[128, 64, 32, 64], [64], [64], [64], [64]], 'skip': ['backward']}), ('BatchNorm', { 'block': P.BatchNorm(), 'desc_inputs': [[128, 64, 32, 32], [64], [64], [64], [64]], 'desc_bprop': [[128, 64, 32, 32], [64], [64], [64], [64]], 'skip': []}), ('BatchNormGrad', { 'block': G.BatchNormGrad(), 'desc_inputs': [[128, 64, 32, 32], [128, 64, 32, 32], [64], [64], [64], [64]], 'desc_bprop': [[128, 64, 32, 32], [64], [64], [64], [64]], 'skip': ['backward']}), ('ApplyMomentum', { 'block': P.ApplyMomentum(), 'desc_inputs': [[128, 32, 32, 64], [128, 32, 32, 64], [32, 32, 64], [32, 32, 64], [32, 32, 64]], 'desc_bprop': [[128, 32, 32, 64]], 'skip': ['backward']}), ('TopK', {
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ from mindspore.ops import operations as P from mindspore.ops import Primitive import mindspore.common.dtype as mstype from mindspore.common.tensor import Tensor AssignSub = P.AssignSub() Mul = P.Mul() Sub = P.Sub() make_tuple = Primitive('make_tuple') tuple_getitem = Primitive('tuple_getitem') depend = Primitive('depend') BatchNorm = P.BatchNorm() BNTrainingReduce = Primitive('BNTrainingReduce') BNTrainingUpdate = Primitive('BNTrainingUpdate') constant0 = Tensor(0.1, mstype.float32) constant1 = Tensor(0.1, mstype.float32) class FnDict: def __init__(self): self.fnDict = {} def __call__(self, fn): self.fnDict[fn.__name__] = fn def __getitem__(self, name): return self.fnDict[name]
'skip': ['backward'] }), # input is scalar ('Tanh0', { 'block': (P.Tanh(), { 'exception': TypeError, 'error_keywords': ['Tanh'] }), 'desc_inputs': [5.0], 'skip': ['backward'] }), # input is scalar ('BatchNorm0', { 'block': (P.BatchNorm(is_training=False), { 'exception': TypeError, 'error_keywords': ['BatchNorm'] }), 'desc_inputs': [5.0, 5.0, 5.0, 5.0, 5.0], 'skip': ['backward'] }), # is_training=False and mean=None ('BatchNorm1', { 'block': (P.BatchNorm(is_training=False), { 'exception': TypeError, 'error_keywords': ['BatchNorm'] }), 'desc_inputs': [ Tensor(np.ones([5, 3]).astype(np.float32)), Tensor(np.ones([5, 3]).astype(np.float32)),
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ from mindspore.ops import Primitive from mindspore.ops import operations as P from mindspore.ops.operations import _inner_ops as inner from mindspore.ops import _constants as Constants from mindspore.common.tensor import Tensor import mindspore.common.dtype as mstype make_tuple = Primitive('MakeTuple') tuple_getitem = Primitive(Constants.kTupleGetItem) bn = P.BatchNorm(is_training=True) sync_bn = inner.SyncBatchNorm() fused_bn1 = Primitive('FusedBN1') fused_bn2 = Primitive('FusedBN2') fused_bn3 = Primitive('FusedBN3') bn_training_reduce = Primitive('BNTrainingReduce') bn_training_update = Primitive('BNTrainingUpdate') allreduce = Primitive('AllReduce') mul = Primitive('Mul') mul_value = Tensor(0.5, mstype.float32) class FnDict: def __init__(self): self.fnDict = {}