def __init__(self, in_channel, x): super().__init__() #self._save_graphs(save_graph_flag=True, save_graph_path=".") self.biasadd = P.BiasAdd() self.equal = P.Equal() self.addn = P.AddN() self.conv = Conv2d(in_channels=in_channel, out_channels=in_channel, kernel_size=1, stride=1, has_bias=False, weight_init='ones', pad_mode='same') self.bn = BatchNorm2d(num_features=in_channel) self.assignadd = P.AssignAdd() self.assign = P.Assign() self.relu = ReLU() self.mean = P.ReduceMean(keep_dims=False) self.bias = Parameter(Tensor( np.random.randint(2, size=(3, )).astype((np.float32))), name="bias") self.bias2 = Parameter(Tensor(np.ones([3]).astype(np.float32)), name="bias2") self.parameterupdate = ParameterUpdate(self.bias) self.value = Tensor(np.random.randn(*(3, )), ms.float32) self.x = x
def __init__(self): super().__init__() self.parameter1 = Parameter(Tensor([199.0], ms.float32), name="parameter1") self.assign = P.Assign() self.assignadd = P.AssignAdd() self.addn = P.AddN() self.depend = P.Depend()
def __init__(self, params, learning_rate, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): super(Lamb, self).__init__(learning_rate, params, weight_decay) _check_param_value(beta1, beta2, eps, self.cls_name) # turn them to scalar when me support scalar/tensor mix operations self.beta1 = Tensor(np.array([beta1]).astype(np.float32)) self.beta2 = Tensor(np.array([beta2]).astype(np.float32)) self.eps = Tensor(np.array([eps]).astype(np.float32)) self.params = self.parameters self.moments1 = self.params.clone(prefix="lamb_m", init='zeros') self.moments2 = self.params.clone(prefix="lamb_v", init='zeros') if not self.dynamic_lr: self.global_step = Parameter(initializer(0, [1]), name='global_step') self.assignadd = P.AssignAdd() self.hyper_map = C.HyperMap() self.enable_graph_kernel = context.get_context("enable_graph_kernel") and \ context.get_context("device_target") == "Ascend"
def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0, decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): super(Momentum, self).__init__(learning_rate, params) if isinstance(momentum, float) and momentum < 0.0: raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) if isinstance(learning_rate, Iterable) or \ (isinstance(learning_rate, Tensor) and learning_rate.dim() == 1): self.dynamic_lr = True self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mstype.int32), name="global_step") self.axis = 0 else: self.dynamic_lr = False self.gather = None self.assignadd = None self.global_step = None self.axis = None self.momentum = Parameter(momentum, name="momentum") self.params = self.parameters self.moments = self.params.clone(prefix="moments", init='zeros') self.decay_tf = tuple(decay_filter(x) for x in self.parameters) self.hyper_map = C.HyperMap() self.opt = P.ApplyMomentum() self.weight_decay = weight_decay * loss_scale self.reciprocal_scale = 1.0 / loss_scale self.one = Tensor(1, mstype.int32)
def __init__(self, optimizer, epsilon=1e-05, hyperpara=0.001, weight_decay=0.0, use_clip=False, decay_filter=lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name, lars_filter=lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name, loss_scale=1.0): super(LARS, self).__init__(0.0, [Parameter(Tensor(0.0), name="trivial")]) self.opt = optimizer self.parameters = optimizer.parameters self.learning_rate = optimizer.learning_rate self.lars = P.LARSUpdate(epsilon, hyperpara, use_clip) self.reciprocal_scale = 1.0 / loss_scale self.weight_decay = weight_decay * loss_scale self.cast = P.Cast() self.decay_flag = tuple(decay_filter(x) for x in self.parameters) self.lars_flag = tuple(lars_filter(x) for x in self.parameters) self.hyper_map = C.HyperMap() self.dynamic_lr = False self.gather = None self.global_step = None self.axis = None if isinstance(self.learning_rate.default_input, Iterable) or \ (isinstance(self.learning_rate.default_input, Tensor) and self.learning_rate.default_input.dim() == 1): self.dynamic_lr = True self.assignadd = P.AssignAdd() self.gather = P.GatherV2() self.global_step = Parameter(initializer(0, [1], mstype.int32), name="lars_global_step") self.axis = 0
def __init__(self): super().__init__() self.assign_sub = P.AssignAdd() self.mul = P.Mul() self.mul_weight = Parameter(Tensor( np.full([128, 32], 0.5, dtype=np.float32)), name="mul_weight") self.assignsub_weight = Parameter(Tensor( np.full([128, 32], 1.1, dtype=np.float32)), name="assignsub_weight")
def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0, decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): super(Optimizer, self).__init__() if isinstance(learning_rate, float): self.dynamic_lr = False self.gather = None self.assignadd = None self.global_step = None validator.check_number_range("learning rate", learning_rate, 0.0, float("inf"), Rel.INC_LEFT) else: self.dynamic_lr = True self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step') if isinstance(learning_rate, Iterable): learning_rate = Tensor( np.array(list(learning_rate)).astype(np.float32)) elif isinstance(learning_rate, Tensor): if learning_rate.dim() > 1: raise ValueError( "Learning rate should be a 0 or 1 dim `Tensor`," f"but got {learning_rate.dim()}.") if learning_rate.dim() == 1 and learning_rate.size() < 2: logger.warning( "If want to use the dynamic learning rate, please make sure that the number " "of elements in the list, tuple or tensor passed is greater than 1." ) else: raise TypeError( "Learning rate should be float, Tensor or Iterable.") if loss_scale <= 0.0: raise ValueError( "Loss scale should be greater than 0, but got {}".format( loss_scale)) if weight_decay < 0.0: raise ValueError( "Weight decay should be equal or greater than 0, but got {}". format(weight_decay)) self.learning_rate = Parameter(learning_rate, name="learning_rate") self.parameters = ParameterTuple(parameters) self.reciprocal_scale = 1.0 / loss_scale self.weight_decay = weight_decay * loss_scale self.decay_flags = tuple(decay_filter(x) for x in self.parameters) if not self.parameters: raise ValueError("optimizer got an empty parameter list.")
def __init__(self, params, learning_rate=0.1, decay=0.9, momentum=0.0, epsilon=1e-10, use_locking=False, centered=False, loss_scale=1.0, weight_decay=0.0, decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): super(RMSProp, self).__init__(learning_rate, params) if isinstance(momentum, float) and momentum < 0.0: raise ValueError( "momentum should be at least 0.0, but got momentum {}".format( momentum)) if decay < 0.0: raise ValueError( "decay should be at least 0.0, but got dampening {}".format( decay)) self.decay = decay self.epsilon = epsilon validator.check_type("use_locking", use_locking, [bool]) validator.check_type("centered", centered, [bool]) self.centered = centered if centered: self.opt = P.ApplyCenteredRMSProp(use_locking) self.mg = self.parameters.clone(prefix="mean_grad", init='zeros') else: self.opt = P.ApplyRMSProp(use_locking) self.dynamic_lr = False if not isinstance(learning_rate, float): self.dynamic_lr = True self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mstype.int32), name="global_step") self.axis = 0 self.one = Tensor(1, mstype.int32) self.momentum = momentum self.ms = self.parameters.clone(prefix="mean_square", init='zeros') self.moment = self.parameters.clone(prefix="moment", init='zeros') self.hyper_map = C.HyperMap() self.decay = decay self.decay_tf = tuple(decay_filter(x) for x in self.parameters) self.reciprocal_scale = 1.0 / loss_scale self.weight_decay = weight_decay * loss_scale
def __init__(self, params, learning_rate=0.1, momentum=0.0, dampening=0.0, weight_decay=0.0, nesterov=False, loss_scale=1.0): super(SGD, self).__init__(learning_rate, params) if isinstance(momentum, float) and momentum < 0.0: raise ValueError( "momentum should be at least 0.0, but got momentum {}".format( momentum)) if dampening < 0.0: raise ValueError( "dampening should be at least 0.0, but got dampening {}". format(dampening)) self.dampening = dampening if weight_decay < 0.0: raise ValueError( "weight_decay should be at least 0.0, but got weight_decay {}". format(weight_decay)) self.weight_decay = weight_decay validator.check_type("nesterov", nesterov, [bool]) self.nesterov = nesterov self.opt = P.SGD(dampening, weight_decay, nesterov) self.dynamic_lr = False self.gather = None self.global_step = None self.axis = None if not isinstance(learning_rate, float): self.dynamic_lr = True self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mstype.int32), name="global_step") self.axis = 0 self.momentum = Parameter(momentum, name="momentum") self.params = self.parameters self.accum = self.params.clone(prefix="accum", init='zeros') self.stat = self.params.clone(prefix="stat", init='ones') self.hyper_map = C.HyperMap() self.weight_decay = weight_decay * loss_scale self.reciprocal_scale = 1.0 / loss_scale
def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False, use_nesterov=False, weight_decay=0.0, loss_scale=1.0, decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): super(Adam, self).__init__(learning_rate, params) _check_param_value(beta1, beta2, eps, weight_decay) validator.check_type("use_locking", use_locking, [bool]) validator.check_type("use_nesterov", use_nesterov, [bool]) validator.check_type("loss_scale", loss_scale, [float]) validator.check_number_range("loss_scale", loss_scale, 1.0, float("inf"), Rel.INC_LEFT) self.dynamic_lr = False if isinstance(learning_rate, Iterable) or \ (isinstance(learning_rate, Tensor) and learning_rate.dim() == 1): self.dynamic_lr = True self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mstype.int32), name="global_step") self.axis = 0 self.beta1 = Tensor(beta1, mstype.float32) self.beta2 = Tensor(beta2, mstype.float32) self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power") self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power") self.eps = eps self.moment1 = self.parameters.clone(prefix="moment1", init='zeros') self.moment2 = self.parameters.clone(prefix="moment2", init='zeros') self.decay_tf = tuple(decay_filter(x) for x in self.parameters) self.hyper_map = C.HyperMap() self.opt = P.Adam(use_locking, use_nesterov) self.weight_decay = weight_decay * loss_scale self.reciprocal_scale = 1.0 / loss_scale self.pow = P.Pow() self.sqrt = P.Sqrt() self.one = Tensor(np.array([1.0]).astype(np.float32)) self.realdiv = P.RealDiv()
def _init_group_params(self, parameters, learning_rate, weight_decay): """Init learning rate or weight decay in group params.""" origin_dynamic_lr = self.dynamic_lr self._parse_group_params(parameters, learning_rate) if self.dynamic_lr and not origin_dynamic_lr: self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step') params_store = [] for group_param in parameters: if 'order_params' in group_param.keys(): ordered_parameters = group_param['order_params'] continue self.group_params += group_param['params'] if 'lr' in group_param.keys(): params_dynamic_lr = isinstance(group_param['lr'], (Iterable, Tensor)) if self.dynamic_lr and not params_dynamic_lr: lr = Tensor(np.array([group_param['lr']] * self.dynamic_lr_length).astype(np.float32)) else: lr = self._get_single_lr(group_param['lr']) else: if self.dynamic_lr and not origin_dynamic_lr: lr = Tensor(np.array([self.scalar_lr] * self.dynamic_lr_length).astype(np.float32)) else: lr = learning_rate if 'weight_decay' in group_param.keys(): validator.check_float_legal_value('weight_decay', group_param['weight_decay'], None) validator.check_number_range('weight_decay', group_param['weight_decay'], 0.0, float("inf"), Rel.INC_LEFT, self.cls_name) weight_decay_ = group_param['weight_decay'] * self.loss_scale else: weight_decay_ = weight_decay * self.loss_scale for key in group_param.keys(): if key not in ('params', 'lr', 'weight_decay'): logger.warning(f"The optimizer cannot parse '{key}' when setting parameter groups.") for param in group_param['params']: validator.check_value_type("parameter", param, [Parameter], self.cls_name) if param.name in params_store: raise RuntimeError(f"The {param.name} parameter has appeared in parameter groups.") params_store.append(param.name) self.group_lr.append(Parameter(lr, name="lr_" + param.name)) self.group_weight_decay.append(weight_decay_) if self.is_group_params_ordered: self._order_and_adjust_group_params(ordered_parameters, learning_rate, weight_decay)
def __init__(self): super(AssignAdd, self).__init__() self.add = P.AssignAdd()
def __init__(self): super(Assign_RAW, self).__init__() self.assign_add = P.AssignAdd() self.greater = P.Greater() self.add = P.Add() self.para = Parameter(Tensor(1, dtype=ms.int32), name='para')
def _cumulative_gard(grad_sum, grad): """Apply gard sum to cumulative gradient.""" add = P.AssignAdd() return add(grad_sum, grad)
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', damping=0.03, loss_scale=1, frequency=278, has_bias=True, activation=None): super(Dense_Thor, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.thor = True if isinstance(weight_init, Tensor): if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.ndim != 1 or bias_init.shape[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv', requires_grad=False) self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv", requires_grad=False) self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)) self.matmul = P.MatMul(transpose_b=True) self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.matrix_combine = P.CusMatrixCombine() self.cholesky = P.CusCholeskyTrsm() self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.mul = P.Mul() self.cast = P.Cast() self.damping = Tensor(damping) self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.vector_matmul = P.CusBatchMatMul() self.pad = P.Pad(((0, 24), (0, 24))) self.pad1 = P.Pad(((0, 8), (0, 8))) self.slice = P.Slice() self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False) self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) self.fused_abs_max1 = P.CusFusedAbsMax1([1000, 1000]) self.fused_abs_max2 = P.CusFusedAbsMax1() self.log = P.Log() self.exp = P.Exp() self.dampingA = Tensor(np.identity(2048), mstype.float32) self.dampingG = Tensor(np.identity(1024), mstype.float32) self.add = P.TensorAdd() self.sqrt = P.Sqrt() self.getG = P.InsertGradientOf(self.save_gradient)
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', damping=0.03, loss_scale=1, frequency=100, has_bias=False, activation=None, batch_size=12): super(Dense_Thor, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.thor = True if isinstance(weight_init, Tensor): if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \ weight_init.shape()[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.dim() != 1 or bias_init.shape( )[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None self.matrix_A_inv = Parameter(Tensor( np.zeros([in_channels, in_channels]).astype(np.float16)), name='matrix_A_inv', requires_grad=False) self.matrix_G_inv = Parameter(Tensor( np.zeros([out_channels, out_channels]).astype(np.float16)), name="matrix_G_inv", requires_grad=False) self.fake_G = Tensor( np.zeros([out_channels, out_channels]).astype(np.float16)) self.matmul = P.MatMul(transpose_b=True) self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.matrix_combine = P.CusMatrixCombine() self.cholesky = P.CusCholeskyTrsm() self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.mul = P.Mul() self.cast = P.Cast() self.damping = damping self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.vector_matmul = P.CusBatchMatMul() self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.abs = P.Abs() self.reduce_max = P.ReduceMax(keep_dims=False) self.log = P.Log() self.exp = P.Exp() self.dampingA = Tensor(np.identity(in_channels), mstype.float32) self.dampingG = Tensor(np.identity(out_channels), mstype.float32) self.sqrt = P.Sqrt() self.getG = P.InsertGradientOf(self.save_gradient) self.batch_size = batch_size
def __init__(self, value): super(AssignAdd, self).__init__() self.var = Parameter(value, name="var") self.add = P.AssignAdd()
def __init__(self): super(Net, self).__init__() self.AssignAdd = P.AssignAdd() self.inputdata = Parameter(initializer(1, [1], ms.int64), name="global_step") print("inputdata: ", self.inputdata)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, eps=1e-5, momentum=0.997, weight_init=None, beta_init=None, gamma_init=None, mean_init=None, var_init=None, quant_delay=0, freeze_bn=100000, fake=True, num_bits=8, per_channel=False, symmetric=False, narrow_range=False): super(Conv2dBatchNormQuant, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.pad_mode = pad_mode self.padding = padding self.dilation = twice(dilation) self.stride = twice(stride) self.group = group self.fake = fake self.freeze_bn = freeze_bn self.momentum = momentum self.quant_delay = quant_delay if isinstance(kernel_size, int): self.kernel_size = (kernel_size, kernel_size) else: self.kernel_size = kernel_size if weight_init is None: weight_init = initializer( 'normal', [out_channels, in_channels // group, *self.kernel_size]) self.weight = Parameter(weight_init, name='weight') if gamma_init is None: gamma_init = initializer('ones', [out_channels]) self.gamma = Parameter(gamma_init, name='gamma') if beta_init is None: beta_init = initializer('zeros', [out_channels]) self.beta = Parameter(beta_init, name='beta') if mean_init is None: mean_init = initializer('zeros', [out_channels]) self.moving_mean = Parameter(mean_init, name='moving_mean', requires_grad=False) if var_init is None: var_init = initializer('ones', [out_channels]) self.moving_variance = Parameter(var_init, name='moving_variance', requires_grad=False) self.step = Parameter(initializer('normal', [1], dtype=mstype.int32), name='step', requires_grad=False) self.conv = P.Conv2D(out_channel=self.out_channels, kernel_size=self.kernel_size, mode=1, pad_mode=self.pad_mode, pad=self.padding, stride=self.stride, dilation=self.dilation, group=self.group) self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6, max_init=6, ema=False, num_bits=num_bits, quant_delay=quant_delay, per_channel=per_channel, out_channels=out_channels, symmetric=symmetric, narrow_range=narrow_range) self.batchnorm_fold_train = P.BatchNormFold(epsilon=eps, momentum=momentum, is_training=True, freeze_bn=freeze_bn) self.batchnorm_fold_infer = P.BatchNormFold(epsilon=eps, momentum=momentum, is_training=False, freeze_bn=freeze_bn) self.correct_mul = P.CorrectionMul() self.relu = P.ReLU() self.batchnorm_fold2 = P.BatchNormFold2(freeze_bn=freeze_bn) self.batchnorm_fold2_infer = P.BatchNormFold2(freeze_bn=0) self.one = Tensor(1, mstype.int32) self.assignadd = P.AssignAdd()
def __init__(self, para): super(AssignAddNet, self).__init__() self.para = Parameter(para, name="para") self.assign_add = P.AssignAdd()
def __init__(self,): super(AssignAddNet, self).__init__() self.op = P.AssignAdd() self.inputdata = Parameter(Tensor(np.zeros([1]).astype(np.bool_), mstype.bool_), name="assign_add1")
def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): super(Optimizer, self).__init__(auto_prefix=False) if parameters and not isinstance(parameters, list): parameters = list(parameters) if not parameters: raise ValueError("Optimizer got an empty parameter list.") if not isinstance(parameters[0], (dict, Parameter)): raise TypeError( "Only a list of Parameter or dict can be supported.") if isinstance(loss_scale, int): loss_scale = float(loss_scale) validator.check_value_type("loss_scale", loss_scale, [float], None) validator.check_number_range("loss_scale", loss_scale, 0.0, float("inf"), Rel.INC_NEITHER, None) if isinstance(weight_decay, int): weight_decay = float(weight_decay) validator.check_value_type("weight_decay", weight_decay, [float], None) validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, None) self.is_group = False self.is_group_lr = False self.loss_scale = loss_scale if isinstance(learning_rate, float): self.dynamic_lr = False self.gather = None self.assignadd = None self.global_step = None self.scalar_lr = learning_rate else: self.dynamic_lr = True self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step') self.scalar_lr = None learning_rate = self._get_single_lr(learning_rate) if isinstance(parameters[0], dict): self.is_group = True self.params = [] self.group_lr = [] self.group_weight_decay = [] self._init_group_params(parameters, learning_rate, weight_decay) if self.is_group_lr: self.learning_rate = ParameterTuple(self.group_lr) else: self.learning_rate = Parameter(learning_rate, name="learning_rate") if self.is_group: self.parameters = ParameterTuple(self.params) self.weight_decay = tuple(self.group_weight_decay) decay_filter = lambda x: x > 0 self.decay_flags = tuple( decay_filter(x) for x in self.weight_decay) else: self.parameters = ParameterTuple(parameters) self.weight_decay = weight_decay * loss_scale decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.reciprocal_scale = 1.0 / loss_scale self.exec_weight_decay = any(self.decay_flags) self.param_length = len(self.parameters)
('StridedSlice_2_Error', { 'block': (lambda x: P.StridedSlice(end_mask="1"), { 'exception': TypeError }), 'desc_inputs': [0] }), ('StridedSlice_3_Error', { 'block': (lambda x: P.StridedSlice(ellipsis_mask=1.1), { 'exception': TypeError }), 'desc_inputs': [0] }), ('StridedSlice_4_Error', { 'block': (lambda x: P.StridedSlice(new_axis_mask="1.1"), { 'exception': TypeError }), 'desc_inputs': [0] }), ('AssignAdd_Error', { 'block': (P.AssignAdd(), { 'exception': IndexError }), 'desc_inputs': [[1]] }), ] @mindspore_test(pipeline_for_verify_exception_for_case_by_case_config) def test_check_exception(): return raise_set
def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): super(Optimizer, self).__init__(auto_prefix=False) if parameters is not None and not isinstance(parameters, list): parameters = list(parameters) if not parameters: raise ValueError("Optimizer got an empty parameter list.") if not isinstance(parameters[0], (dict, Parameter)): raise TypeError( "Only a list of Parameter or dict can be supported.") if isinstance(loss_scale, int): loss_scale = float(loss_scale) validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name) validator.check_positive_float(loss_scale, "loss_scale", self.cls_name) self.loss_scale = loss_scale weight_decay = self._preprocess_weight_decay(weight_decay) self.grad_centralization = False self._unique = True self._target = context.get_context("device_target") self.dynamic_lr = False self.assignadd = None self.global_step = None self.is_group = False self.is_group_lr = False self.is_group_params_ordered = False learning_rate = self._preprocess_single_lr(learning_rate) if isinstance(parameters[0], dict): self.is_group = True self.group_params = [] self.group_lr = [] self.group_weight_decay = [] self.group_grad_centralization = [] self._init_group_params(parameters, learning_rate, weight_decay, self.grad_centralization) # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params if self.dynamic_lr: self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step') if self.is_group_lr: self.learning_rate = CellList( self.group_lr) if self.dynamic_lr else ParameterTuple( self.group_lr) else: self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate') if self.is_group: self.parameters = ParameterTuple(self.group_params) self.weight_decay = tuple(self.group_weight_decay) self.weight_decay_tensor_tuple = tuple( Tensor(x, mstype.float32) for x in self.group_weight_decay) decay_filter = lambda x: x > 0 self.decay_flags = tuple( decay_filter(x) for x in self.weight_decay) self.exec_weight_decay = any(self.decay_flags) self.grad_centralization_flags = tuple( self.group_grad_centralization) else: self.parameters = ParameterTuple(parameters) self.weight_decay = weight_decay * loss_scale self.weight_decay_tensor = Tensor(self.weight_decay, mstype.float32) decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.exec_weight_decay = self.weight_decay > 0 # when a parameter has been unique, there is no need do another unique in optimizer. for param in self.parameters: if param.unique: self._unique = False break ps_filter = lambda x: x.is_param_ps self.ps_parameters = tuple(ps_filter(x) for x in self.parameters) cache_filter = lambda x: x.cache_enable self.cache_enable = tuple(cache_filter(x) for x in self.parameters) self.reciprocal_scale = Tensor(1.0 / loss_scale, mstype.float32) self.need_scale = loss_scale != 1.0 self.global_step_increase_tensor = Tensor(1, mstype.int32) self.param_length = len(self.parameters) self.map_ = C.Map() self._use_parallel_optimizer()
def __init__(self): super().__init__() self.op = P.AssignAdd() self.inputdata = Parameter(initializer(1, [1], ms.float32), name="global_step")
@non_graph_engine @mindspore_test(pipeline_for_compile_forward_ge_graph_for_case_by_case_config) def test_exec(): context.set_context(mode=context.GRAPH_MODE) return test_exec_case raise_set = [ ('StridedSlice_1_Error', { 'block': (lambda x: P.StridedSlice(begin_mask="1"), {'exception': TypeError}), 'desc_inputs': [0]}), ('StridedSlice_2_Error', { 'block': (lambda x: P.StridedSlice(end_mask="1"), {'exception': TypeError}), 'desc_inputs': [0]}), ('StridedSlice_3_Error', { 'block': (lambda x: P.StridedSlice(ellipsis_mask=1.1), {'exception': TypeError}), 'desc_inputs': [0]}), ('StridedSlice_4_Error', { 'block': (lambda x: P.StridedSlice(new_axis_mask="1.1"), {'exception': TypeError}), 'desc_inputs': [0]}), ('AssignAdd_Error', { 'block': (P.AssignAdd(), {'exception': ValueError}), 'desc_inputs': [[1]]}), ] @mindspore_test(pipeline_for_verify_exception_for_case_by_case_config) def test_check_exception(): return raise_set
def __init__(self): super(AssignAddNet, self).__init__() self.AssignAdd = P.AssignAdd() self.inputdata = Parameter(initializer(1, [1], ms.float16), name="KIND_AUTOCAST_SCALAR_TO_TENSOR") self.one = 1
def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): super(Optimizer, self).__init__(auto_prefix=False) if parameters is not None and not isinstance(parameters, list): parameters = list(parameters) if not parameters: raise ValueError("Optimizer got an empty parameter list.") if not isinstance(parameters[0], (dict, Parameter)): raise TypeError("Only a list of Parameter or dict can be supported.") if isinstance(loss_scale, int): loss_scale = float(loss_scale) validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name) validator.check_number_range("loss_scale", loss_scale, 0.0, float("inf"), Rel.INC_NEITHER, self.cls_name) self.loss_scale = loss_scale weight_decay = self._preprocess_weight_decay(weight_decay) self.dynamic_lr = False self.assignadd = None self.global_step = None self.is_group = False self.is_group_lr = False self.is_group_params_ordered = False learning_rate = self._preprocess_single_lr(learning_rate) if isinstance(parameters[0], dict): self.is_group = True self.group_params = [] self.group_lr = [] self.group_weight_decay = [] self._init_group_params(parameters, learning_rate, weight_decay) # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params if self.dynamic_lr: self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step') if self.is_group_lr: if self.dynamic_lr: self.learning_rate = CellList(self.group_lr) else: self.learning_rate = ParameterTuple(self.group_lr) else: self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate') if self.is_group: self.parameters = ParameterTuple(self.group_params) self.weight_decay = tuple(self.group_weight_decay) decay_filter = lambda x: x > 0 self.decay_flags = tuple(decay_filter(x) for x in self.weight_decay) self.exec_weight_decay = any(self.decay_flags) else: self.parameters = ParameterTuple(parameters) self.weight_decay = weight_decay * loss_scale decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.exec_weight_decay = self.weight_decay > 0 ps_filter = lambda x: x.is_param_ps self.ps_parameters = tuple(ps_filter(x) for x in self.parameters) self.reciprocal_scale = 1.0 / loss_scale self.param_length = len(self.parameters) self.map_ = C.Map() use_parallel = context.get_auto_parallel_context("enable_parallel_optimizer") self.use_parallel = use_parallel if use_parallel: if self.cls_name not in ["Lamb", "AdamWeightDecay"]: raise RuntimeError("Optimizer segmentation does not support optimizer {}".format(self.cls_name)) if _get_parallel_mode() != ParallelMode.DATA_PARALLEL: raise RuntimeError("Optimizer segmentation does not support parallel mode {}".format (_get_parallel_mode())) self.dev_num = _get_device_num() if self.dev_num > self.param_length: raise RuntimeError("Optimizer segmentation can not be applied when the number of parameters {} is" " less than the number of devices {}".format(self.param_length, self.dev_num)) self.param_rank = self._get_parameter_group_id() self.optim_filter = tuple(map(lambda x: x == _get_global_rank(), self.param_rank)) self.param_names = [] for param in self.parameters: self.param_names.append(param.name) else: self.optim_filter = (True,) * self.param_length
def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): super(Optimizer, self).__init__(auto_prefix=False) if parameters is not None and not isinstance(parameters, list): parameters = list(parameters) if not parameters: raise ValueError("Optimizer got an empty parameter list.") if not isinstance(parameters[0], (dict, Parameter)): raise TypeError( "Only a list of Parameter or dict can be supported.") if isinstance(loss_scale, int): loss_scale = float(loss_scale) validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name) validator.check_positive_float(loss_scale, "loss_scale", self.cls_name) self.loss_scale = loss_scale weight_decay = self._preprocess_weight_decay(weight_decay) self._unique = True self._target = context.get_context("device_target") self.dynamic_lr = False self.assignadd = None self.global_step = None self.is_group = False self.is_group_lr = False self.is_group_params_ordered = False learning_rate = self._preprocess_single_lr(learning_rate) if isinstance(parameters[0], dict): self.is_group = True self.group_params = [] self.group_lr = [] self.group_weight_decay = [] self._init_group_params(parameters, learning_rate, weight_decay) # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params if self.dynamic_lr: self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step') if self.is_group_lr: if self.dynamic_lr: self.learning_rate = CellList(self.group_lr) else: self.learning_rate = ParameterTuple(self.group_lr) else: self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate') if self.is_group: self.parameters = ParameterTuple(self.group_params) self.weight_decay = tuple(self.group_weight_decay) self.weight_decay_tensor_tuple = tuple( Tensor(x, mstype.float32) for x in self.group_weight_decay) decay_filter = lambda x: x > 0 self.decay_flags = tuple( decay_filter(x) for x in self.weight_decay) self.exec_weight_decay = any(self.decay_flags) else: self.parameters = ParameterTuple(parameters) self.weight_decay = weight_decay * loss_scale self.weight_decay_tensor = Tensor(self.weight_decay, mstype.float32) decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.exec_weight_decay = self.weight_decay > 0 # when a parameter has been unique, there is no need do another unique in optimizer. for param in self.parameters: if param.unique: self._unique = False break ps_filter = lambda x: x.is_param_ps self.ps_parameters = tuple(ps_filter(x) for x in self.parameters) ps_cache_filter = lambda x: x.cache_enable self.cache_enable = tuple(ps_cache_filter(x) for x in self.parameters) self.reciprocal_scale = Tensor(1.0 / loss_scale, mstype.float32) self.need_scale = loss_scale != 1.0 self.global_step_increase_tensor = Tensor(1, mstype.int32) self.param_length = len(self.parameters) self.map_ = C.Map() if context.get_auto_parallel_context("enable_parallel_optimizer"): if _get_parallel_mode( ) == ParallelMode.DATA_PARALLEL and context.get_context( "device_target") == "Ascend": self.use_parallel = True elif _get_parallel_mode() == ParallelMode.DATA_PARALLEL \ and context.get_context("device_target") != "Ascend": raise RuntimeError( "Parallel optimizer only supports Ascend in data parallel mode." ) elif _get_parallel_mode() in (ParallelMode.STAND_ALONE, ParallelMode.HYBRID_PARALLEL): raise RuntimeError( "Parallel optimizer is not supported in {}.".format( _get_parallel_mode())) else: self.use_parallel = False else: self.use_parallel = False if self.use_parallel: if self.cls_name not in ["Lamb", "AdamWeightDecay"]: raise RuntimeError( "Parallel optimizer does not support optimizer {}".format( self.cls_name)) self.dev_num = _get_device_num() if self.dev_num > self.param_length: raise RuntimeError( "Parallel optimizer can not be applied when the number of parameters {} is" " less than the number of devices {}".format( self.param_length, self.dev_num)) self.param_rank = self._get_parameter_group_id() self.optim_filter = tuple( map(lambda x: x == _get_global_rank(), self.param_rank)) self.param_names = [] for param in self.parameters: self.param_names.append(param.name) else: self.optim_filter = (True, ) * self.param_length
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ from mindspore.ops import Primitive from mindspore.ops import operations as P from mindspore.ops import _constants as Constants depend = P.Depend() all_reduce = P.AllReduce() broadcast = P.Broadcast(1) tensor_move = Primitive('TensorMove') make_tuple = Primitive('MakeTuple') tuple_getitem = Primitive(Constants.kTupleGetItem) assign_add = P.AssignAdd() apply_momentun = P.ApplyMomentum() relu = P.ReLU() class FnDict: def __init__(self): self.fnDict = {} def __call__(self, fn): self.fnDict[fn.__name__] = fn def __getitem__(self, name): return self.fnDict[name]