def __init__(self, group, transpose_a=False, transpose_b=False): super(MatmulAllgather, self).__init__() self.allgather = P.AllGather(group=group) self.matmul = P.MatMul(transpose_a, transpose_b) self.pow = P.Pow() self.reduce_sum = P.ReduceSum() self.allreduce = P.AllReduce(group=group)
def __init__(self): """init function""" super(Rerank_Downstream, self).__init__() self.dense_0 = nn.Dense(in_channels=4096, out_channels=8192, has_bias=True) self.relu_1 = nn.ReLU() self.reducemean_2 = P.ReduceMean(keep_dims=True) self.sub_3 = P.Sub() self.sub_4 = P.Sub() self.pow_5 = P.Pow() self.pow_5_input_weight = 2.0 self.reducemean_6 = P.ReduceMean(keep_dims=True) self.add_7 = P.Add() self.add_7_bias = 9.999999960041972e-13 self.sqrt_8 = P.Sqrt() self.div_9 = P.Div() self.mul_10 = P.Mul() self.mul_10_w = Parameter(Tensor( np.random.uniform(0, 1, (8192, )).astype(np.float32)), name=None) self.add_11 = P.Add() self.add_11_bias = Parameter(Tensor( np.random.uniform(0, 1, (8192, )).astype(np.float32)), name=None) self.dense_12 = nn.Dense(in_channels=8192, out_channels=2, has_bias=True)
def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False, use_nesterov=False, weight_decay=0.0, loss_scale=1.0): super(Adam, self).__init__(learning_rate, params, weight_decay, loss_scale) _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name) validator.check_value_type("use_locking", use_locking, [bool], self.cls_name) validator.check_value_type("use_nesterov", use_nesterov, [bool], self.cls_name) validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name) validator.check_number_range("loss_scale", loss_scale, 1.0, float("inf"), Rel.INC_LEFT, self.cls_name) self.beta1 = Tensor(beta1, mstype.float32) self.beta2 = Tensor(beta2, mstype.float32) self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power") self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power") self.eps = eps self.moment1 = self.parameters.clone(prefix="moment1", init='zeros') self.moment2 = self.parameters.clone(prefix="moment2", init='zeros') self.hyper_map = C.HyperMap() self.opt = P.Adam(use_locking, use_nesterov) self.pow = P.Pow() self.sqrt = P.Sqrt() self.one = Tensor(np.array([1.0]).astype(np.float32)) self.realdiv = P.RealDiv()
def __init__(self, params, decay_steps, learning_rate=0.001, end_learning_rate=0.0001, power=10.0, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): super(AdamWeightDecayDynamicLR, self).__init__(learning_rate, params) _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name) # turn them to scalar when me support scalar/tensor mix operations self.global_step = Parameter(initializer(0, [1]), name="global_step") self.decay_steps = Tensor(np.array([decay_steps]).astype(np.float32)) self.end_learning_rate = Tensor( np.array([end_learning_rate]).astype(np.float32)) self.diff_learning_rate = Tensor( np.array([learning_rate - end_learning_rate]).astype(np.float32)) self.power = power self.beta1 = Tensor(np.array([beta1]).astype(np.float32)) self.beta2 = Tensor(np.array([beta2]).astype(np.float32)) self.eps = Tensor(np.array([eps]).astype(np.float32)) self.weight_decay_tensor = Tensor( np.array([weight_decay]).astype(np.float32)) self.params = self.parameters self.moments1 = self.params.clone(prefix="adam_m", init='zeros') self.moments2 = self.params.clone(prefix="adam_v", init='zeros') self.hyper_map = C.HyperMap() self.min = P.Minimum() self.pow = P.Pow() self.one = Tensor(np.array([1.0]).astype(np.float32))
def __init__(self, max_val=1.0, power_factors=(0.0448, 0.2856, 0.3001, 0.2363, 0.1333), filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03): super(MSSSIM, self).__init__() validator.check_value_type('max_val', max_val, [int, float], self.cls_name) validator.check_number('max_val', max_val, 0.0, Rel.GT, self.cls_name) self.max_val = max_val validator.check_value_type('power_factors', power_factors, [tuple, list], self.cls_name) self.filter_size = validator.check_integer('filter_size', filter_size, 1, Rel.GE, self.cls_name) self.filter_sigma = validator.check_float_positive( 'filter_sigma', filter_sigma, self.cls_name) self.k1 = validator.check_value_type('k1', k1, [float], self.cls_name) self.k2 = validator.check_value_type('k2', k2, [float], self.cls_name) window = _create_window(filter_size, filter_sigma) self.level = len(power_factors) self.conv = [] for i in range(self.level): self.conv.append(_conv2d(1, 1, filter_size, Tensor(window))) self.conv[i].weight.requires_grad = False self.multi_convs_list = CellList(self.conv) self.weight_tensor = Tensor(power_factors, mstype.float32) self.avg_pool = AvgPool2d(kernel_size=2, stride=2, pad_mode='valid') self.relu = ReLU() self.reduce_mean = P.ReduceMean() self.prod = P.ReduceProd() self.pow = P.Pow() self.pack = P.Pack(axis=-1) self.concat = P.Concat(axis=1)
def test_broadcast_diff_dims(): context.set_context(mode=context.GRAPH_MODE, device_target='GPU') x1_np = np.random.rand(2).astype(np.float32) x2_np = np.random.rand(2, 1).astype(np.float32) output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.minimum(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Maximum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.maximum(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Greater()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np > x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np < x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np)) output_np = np.power(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np)
def __init__(self, model, train_dataset, task_type, num_classes=None, epochs=1, epi_uncer_model_path=None, ale_uncer_model_path=None, save_model=False): self.epi_model = model self.ale_model = deepcopy(model) self.epi_train_dataset = train_dataset self.ale_train_dataset = train_dataset self.task_type = task_type self.epochs = Validator.check_positive_int(epochs) self.epi_uncer_model_path = epi_uncer_model_path self.ale_uncer_model_path = ale_uncer_model_path self.save_model = Validator.check_bool(save_model) self.epi_uncer_model = None self.ale_uncer_model = None self.concat = P.Concat(axis=0) self.sum = P.ReduceSum() self.pow = P.Pow() if not isinstance(model, Cell): raise TypeError('The model should be Cell type.') if task_type not in ('regression', 'classification'): raise ValueError('The task should be regression or classification.') if task_type == 'classification': self.num_classes = Validator.check_positive_int(num_classes) else: self.num_classes = num_classes if save_model: if epi_uncer_model_path is None or ale_uncer_model_path is None: raise ValueError("If save_model is True, the epi_uncer_model_path and " "ale_uncer_model_path should not be None.")
def __init__(self, probs=None, seed=0, dtype=mstype.int32, name="Geometric"): """ Constructor of Geometric distribution. """ param = dict(locals()) super(Geometric, self).__init__(dtype, name, param) if probs is not None: self._probs = cast_to_tensor(probs, dtype=mstype.float32) check_prob(self._probs) else: self._probs = probs self.minval = np.finfo(np.float).tiny # ops needed for the class self.const = P.ScalarToArray() self.dtypeop = P.DType() self.fill = P.Fill() self.floor = P.Floor() self.issubclass = P.IsSubClass() self.less = P.Less() self.log = P.Log() self.pow = P.Pow() self.select = P.Select() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.uniform = P.UniformReal(seed=seed)
def __init__(self, model, train_dataset, task_type, num_classes=None, epochs=1, epi_uncer_model_path=None, ale_uncer_model_path=None, save_model=False): self.model = model self.train_dataset = train_dataset self.task_type = task_type self.num_classes = check_int_positive(num_classes) self.epochs = epochs self.epi_uncer_model_path = epi_uncer_model_path self.ale_uncer_model_path = ale_uncer_model_path self.save_model = save_model self.epi_uncer_model = None self.ale_uncer_model = None self.concat = P.Concat(axis=0) self.sum = P.ReduceSum() self.pow = P.Pow() if self.task_type not in ('regression', 'classification'): raise ValueError( 'The task should be regression or classification.') if self.task_type == 'classification': if self.num_classes is None: raise ValueError("Classification task needs to input labels.") if self.save_model: if self.epi_uncer_model_path is None or self.ale_uncer_model_path is None: raise ValueError( "If save_model is True, the epi_uncer_model_path and " "ale_uncer_model_path should not be None.")
def test_pow(): """ test_pow """ input_tensor = Tensor(np.array([[2, 2], [3, 3]])) testpow = P.Pow() expect = np.array([[8, 8], [27, 27]]) result = testpow(input_tensor, 3.0) assert np.all(result.asnumpy() == expect)
def __init__(self, group, transpose_a=False, transpose_b=False): super(MatmulReduce, self).__init__() self.matmul1 = P.MatMul(transpose_a, transpose_b) self.allreduce1 = P.AllReduce(group=group) self.matmul2 = P.MatMul(transpose_a, transpose_b) self.pow = P.Pow() self.reduce_sum = P.ReduceSum() self.allreduce2 = P.AllReduce(group=group)
def __init__(self): super().__init__() self.pow = P.Pow() self.print = P.Print() self.assign = P.Assign() self.exponent = Tensor([2.0], ms.float32) self.para1 = Parameter(Tensor(1.0, dtype=ms.float32), name='para1') self.para2 = Parameter(Tensor(3.0, dtype=ms.float32), name='para2')
def __init__(self): """init function""" super(NewGeLU, self).__init__() self.mul = P.Mul() self.pow = P.Pow() self.mul = P.Mul() self.add = P.Add() self.tanh = nn.Tanh()
def _update_run_op_graph_kernel(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, decay_flag): """ Update parameters. Args: beta1 (Tensor): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0). beta2 (Tensor): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0). eps (Tensor): Term added to the denominator to improve numerical stability. Should be greater than 0. lr (Tensor): Learning rate. weight_decay (Number): Weight decay. Should be equal to or greater than 0. global_step (Tensor): Global step. param (Tensor): Parameters. m (Tensor): m value of parameters. v (Tensor): v value of parameters. gradient (Tensor): Gradient of parameters. decay_flag (bool): Specifies whether param update with weight decay. Returns: Tensor, the new value of v after updating. """ op_mul = P.Mul() op_square = P.Square() op_cast = P.Cast() op_shape = P.Shape() op_pow = P.Pow() op_norm = layer.Norm() op_fill = P.Fill() op_dtype = P.DType() param_fp32 = op_cast(param, mstype.float32) gradient_fp32 = op_cast(gradient, mstype.float32) i6_ex = op_cast(global_step + num_one, mstype.float32) i9 = op_cast(num_one, mstype.float32) - beta1 x1 = op_cast(num_one, mstype.float32) - beta2 i6 = op_cast(num_one, mstype.float32) - op_pow(beta1, i6_ex) i3 = op_cast(num_one, mstype.float32) - op_pow(beta2, i6_ex) i1 = op_square(gradient_fp32) add3, update = G.LambNextMV()(i1, v, i3, gradient, m, i6, param, beta1, i9, beta2, x1, weight_decay, eps) if decay_flag: update = update + op_mul(weight_decay, param_fp32) w_norm = op_norm(param_fp32) g_norm = op_norm(gradient_fp32) g_norm_hat = op_norm(add3) zeros = F.zeros_like(w_norm) ones = op_fill(op_dtype(w_norm), op_shape(w_norm), 1.0) tens = op_fill(op_dtype(w_norm), op_shape(w_norm), 10.0) next_param = G.LambUpdateWithLR()(g_norm, w_norm, g_norm_hat, lr, update, param, zeros, ones, tens) next_v = F.control_depend(add3, next_param) return next_v
def __init__(self, weight=1.0, compute_type=mstype.float32): super(LengthPenalty, self).__init__() self.weight = weight self.add = P.TensorAdd() self.pow = P.Pow() self.div = P.RealDiv() self.cast = P.Cast() self.five = Tensor(5.0, mstype.float32) self.six = Tensor(6.0, mstype.float32)
def __init__(self): super(Net, self).__init__() self.add = P.TensorAdd() self.sub = P.Sub() self.mul = P.Mul() self.div = P.RealDiv() self.sqrt = P.Sqrt() self.pow = P.Pow() self.neg = P.Neg()
def __init__(self, batchsize): super(GatherV2, self).__init__() self.pow = P.Pow() emb_list = list(range(batchsize)) emb1_list = emb_list[0::2] emb2_list = emb_list[1::2] self.emb1_param = Tensor(emb1_list, dtype=mstype.int32) self.emb2_param = Tensor(emb2_list, dtype=mstype.int32) self.gatherv2 = P.GatherV2()
def __init__(self, power=0, name='PowerTransform', param=None): param = dict(locals()) if param is None else param super(PowerTransform, self).__init__(name=name, param=param) validator.check_value_type('power', power, [int, float], self.name) self._power = power self.pow = P.Pow() self.exp = P.Exp() self.log = P.Log() self.log1p = self._log1p_by_step self.expm1 = self._expm1_by_step
def __init__(self, gamma=2.0, alpha=0.25): super(SigmoidFocalClassificationLoss, self).__init__() self.sigmiod_cross_entropy = P.SigmoidCrossEntropyWithLogits() self.sigmoid = P.Sigmoid() self.pow = P.Pow() self.onehot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.gamma = gamma self.alpha = alpha
def __init__(self, params, decay_steps, warmup_steps=0, start_learning_rate=0.1, end_learning_rate=0.0001, power=1.0, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0, decay_filter=lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name): super(Lamb, self).__init__(start_learning_rate, params) if self.is_group: raise RuntimeError( f"The {self.cls_name} optimizer cannot support group setting.") _check_param_value(decay_steps, warmup_steps, start_learning_rate, end_learning_rate, power, beta1, beta2, eps, weight_decay, self.cls_name) # turn them to scalar when me support scalar/tensor mix operations self.global_step = Parameter(initializer(0, [1]), name="global_step") self.warmup_steps = Tensor(np.array([warmup_steps]).astype(np.float32)) self.warmup_flag = False if warmup_steps > 0: self.warmup_flag = True self.decay_steps = Tensor(np.array([decay_steps]).astype(np.float32)) self.start_learning_rate = Tensor( np.array([start_learning_rate]).astype(np.float32)) self.end_learning_rate = Tensor( np.array([end_learning_rate]).astype(np.float32)) self.diff_learning_rate = Tensor( np.array([start_learning_rate - end_learning_rate ]).astype(np.float32)) self.power = power self.beta1 = Tensor(np.array([beta1]).astype(np.float32)) self.beta2 = Tensor(np.array([beta2]).astype(np.float32)) self.eps = Tensor(np.array([eps]).astype(np.float32)) self.weight_decay_tensor = Tensor( np.array([weight_decay]).astype(np.float32)) self.params = self.parameters self.moments1 = self.params.clone(prefix="lamb_m", init='zeros') self.moments2 = self.params.clone(prefix="lamb_v", init='zeros') self.decay_flag = tuple(decay_filter(x) for x in self.params) self.hyper_map = C.HyperMap() self.min = P.Minimum() self.pow = P.Pow() self.greater = P.Greater() self.one = Tensor(np.array([1.0]).astype(np.float32)) self.cast = P.Cast()
def test_nobroadcast_fp16(): context.set_context(mode=context.GRAPH_MODE, device_target='GPU') np.random.seed(42) x1_np = np.random.rand(10, 20).astype(np.float16) x2_np = np.random.rand(10, 20).astype(np.float16) output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.minimum(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Maximum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.maximum(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Greater()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np > x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np < x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np)) output_np = np.power(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.RealDiv()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np / x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Mul()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np * x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Sub()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np - x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.DivNoNan()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np / x2_np assert np.allclose(output_ms.asnumpy(), output_np) x2_np_zero = np.zeros_like(x2_np) output_ms = P.DivNoNan()(Tensor(x1_np), Tensor(x2_np_zero)) assert np.allclose(output_ms.asnumpy(), x2_np_zero) output_ms = P.Mod()(Tensor(x1_np), Tensor(x2_np)) output_np = np.fmod(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.FloorMod()(Tensor(x1_np), Tensor(x2_np)) output_np = np.mod(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np)
def __init__(self): super(Net, self).__init__() self.add = P.Add() self.sub = P.Sub() self.mul = P.Mul() self.div = P.RealDiv() self.sqrt = P.Sqrt() self.pow = P.Pow() self.neg = P.Neg() self.reducemin = P.ReduceMin() self.reshape = P.Reshape()
def test_pow(): """ test_pow """ input_tensor = Tensor(np.array([[2, 2], [3, 3]])) power = Tensor(np.array(3.0, np.int64)) power2 = Tensor(np.array(True, np.bool)) testpow = P.Pow() expect = np.array([[8, 8], [27, 27]]) result = testpow(input_tensor, power) assert np.all(result.asnumpy() == expect) net = PowNet() net(input_tensor, power2)
def __init__(self, fixed_atoms=False, dim=3): super().__init__() self.fixed_atoms = fixed_atoms self.reducesum = P.ReduceSum() self.pow = P.Pow() # self.concat = P.Concat() # self.pack = P.Pack() self.gatherd = P.GatherD() self.norm = nn.Norm(-1) self.gather_neighbors = GatherNeighbors(dim, fixed_atoms)
def __init__(self, power=0, name='PowerTransform', param=None): param = dict(locals()) if param is None else param super(PowerTransform, self).__init__(name=name, param=param) validator.check_value_type('power', power, [int, float], self.name) validator.check_number("power", power, 0, Rel.GE, self.name) self._power = power self.pow = P.Pow() self.exp = exp_generic self.expm1 = expm1_generic self.log = log_generic self.log1p = log1p_generic
def test_broadcast_diff_dims(): context.set_context(mode=context.GRAPH_MODE, device_target='GPU') np.random.seed(42) x1_np = np.random.rand(2).astype(np.float32) x2_np = np.random.rand(2, 1).astype(np.float32) x1_np_int32 = np.random.randint(0, 100, (2)).astype(np.int32) x2_np_int32 = np.random.randint(0, 100, (2, 1)).astype(np.int32) output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.minimum(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Maximum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.maximum(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Greater()(Tensor(x1_np_int32), Tensor(x2_np_int32)) output_np = x1_np_int32 > x2_np_int32 assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Greater()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np > x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np < x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Less()(Tensor(x1_np_int32), Tensor(x2_np_int32)) output_np = x1_np_int32 < x2_np_int32 assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np)) output_np = np.power(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.RealDiv()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np / x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Mul()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np * x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Sub()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np - x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.DivNoNan()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np / x2_np assert np.allclose(output_ms.asnumpy(), output_np) x2_np_zero = np.zeros_like(x2_np) output_ms = P.DivNoNan()(Tensor(x1_np), Tensor(x2_np_zero)) assert np.allclose(output_ms.asnumpy(), x2_np_zero)
def __init__(self, layer_norm_weight, layer_norm_bias): """init function""" super(LayerNorm, self).__init__() self.reducemean = P.ReduceMean(keep_dims=True) self.sub = P.Sub() self.pow = P.Pow() self.add = P.Add() self.sqrt = P.Sqrt() self.div = P.Div() self.mul = P.Mul() self.layer_norm_weight = layer_norm_weight self.layer_norm_bias = layer_norm_bias
def __init__(self, task): super(AleatoricLoss, self).__init__() self.task = task if self.task == 'classification': self.sum = P.ReduceSum() self.exp = P.Exp() self.normal = C.normal self.to_tensor = P.ScalarToArray() self.entropy = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") else: self.mean = P.ReduceMean() self.exp = P.Exp() self.pow = P.Pow()
def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False, use_nesterov=False, weight_decay=0.0, loss_scale=1.0, decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): super(Adam, self).__init__(learning_rate, params) _check_param_value(beta1, beta2, eps, weight_decay) validator.check_type("use_locking", use_locking, [bool]) validator.check_type("use_nesterov", use_nesterov, [bool]) validator.check_type("loss_scale", loss_scale, [float]) validator.check_number_range("loss_scale", loss_scale, 1.0, float("inf"), Rel.INC_LEFT) self.dynamic_lr = False if isinstance(learning_rate, Iterable) or \ (isinstance(learning_rate, Tensor) and learning_rate.dim() == 1): self.dynamic_lr = True self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mstype.int32), name="global_step") self.axis = 0 self.beta1 = Tensor(beta1, mstype.float32) self.beta2 = Tensor(beta2, mstype.float32) self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power") self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power") self.eps = eps self.moment1 = self.parameters.clone(prefix="moment1", init='zeros') self.moment2 = self.parameters.clone(prefix="moment2", init='zeros') self.decay_tf = tuple(decay_filter(x) for x in self.parameters) self.hyper_map = C.HyperMap() self.opt = P.Adam(use_locking, use_nesterov) self.weight_decay = weight_decay * loss_scale self.reciprocal_scale = 1.0 / loss_scale self.pow = P.Pow() self.sqrt = P.Sqrt() self.one = Tensor(np.array([1.0]).astype(np.float32)) self.realdiv = P.RealDiv()
def __init__(self, concentration1=None, concentration0=None, seed=None, dtype=mstype.float32, name="Beta"): """ Constructor of Beta. """ param = dict(locals()) param['param_dict'] = { 'concentration1': concentration1, 'concentration0': concentration0 } valid_dtype = mstype.float_type Validator.check_type_name("dtype", dtype, valid_dtype, type(self).__name__) # As some operators can't accept scalar input, check the type here if isinstance(concentration0, float): raise TypeError("Input concentration0 can't be scalar") if isinstance(concentration1, float): raise TypeError("Input concentration1 can't be scalar") super(Beta, self).__init__(seed, dtype, name, param) self._concentration1 = self._add_parameter(concentration1, 'concentration1') self._concentration0 = self._add_parameter(concentration0, 'concentration0') if self._concentration1 is not None: check_greater_zero(self._concentration1, "concentration1") if self._concentration0 is not None: check_greater_zero(self._concentration0, "concentration0") # ops needed for the class self.log = log_generic self.log1p = P.Log1p() self.neg = P.Neg() self.pow = P.Pow() self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.fill = P.Fill() self.shape = P.Shape() self.select = P.Select() self.logicaland = P.LogicalAnd() self.greater = P.Greater() self.digamma = nn.DiGamma() self.lbeta = nn.LBeta()