def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', damping=0.03, loss_scale=1, frequency=278, batch_size=32, has_bias=True, activation=None): super(Dense_Thor, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.thor = True self.batch_size = batch_size if isinstance(weight_init, Tensor): if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.dim() != 1 or bias_init.shape[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv', requires_grad=False) self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv", requires_grad=False) self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)) self.matmul = P.MatMul(transpose_b=True) self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.matrix_combine = P.CusMatrixCombine() self.cholesky = P.CusCholeskyTrsm() self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.mul = P.Mul() self.cast = P.Cast() self.damping = Tensor(damping) self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.vector_matmul = P.CusBatchMatMul() self.pad = P.Pad(((0, 23), (0, 23))) self.pad1 = P.Pad(((0, 7), (0, 7))) self.slice = P.Slice() self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False) self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) self.fused_abs_max1 = P.CusFusedAbsMax1([1001, 1001]) self.fused_abs_max2 = P.CusFusedAbsMax1() self.log = P.Log() self.exp = P.Exp() self.dampingA = Tensor(np.identity(2048), mstype.float32) self.dampingG = Tensor(np.identity(1024), mstype.float32) self.add = P.TensorAdd() self.sqrt = P.Sqrt() self.getG = P.InsertGradientOf(self.save_gradient)
def __init__(self): super(Triu, self).__init__() self.dtype = P.DType() self.mul = P.Mul() self.cast = P.Cast()
def __init__(self, args, strategy): super(SemiAutoOneHotNet, self).__init__() self.a = args.a self.b = args.b self.c = args.c self.d = args.d self.e = args.e self.cast = P.Cast() self.cast.set_strategy(strategy=strategy.twod_strategy) self.cast1 = P.Cast() self.cast1.set_strategy(strategy=strategy.twod_strategy) self.cast2 = P.Cast() self.cast2.set_strategy(strategy=strategy.twod_strategy) self.cast3 = P.Cast() self.cast3.set_strategy(strategy=strategy.scalar_strategy) self.cast4 = P.Cast() self.cast4.set_strategy(strategy=strategy.scalar_strategy) self.a_const = Tensor(self.a, dtype=mstype.float32) self.b_const = Tensor(self.b, dtype=mstype.float32) self.c_const = Tensor(self.c, dtype=mstype.float32) self.d_const = Tensor(self.d, dtype=mstype.float32) self.e_const = Tensor(self.e, dtype=mstype.float32) self.m_const_zero = Tensor(0, dtype=mstype.float32) self.a_const_one = Tensor(1, dtype=mstype.float32) self.onehot = P.OneHot() self.onehot.set_strategy(strategy=strategy.onehot_strategy) self.exp = P.Exp() self.exp.set_strategy(strategy=strategy.twod_strategy) self.exp2 = P.Exp() self.exp2.set_strategy(strategy=strategy.twod_strategy) self.exp3 = P.Exp() self.exp3.set_strategy(strategy=strategy.twod_strategy) self.mul_const = P.Mul() self.mul_const.set_strategy(strategy=strategy.scalar_twod_strategy) self.mul_const2 = P.TensorAdd() self.mul_const2.set_strategy(strategy=strategy.scalar_twod_strategy) self.mul_const3 = P.Sub() self.mul_const3.set_strategy(strategy=strategy.twod_scalar_strategy) self.mul_const4 = P.Sub() self.mul_const4.set_strategy(strategy=strategy.scalar_twod_strategy) self.mul_const5 = P.Mul() self.mul_const5.set_strategy(strategy=strategy.twod_scalar_strategy) self.mul = P.Mul() self.mul.set_strategy(strategy=strategy.twod_twod_strategy) self.mul2 = P.Mul() self.mul2.set_strategy(strategy=strategy.twod_twod_strategy) self.mul3 = P.TensorAdd() self.mul3.set_strategy(strategy=strategy.twod_twod_strategy) self.mul4 = P.Sub() self.mul4.set_strategy(strategy=strategy.twod_twodbc_strategy) self.mul5 = P.RealDiv() self.mul5.set_strategy(strategy=strategy.twod_twodbc_strategy) self.mul6 = P.Mul() self.mul6.set_strategy(strategy=strategy.twod_twod_strategy) self.mul7 = P.Mul() self.mul7.set_strategy(strategy=strategy.twod_scalar_strategy) self.mul8 = P.RealDiv() self.mul8.set_strategy(strategy=strategy.scalar_scalar_strategy) self.mul9 = P.TensorAdd() self.mul9.set_strategy(strategy=strategy.twod_scalar_strategy) self.reduce_max = P.ReduceMax(keep_dims=True) self.reduce_max.set_strategy(strategy=strategy.twod_strategy) self.reduce_sum = P.ReduceSum(keep_dims=False) self.reduce_sum.set_strategy(strategy=strategy.twod_strategy) self.reduce_sum_2 = P.ReduceSum(keep_dims=False) self.reduce_sum_2.set_strategy(strategy=strategy.twod_strategy) self.reduce_sum_3 = P.ReduceSum(keep_dims=False) self.reduce_sum_3.set_strategy(strategy=strategy.oned_strategy) self.reshape = P.Reshape() self.log = P.Log() self.log.set_strategy(strategy=strategy.twod_strategy) self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.normalize = P.L2Normalize(axis=1) self.normalize.set_strategy(strategy=strategy.twod_strategy_m) self.normalize2 = P.L2Normalize(axis=1) self.normalize2.set_strategy(strategy=strategy.twod_strategy_m) self.fc = P.MatMul(transpose_b=True) self.fc.set_strategy(strategy=strategy.twodbc_twod_strategy) weight_shape = [args.num_classes, args.emb_size] weight_np = np.zeros(weight_shape, np.float32) self.weight = Parameter(Tensor(weight_np), name='model_parallel_weight')
def __init__(self, strategy1): super().__init__() self.matmul = P.MatMul().set_strategy(strategy1) self.cast = P.Cast()
def _tensors_allreduce_mean(mul, degree, grad): degree = F.scalar_cast(degree, F.dtype(grad)) grad = _all_reduce_G(grad) cast_op = P.Cast() return mul(grad, cast_op(F.scalar_to_array(1.0 / degree), F.dtype(grad)))
def __init__(self, config, is_training, use_one_hot_embeddings=False): super(TransformerNetworkWithLoss, self).__init__(auto_prefix=False) self.transformer = TransformerModel(config, is_training, use_one_hot_embeddings) self.loss = TransformerTrainingLoss(config) self.cast = P.Cast()
def __init__(self, config): super(WideDeepModel, self).__init__() emb_128_size = 650000 emb64_single_size = 17300 emb64_multi_size = 20900 indicator_size = 16 deep_dim_list = [1024, 1024, 1024, 1024, 1024] # deep_dropout=0.0 wide_reg_coef = [0.0, 0.0] deep_reg_coef = [0.0, 0.0] wide_lr = 0.2 deep_lr = 1.0 self.input_emb_dim = config.input_emb_dim self.batch_size = config.batch_size self.deep_layer_act = config.deep_layers_act self.init_args = config.init_args self.weight_init, self.bias_init = config.weight_bias_init self.weight_bias_init = config.weight_bias_init self.emb_init = config.emb_init self.keep_prob = config.keep_prob self.layer_dims = deep_dim_list + [1] self.all_dim_list = [self.input_emb_dim] + self.layer_dims self.continue_field_size = 32 self.emb_128_size = emb_128_size self.emb64_single_size = emb64_single_size self.emb64_multi_size = emb64_multi_size self.indicator_size = indicator_size self.wide_l1_coef, self.wide_l2_coef = wide_reg_coef self.deep_l1_coef, self.deep_l2_coef = deep_reg_coef self.wide_lr = wide_lr self.deep_lr = deep_lr init_acts_embedding_metrix = [ ('emb128_embedding', [self.emb_128_size, 128], self.emb_init), ('emb64_single', [self.emb64_single_size, 64], self.emb_init), ('emb64_multi', [self.emb64_multi_size, 64], self.emb_init), ('emb64_indicator', [self.indicator_size, 64], self.emb_init) ] var_map = init_var_dict(self.init_args, init_acts_embedding_metrix) self.emb128_embedding = var_map["emb128_embedding"] self.emb64_single = var_map["emb64_single"] self.emb64_multi = var_map["emb64_multi"] self.emb64_indicator = var_map["emb64_indicator"] init_acts_wide_weight = [ ('wide_continue_w', [self.continue_field_size], self.emb_init), ('wide_emb128_w', [self.emb_128_size], self.emb_init), ('wide_emb64_single_w', [self.emb64_single_size], self.emb_init), ('wide_emb64_multi_w', [self.emb64_multi_size], self.emb_init), ('wide_indicator_w', [self.indicator_size], self.emb_init), ('wide_bias', [1], self.emb_init) ] var_map = init_var_dict(self.init_args, init_acts_wide_weight) self.wide_continue_w = var_map["wide_continue_w"] self.wide_emb128_w = var_map["wide_emb128_w"] self.wide_emb64_single_w = var_map["wide_emb64_single_w"] self.wide_emb64_multi_w = var_map["wide_emb64_multi_w"] self.wide_indicator_w = var_map["wide_indicator_w"] self.wide_bias = var_map["wide_bias"] self.dense_layer_1 = DenseLayer(self.all_dim_list[0], self.all_dim_list[1], self.weight_bias_init, self.deep_layer_act, drop_out=config.dropout_flag, convert_dtype=True) self.dense_layer_2 = DenseLayer(self.all_dim_list[1], self.all_dim_list[2], self.weight_bias_init, self.deep_layer_act, drop_out=config.dropout_flag, convert_dtype=True) self.dense_layer_3 = DenseLayer(self.all_dim_list[2], self.all_dim_list[3], self.weight_bias_init, self.deep_layer_act, drop_out=config.dropout_flag, convert_dtype=True) self.dense_layer_4 = DenseLayer(self.all_dim_list[3], self.all_dim_list[4], self.weight_bias_init, self.deep_layer_act, drop_out=config.dropout_flag, convert_dtype=True) self.dense_layer_5 = DenseLayer(self.all_dim_list[4], self.all_dim_list[5], self.weight_bias_init, self.deep_layer_act, drop_out=config.dropout_flag, convert_dtype=True) self.deep_predict = DenseLayer(self.all_dim_list[5], self.all_dim_list[6], self.weight_bias_init, self.deep_layer_act, drop_out=config.dropout_flag, convert_dtype=True, use_activation=False) self.gather_v2 = P.GatherV2() self.mul = P.Mul() self.reduce_sum_false = P.ReduceSum(keep_dims=False) self.reduce_sum_true = P.ReduceSum(keep_dims=True) self.reshape = P.Reshape() self.square = P.Square() self.shape = P.Shape() self.tile = P.Tile() self.concat = P.Concat(axis=1) self.cast = P.Cast() self.reduceMean_false = P.ReduceMean(keep_dims=False) self.Concat = P.Concat(axis=1) self.BiasAdd = P.BiasAdd() self.expand_dims = P.ExpandDims() self.flatten = Flatten()
def __init__(self, input_size, hidden_size, num_layers=1, has_bias=True, batch_first=False, dropout=0, bidirectional=False): super(LSTM, self).__init__() validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) validator.check_positive_int(hidden_size, "hidden_size", self.cls_name) validator.check_positive_int(num_layers, "num_layers", self.cls_name) self.is_ascend = context.get_context("device_target") == "Ascend" self.batch_first = batch_first self.transpose = P.Transpose() self.num_layers = num_layers self.bidirectional = bidirectional self.dropout = dropout self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, bidirectional=bidirectional, dropout=float(dropout)) weight_size = 0 gate_size = 4 * hidden_size stdv = 1 / math.sqrt(hidden_size) num_directions = 2 if bidirectional else 1 if self.is_ascend: self.reverse_seq = P.ReverseSequence(batch_dim=1, seq_dim=0) self.concat = P.Concat(axis=0) self.concat_2dim = P.Concat(axis=2) self.cast = P.Cast() self.shape = P.Shape() if dropout != 0: self.dropout_op = nn.Dropout(float(dropout)) b0 = np.zeros(gate_size, dtype=np.float16) self.w_list = [] self.b_list = [] self.rnns_fw = P.DynamicRNN(forget_bias=0.0) self.rnns_bw = P.DynamicRNN(forget_bias=0.0) for layer in range(num_layers): w_shape = input_size if layer == 0 else (num_directions * hidden_size) w_np = np.random.uniform( -stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) self.w_list.append( Parameter(initializer(Tensor(w_np), [w_shape + hidden_size, gate_size]), name='weight_fw' + str(layer))) if has_bias: b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16) self.b_list.append( Parameter(initializer(Tensor(b_np), [gate_size]), name='bias_fw' + str(layer))) else: self.b_list.append( Parameter(initializer(Tensor(b0), [gate_size]), name='bias_fw' + str(layer))) if bidirectional: w_bw_np = np.random.uniform( -stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) self.w_list.append( Parameter( initializer(Tensor(w_bw_np), [w_shape + hidden_size, gate_size]), name='weight_bw' + str(layer))) b_bw_np = np.random.uniform( -stdv, stdv, (4 * hidden_size)).astype(np.float16) if has_bias else b0 self.b_list.append( Parameter(initializer(Tensor(b_bw_np), [gate_size]), name='bias_bw' + str(layer))) self.w_list = ParameterTuple(self.w_list) self.b_list = ParameterTuple(self.b_list) else: for layer in range(num_layers): input_layer_size = input_size if layer == 0 else hidden_size * num_directions increment_size = gate_size * input_layer_size increment_size += gate_size * hidden_size if has_bias: increment_size += 2 * gate_size weight_size += increment_size * num_directions w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight')
def __init__(self, config, is_training, use_one_hot_embeddings=False): super(BertNetworkWithLoss, self).__init__() self.bert = BertPreTraining(config, is_training, use_one_hot_embeddings) self.loss = BertPretrainingLoss(config) self.cast = P.Cast()
def __init__(self, config): super(Faster_Rcnn_Resnet50, self).__init__() self.dtype = np.float32 self.ms_type = mstype.float32 self.train_batch_size = config.batch_size self.num_classes = config.num_classes self.anchor_scales = config.anchor_scales self.anchor_ratios = config.anchor_ratios self.anchor_strides = config.anchor_strides self.target_means = tuple(config.rcnn_target_means) self.target_stds = tuple(config.rcnn_target_stds) # Anchor generator anchor_base_sizes = None self.anchor_base_sizes = list( self.anchor_strides ) if anchor_base_sizes is None else anchor_base_sizes self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) featmap_sizes = config.feature_shapes assert len(featmap_sizes) == len(self.anchor_generators) self.anchor_list = self.get_anchors(featmap_sizes) # Backbone resnet50 self.backbone = ResNetFea(ResidualBlockUsing, config.resnet_block, config.resnet_in_channels, config.resnet_out_channels, False) # Fpn self.fpn_ncek = FeatPyramidNeck(config.fpn_in_channels, config.fpn_out_channels, config.fpn_num_outs) # Rpn and rpn loss self.gt_labels_stage1 = Tensor( np.ones((self.train_batch_size, config.num_gts)).astype(np.uint8)) self.rpn_with_loss = RPN(config, self.train_batch_size, config.rpn_in_channels, config.rpn_feat_channels, config.num_anchors, config.rpn_cls_out_channels) # Proposal self.proposal_generator = Proposal(config, self.train_batch_size, config.activate_num_classes, config.use_sigmoid_cls) self.proposal_generator.set_train_local(config, True) self.proposal_generator_test = Proposal(config, config.test_batch_size, config.activate_num_classes, config.use_sigmoid_cls) self.proposal_generator_test.set_train_local(config, False) # Assign and sampler stage two self.bbox_assigner_sampler_for_rcnn = BboxAssignSampleForRcnn( config, self.train_batch_size, config.num_bboxes_stage2, True) self.decode = P.BoundingBoxDecode(max_shape=(config.img_height, config.img_width), means=self.target_means, \ stds=self.target_stds) # Roi self.roi_init(config) # Rcnn self.rcnn = Rcnn( config, config.rcnn_in_channels * config.roi_layer['out_size'] * config.roi_layer['out_size'], self.train_batch_size, self.num_classes) # Op declare self.squeeze = P.Squeeze() self.cast = P.Cast() self.concat = P.Concat(axis=0) self.concat_1 = P.Concat(axis=1) self.concat_2 = P.Concat(axis=2) self.reshape = P.Reshape() self.select = P.Select() self.greater = P.Greater() self.transpose = P.Transpose() # Improve speed self.concat_start = min(self.num_classes - 2, 55) self.concat_end = (self.num_classes - 1) # Test mode self.test_mode_init(config) # Init tensor self.init_tensor(config)
def __init__(self): super(ParameterReduce, self).__init__() self.cast = P.Cast() self.reduce = P.AllReduce()
def __init__(self, vocab_size, embedding_size, field_size, param_init='normal', target='CPU', slice_mode='batch_slice', feature_num_list=None, max_norm=None, sparse=True, operator='SUM'): super(MultiFieldEmbeddingLookup, self).__init__(vocab_size, embedding_size, param_init, target, slice_mode, feature_num_list, max_norm, sparse) self.field_size = validator.check_value_type('field_size', field_size, [int], self.cls_name) self.operator = operator self.mul = P.Mul() self.inf_mask_mul = P.Mul() self.bias_add = P.TensorAdd() self.inf_add = P.TensorAdd() self.merge_op = None self.count_op = P.UnsortedSegmentSum() self.abs = P.Abs() self.equal = P.Equal() self.add = P.TensorAdd() self.cast = P.Cast() self.div_no_nan = P.DivNoNan() self.expand = P.ExpandDims() self.max_mask_mul = P.Mul() self.max_no_equal = P.NotEqual() if operator == MultiFieldEmbeddingLookup.OPERATOR_SUM: self.merge_op = P.UnsortedSegmentSum() elif operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.merge_op = P.UnsortedSegmentMax() elif operator == MultiFieldEmbeddingLookup.OPERATOR_MEAN: self.merge_op = P.UnsortedSegmentSum() else: raise ValueError( "The operator supports ['SUM', 'MAX', 'MEAN'], but found: " + str(operator)) parallel_mode = _get_parallel_mode() is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL) if slice_mode in ["table_row_slice", "batch_slice" ] and is_auto_parallel: self.merge_op.shard( ((get_group_size(), 1, 1), (get_group_size(), 1))) self.expand.shard(((get_group_size(), ), )) self.bias_add.shard(((1, 1), (1, 1))) self.mul.shard( ((get_group_size(), 1, 1), (get_group_size(), 1, 1))) self.count_op.shard(((get_group_size(), 1), (get_group_size(), 1))) self.add.shard(((get_group_size(), ), (get_group_size(), ))) self.div_no_nan.shard( ((get_group_size(), 1), (get_group_size(), 1))) self.max_mask_mul.shard( ((get_group_size(), 1), (get_group_size(), 1))) self.max_no_equal.shard(((1, ), ())) if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.equal.shard(((get_group_size(), 1, 1), ())) self.inf_mask_mul.shard(((get_group_size(), 1, 1), ())) self.merge_op.shard( ((get_group_size(), 1), (get_group_size(), ))) self.count_op.shard( ((get_group_size(), ), (get_group_size(), ))) self.inf_add.shard( ((get_group_size(), 1, 1), (get_group_size(), 1, 1))) elif slice_mode == "table_column_slice" and is_auto_parallel: self.merge_op.shard(((1, 1, get_group_size()), (1, 1))) self.div_no_nan.shard(((1, get_group_size()), (1, 1))) self.bias_add.shard(((1, 1), (1, 1))) self.mul.shard(((1, 1, 1), (1, 1, get_group_size()))) self.count_op.shard(((1, 1), (1, 1))) self.add.shard(((1, ), (1, ))) self.max_mask_mul.shard(((1, get_group_size()), (1, 1))) self.expand.shard(((1, ), )) self.max_no_equal.shard(((1, ), ())) if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.equal.shard(((1, 1, 1), ())) self.inf_mask_mul.shard(((1, 1, 1), ())) self.merge_op.shard(((1, get_group_size()), (1, ))) self.count_op.shard(((1, ), (1, ))) self.inf_add.shard(((1, 1, get_group_size()), (1, 1, 1))) else: if is_auto_parallel: raise ValueError( "slice_mode should be ['table_row_slice', 'batch_slice' and \ 'table_column_slice'], but get " + str(slice_mode)) # Min value for fp32 self.negative_inf_value = -3.402823466E+38
def __init__(self, weights_update=False): """ VGG16 feature extraction Args: weights_updata(bool): whether update weights for top two layers, default is False. """ super(VGG16FeatureExtraction, self).__init__() self.relu = nn.ReLU() self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode="same") self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2) self.conv1_1 = _conv(in_channels=3, out_channels=64, kernel_size=3,\ padding=1, weights_update=weights_update) self.conv1_2 = _conv(in_channels=64, out_channels=64, kernel_size=3,\ padding=1, weights_update=weights_update) self.conv2_1 = _conv(in_channels=64, out_channels=128, kernel_size=3,\ padding=1, weights_update=weights_update) self.conv2_2 = _conv(in_channels=128, out_channels=128, kernel_size=3,\ padding=1, weights_update=weights_update) self.conv3_1 = _conv(in_channels=128, out_channels=256, kernel_size=3, padding=1) self.conv3_2 = _conv(in_channels=256, out_channels=256, kernel_size=3, padding=1) self.conv3_3 = _conv(in_channels=256, out_channels=256, kernel_size=3, padding=1) self.conv4_1 = _conv(in_channels=256, out_channels=512, kernel_size=3, padding=1) self.conv4_2 = _conv(in_channels=512, out_channels=512, kernel_size=3, padding=1) self.conv4_3 = _conv(in_channels=512, out_channels=512, kernel_size=3, padding=1) self.conv5_1 = _conv(in_channels=512, out_channels=512, kernel_size=3, padding=1) self.conv5_2 = _conv(in_channels=512, out_channels=512, kernel_size=3, padding=1) self.conv5_3 = _conv(in_channels=512, out_channels=512, kernel_size=3, padding=1) self.cast = P.Cast()
def __init__(self): super(Net, self).__init__() self.add = P.TensorAdd() self.cast = P.Cast() self.relu = P.ReLU() self.biasadd = P.BiasAdd()
def __init__(self, config, use_one_hot_embeddings=False): super(TransformerInferModel, self).__init__() config = copy.deepcopy(config) config.hidden_dropout_prob = 0.0 config.attention_dropout_prob = 0.0 self.input_mask_from_dataset = config.input_mask_from_dataset self.batch_size = config.batch_size self.seq_length = config.seq_length self.hidden_size = config.hidden_size self.num_hidden_layers = config.num_hidden_layers self.embedding_size = config.hidden_size self.attn_embed_dim = config.hidden_size self.num_layers = config.num_hidden_layers self.last_idx = self.num_hidden_layers - 1 self.embedding_lookup = EmbeddingLookup( vocab_size=config.vocab_size, embed_dim=self.embedding_size, use_one_hot_embeddings=use_one_hot_embeddings) self.positional_embedding = PositionalEmbedding( embedding_size=self.embedding_size, max_position_embeddings=config.max_position_embeddings) # use for infer self.projection = PredLogProbs(batch_size=config.batch_size * config.beam_width, seq_length=1, width=self.hidden_size, compute_type=config.compute_type) self.encoder = TransformerEncoder( attn_embed_dim=self.attn_embed_dim, encoder_layers=self.num_layers, num_attn_heads=config.num_attention_heads, intermediate_size=config.intermediate_size, attention_dropout_prob=config.attention_dropout_prob, initializer_range=config.initializer_range, hidden_dropout_prob=config.hidden_dropout_prob, hidden_act=config.hidden_act, compute_type=config.compute_type) decoder_cell = TransformerDecoderStep( config=config, num_hidden_layers=config.num_hidden_layers, attn_embed_dim=self.attn_embed_dim, seq_length=config.seq_length, num_attn_heads=config.num_attention_heads, intermediate_size=config.intermediate_size, hidden_dropout_prob=config.hidden_dropout_prob, compute_type=config.compute_type, initializer_range=config.initializer_range, hidden_act="relu", embedding_lookup=self.embedding_lookup, positional_embedding=self.positional_embedding, attn_dropout_prob=config.attention_dropout_prob, projection=self.projection) # link beam_search after decoder self.decoder = BeamSearchDecoder( batch_size=config.batch_size, seq_length=config.seq_length, vocab_size=config.vocab_size, decoder=decoder_cell, beam_width=config.beam_width, length_penalty_weight=config.length_penalty_weight, max_decode_length=config.max_decode_length) self.cast = P.Cast() self.dtype = config.dtype self.cast_compute_type = SaturateCast(dst_type=config.compute_type) self.expand = P.ExpandDims() self.multiply = P.Mul() self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask( config) # use for infer self.tile_beam = TileBeam(beam_width=config.beam_width) ones = np.ones(shape=(config.batch_size, config.max_decode_length)) self.encode_mask = Tensor(ones, dtype=mstype.float32) self.scale = Tensor([math.sqrt(float(self.embedding_size))], dtype=mstype.float32) self.reshape = P.Reshape()
def __init__(self, dtype): super(Cast, self).__init__() self.op = P.Cast() self.dtype = dtype
def __init__(self, num_features, eps=1e-5, momentum=0.9, affine=True, gamma_init='ones', beta_init='zeros', moving_mean_init='zeros', moving_var_init='ones', use_batch_statistics=None, device_num_each_group=1): super(_BatchNorm, self).__init__() if num_features < 1: raise ValueError("num_features must be at least 1") if momentum < 0 or momentum > 1: raise ValueError( "momentum should be a number in range [0, 1], but got {}". format(momentum)) self.use_batch_statistics = use_batch_statistics self.num_features = num_features self.eps = eps self.moving_mean = Parameter(initializer(moving_mean_init, num_features), name="mean", requires_grad=False) self.moving_variance = Parameter(initializer(moving_var_init, num_features), name="variance", requires_grad=False) self.gamma = Parameter(initializer(gamma_init, num_features), name="gamma", requires_grad=affine) self.beta = Parameter(initializer(beta_init, num_features), name="beta", requires_grad=affine) self.group = check_int_positive(device_num_each_group) self.is_global = False if self.group != 1: self.rank_id = get_rank() self.rank_size = get_group_size() self.device_list = [i for i in range(0, self.rank_size)] self.rank_list = self.list_group(self.device_list, self.group) self.rank_list_idx = len(self.rank_list) for i in range(self.rank_list_idx): if self.rank_id in self.rank_list[i] and self.group != 1: self.is_global = True management.create_group('group' + str(i), self.rank_list[i]) self.all_reduce = P.AllReduce( P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1) self.shape = P.Shape() self.reduce_mean = P.ReduceMean(keep_dims=True) self.square = P.Square() self.sqrt = P.Sqrt() self.cast = P.Cast() self.dtype = P.DType() self.reshape = P.Reshape() self.is_ascend = context.get_context("device_target") == "Ascend" self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE self.momentum = 1.0 - momentum if context.get_context("enable_ge"): self.is_ge_backend = True else: self.is_ge_backend = False if self.is_graph_mode and (self.is_ge_backend or self.is_ascend): self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps) else: self.bn_train = P.FusedBatchNorm(mode=1, epsilon=self.eps, momentum=self.momentum) self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps) data_parallel_strategy = ((1, ), (1, )) data_parallel_strategy_one = ((1, ), ()) self.sub_mean = P.Sub().set_strategy(data_parallel_strategy) self.sub_var = P.Sub().set_strategy(data_parallel_strategy) self.mul_mean = P.Mul().set_strategy(data_parallel_strategy_one) self.mul_var = P.Mul().set_strategy(data_parallel_strategy_one) self.assign_sub_mean = P.AssignSub().set_strategy( data_parallel_strategy) self.assign_sub_var = P.AssignSub().set_strategy( data_parallel_strategy)
def __init__(self, config, is_training, use_one_hot_embeddings=False): super(BertModel, self).__init__() config = copy.deepcopy(config) if not is_training: config.hidden_dropout_prob = 0.0 config.attention_probs_dropout_prob = 0.0 self.input_mask_from_dataset = config.input_mask_from_dataset self.token_type_ids_from_dataset = config.token_type_ids_from_dataset self.batch_size = config.batch_size self.seq_length = config.seq_length self.hidden_size = config.hidden_size self.num_hidden_layers = config.num_hidden_layers self.embedding_size = config.hidden_size self.token_type_ids = None self.last_idx = self.num_hidden_layers - 1 output_embedding_shape = [self.batch_size, self.seq_length, self.embedding_size] if not self.token_type_ids_from_dataset: self.token_type_ids = initializer( "zeros", [self.batch_size, self.seq_length], mstype.int32).init_data() self.bert_embedding_lookup = EmbeddingLookup( vocab_size=config.vocab_size, embedding_size=self.embedding_size, embedding_shape=output_embedding_shape, use_one_hot_embeddings=use_one_hot_embeddings, initializer_range=config.initializer_range) self.bert_embedding_postprocessor = EmbeddingPostprocessor( embedding_size=self.embedding_size, embedding_shape=output_embedding_shape, use_relative_positions=config.use_relative_positions, use_token_type=True, token_type_vocab_size=config.type_vocab_size, use_one_hot_embeddings=use_one_hot_embeddings, initializer_range=0.02, max_position_embeddings=config.max_position_embeddings, dropout_prob=config.hidden_dropout_prob) self.bert_encoder = BertTransformer( batch_size=self.batch_size, hidden_size=self.hidden_size, seq_length=self.seq_length, num_attention_heads=config.num_attention_heads, num_hidden_layers=self.num_hidden_layers, intermediate_size=config.intermediate_size, attention_probs_dropout_prob=config.attention_probs_dropout_prob, use_one_hot_embeddings=use_one_hot_embeddings, initializer_range=config.initializer_range, hidden_dropout_prob=config.hidden_dropout_prob, use_relative_positions=config.use_relative_positions, hidden_act=config.hidden_act, compute_type=config.compute_type, return_all_encoders=True, enable_fused_layernorm=config.enable_fused_layernorm) self.cast = P.Cast() self.dtype = config.dtype self.cast_compute_type = SaturateCast(dst_type=config.compute_type) self.slice = P.StridedSlice() self.squeeze_1 = P.Squeeze(axis=1) self.dense = nn.Dense(self.hidden_size, self.hidden_size, activation="tanh", weight_init=TruncatedNormal(config.initializer_range)).to_float(config.compute_type) self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config)
def __init__(self): super(ClipGradients, self).__init__() self.clip_by_norm = nn.ClipByNorm() self.cast = P.Cast() self.dtype = P.DType()
def __init__(self): super(Net, self).__init__() self.softmax = P.Softmax(axis=1) self.cast = P.Cast() self.relu = P.ReLU() self.biasadd = P.BiasAdd()
def __init__(self, config, batch_size, num_classes, use_sigmoid_cls, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)): super(Proposal, self).__init__() cfg = config self.batch_size = batch_size self.num_classes = num_classes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = use_sigmoid_cls if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 self.activation = P.Sigmoid() self.reshape_shape = (-1, 1) else: self.cls_out_channels = num_classes self.activation = P.Softmax(axis=1) self.reshape_shape = (-1, 2) if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.num_pre = cfg.rpn_proposal_nms_pre self.min_box_size = cfg.rpn_proposal_min_bbox_size self.nms_thr = cfg.rpn_proposal_nms_thr self.nms_post = cfg.rpn_proposal_nms_post self.nms_across_levels = cfg.rpn_proposal_nms_across_levels self.max_num = cfg.rpn_proposal_max_num self.num_levels = len(cfg.anchor_strides) # Op Define self.squeeze = P.Squeeze() self.reshape = P.Reshape() self.cast = P.Cast() self.feature_shapes = cfg.feature_shapes self.transpose_shape = (1, 2, 0) self.decode = P.BoundingBoxDecode(max_shape=(cfg.img_height, cfg.img_width), \ means=self.target_means, \ stds=self.target_stds) self.nms = P.NMSWithMask(self.nms_thr) self.concat_axis0 = P.Concat(axis=0) self.concat_axis1 = P.Concat(axis=1) self.split = P.Split(axis=1, output_num=5) self.min = P.Minimum() self.gatherND = P.GatherNd() self.slice = P.Slice() self.select = P.Select() self.greater = P.Greater() self.transpose = P.Transpose() self.tile = P.Tile() self.set_train_local(config, training=True) self.multi_10 = Tensor(10.0, mstype.float16)
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', damping=0.03, loss_scale=1, frequency=278, batch_size=32, has_bias=True, activation=None): super(Dense_SKFAC_GPU, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.skfac = True if isinstance(weight_init, Tensor): if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter( initializer(weight_init, [out_channels, in_channels])) if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.ndim != 1 or bias_init.shape[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels])) self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd() split_dim = 128 self.activation = get_activation(activation) self.activation_flag = self.activation is not None self.matrix_A_inv = Parameter(Tensor( np.zeros((in_channels, in_channels)).astype(np.float32)), requires_grad=False) self.matrix_G_inv = Parameter(Tensor( np.zeros((out_channels, out_channels)).astype(np.float32)), requires_grad=False) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False) self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.mul = P.Mul() self.matmul = P.MatMul(transpose_a=True) self.matmul_B = P.MatMul(transpose_b=True) self.matmul_ = P.MatMul() self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.batch_size = Tensor(batch_size, mstype.float16) self.getG = P.InsertGradientOf(self.save_gradient) self.damping = Parameter(Tensor(damping), requires_grad=False) self.dampingA = Tensor(np.identity(batch_size), mstype.float32) self.dampingG = Tensor(np.identity(batch_size), mstype.float32) self.I_G = Tensor(np.identity(out_channels), mstype.float32) self.I_A = Tensor(np.identity(in_channels), mstype.float32) self.cast = P.Cast() self.gather = P.Gather() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.add = P.Add() self.sqrt = P.Sqrt() self.cholesky = P.CholeskyTrsm(split_dim=split_dim) self.vector_matmul = P.BatchMatMul(transpose_a=True) self.batch_coefficient = Tensor((1 / 32)**0.5, mstype.float32)
def __init__(self, strategy1, strategy2, strategy3): super().__init__() self.mul = P.Mul().set_strategy(strategy1) self.mul2 = P.Mul().set_strategy(strategy2) self.cast = P.Cast().set_strategy(strategy3) self.cast2 = P.Cast().set_strategy(strategy3)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, data_format='NCHW', has_bias=False, weight_init='normal', damping=0.03, loss_scale=1, frequency=278, batch_size=32, bias_init='zeros'): self.skfac = True self.hw = kernel_size * kernel_size kernel_size = twice(kernel_size) super(Conv2d_SKFAC_GPU, self).__init__( in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, data_format, has_bias, weight_init, bias_init, ) self.conv2d = P.Conv2D(out_channel=self.out_channels, kernel_size=self.kernel_size, mode=1, pad_mode=self.pad_mode, pad=self.padding, stride=self.stride, dilation=self.dilation, group=self.group) self.matrix_A_dim = self.in_channels * self.kernel_size[ 0] * self.kernel_size[1] self.matrix_G_dim = self.out_channels split_dim = 128 self.matrix_A_inv = Parameter(np.zeros( (self.matrix_A_dim, self.matrix_A_dim)).astype(np.float32), requires_grad=False) self.matrix_G_inv = Parameter(np.zeros( (self.matrix_G_dim, self.matrix_G_dim)).astype(np.float32), requires_grad=False) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False) self.img2col = P.Im2Col(kernel_size=kernel_size, stride=stride, pad_mode="same") self.matmul = P.MatMul(transpose_a=True) self.matmul_ = P.MatMul() self.shape = P.Shape() self.reshape = P.Reshape() self.mul = P.Mul() self.getG = P.InsertGradientOf(self.save_gradient) self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.batch_size = Tensor(batch_size, mstype.float16) self.transpose = P.Transpose() self.cast = P.Cast() self.gather = P.Gather() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.sqrt = P.Sqrt() self.reduce_mean = P.ReduceMean(keep_dims=False) self.damping = Parameter(Tensor(damping), requires_grad=False) self.dampingA = Tensor(np.identity(batch_size), mstype.float32) self.dampingG = Tensor(np.identity(batch_size), mstype.float32) self.I_G = Tensor(np.identity(out_channels), mstype.float32) self.I_A = Tensor(np.identity(self.matrix_A_dim), mstype.float32) self.cholesky = P.CholeskyTrsm(split_dim=split_dim) self.vector_matmul = P.BatchMatMul(transpose_a=True) self.batch_coefficient = Tensor((1 / 32)**0.5, mstype.float32)
import mindspore.common.dtype as mstype Conv = P.Conv2D(out_channel=64, kernel_size=7, mode=1, pad_mode="valid", pad=0, stride=1, dilation=1, group=1) Relu = P.ReLU() Fusion = Primitive('FusionOp') Reduce = P.ReduceOp() Biasadd = P.BiasAdd() Biasaddgrad = G.BiasAddGrad() Cast = P.Cast() Fusion_relu_relu = Primitive('FusionOp_ReLU_ReLU') Fusion_biasadd = Primitive('FusionOp_ReLU_ReLU_ReLU_BiasAdd_ReLU_ReLU_ReLU') Fusion_biasaddgrad = Primitive( 'FusionOp_ReLU_ReLU_ReLU_BiasAddGrad_ReLU_ReLU_ReLU') Add = P.TensorAdd() Sub = P.Sub() make_tuple = Primitive('make_tuple') class FnDict: def __init__(self): self.fnDict = {}
def __init__(self, from_tensor_width, to_tensor_width, from_seq_length, to_seq_length, num_attention_heads=1, size_per_head=512, query_act=None, key_act=None, value_act=None, has_attention_mask=False, attention_probs_dropout_prob=0.0, use_one_hot_embeddings=False, initializer_range=0.02, do_return_2d_tensor=False, use_relative_positions=False, compute_type=mstype.float32): super(BertAttention, self).__init__() self.from_seq_length = from_seq_length self.to_seq_length = to_seq_length self.num_attention_heads = num_attention_heads self.size_per_head = size_per_head self.has_attention_mask = has_attention_mask self.use_relative_positions = use_relative_positions self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))], dtype=compute_type) self.reshape = P.Reshape() self.shape_from_2d = (-1, from_tensor_width) self.shape_to_2d = (-1, to_tensor_width) weight = TruncatedNormal(initializer_range) units = num_attention_heads * size_per_head self.query_layer = nn.Dense(from_tensor_width, units, activation=query_act, weight_init=weight).to_float(compute_type) self.key_layer = nn.Dense(to_tensor_width, units, activation=key_act, weight_init=weight).to_float(compute_type) self.value_layer = nn.Dense(to_tensor_width, units, activation=value_act, weight_init=weight).to_float(compute_type) self.shape_from = (-1, from_seq_length, num_attention_heads, size_per_head) self.shape_to = (-1, to_seq_length, num_attention_heads, size_per_head) self.matmul_trans_b = P.BatchMatMul(transpose_b=True) self.multiply = P.Mul() self.transpose = P.Transpose() self.trans_shape = (0, 2, 1, 3) self.trans_shape_relative = (2, 0, 1, 3) self.trans_shape_position = (1, 2, 0, 3) self.multiply_data = Tensor([ -10000.0, ], dtype=compute_type) self.matmul = P.BatchMatMul() self.softmax = nn.Softmax() self.dropout = nn.Dropout(1 - attention_probs_dropout_prob) if self.has_attention_mask: self.expand_dims = P.ExpandDims() self.sub = P.Sub() self.add = P.Add() self.cast = P.Cast() self.get_dtype = P.DType() if do_return_2d_tensor: self.shape_return = (-1, num_attention_heads * size_per_head) else: self.shape_return = (-1, from_seq_length, num_attention_heads * size_per_head) self.cast_compute_type = SaturateCast(dst_type=compute_type) if self.use_relative_positions: self._generate_relative_positions_embeddings = \ RelaPosEmbeddingsGenerator(length=to_seq_length, depth=size_per_head, max_relative_position=16, initializer_range=initializer_range, use_one_hot_embeddings=use_one_hot_embeddings)
def hard_swish(x): x = P.Cast()(x, ms.float32) y = x + 3.0 y = clip_by_value(y, 0.0, 6.0) y = y / 6.0 return x * y
def __init__(self, config): super(CreateAttentionMaskFromInputMask, self).__init__() self.input_mask = None self.cast = P.Cast() self.reshape = P.Reshape() self.shape = (-1, 1, config.seq_length)
def __init__(self, type0, type1): super(Net, self).__init__() self.Cast = P.Cast() self.type0 = type0 self.type1 = type1
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, data_format='NCHW', has_bias=False, weight_init='normal', damping=0.03, loss_scale=1, frequency=278, batch_size=32, bias_init='zeros'): self.thor = True ksizes = (1, kernel_size, kernel_size, 1) self.hw = kernel_size * kernel_size strides = (1, stride, stride, 1) kernel_size = twice(kernel_size) super(Conv2d_Thor, self).__init__( in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, data_format, has_bias, weight_init, bias_init, ) self.conv2d = P.Conv2D(out_channel=self.out_channels, kernel_size=self.kernel_size, mode=1, pad_mode=self.pad_mode, pad=self.padding, stride=self.stride, dilation=self.dilation, group=self.group ) self.batch_size = batch_size self.img2col = P.CusImg2Col(ksizes=ksizes, strides=strides) self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.matrix_combine = P.CusMatrixCombine() self.cholesky = P.CusCholeskyTrsm() self.transpose02314 = P.CusTranspose02314() self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1] self.matrix_G_dim = self.out_channels self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim, self.in_channels, True) self.matrix_G_device_shape, self.matrix_G_device_dim = caculate_device_shape(self.matrix_G_dim, self.in_channels, False) self.matrix_A_device_temp_shape = ( self.matrix_A_device_shape[0], self.matrix_A_device_shape[2], self.matrix_A_device_shape[1], self.matrix_A_device_shape[3]) self.matrix_G_device_temp_shape = ( self.matrix_G_device_shape[0], self.matrix_G_device_shape[2], self.matrix_G_device_shape[1], self.matrix_G_device_shape[3]) self.matrix_A_inv = Parameter( Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)), name='matrix_A_inv', requires_grad=False) self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False) self.matrix_G_inv = Parameter( Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)), name="matrix_G_inv", requires_grad=False) self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) self.fake_G = Tensor( np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)) self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.mul = P.Mul() self.cast = P.Cast() self.damping = Tensor(damping) self.vector_matmul = P.CusBatchMatMul() self.diag_block_dim = 128 self.channels_slice_flag = False if self.in_channels % C0 != 0: self.channels_slice_flag = True self.padA_flag = False if (self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim \ and self.matrix_A_dim > self.diag_block_dim: self.padA_flag = True pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim self.padA = P.Pad(((0, pad_dim), (0, pad_dim))) self.device_shape_pad_flag = False if self.matrix_A_dim != self.matrix_A_device_dim: self.device_shape_pad_flag = True self.device_shape_pad = P.Pad(((0, 0), (0, C0 - self.in_channels), (0, 0), (0, C0 - self.in_channels))) self.slice = P.Slice() self.gather = P.GatherV2() self.freq = Tensor(frequency, mstype.int32) self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.axis = 0 dampingA_dim = self.matrix_A_dim if (self.matrix_A_dim % self.diag_block_dim) != 0 and self.matrix_A_dim > self.diag_block_dim: dampingA_dim = (self.matrix_A_dim // self.diag_block_dim + 1) * self.diag_block_dim dampingG_dim = self.matrix_G_dim if (self.matrix_G_dim % self.diag_block_dim) != 0 and self.matrix_G_dim > self.diag_block_dim: dampingG_dim = (self.matrix_G_dim // self.diag_block_dim + 1) * self.diag_block_dim self.dampingA = Tensor(np.identity(dampingA_dim), mstype.float32) self.dampingG = Tensor(np.identity(dampingG_dim), mstype.float32) self.fused_abs_max1 = P.CusFusedAbsMax1([self.matrix_A_dim, self.matrix_A_dim]) self.fused_abs_max2 = P.CusFusedAbsMax1() self.log = P.Log() self.exp = P.Exp() self.sqrt = P.Sqrt() self.getG = P.InsertGradientOf(self.save_gradient)