def __init__(self): super().__init__() self.max = P.ReduceMax() self.param = Parameter(Tensor( np.arange(2 * 2 * 2).reshape((2, 2, 2)), ms.float32), name="weight") self.zero = Tensor(np.zeros(([2, 2, 2])), ms.float32) self.reduce = P.ReduceSum() self.addn = P.AddN()
def __init__(self, var, accum): super(MomentumFusionNet, self).__init__() self.op = P.ApplyMomentum() self.add = P.AddN() self.mul = P.Mul() self.var = Parameter(var, name="variable") self.accum = Parameter(accum, name="accumulate") self.lr = 0.1 self.weight_decay = 0.002 self.moment = 0.98
def __init__(self): super().__init__() self.parameter1 = Parameter( Tensor([1.0], ms.float32), name="parameter1") self.parameter2 = Parameter( Tensor([3.0], ms.float32), name="parameter2") self.assign = P.Assign() self.addn = P.AddN() self.mul = P.Mul() self.print = P.Print()
def __init__(self, network, max_length): super(AttentionOCRWithLossCell, self).__init__() self.network = network self.loss = NLLLoss() self.shape = P.Shape() self.add = P.AddN() self.mean = P.ReduceMean() self.split = P.Split(axis=0, output_num=max_length) self.squeeze = P.Squeeze() self.cast = P.Cast()
def __init__(self, network, optimizer, scale_update_cell=None): super(TransformerTrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False) self.network = network self.network.set_grad() self.network.add_flags(defer_inline=True) self.weights = optimizer.parameters self.optimizer = optimizer self.grad = C.GradOperation(get_by_list=True, sens_param=True) self.reducer_flag = False self.all_reduce = P.AllReduce() self.parallel_mode = _get_parallel_mode() if self.parallel_mode not in ParallelMode.MODE_LIST: raise ValueError("Parallel mode does not support: ", self.parallel_mode) if self.parallel_mode in [ ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL ]: self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.clip_gradients = ClipGradients() self.cast = P.Cast() if context.get_context("device_target") == "GPU": self.gpu_target = True self.float_status = P.FloatStatus() self.addn = P.AddN() self.reshape = P.Reshape() else: self.gpu_target = False self.alloc_status = P.NPUAllocFloatStatus() self.get_status = P.NPUGetFloatStatus() self.clear_status = P.NPUClearFloatStatus() self.reduce_sum = P.ReduceSum(keep_dims=False) self.depend_parameter_use = P.ControlDepend(depend_mode=1) self.base = Tensor(1, mstype.float32) self.less_equal = P.LessEqual() self.hyper_map = C.HyperMap() self.loss_scale = None self.loss_scaling_manager = scale_update_cell if scale_update_cell: self.loss_scale = Parameter( Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32)) self.add_flags(has_effect=True)
def softmax_addn_pass(): x = AnyPattern() softmax = P.Softmax() pattern = CallWith(softmax, inputs=[x]) weight_tensor = Tensor(np.zeros([42]), mindspore.float16) new_weight = NewTensor(weight_tensor) addn_ops = P.AddN() target = CallWith(addn_ops, inputs=[x, new_weight], should_replace=False) return pattern, target
def __init__(self, network, optimizer, scale_update_cell=None, micro_batches=None, norm_clip=1.0, mech=None): super(_TrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False) self.network = network self.network.set_grad() self.network.add_flags(defer_inline=True) self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.hyper_map = C.HyperMap() if context.get_context("device_target") == "GPU": self.gpu_target = True self.float_status = P.FloatStatus() self.addn = P.AddN() self.reshape = P.Reshape() else: self.gpu_target = False self.alloc_status = NPUAllocFloatStatus() self.get_status = NPUGetFloatStatus() self.clear_status = NPUClearFloatStatus() self.reduce_sum = ReduceSum(keep_dims=False) self.base = Tensor(1, mstype.float32) self.less_equal = LessEqual() self.depend_parameter_use = ControlDepend(depend_mode=1) self.allreduce = P.AllReduce() self.parallel_mode = _get_parallel_mode() self.grad_reducer = F.identity self.reducer_flag = self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL] if self.reducer_flag: mean = _get_mirror_mean() degree = _get_device_num() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = self.parallel_mode != ParallelMode.STAND_ALONE self.loss_scale = None self.loss_scaling_manager = scale_update_cell if scale_update_cell: self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), name="loss_scale") self.add_flags(has_effect=True) # dp params self._micro_batches = micro_batches norm_clip = check_param_type('norm_clip', norm_clip, float) self._l2_norm = check_value_positive('norm_clip', norm_clip) self._split = P.Split(0, self._micro_batches) self._clip_by_global_norm = _ClipGradients() self._mech = mech self._tuple_add = _TupleAdd() self._hyper_map = C.HyperMap() self._micro_float = Tensor(micro_batches, mstype.float32)
def test_merge_addn(tag): """ test_merge_addn """ fns = FnDict() addn = P.AddN() @fns def before(x, y, z, a): return addn((addn((a, x, y)), z)) @fns def after(x, y, z, a): return addn((a, x, y, z)) return fns[tag]
def __init__(self, network): super(GRUWithLossCell, self).__init__() self.network = network self.loss = NLLLoss() self.logits_shape = (-1, config.src_vocab_size) self.reshape = P.Reshape() self.cast = P.Cast() self.mean = P.ReduceMean() self.text_len = config.max_length self.split = P.Split(axis=0, output_num=config.max_length-1) self.squeeze = P.Squeeze() self.add = P.AddN() self.transpose = P.Transpose() self.shape = P.Shape()
def Simulation_Caculate_Energy(self, uint_crd, uint_dr_to_dr_cof): '''simulation calculate energy''' bond_energy = self.bond_energy(uint_crd, uint_dr_to_dr_cof, self.bond_atom_a, self.bond_atom_b, self.bond_k, self.bond_r0) bond_energy_sum = P.ReduceSum(True)(bond_energy) angle_energy = self.angle_energy(uint_crd, uint_dr_to_dr_cof, self.angle_atom_a, self.angle_atom_b, self.angle_atom_c, self.angle_k, self.angle_theta0) angle_energy_sum = P.ReduceSum(True)(angle_energy) dihedral_energy = self.dihedral_energy( uint_crd, uint_dr_to_dr_cof, self.dihedral_atom_a, self.dihedral_atom_b, self.dihedral_atom_c, self.dihedral_atom_d, self.ipn, self.pk, self.gamc, self.gams, self.pn) dihedral_energy_sum = P.ReduceSum(True)(dihedral_energy) nb14_lj_energy = self.nb14_lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, self.nb14_atom_a, self.nb14_atom_b, self.lj_scale_factor, self.LJ_A, self.LJ_B) nb14_cf_energy = self.nb14_cf_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, self.nb14_atom_a, self.nb14_atom_b, self.cf_scale_factor) nb14_lj_energy_sum = P.ReduceSum(True)(nb14_lj_energy) nb14_cf_energy_sum = P.ReduceSum(True)(nb14_cf_energy) lj_energy = self.lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, self.nl_atom_numbers, self.nl_atom_serial, self.LJ_A, self.LJ_B) lj_energy_sum = P.ReduceSum(True)(lj_energy) reciprocal_energy, self_energy, direct_energy, correction_energy = self.pme_energy( uint_crd, self.charge, self.nl_atom_numbers, self.nl_atom_serial, uint_dr_to_dr_cof, self.excluded_list_start, self.excluded_list, self.excluded_numbers) ee_ene = reciprocal_energy + self_energy + direct_energy + correction_energy total_energy = P.AddN()([ bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, nb14_cf_energy_sum, lj_energy_sum, ee_ene ]) return bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, nb14_cf_energy_sum, \ lj_energy_sum, ee_ene, total_energy
def __init__(self, network, optimizer, scale_sense): super(DFCNNCTCTrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False) self.network = network self.optimizer = optimizer if isinstance(scale_sense, nn.Cell): self.loss_scaling_manager = scale_sense self.scale_sense = Parameter(Tensor(scale_sense.get_loss_scale(), dtype=mstype.float32), name="scale_sense") elif isinstance(scale_sense, Tensor): if scale_sense.shape == (1,) or scale_sense.shape == (): self.scale_sense = Parameter(scale_sense, name='scale_sense') else: raise ValueError("The shape of scale_sense must be (1,) or (), but got {}".format( scale_sense.shape)) else: raise TypeError("The scale_sense must be Cell or Tensor, but got {}".format( type(scale_sense))) self.network.set_grad() self.weights = ParameterTuple(network.trainable_params()) self.grad = C.GradOperation(get_by_list=True, sens_param=True) self.reducer_flag = False self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode not in ParallelMode.MODE_LIST: raise ValueError("Parallel mode does not support: ", self.parallel_mode) if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: mean = context.get_auto_parallel_context("gradients_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.clip_gradients = ClipGradients() self.cast = P.Cast() self.addn = P.AddN() self.reshape = P.Reshape() self.hyper_map = C.HyperMap() self.less_equal = P.LessEqual() self.allreduce = P.AllReduce()
def __init__(self, network, optimizer, scale_update_cell=None): super(BertFinetuneCell, self).__init__(auto_prefix=False) self.network = network self.network.set_grad() self.weights = optimizer.parameters self.optimizer = optimizer self.grad = C.GradOperation(get_by_list=True, sens_param=True) self.reducer_flag = False self.allreduce = P.AllReduce() self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL ]: self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: mean = context.get_auto_parallel_context("gradients_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.cast = P.Cast() self.gpu_target = False if context.get_context("device_target") == "GPU": self.gpu_target = True self.float_status = P.FloatStatus() self.addn = P.AddN() self.reshape = P.Reshape() else: self.alloc_status = P.NPUAllocFloatStatus() self.get_status = P.NPUGetFloatStatus() self.clear_before_grad = P.NPUClearFloatStatus() self.reduce_sum = P.ReduceSum(keep_dims=False) self.depend_parameter_use = P.ControlDepend(depend_mode=1) self.base = Tensor(1, mstype.float32) self.less_equal = P.LessEqual() self.hyper_map = C.HyperMap() self.loss_scale = None self.loss_scaling_manager = scale_update_cell if scale_update_cell: self.loss_scale = Parameter( Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
def Simulation_Caculate_Force(self, uint_crd, scaler, nl_atom_numbers, nl_atom_serial): '''simulation calculate force''' bond_force, _ = self.bond_force_with_atom_energy( uint_crd, scaler, self.bond_atom_a, self.bond_atom_b, self.bond_k, self.bond_r0) angle_force, _ = self.angle_force_with_atom_energy( uint_crd, scaler, self.angle_atom_a, self.angle_atom_b, self.angle_atom_c, self.angle_k, self.angle_theta0) dihedral_force, _ = self.dihedral_force_with_atom_energy( uint_crd, scaler, self.dihedral_atom_a, self.dihedral_atom_b, self.dihedral_atom_c, self.dihedral_atom_d, self.ipn, self.pk, self.gamc, self.gams, self.pn) nb14_force, _ = self.nb14_force_with_atom_energy( uint_crd, self.atom_LJ_type, self.charge, scaler, self.nb14_atom_a, self.nb14_atom_b, self.lj_scale_factor, self.cf_scale_factor, self.LJ_A, self.LJ_B) lj_force = self.lj_force_pme_direct_force(uint_crd, self.atom_LJ_type, self.charge, scaler, nl_atom_numbers, nl_atom_serial, self.LJ_A, self.LJ_B) pme_excluded_force = self.pme_excluded_force(uint_crd, scaler, self.charge, self.excluded_list_start, self.excluded_list, self.excluded_numbers) pme_reciprocal_force = self.pme_reciprocal_force(uint_crd, self.charge) force = P.AddN()([ bond_force, angle_force, dihedral_force, nb14_force, lj_force, pme_excluded_force, pme_reciprocal_force ]) return force
def __init__(self): super().__init__() self.addN = P.AddN()
def __init__(self): super().__init__() self.para = Parameter(Tensor([1.0], ms.float32), name="para") self.assign = P.Assign() self.addn = P.AddN() self.relu = P.ReLU()
def __init__(self): super(NetWork_2, self).__init__() self.addN = P.AddN() self.step = Tensor([-1]) self.index_0 = Tensor(-6)
def __init__(self, input_shape): super().__init__() self.addn = P.AddN() self.assign = P.Assign() self.inputdata = Parameter(initializer( 1, input_shape, ms.float32), name="global_step")
def __init__(self): super(SecondNet, self).__init__() self.addN = P.AddN() self.max = P.Maximum() self.add = P.TensorAdd()
def __init__(self): super(NetWork_1, self).__init__() self.addN = P.AddN() self.index_0 = Tensor(3) self.index_1 = Tensor([5]) self.index_3 = Tensor([True])
def __init__(self): super(NetWork_3, self).__init__() self.addN = P.AddN()
def __init__(self): super(NetWorkOutOfBounds, self).__init__() self.addN = P.AddN()
def __init__(self): super(NetAddN, self).__init__() self.net = P.AddN()
def __init__(self, network, optimizer, scale_update_cell=None, micro_batches=None, norm_bound=1.0, noise_mech=None, clip_mech=None): super(_TrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False) self.network = network self.network.set_grad() self.network.add_flags(defer_inline=True) self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.hyper_map = C.HyperMap() if context.get_context("device_target") == "GPU": self.gpu_target = True self.float_status = P.FloatStatus() self.addn = P.AddN() self.reshape = P.Reshape() else: self.gpu_target = False self.alloc_status = NPUAllocFloatStatus() self.get_status = NPUGetFloatStatus() self.clear_status = NPUClearFloatStatus() self.reduce_sum = ReduceSum(keep_dims=False) self.base = Tensor(1, mstype.float32) self.less_equal = LessEqual() self.depend_parameter_use = ControlDepend(depend_mode=1) self.allreduce = P.AllReduce() self.parallel_mode = _get_parallel_mode() self.grad_reducer = F.identity self.reducer_flag = self.parallel_mode in [ ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL ] if self.reducer_flag: mean = _get_mirror_mean() degree = _get_device_num() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = self.parallel_mode != ParallelMode.STAND_ALONE self.loss_scale = None self.loss_scaling_manager = scale_update_cell if scale_update_cell: self.loss_scale = Parameter(Tensor( scale_update_cell.get_loss_scale(), dtype=mstype.float32), name="loss_scale") self.add_flags(has_effect=True) # dp params self._micro_batches = micro_batches self._norm_bound = norm_bound self._split = P.Split(0, self._micro_batches) self._clip_by_global_norm = _ClipGradients() self._noise_mech = noise_mech self._clip_mech = clip_mech self._add = P.TensorAdd() self._norm = nn.Norm() self._tuple_add = _TupleAdd() self._hyper_map = C.HyperMap() self._micro_float = Tensor(micro_batches, mstype.float32) self._zero = Tensor(0, mstype.float32) self._assign = P.Assign() self._div = P.Div() self._sqrt = P.Sqrt() self._reduce_sum = P.ReduceSum() self._square_all = P.Square() self._less = P.Less() self._cast = P.Cast() self._noise_mech_param_updater = None if self._noise_mech is not None and self._noise_mech._decay_policy is not None: self._noise_mech_param_updater = _MechanismsParamsUpdater( decay_policy=self._noise_mech._decay_policy, decay_rate=self._noise_mech._noise_decay_rate, cur_noise_multiplier=self._noise_mech._noise_multiplier, init_noise_multiplier=self._noise_mech. _initial_noise_multiplier)
'exception': ValueError }), 'desc_inputs': [ Tensor(np.ones(shape=[5, 5, 8, 8]).astype(np.float32)), Tensor(np.ones(shape=[1, 5, 6, 6]).astype(np.float32)) ] }), ('NetAddN_Error', { 'block': (NetAddN(), { 'exception': TypeError }), 'desc_inputs': [(np.random.randn(1, 2, 3, 4).astype(np.float32), np.random.randn(1, 2, 3, 4).astype(np.float32))] }), ('AddN_Error', { 'block': (P.AddN(), { 'exception': TypeError }), 'desc_inputs': [(np.random.randn(1, 2, 3, 4).astype(np.float32), np.random.randn(1, 2, 3, 4).astype(np.float32))] }), ('Splite_Error', { 'block': (NetSplit(), { 'exception': TypeError }), 'desc_inputs': [None] }), ('MatMul_1_Error', { 'block': (P.MatMul(), { 'exception': ValueError }),
def add_n(input): """Apply sum function.""" # return sum(input) return P.AddN()(input)
'block': P.ArgMinWithValue(), 'desc_inputs': [[128, 32, 32, 64]], 'desc_bprop': [[1], [1]], 'skip': ['backward']}), ('Transpose_dim3', { 'block': P.Transpose(), 'desc_const': [(0, 2, 1)], 'desc_inputs': [[1, 2, 3]], 'desc_bprop': [[1, 3, 2]]}), ('Transpose_dim4', { 'block': P.Transpose(), 'desc_const': [(0, 1, 2, 3)], 'desc_inputs': [[1, 2, 3, 4]], 'desc_bprop': [[1, 2, 4, 3]]}), ('AddN', { 'block': NetForTupleInput(P.AddN()), 'desc_inputs': [[2, 3, 3, 5], [2, 3, 3, 5]], 'desc_bprop': [[2, 3, 3, 5]], 'skip': ['backward']}), ('Shape', { 'block': P.Shape(), 'desc_inputs': [[3, 3, 2, 2]], 'skip': ['backward']}), ('Reshape', { 'block': P.Reshape(), 'desc_const': [(64,)], 'desc_inputs': [[64, 1]], 'desc_bprop': [[64]]}), ('Cast', { 'block': P.Cast(), 'desc_const': [mstype.int32],
new_param_group.append(next_params) for i in range(F.tuple_len(next_params)): F.assign(key_group[root][i], next_params[i]) status = F.control_depend(optim_result, new_param_group[0][0]) for i in range(self.dev_num - 1): status = F.depend( F.control_depend(new_param_group[i], new_param_group[i + 1][0]), status) return status def construct(self, *hyper_params): raise NotImplementedError op_add = P.AddN() op_gather = P.GatherV2() op_mul = P.Mul() _apply_decay = C.MultitypeFuncGraph("apply_decay") @_apply_decay.register("Tensor", "Bool", "Tensor", "RowTensor") def _tensor_apply_decay_with_sparse(weight_decay, if_apply, weight, gradient): """Get grad with weight_decay.""" if if_apply: indices = gradient.indices values = op_add( (op_gather(weight, indices, 0) * F.cast(weight_decay, F.dtype(weight)), gradient.values)) shape = gradient.dense_shape
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ from mindspore.ops import operations as P from mindspore.ops import Primitive import mindspore.common.dtype as mstype from mindspore.common.tensor import Tensor addn = P.AddN() mul = P.Mul() fused_mul_addn = Primitive('FusedMulAddN') make_tuple = Primitive('make_tuple') tuple_getitem = Primitive('tuple_getitem') scalar = Tensor(1.0, mstype.float32) class FnDict: def __init__(self): self.fnDict = {} def __call__(self, fn): self.fnDict[fn.__name__] = fn def __getitem__(self, name):
def __init__(self): super().__init__() self.addn = op.AddN()
def __init__(self): super().__init__() self.addn = P.AddN() self.assign = P.Assign() self.para1 = Parameter(Tensor(1.0, dtype=ms.float32), name='para1') self.para2 = Parameter(Tensor(3.0, dtype=ms.float32), name='para2')