def __init__(self, network, optimizer, sens=1): super(CenterNetWithLossScaleCell, self).__init__(auto_prefix=False) self.image = ImagePreProcess() self.network = network self.network.set_grad() self.weights = optimizer.parameters self.optimizer = optimizer self.grad = ops.GradOperation(get_by_list=True, sens_param=True) self.reducer_flag = False self.allreduce = ops.AllReduce() self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True self.grad_reducer = ops.identity self.degree = 1 if self.reducer_flag: self.degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, False, self.degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.cast = ops.Cast() self.alloc_status = ops.NPUAllocFloatStatus() self.get_status = ops.NPUGetFloatStatus() self.clear_before_grad = ops.NPUClearFloatStatus() self.reduce_sum = ops.ReduceSum(keep_dims=False) self.base = Tensor(1, mstype.float32) self.less_equal = ops.LessEqual() self.grad_scale = GradScale() self.loss_scale = sens
def __init__(self, G, generator, optimizer, sens=1.0): super(TrainOneStepG, self).__init__(auto_prefix=False) self.optimizer = optimizer self.G = G self.G.set_grad() self.G.set_train() self.G.D_A.set_grad(False) self.G.D_A.set_train(False) self.G.D_B.set_grad(False) self.G.D_B.set_train(False) self.grad = ops.GradOperation(get_by_list=True, sens_param=True) self.sens = sens self.weights = ms.ParameterTuple(generator.trainable_params()) self.net = WithLossCell(G) self.reducer_flag = False self.grad_reducer = None self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL ]: self.reducer_flag = True if self.reducer_flag: mean = context.get_auto_parallel_context("gradients_mean") if auto_parallel_context().get_device_num_is_set(): degree = context.get_auto_parallel_context("device_num") else: degree = get_group_size() self.grad_reducer = nn.DistributedGradReducer( optimizer.parameters, mean, degree)
def __init__(self, net, sens=False): super().__init__() self.grad = P.GradOperation(get_all=True, get_by_list=True, sens_param=sens) self.net = net self.params = ParameterTuple(self.net.trainable_params())
def __init__(self, network, optimizer, scale_update_cell=None): super(BertTrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False) self.network = network self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad = ops.GradOperation( get_by_list=True, sens_param=True) self.reducer_flag = False self.allreduce = ops.AllReduce() self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True self.grad_reducer = ops.identity self.degree = 1 if self.reducer_flag: self.degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, False, self.degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.cast = ops.Cast() self.alloc_status = ops.NPUAllocFloatStatus() self.get_status = ops.NPUGetFloatStatus() self.clear_before_grad = ops.NPUClearFloatStatus() self.reduce_sum = ops.ReduceSum(keep_dims=False) self.depend_parameter_use = ops.ControlDepend(depend_mode=1) self.base = Tensor(1, mstype.float32) self.less_equal = ops.LessEqual() self.hyper_map = ops.HyperMap() self.loss_scale = None self.loss_scaling_manager = scale_update_cell if scale_update_cell: self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), name="loss_scale")
def __init__(self, network, optimizer, sens=1.0): super(TrainOneStepCell, self).__init__(auto_prefix=False) self.network = network self.weights = optimizer.parameters self.optimizer = optimizer self.grad = ops.GradOperation(get_by_list=True, sens_param=True) self.sens = sens
def __init__(self, network, optimizer): super(CenterNetWithoutLossScaleCell, self).__init__(auto_prefix=False) self.image = ImagePreProcess() self.network = network self.network.set_grad() self.weights = optimizer.parameters self.optimizer = optimizer self.grad = ops.GradOperation(get_by_list=True, sens_param=False)
def __init__(self, network, optimizer, grad_sum, sens=1.0): super(TrainForwardBackward, self).__init__(auto_prefix=False) self.network = network self.network.set_grad() self.network.add_flags(defer_inline=True) self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad_sum = grad_sum self.grad = ops.GradOperation(get_by_list=True, sens_param=True) self.sens = sens self.hyper_map = ops.HyperMap()
def __init__(self, network, optimizer, sens=1.0): super(BertTrainOneStepCell, self).__init__(auto_prefix=False) self.network = network self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad = ops.GradOperation(get_by_list=True, sens_param=True) self.sens = sens self.reducer_flag = False self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: mean = context.get_auto_parallel_context("mirror_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.cast = ops.Cast() self.hyper_map = ops.HyperMap()
def __init__(self, network, optimizer, scale_update_cell=None, enable_global_norm=True): super().__init__() self.network = network self.optimizer = optimizer self.weights = optimizer.parameters self.grad = P.GradOperation(get_by_list=True) self.enable_global_norm = enable_global_norm self.reducer_flag = False self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL ]: self.reducer_flag = True self.loss_scale_manager = scale_update_cell self.grad_reducer = F.identity
def __init__(self, network, optimizer, scale_update_cell=None): super(BertPoetryCell, self).__init__(network, optimizer, scale_update_cell) self.network = network self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad = ops.GradOperation( get_by_list=True, sens_param=True) self.reducer_flag = False self.allreduce = ops.AllReduce() self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: mean = context.get_auto_parallel_context("mirror_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.cast = ops.Cast() self.gpu_target = False if context.get_context("device_target") == "GPU": self.gpu_target = True self.float_status = ops.FloatStatus() self.addn = ops.AddN() self.reshape = ops.Reshape() else: self.alloc_status = ops.NPUAllocFloatStatus() self.get_status = ops.NPUGetFloatStatus() self.clear_before_grad = ops.NPUClearFloatStatus() self.reduce_sum = ops.ReduceSum(keep_dims=False) self.base = Tensor(1, mstype.float32) self.less_equal = ops.LessEqual() self.hyper_map = ops.HyperMap() self.loss_scale = None self.loss_scaling_manager = scale_update_cell if scale_update_cell: self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), name="loss_scale")
def __init__(self, loss_netD, loss_netG, optimizerD, optimizerG, sens=1, auto_prefix=True): super(TrainOneStepCell, self).__init__(auto_prefix=auto_prefix) self.loss_netD = loss_netD # loss network self.loss_netD.set_grad() self.loss_netD.add_flags(defer_inline=True) self.loss_netG = loss_netG self.loss_netG.set_grad() self.loss_netG.add_flags(defer_inline=True) self.weights_G = optimizerG.parameters self.optimizerG = optimizerG self.weights_D = optimizerD.parameters self.optimizerD = optimizerD self.grad = ops.GradOperation(get_by_list=True, sens_param=True) self.sens = sens # Parallel processing self.reducer_flag = False self.grad_reducer_G = F.identity self.grad_reducer_D = F.identity self.parallel_mode = _get_parallel_mode() if self.parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL): self.reducer_flag = True if self.reducer_flag: mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer_G = DistributedGradReducer( self.weights_G, mean, degree) self.grad_reducer_D = DistributedGradReducer( self.weights_D, mean, degree)
def __init__(self, network): super(GradSec, self).__init__() self.grad = ops.GradOperation(get_all=True, sens_param=False) self.network = network
def construct(self, x, label): weights = self.weights return ops.GradOperation(get_by_list=True)(self.network, weights)(x, label)
def __init__(self, net): super(GradNetWrtX, self).__init__() self.net = net self.grad_op = ops.GradOperation()
def mainf(x, y): return ops.GradOperation(get_all=True)(mul)(x, y)
def __init__(self, network): super(GradSec, self).__init__() self.grad = ops.GradOperation() self.network = network
def __init__(self, net): super(GradNetWrtX, self).__init__() self.net = net self.params = ParameterTuple(net.trainable_params()) self.grad_op = ops.GradOperation(get_by_list=True)
def __init__(self, network): super(GradNetWrtX, self).__init__(auto_prefix=False) self.network = network self.grad_op = ops.GradOperation(sens_param=True) self.flag = MsTensor(True, dtype=mstype.bool_)
def __init__(self, network): super(Grad, self).__init__() self.grad = ops.GradOperation(get_by_list=True, sens_param=False) self.network = network self.params = ParameterTuple(network.trainable_params())
def __init__(self, net): super(GradNetWrtX, self).__init__() self.net = net self.grad_op = ops.GradOperation(sens_param=True) self.grad_wrt_output = Tensor([[0.1, 0.6, 0.2], [0.8, 1.3, 1.1]], dtype=mstype.float32)
def __init__(self, network): super(Grad, self).__init__() self.grad = ops.GradOperation(sens_param=False) self.network = network
def __init__(self, network): super(GradSec, self).__init__() self.grad = ops.GradOperation(get_all=True, sens_param=True) self.network = network self.sens1 = Tensor(np.array([1]).astype('float32')) self.sens2 = Tensor(np.array([0]).astype('float32'))