Beispiel #1
0
    def __init__(self, network, optimizer, sens=1.0):
        super(TrainingWrapper, self).__init__(auto_prefix=False)
        self.network = network
        self.network.set_grad()  #False
        self.network.add_flags(defer_inline=True)
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.grad_reducer = None

        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
        if self.parallel_mode in [
                ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL
        ]:
            self.reducer_flag = True
        if self.reducer_flag:
            mean = context.get_auto_parallel_context("gradients_mean")
            if auto_parallel_context().get_device_num_is_set():
                degree = context.get_auto_parallel_context("device_num")
            else:
                degree = get_group_size()
            self.grad_reducer = nn.DistributedGradReducer(
                optimizer.parameters, mean, degree)

        self.hyper_map = C.HyperMap()
        self.alloc_status = NPUAllocFloatStatus()
        self.get_status = NPUGetFloatStatus()
        self.clear_status = NPUClearFloatStatus()
        self.reduce_sum = ReduceSum(keep_dims=False)
        self.base = Tensor(1, mstype.float32)
        self.less_equal = LessEqual()
        self.allreduce = P.AllReduce()
        self.is_distributed = self.parallel_mode != ParallelMode.STAND_ALONE
Beispiel #2
0
 def __init__(self, network, optimizer, sens=1):
     super(TrainingWrapper, self).__init__(auto_prefix=False)
     self.network = network
     self.depend_network = Depend_network(network)
     # self.weights = ms.ParameterTuple(network.trainable_params())
     self.weights = optimizer.parameters
     self.optimizer = optimizer
     self.grad = C.GradOperation(get_by_list=True, sens_param=True)
     self.sens = sens
     self.reducer_flag = False
     self.grad_reducer = None
     self.print = P.Print()
     self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
     if self.parallel_mode in [
             ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL
     ]:
         self.reducer_flag = True
     if self.reducer_flag:
         mean = context.get_auto_parallel_context("gradients_mean")
         #if mean.get_device_num_is_set():
         # if mean:
         #degree = context.get_auto_parallel_context("device_num")
         # else:
         degree = get_group_size()
         self.grad_reducer = nn.DistributedGradReducer(
             optimizer.parameters, mean, degree)
Beispiel #3
0
 def __init__(self, network, optimizer, sens=1.0, use_global_norm=False):
     super(TrainingWrapper, self).__init__(auto_prefix=False)
     self.network = network
     self.network.set_grad()
     self.weights = ms.ParameterTuple(network.trainable_params())
     self.optimizer = optimizer
     self.grad = C.GradOperation(get_by_list=True, sens_param=True)
     self.sens = sens
     self.reducer_flag = False
     self.grad_reducer = None
     self.use_global_norm = use_global_norm
     self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
     if self.parallel_mode in [
             ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL
     ]:
         self.reducer_flag = True
     if self.reducer_flag:
         mean = context.get_auto_parallel_context("gradients_mean")
         if auto_parallel_context().get_device_num_is_set():
             degree = context.get_auto_parallel_context("device_num")
         else:
             degree = get_group_size()
         self.grad_reducer = nn.DistributedGradReducer(
             optimizer.parameters, mean, degree)
     self.hyper_map = C.HyperMap()
Beispiel #4
0
 def __init__(self, G, generator, optimizer, sens=1.0):
     super(TrainOneStepG, self).__init__(auto_prefix=False)
     self.optimizer = optimizer
     self.G = G
     self.G.set_grad()
     self.G.set_train()
     self.G.D_A.set_grad(False)
     self.G.D_A.set_train(False)
     self.G.D_B.set_grad(False)
     self.G.D_B.set_train(False)
     self.grad = ops.GradOperation(get_by_list=True, sens_param=True)
     self.sens = sens
     self.weights = ms.ParameterTuple(generator.trainable_params())
     self.net = WithLossCell(G)
     self.reducer_flag = False
     self.grad_reducer = None
     self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
     if self.parallel_mode in [
             ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL
     ]:
         self.reducer_flag = True
     if self.reducer_flag:
         mean = context.get_auto_parallel_context("gradients_mean")
         if auto_parallel_context().get_device_num_is_set():
             degree = context.get_auto_parallel_context("device_num")
         else:
             degree = get_group_size()
         self.grad_reducer = nn.DistributedGradReducer(
             optimizer.parameters, mean, degree)
Beispiel #5
0
 def __init__(self, network, optimizer, sens=1.0):
     super(TrainingWrapper, self).__init__(auto_prefix=False)
     self.network = network
     self.weights = optimizer.parameters
     self.optimizer = optimizer
     self.grad = C.GradOperation(get_by_list=True, sens_param=True)
     self.sens = sens
     self.reducer_flag = False
     self.grad_reducer = None
     self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
     if self.parallel_mode in [
             ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL
     ]:
         self.reducer_flag = True
     if self.reducer_flag:
         mean = context.get_auto_parallel_context("mirror_mean")
         if auto_parallel_context().get_device_num_is_set():
             degree = context.get_auto_parallel_context("device_num")
         else:
             degree = get_group_size()
         self.grad_reducer = nn.DistributedGradReducer(
             optimizer.parameters, mean, degree)