def test_check_output(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) outs = self.calc_output(place) outs = [np.array(out) for out in outs] outs.sort(key=len) self.verify_output(outs)
def test_check_grad_normal(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_grad_with_place( place, ['X', 'Y'], 'Out', max_relative_error=self.max_relative_error)
def test_checkout_grad(self): if paddle.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) self.check_grad_with_place(place, ['X'], 'Out', max_relative_error=1.e1)
def test_check_grad_ingore_y(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_grad_with_place(place, ['X'], 'Out', max_relative_error=0.006, no_grad_set=set('Y'))
def test_check_grad_normal(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_grad_with_place( place, ['X', 'Y'], 'Out', check_dygraph=(self.use_mkldnn == False))
def is_xpu_available(): """ check whether morl can access a XPU Returns: True if paddle was complied with XPU. """ xpu_count = int(os.getenv("FLAGS_selected_xpus", "-1")) if xpu_count < 0: return False if _HAS_FLUID: from paddle import fluid if not fluid.is_compiled_with_xpu(): logger.warning("Found non-empty XPU_VISIBLE_DEVICES. \ But morl found that Paddle was not complied with XPU, which may cause issues. \ Thus morl will not use XPU.") return False if _HAS_PADDLE: import paddle if not paddle.is_compiled_with_xpu(): logger.warning("Found non-empty XPU_VISIBLE_DEVICES. \ But morl found that Paddle was not complied with XPU, which may cause issues. \ Thus morl will not use XPU.") return False return True
def test_check_grad(self): if paddle.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) self.check_grad_with_place(place, ["Logits"], "Loss", max_relative_error=0.2)
def test_check_output(self): if paddle.is_compiled_with_xpu() and len( self.inputs['X'].shape) == len( self.inputs['Y'].shape ) and self.inputs['X'].shape[0] == self.inputs['Y'].shape[0]: place = paddle.XPUPlace(0) self.check_output_with_place(place, atol=1e-3)
def test_check_grad_ingore_x(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_grad_with_place( place, ['Y'], 'Out', no_grad_set=set("X"), max_relative_error=self.max_relative_error)
def test_check_grad(self): if self.dtype == 'int64' or self.dtype == 'int32': pass else: if paddle.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) self.check_grad_with_place(place, self.get_x_names(), 'Y')
def test_check_grad_ingore_y(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_grad_with_place( place, ['X'], 'Out', no_grad_set=set('Y'), check_dygraph=(self.use_mkldnn == False))
def test_check_grad_normal(self): if paddle.is_compiled_with_xpu() and len( self.inputs['X'].shape) == len( self.inputs['Y'].shape ) and self.inputs['X'].shape[0] == self.inputs['Y'].shape[0]: place = paddle.XPUPlace(0) self.check_grad_with_place(place, ['X', 'Y'], 'Out', max_relative_error=5e-2)
def test_grad(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) if not self.is_test: var_name_list = self.get_weight_names() grad_check_list = ['Input', 'init_h', 'init_c'] grad_check_list.extend(var_name_list) self.check_grad_with_place(place, set(grad_check_list), ['Out', 'last_hidden', 'last_cell'], max_relative_error=0.1)
def get_env_device(): """ Return the device name of running enviroment. """ if paddle.is_compiled_with_cuda(): return 'gpu' elif paddle.is_compiled_with_npu(): return 'npu' elif paddle.is_compiled_with_rocm(): return 'rocm' elif paddle.is_compiled_with_xpu(): return 'xpu' return 'cpu'
def __init__(self, layers, strategy=None, comm_buffer_size=25, last_comm_buffer_size=1, find_unused_parameters=False): super(DataParallel, self).__init__(layers.full_name() + "_data_parallel") self._layers = layers self.find_unused_parameters = find_unused_parameters self.grad_need_sync = True # NOTE(chenweihang): The ParallelStrategy here is not strictly a strategy. # It just stores some environment variables, which can be constructed by # ParallelEnv. Here it is set as an optional argument. # This parameter is not removed because of compatibility with 1.x writing. if strategy is not None: self._strategy = strategy else: self._strategy = _build_default_parallel_strategy() if self._strategy.nranks > 1: # check the environment assert parallel_helper.__parallel_ctx__clz__ is not None, \ "ParallelContext must be initialized before. You should use init_parallel_env() before" \ "constructing the DataParallel." # sync buffer and params # TODO(liuyuhui) Currently not support xpu. xpu is # still broadcasting parameters when calling layer if not paddle.is_compiled_with_xpu(): sync_params_buffers(self._layers) self.comm_buffer_size = int(comm_buffer_size * 1024 * 1024) # NOTE(shenliang03): We can set environment variables to control # the size of the group, Default: 1MB. The role of this small group is: # when the last group allreduce, the overlap cannot work. Making the # the last group small is useful to improve performance. self.last_comm_buffer_size = int(last_comm_buffer_size * 1024 * 1024) self.init_reducer() else: warnings.warn( "The program will return to single-card operation. " "Please check 1, whether you use spawn or fleetrun " "to start the program. 2, Whether it is a multi-card " "program. 3, Is the current environment multi-card.")
def backward(self, retain_graph=False): """ Run backward of current Graph which starts from current Tensor. The new gradient will accumulat on previous gradient. You can clear gradient by ``Tensor.clear_grad()`` . Args: retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter :code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient. Defaults to False. Returns: NoneType: None Examples: .. code-block:: python x = paddle.to_tensor(5., stop_gradient=False) for i in range(5): y = paddle.pow(x, 4.0) y.backward() print("{}: {}".format(i, x.grad)) # 0: [500.] # 1: [1000.] # 2: [1500.] # 3: [2000.] # 4: [2500.] x.clear_grad() print("{}".format(x.grad)) # 0. """ if framework.in_dygraph_mode(): if paddle.is_compiled_with_xpu(): # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. scaled_loss = scale_loss(self) scaled_loss._run_backward(framework._dygraph_tracer(), retain_graph) else: self._run_backward(framework._dygraph_tracer(), retain_graph) else: raise ValueError( "Variable.backward() is only available in DyGraph mode")
def check_xpu(use_xpu): """ Log error and exit when set use_xpu=true in paddlepaddle cpu/gpu/npu version. """ err = "Config use_xpu cannot be set as true while you are " \ "using paddlepaddle cpu/gpu/npu version ! \nPlease try: \n" \ "\t1. Install paddlepaddle-xpu to run model on XPU \n" \ "\t2. Set use_xpu as false in config file to run " \ "model on CPU/GPU/NPU" try: if use_xpu and not paddle.is_compiled_with_xpu(): logger.error(err) sys.exit(1) except Exception as e: pass
def test_check_output(self): if paddle.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) self.check_output_with_place(place=place, no_check_set=['XShape'])
def test_check_grad(self): if paddle.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) self.check_grad_with_place(place, ['X'], 'Out')
def test_check_grad(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_grad_with_place(place, ["X"], "Out")
else: dim = [i for i in range(len(Y.shape))] dim[-1], dim[len(Y.shape) - 2] = dim[len(Y.shape) - 2], dim[-1] Y = np.transpose(Y, tuple(dim)) Out = np.matmul(X, Y) if not Out.shape: # We do not support 0-dimensional Tensors (scalars). So where # np.matmul outputs a scalar, we must convert to a Tensor of # shape (1, ) instead. # Everywhere else, we are compatible with np.matmul. Out = np.array([Out], dtype="float64") return Out @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestMatMulV2Op(OpTest): """ case 1 """ def config(self): self.x_shape = (100, ) self.y_shape = (100, ) self.trans_x = False self.trans_y = False def init_kernel_type(self): self.dtype = "float32" def setUp(self):
def test_check_grad(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_grad(set(['X']), 'Out')
def test_check_output(self): if paddle.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) self.check_output_with_place(place, atol=1e-2)
def test_check_grad(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_grad_with_place(place, ['X'], 'Out', in_place=True)
def test_check_output(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_output_with_place(place)
def test_check_output(self): if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_output_with_place(place)
def test_check_grad(self): if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_grad_with_place(place, ['X'], 'Out')
def backward(self, grad_tensor=None, retain_graph=False): """ Run backward of current Graph which starts from current Tensor. The new gradient will accumulat on previous gradient. You can clear gradient by ``Tensor.clear_grad()`` . Args: grad_tensor(Tensor, optional): initial gradient values of the current Tensor. If `grad_tensor` is None, the initial gradient values of the current Tensor would be Tensor filled with 1.0; if `grad_tensor` is not None, it must have the same length as the current Tensor. Teh default value is None. retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter :code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient. Defaults to False. Returns: NoneType: None Examples: .. code-block:: python import paddle x = paddle.to_tensor(5., stop_gradient=False) for i in range(5): y = paddle.pow(x, 4.0) y.backward() print("{}: {}".format(i, x.grad)) # 0: [500.] # 1: [1000.] # 2: [1500.] # 3: [2000.] # 4: [2500.] x.clear_grad() print("{}".format(x.grad)) # 0. grad_tensor=paddle.to_tensor(2.) for i in range(5): y = paddle.pow(x, 4.0) y.backward(grad_tensor) print("{}: {}".format(i, x.grad)) # 0: [1000.] # 1: [2000.] # 2: [3000.] # 3: [4000.] # 4: [5000.] """ if framework.in_dygraph_mode(): if grad_tensor is not None: if core._in_eager_mode(): assert isinstance( grad_tensor, core.eager.EagerTensor ), "The type of grad_tensor must be paddle.Tensor" else: assert isinstance( grad_tensor, paddle.Tensor ), "The type of grad_tensor must be paddle.Tensor" assert grad_tensor.shape == self.shape, \ "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format( grad_tensor.name, grad_tensor.shape, self.name, self.shape) if core._in_eager_mode(): if grad_tensor is None: grad_tensor = [] else: grad_tensor = [grad_tensor] if paddle.is_compiled_with_xpu() or paddle.is_compiled_with_npu(): # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. scaled_loss = scale_loss(self) if core._in_eager_mode(): core.eager.run_backward([scaled_loss], grad_tensor, retain_graph) else: core.dygraph_run_backward([scaled_loss], [grad_tensor], retain_graph, framework._dygraph_tracer()) else: if core._in_eager_mode(): core.eager.run_backward([self], grad_tensor, retain_graph) else: core.dygraph_run_backward([self], [grad_tensor], retain_graph, framework._dygraph_tracer()) else: raise ValueError( "Variable.backward() is only available in DyGraph mode")
def test_xpu(self): if paddle.is_compiled_with_xpu(): self.gaussian_random_test(place=fluid.XPUPlace(0))
def test_check_grad(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_grad_with_place(place, ['x0'], 'Out') self.check_grad_with_place(place, ['x1'], 'Out') self.check_grad_with_place(place, ['x2'], 'Out')