def _add_grad_as_view(self, param, align): assert np.prod( self.buffer.shape ) > 0, "Cannot add a gradient to a released InternalStorage, please rebuild" assert param.dtype == self.buffer.dtype grad_end = self._fill + np.prod(param.shape) offset = grad_end + align assert offset <= np.prod(self.buffer.shape) # Copy the current grad value to InternalStorage dev_id = 0 if paddle.get_device() == "cpu" else int( paddle.get_device().split(":")[1]) if self._device == "cpu": with device_guard(dev_id, self._device): tmp_var = core.VarBase(self.buffer._slice( self._fill, grad_end)) param._copy_gradient_from(tmp_var) tmp_var.value().get_tensor()._clear() elif self._device == "gpu": tmp_var = core.VarBase(self.buffer._slice(self._fill, grad_end)) param._copy_gradient_from(tmp_var) tmp_var.value().get_tensor()._clear() self._fill = offset
def _add_param_as_view(self, param, align, convert_gpu=True): assert ( param.dtype == self.buffer.dtype ), "Different types for the InternalStorage and the param, cannot proceed: {} - {}".format( param.dtype, self.buffer.dtype) var_end = self._fill + np.prod(param.shape) offset = var_end + align assert offset <= np.prod(self.buffer.shape) p_shape = param.shape origin_state = param.stop_gradient param.stop_gradient = True param.flatten_() param.stop_gradient = origin_state # Copy the current param value dev_id = 0 if paddle.get_device() == "cpu" else int( paddle.get_device().split(":")[1]) with device_guard(dev_id, "cpu"): tmp_var = core.VarBase( tensor=self.buffer._slice(self._fill, var_end)) if convert_gpu: param_cpu = param.cpu() param.value().get_tensor()._clear() tmp_var.set_value(param_cpu) else: tmp_var.set_value(param) self._fill = offset return p_shape
def add_rank_params(self, trainable_params, param2align, convert_gpu=True): """ Add new parameters to the InternalStorage. Params becomes a view of this InternalStorage buffer. """ assert all([ id(param) not in self._param_ids for param in trainable_params ]), "The same param cannot be checked in twice" assert self.buffer is not None self.param2align = param2align cpu_param_shape = list() for param in trainable_params: p_shape = self._add_param_as_view(param, param2align[param.name], convert_gpu) cpu_param_shape.append(p_shape) if convert_gpu: # buffer convert from cpu to cuda dev_id = int(paddle.get_device().split(":")[1]) self.buffer = self.buffer.cuda(dev_id) self._fill = 0 for idx, param in enumerate(trainable_params): self._convert_buffer(param, cpu_param_shape[idx], param2align[param.name]) self._params.append(param) self._param_ids.append(id(param))
def check_output_equal(self, actual, expect, rtol=1.e-5, atol=1.e-8): error_msg = 'Output has diff at place:{}. \nExpect: {} \nBut Got: {} in class {}' self.assertTrue( np.allclose( actual, expect, rtol=rtol, atol=atol), error_msg.format(paddle.get_device(), expect, actual, self.__class__.__name__))
def __init__(self, args, config, device=None): self.args = args self.config = config if device is None: device = paddle.get_device() self.device = device self.model_var_type = config.model.var_type betas = get_beta_schedule( beta_schedule=config.diffusion.beta_schedule, beta_start=config.diffusion.beta_start, beta_end=config.diffusion.beta_end, num_diffusion_timesteps=config.diffusion.num_diffusion_timesteps, ) betas = self.betas = paddle.to_tensor(betas).astype('float32') self.num_timesteps = betas.shape[0] alphas = 1.0 - betas alphas_cumprod = alphas.cumprod(0) alphas_cumprod_prev = paddle.concat( [paddle.ones([1]), alphas_cumprod[:-1]], 0) posterior_variance = (betas * (1.0 - alphas_cumprod_prev) / (1.0 - alphas_cumprod)) if self.model_var_type == "fixedlarge": self.logvar = betas.log() # paddle.concat( # [posterior_variance[1:2], betas[1:]], 0).log() elif self.model_var_type == "fixedsmall": self.logvar = posterior_variance.clip(min=1e-20).log()
def SyncBatchNorm(*args, **kwargs): """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" if paddle.get_device() == 'cpu' or os.environ.get( 'PADDLESEG_EXPORT_STAGE'): return nn.BatchNorm2D(*args, **kwargs) else: return nn.SyncBatchNorm(*args, **kwargs)
def setting_init(self): #根据配置文件初始化 设置界面 self.read_config() self.threshold_hs.setValue(self.setting['threshold'] * 10) self.grade_width_hs.setValue(self.setting['grade_line_width']) self.pre_width_hs.setValue(self.setting['pre_line_width']) self.label_width_hs.setValue(self.setting['label_line_width']) self.font_size_hs.setValue(self.setting['font_size']) self.threshold_value.setText(str(self.setting['threshold'])) self.grade_line_width.setText(str(self.setting['grade_line_width'])) self.pre_line_width.setText(str(self.setting['pre_line_width'])) self.label_line_width.setText(str(self.setting['label_line_width'])) self.font_size_lable.setText(str(self.setting['font_size'])) self.clipboard_cb.setChecked(self.setting['clipboard_cb']) #判断电脑是否 GPU device = str(paddle.get_device()) if 'gpu' in device: self.gpu_rb.setChecked(True) self.nogpu_rb.setChecked(False) self.setting['use_gpu'] = True elif 'cpu' in device: self.setting['use_gpu'] = False self.nogpu_rb.setChecked(True) self.gpu_rb.setEnabled(False) self.create_config(self.setting)
def train(model, train_loader): model.train() # 判断可用的模型训练环境,优先使用GPU use_gpu = True if paddle.get_device().startswith("gpu") else False if use_gpu: paddle.set_device('gpu:0') # 创建优化器Optimizer,用于更新这个网络的参数 optimizer = paddle.optimizer.Adam(learning_rate=0.01, beta1=0.9, beta2=0.999, parameters= model.parameters()) # 开始训练 for step, (sentences, labels) in enumerate(train_loader): sentences_var = paddle.to_tensor(sentences) labels_var = paddle.to_tensor(labels) pred, loss = model(sentences_var, labels_var) # 后向传播 loss.backward() # 最小化loss optimizer.step() # 清除梯度 optimizer.clear_grad() if step % 100 == 0: print("step %d, loss %.3f" % (step, loss.numpy()[0]))
def main(args): cfg = Config.fromfile(args.config) for d in [cfg, cfg.data.test]: d.update(dict(report_speed=args.report_speed)) print(json.dumps(cfg._cfg_dict, indent=4)) sys.stdout.flush() device = paddle.get_device() paddle.set_device(device) # model model = build_model(cfg.model) if args.checkpoint is not None: if os.path.isfile(args.checkpoint): print("Loading model and optimizer from checkpoint '{}'".format( args.checkpoint)) sys.stdout.flush() checkpoint = paddle.load(args.checkpoint) model.set_state_dict(checkpoint) else: print("No checkpoint found at '{}'".format(args.resume)) raise # fuse conv and bn model = fuse_module(model) # test predict(args.input, model, cfg, args.output)
def step(self): """ A wrapper for Optimizer's step function to finish the update operation of the optimizer. """ if self.offload: params_list = [self.offload_params.buffer] #TODO(Baibaifan): Offload will support param_groups later if not isinstance(self._optim._param_groups[0], dict): self._optim._parameter_list = params_list self._optim._param_groups = params_list # Run the optimizer of the current rank step if self.offload: with device_guard(device=self.offload_device): self._optim.step() dev_id = int(paddle.get_device().split(":")[1]) for param in self._local_params: if param.name in self._master_params.keys(): param.set_value( self._master_params[param.name].cuda(dev_id).cast( dtype=param.dtype)) else: self._optim.step() # Synchronize all the updated shards in between the ranks self._broadcast_params()
def unscale_method(self, optimizer): if not self._enable: return param_grads = [] param_grads_fp16 = [] param_grads_fp32 = [] if hasattr(optimizer, "update_slice"): optimizer.update_slice() optimizer.update_scaler = True if getattr(optimizer._optim, '_param_groups', None) and isinstance( optimizer._optim._param_groups[0], dict): for group in optimizer._optim._param_groups: for param in group['params']: if param.grad is not None: param_grads.append(param.grad) if param.grad.dtype in [ core.VarDesc.VarType.FP16, paddle.float16 ]: param_grads_fp16.append(param.grad) else: param_grads_fp32.append(param.grad) else: for param in optimizer._optim._parameter_list: if param.grad is not None: param_grads.append(param.grad) if param.grad.dtype in [ core.VarDesc.VarType.FP16, paddle.float16 ]: param_grads_fp16.append(param.grad) else: param_grads_fp32.append(param.grad) temp_found_inf_fp16 = to_variable(np.array([0]).astype(np.bool)) temp_found_inf_fp32 = to_variable(np.array([0]).astype(np.bool)) device = "cpu" if optimizer.offload else "gpu" dev_id = 0 if device == "cpu" else int(paddle.get_device().split(":")[ 1]) with device_guard(dev_id, device): if len(param_grads_fp16): _C_ops.check_finite_and_unscale(param_grads_fp16, self._scale, param_grads_fp16, temp_found_inf_fp16) if len(param_grads_fp32): _C_ops.check_finite_and_unscale(param_grads_fp32, self._scale, param_grads_fp32, temp_found_inf_fp32) self._found_inf = 1 if temp_found_inf_fp16 or temp_found_inf_fp32 else 0 is_found_inf = paddle.to_tensor([self._found_inf], dtype="int32") paddle.distributed.all_reduce( is_found_inf, op=paddle.distributed.ReduceOp.MAX, group=optimizer._group) self._found_inf = is_found_inf.numpy()[0]
def test_ipu_set_device(self): num_devices = fluid.core.get_ipu_device_count() self.assertGreater(num_devices, 0) for i in range(num_devices): paddle.set_device('ipu') device = paddle.get_device() self.assertTrue(device == "ipus:{{0-{}}}".format(num_devices - 1))
def device_count(): gpu_useful = paddle.get_device().startswith("gpu") if gpu_useful: device_str = os.environ["CUDA_VISIBLE_DEVICES"] seg = device_str.split(",") return len(seg) else: return 0
def test_xpu(self): if core.is_compiled_with_xpu(): with fluid.dygraph.guard(): out = paddle.to_tensor([1, 2]) device = paddle.get_device() self.assertEqual( isinstance(framework._current_expected_place(), core.XPUPlace), True) self.assertTrue(out.place.is_xpu_place()) self.assertEqual(device, "xpu:0")
def test_cpu(self): with fluid.dygraph.guard(): paddle.set_device('cpu') out1 = paddle.zeros(shape=[1, 3], dtype='float32') out2 = paddle.ones(shape=[1, 3], dtype='float32') out3 = paddle.concat(x=[out1, out2], axis=0) device = paddle.get_device() self.assertEqual( isinstance(framework._current_expected_place(), core.CPUPlace), True) self.assertEqual(device, "cpu")
def test_cpu_device(self): paddle.set_device('cpu') out1 = paddle.zeros(shape=[1, 3], dtype='float32') out2 = paddle.ones(shape=[1, 3], dtype='float32') out3 = paddle.concat(x=[out1, out2], axis=0) exe = paddle.fluid.Executor() exe.run(paddle.fluid.default_startup_program()) res = exe.run(fetch_list=[out3]) device = paddle.get_device() self.assertEqual(isinstance(exe.place, core.CPUPlace), True) self.assertEqual(device, "cpu")
def __init__(self, size, dtype, device, convert_cpu=False): self._params = [] self._param_ids = [] self._fill = 0 self._device = device self._dtype = dtype # The flatten tensor size = [size] if isinstance(size, int) else size if convert_cpu: value = np.zeros( size, dtype=np.float16) if Type.fp16.value == dtype else np.zeros( size, dtype=np.float32) self.buffer = core.eager.Tensor(value=value, place=core.CPUPlace()) else: self.buffer = paddle.zeros(size, dtype=dtype) self.dev_id = 0 if paddle.get_device() == "cpu" else int( paddle.get_device().split(":")[1])
def _check_output_impl(self, result, expected_result, rtol, atol, equal=True): assertForNormalType = self.assertNotEqual assertForFloat = self.assertFalse if equal: assertForNormalType = self.assertEqual assertForFloat = self.assertTrue result_t = type(result) error_msg = 'Output has diff at place:{}. \nExpect: {} \nBut Got: {} in class {}' if result_t in [list, tuple]: result_t = get_container_type(result) if result_t in [ str, int, bool, set, np.bool, np.int32, np.int64, np.str ]: assertForNormalType( result, expected_result, msg=error_msg.format(paddle.get_device(), expected_result, result, self.__class__.__name__)) elif result_t in [float, np.ndarray, np.float32, np.float64]: assertForFloat( np.allclose( result, expected_result, rtol=rtol, atol=atol), msg=error_msg.format(paddle.get_device(), expected_result, result, self.__class__.__name__)) if result_t == np.ndarray: assertForNormalType( result.shape, expected_result.shape, msg=error_msg.format(paddle.get_device(), expected_result.shape, result.shape, self.__class__.__name__)) else: raise ValueError( 'result type must be str, int, bool, set, np.bool, np.int32, ' 'np.int64, np.str, float, np.ndarray, np.float32, np.float64')
def test_gpu(self): if core.is_compiled_with_cuda(): with fluid.dygraph.guard(): paddle.set_device('gpu:0') out1 = paddle.zeros(shape=[1, 3], dtype='float32') out2 = paddle.ones(shape=[1, 3], dtype='float32') out3 = paddle.concat(x=[out1, out2], axis=0) device = paddle.get_device() self.assertEqual( isinstance(framework._current_expected_place(), core.CUDAPlace), True) self.assertEqual(device, "gpu:0")
def test_gpu_device(self): if core.is_compiled_with_cuda(): out1 = paddle.zeros(shape=[1, 3], dtype='float32') out2 = paddle.ones(shape=[1, 3], dtype='float32') out3 = paddle.concat(x=[out1, out2], axis=0) paddle.set_device('gpu:0') exe = paddle.fluid.Executor() exe.run(paddle.fluid.default_startup_program()) res = exe.run(fetch_list=[out3]) device = paddle.get_device() self.assertEqual(isinstance(exe.place, core.CUDAPlace), True) self.assertEqual(device, "gpu:0")
def train(model, data_loader): # 开始训练,定义一些训练过程中需要使用的超参数 batch_size = 128 epoch_num = 3 embedding_size = 200 step = 0 learning_rate = 0.001 # 判断可用的模型训练环境,优先使用GPU use_gpu = True if paddle.get_device().startswith("gpu") else False if use_gpu: paddle.set_device('gpu:0') # 开启模型训练模式 model.train() # 构造训练这个网络的优化器 adam = paddle.optimizer.Adam(learning_rate=learning_rate, parameters=model.parameters()) # 使用build_batch函数,以mini-batch为单位,遍历训练数据,并训练网络 for center_words, target_words, label in data_loader: # 使用paddle.to_tensor,将一个numpy的tensor,转换为飞桨可计算的tensor center_words_var = paddle.to_tensor(center_words) target_words_var = paddle.to_tensor(target_words) label_var = paddle.to_tensor(label) # 将转换后的tensor送入飞桨中,进行一次前向计算,并得到计算结果 pred, loss = model(center_words_var, target_words_var, label_var) # 程序自动完成反向计算 loss.backward() # 程序根据loss,完成一步对参数的优化更新 adam.step() # 清空模型中的梯度,以便于下一个mini-batch进行更新 adam.clear_grad() # 每经过100个mini-batch,打印一次当前的loss,看看loss是否在稳定下降 step += 1 if step % 1000 == 0: print("step %d, loss %.3f" % (step, loss.numpy()[0])) # 每隔10000步,打印一次模型对以下查询词的相似词,这里我们使用词和词之间的向量点积作为衡量相似度的方法,只打印了5个最相似的词 if step % 10000 == 0: utils.get_similar_tokens('movie', 5, model.embedding.weight, word2id_dict, id2word_dict) utils.get_similar_tokens('one', 5, model.embedding.weight, word2id_dict, id2word_dict) utils.get_similar_tokens('chip', 5, model.embedding.weight, word2id_dict, id2word_dict)
def train(): #print("paddle.distributed.ParallelEnv().dev_id:", paddle.distributed.ParallelEnv().device_id) #paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) #paddle.set_device('gpu:0') #with paddle.fluid.dygraph.guard(paddle.fluid.CUDAPlace(paddle.distributed.ParallelEnv().dev_id)): #dist.init_parallel_env() print("paddle.distributed.ParallelEnv().dev_id:", paddle.distributed.ParallelEnv().device_id) paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().device_id) #place = paddle.CUDAPlace(paddle.distributed.ParallelEnv().dev_id) #paddle.disable_static(place) print("paddle.get_device()", paddle.get_device()) #print("paddle.distributed.ParallelEnv().dev_id:", paddle.distributed.ParallelEnv().dev_id) state = paddle.load("./fc.example.model") print(state.keys().__len__())
def forward(ctx, run_function, preserve_rng_state, *args): if framework._dygraph_tracer()._has_grad: check_recompute_necessary(args) # store for recomputing ctx.run_function = run_function ctx.preserve_rng_state = preserve_rng_state # NOTE the number of outputs of backward() should be equal to the number of tensors in forward()'s input # the order of tensors in backward()'s output should be the same as tensors in forward()'s input # None tensor inputs will be filtered in backward inputs. # save input for backward ctx.inputs = [] ctx.tensor_indices = [] tensor_inputs = [] for i, arg in enumerate(args): if paddle.is_tensor(arg): tensor_inputs.append(arg) ctx.tensor_indices.append(i) ctx.inputs.append(None) else: ctx.inputs.append(arg) ctx.save_for_backward(*tensor_inputs) # NOTE recompute with restore RNG only support one senario where one process for one cuda gpu. # one process with multiple gpu and mix-gpu-cpu senarios are not support if ctx.preserve_rng_state: cur_device = paddle.get_device() if 'gpu:' not in cur_device: raise RuntimeError( "Recompute with RNG perserve is not support current device: {}.". format(cur_device)) ctx.fw_cuda_rng_state = paddle.get_cuda_rng_state() # TODO support AMP tracer = framework._dygraph_tracer() ctx.is_fw_autocast = False if tracer._amp_level == core.AmpLevel.O0 else True if tracer._amp_level == core.AmpLevel.O2: ctx.amp_level = 'O2' elif tracer._amp_level in (core.AmpLevel.O1, core.AmpLevel.O0): ctx.amp_level = 'O1' else: raise ValueError("unsupported amp level: {}".format( tracer._amp_level)) ctx.amp_white_list, ctx.amp_black_list = tracer._get_amp_op_list() with paddle.no_grad(): outputs = run_function(*args) return outputs
def to(self, device, dtype=None, keep_alignment=True): """ Move the underlying buffer """ assert self.buffer is not None, "Cannot move a collapsed bucket, please rebuild it" assert (dtype == Type.fp32.value or Type.fp16.value), "Conversion type is not supported now" dev_id = 0 if paddle.get_device() == "cpu" else int( paddle.get_device().split(":")[1]) if self._device != device: tmp_buffer = self.buffer.cuda( dev_id) if device == "gpu" else self.buffer.cpu() for param in self._params: param.clear_gradient(False) param._gradient_set_empty(False) self.buffer.value().get_tensor()._clear() self.buffer = tmp_buffer self._device = device if dtype is not None: self.buffer = self.buffer.cast(dtype=dtype) self._dtype = dtype
def testcase5(self): if not fluid.core.is_compiled_with_cuda(): return shape = [2, 3, 4] x = np.arange(int(np.prod(shape))).reshape(shape) index = np.array([[0, 0, 2], [0, 1, 2]]) val = np.array([-1, -3]) with fluid.dygraph.guard(): device = paddle.get_device() paddle.set_device('gpu') gpu_value = paddle.scatter_nd_add(paddle.to_tensor(x), paddle.to_tensor(index), paddle.to_tensor(val)) paddle.set_device('cpu') cpu_value = paddle.scatter_nd_add(paddle.to_tensor(x), paddle.to_tensor(index), paddle.to_tensor(val)) self.assertTrue( np.array_equal(gpu_value.numpy(), cpu_value.numpy())) paddle.set_device(device) @switch_to_static_graph def test_static_graph(): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): x_t = paddle.static.data(name="x", dtype=x.dtype, shape=x.shape) index_t = paddle.static.data(name="index", dtype=index.dtype, shape=index.shape) val_t = paddle.static.data(name="val", dtype=val.dtype, shape=val.shape) out_t = paddle.scatter_nd_add(x_t, index_t, val_t) feed = {x_t.name: x, index_t.name: index, val_t.name: val} fetch = [out_t] gpu_exe = paddle.static.Executor(paddle.CUDAPlace(0)) gpu_value = gpu_exe.run(feed=feed, fetch_list=fetch)[0] cpu_exe = paddle.static.Executor(paddle.CPUPlace()) cpu_value = cpu_exe.run(feed=feed, fetch_list=fetch)[0] self.assertTrue(np.array_equal(gpu_value, cpu_value)) test_static_graph()
def _check_predictor_type(self): if paddle.get_device() == 'cpu' and self._infer_precision == 'fp16': logger.info( "The inference precision is change to 'fp32', 'fp16' inference only takes effect on gpu." ) else: if self._infer_precision == 'fp16': try: import onnx import onnxruntime as ort import paddle2onnx from onnxconverter_common import float16 self._predictor_type = 'onnxruntime' except: logger.info( "The inference precision is change to 'fp32', please install the dependencies that required for 'fp16' inference, pip install onnxruntime-gpu onnx onnxconverter-common paddle2onnx" )
def step(self): """ A wrapper for Optimizer's step function to finish the update operation of the optimizer. """ if self.offload: params_list = [self.offload_params.buffer] else: # Synchronize optimizer parameters for the current rank params_list = [] for dtype in self.dtype_rank_params.keys(): params_list.extend(self.dtype_rank_params[dtype][self.rank]) params_name_list = list(map(lambda p: p.name, params_list)) if not isinstance(self._optim._param_groups[0], dict): self._optim._parameter_list = params_list self._optim._param_groups = params_list else: for param_group in self._optim._param_groups: p_group = [] for param in param_group['params']: if param.name in params_name_list: p_group.append(params_list[params_name_list.index( param.name)]) param_group['params'] = p_group # Run the optimizer of the current rank step if self.offload: with device_guard(device=self.offload_device): self._optim.step() dev_id = int(paddle.get_device().split(":")[1]) for param in self._local_params: if param.name in self._master_params.keys(): param.set_value(self._master_params[param.name].cuda(dev_id) .cast(dtype=param.dtype)) else: self._optim.step() # Synchronize all the updated shards in between the ranks self._broadcast_params() # Return full parameters to optimizer parameters self._optim._parameter_list = self._ori_parameter_list self._optim._param_groups = self._ori_param_groups
def build_inference_model(self): if paddle.in_dynamic_mode(): # todo self.model = build_model(self.cfg) pass else: place = paddle.get_device() self.exe = paddle.static.Executor(place) file_names = os.listdir(self.weight_path) for file_name in file_names: if file_name.find('model') > -1: model_file = file_name elif file_name.find('param') > -1: param_file = file_name self.program, self.feed_names, self.fetch_targets = paddle.static.load_inference_model( self.weight_path, executor=self.exe, model_filename=model_file, params_filename=param_file)
def reverse_transform(pred, trans_info, mode='nearest'): """recover pred to origin shape""" intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64] dtype = pred.dtype for item in trans_info[::-1]: if item[0] == 'resize': h, w = item[1][0], item[1][1] if paddle.get_device() == 'cpu' and dtype in intTypeList: pred = paddle.cast(pred, 'float32') pred = F.interpolate(pred, (h, w), mode=mode) pred = paddle.cast(pred, dtype) else: pred = F.interpolate(pred, (h, w), mode=mode) elif item[0] == 'padding': h, w = item[1][0], item[1][1] pred = pred[:, :, 0:h, 0:w] else: raise Exception("Unexpected info '{}' in im_info".format(item[0])) return pred
def get_sys_env(): """collect environment information""" env_info = {} env_info['platform'] = platform.platform() env_info['Python'] = sys.version.replace('\n', '') # TODO is_compiled_with_cuda() has not been moved compiled_with_cuda = paddle.is_compiled_with_cuda() env_info['Paddle compiled with cuda'] = compiled_with_cuda if compiled_with_cuda: cuda_home = _find_cuda_home() env_info['NVCC'] = _get_nvcc_info(cuda_home) # refer to https://github.com/PaddlePaddle/Paddle/blob/release/2.0-rc/paddle/fluid/platform/device_context.cc#L327 v = paddle.get_cudnn_version() v = str(v // 1000) + '.' + str(v % 1000 // 100) env_info['cudnn'] = v if 'gpu' in paddle.get_device(): gpu_nums = paddle.distributed.ParallelEnv().nranks else: gpu_nums = 0 env_info['GPUs used'] = gpu_nums env_info['CUDA_VISIBLE_DEVICES'] = os.environ.get( 'CUDA_VISIBLE_DEVICES') if gpu_nums == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '' env_info['GPU'] = _get_gpu_info() try: gcc = subprocess.check_output(['gcc', '--version']).decode() gcc = gcc.strip().split('\n')[0] env_info['GCC'] = gcc except: pass env_info['PaddleSeg'] = paddleseg.__version__ env_info['PaddlePaddle'] = paddle.__version__ env_info['OpenCV'] = cv2.__version__ return env_info