Exemple #1
0
    def _add_grad_as_view(self, param, align):
        assert np.prod(
            self.buffer.shape
        ) > 0, "Cannot add a gradient to a released InternalStorage, please rebuild"
        assert param.dtype == self.buffer.dtype

        grad_end = self._fill + np.prod(param.shape)
        offset = grad_end + align
        assert offset <= np.prod(self.buffer.shape)

        # Copy the current grad value to InternalStorage
        dev_id = 0 if paddle.get_device() == "cpu" else int(
            paddle.get_device().split(":")[1])
        if self._device == "cpu":
            with device_guard(dev_id, self._device):
                tmp_var = core.VarBase(self.buffer._slice(
                    self._fill, grad_end))
                param._copy_gradient_from(tmp_var)
                tmp_var.value().get_tensor()._clear()

        elif self._device == "gpu":
            tmp_var = core.VarBase(self.buffer._slice(self._fill, grad_end))
            param._copy_gradient_from(tmp_var)
            tmp_var.value().get_tensor()._clear()

        self._fill = offset
Exemple #2
0
    def _add_param_as_view(self, param, align, convert_gpu=True):

        assert (
            param.dtype == self.buffer.dtype
        ), "Different types for the InternalStorage and the param, cannot proceed: {} - {}".format(
            param.dtype, self.buffer.dtype)

        var_end = self._fill + np.prod(param.shape)
        offset = var_end + align
        assert offset <= np.prod(self.buffer.shape)

        p_shape = param.shape

        origin_state = param.stop_gradient
        param.stop_gradient = True
        param.flatten_()
        param.stop_gradient = origin_state

        # Copy the current param value
        dev_id = 0 if paddle.get_device() == "cpu" else int(
            paddle.get_device().split(":")[1])
        with device_guard(dev_id, "cpu"):
            tmp_var = core.VarBase(
                tensor=self.buffer._slice(self._fill, var_end))
            if convert_gpu:
                param_cpu = param.cpu()
                param.value().get_tensor()._clear()
                tmp_var.set_value(param_cpu)
            else:
                tmp_var.set_value(param)

        self._fill = offset
        return p_shape
Exemple #3
0
    def add_rank_params(self, trainable_params, param2align, convert_gpu=True):
        """
        Add new parameters to the InternalStorage. Params becomes a view of this InternalStorage buffer.
        """

        assert all([
            id(param) not in self._param_ids for param in trainable_params
        ]), "The same param cannot be checked in twice"
        assert self.buffer is not None

        self.param2align = param2align

        cpu_param_shape = list()
        for param in trainable_params:
            p_shape = self._add_param_as_view(param, param2align[param.name],
                                              convert_gpu)
            cpu_param_shape.append(p_shape)

        if convert_gpu:
            # buffer convert from cpu to cuda
            dev_id = int(paddle.get_device().split(":")[1])
            self.buffer = self.buffer.cuda(dev_id)

        self._fill = 0

        for idx, param in enumerate(trainable_params):
            self._convert_buffer(param, cpu_param_shape[idx],
                                 param2align[param.name])
            self._params.append(param)
            self._param_ids.append(id(param))
 def check_output_equal(self, actual, expect, rtol=1.e-5, atol=1.e-8):
     error_msg = 'Output has diff at place:{}. \nExpect: {} \nBut Got: {} in class {}'
     self.assertTrue(
         np.allclose(
             actual, expect, rtol=rtol, atol=atol),
         error_msg.format(paddle.get_device(), expect, actual,
                          self.__class__.__name__))
Exemple #5
0
    def __init__(self, args, config, device=None):
        self.args = args
        self.config = config
        if device is None:
            device = paddle.get_device()
        self.device = device

        self.model_var_type = config.model.var_type
        betas = get_beta_schedule(
            beta_schedule=config.diffusion.beta_schedule,
            beta_start=config.diffusion.beta_start,
            beta_end=config.diffusion.beta_end,
            num_diffusion_timesteps=config.diffusion.num_diffusion_timesteps,
        )
        betas = self.betas = paddle.to_tensor(betas).astype('float32')
        self.num_timesteps = betas.shape[0]

        alphas = 1.0 - betas
        alphas_cumprod = alphas.cumprod(0)
        alphas_cumprod_prev = paddle.concat(
            [paddle.ones([1]), alphas_cumprod[:-1]], 0)
        posterior_variance = (betas * (1.0 - alphas_cumprod_prev) /
                              (1.0 - alphas_cumprod))
        if self.model_var_type == "fixedlarge":
            self.logvar = betas.log()
            # paddle.concat(
            # [posterior_variance[1:2], betas[1:]], 0).log()
        elif self.model_var_type == "fixedsmall":
            self.logvar = posterior_variance.clip(min=1e-20).log()
Exemple #6
0
def SyncBatchNorm(*args, **kwargs):
    """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead"""
    if paddle.get_device() == 'cpu' or os.environ.get(
            'PADDLESEG_EXPORT_STAGE'):
        return nn.BatchNorm2D(*args, **kwargs)
    else:
        return nn.SyncBatchNorm(*args, **kwargs)
    def setting_init(self):
        #根据配置文件初始化 设置界面
        self.read_config()

        self.threshold_hs.setValue(self.setting['threshold'] * 10)
        self.grade_width_hs.setValue(self.setting['grade_line_width'])
        self.pre_width_hs.setValue(self.setting['pre_line_width'])
        self.label_width_hs.setValue(self.setting['label_line_width'])
        self.font_size_hs.setValue(self.setting['font_size'])

        self.threshold_value.setText(str(self.setting['threshold']))
        self.grade_line_width.setText(str(self.setting['grade_line_width']))
        self.pre_line_width.setText(str(self.setting['pre_line_width']))
        self.label_line_width.setText(str(self.setting['label_line_width']))
        self.font_size_lable.setText(str(self.setting['font_size']))

        self.clipboard_cb.setChecked(self.setting['clipboard_cb'])

        #判断电脑是否 GPU
        device = str(paddle.get_device())
        if 'gpu' in device:
            self.gpu_rb.setChecked(True)
            self.nogpu_rb.setChecked(False)
            self.setting['use_gpu'] = True
        elif 'cpu' in device:
            self.setting['use_gpu'] = False
            self.nogpu_rb.setChecked(True)
            self.gpu_rb.setEnabled(False)
        self.create_config(self.setting)
Exemple #8
0
def train(model, train_loader):
    model.train()

    # 判断可用的模型训练环境,优先使用GPU
    use_gpu = True if paddle.get_device().startswith("gpu") else False
    if use_gpu:
        paddle.set_device('gpu:0')

    # 创建优化器Optimizer,用于更新这个网络的参数
    optimizer = paddle.optimizer.Adam(learning_rate=0.01, beta1=0.9, beta2=0.999, parameters= model.parameters())

    # 开始训练
    for step, (sentences, labels) in enumerate(train_loader):
        sentences_var = paddle.to_tensor(sentences)
        labels_var = paddle.to_tensor(labels)
        pred, loss = model(sentences_var, labels_var)

        # 后向传播
        loss.backward()
        # 最小化loss
        optimizer.step()
        # 清除梯度
        optimizer.clear_grad()

        if step % 100 == 0:
            print("step %d, loss %.3f" % (step, loss.numpy()[0]))
Exemple #9
0
def main(args):
    cfg = Config.fromfile(args.config)
    for d in [cfg, cfg.data.test]:
        d.update(dict(report_speed=args.report_speed))
    print(json.dumps(cfg._cfg_dict, indent=4))
    sys.stdout.flush()

    device = paddle.get_device()
    paddle.set_device(device)

    # model
    model = build_model(cfg.model)

    if args.checkpoint is not None:
        if os.path.isfile(args.checkpoint):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.checkpoint))
            sys.stdout.flush()

            checkpoint = paddle.load(args.checkpoint)
            model.set_state_dict(checkpoint)
        else:
            print("No checkpoint found at '{}'".format(args.resume))
            raise

    # fuse conv and bn
    model = fuse_module(model)

    # test
    predict(args.input, model, cfg, args.output)
    def step(self):
        """
        A wrapper for Optimizer's step function to finish the update operation of the optimizer.
        """

        if self.offload:
            params_list = [self.offload_params.buffer]

            #TODO(Baibaifan): Offload will support param_groups later
            if not isinstance(self._optim._param_groups[0], dict):
                self._optim._parameter_list = params_list
                self._optim._param_groups = params_list

        # Run the optimizer of the current rank step
        if self.offload:
            with device_guard(device=self.offload_device):
                self._optim.step()

            dev_id = int(paddle.get_device().split(":")[1])
            for param in self._local_params:
                if param.name in self._master_params.keys():
                    param.set_value(
                        self._master_params[param.name].cuda(dev_id).cast(
                            dtype=param.dtype))
        else:
            self._optim.step()

        # Synchronize all the updated shards in between the ranks
        self._broadcast_params()
Exemple #11
0
    def unscale_method(self, optimizer):
        if not self._enable:
            return
        param_grads = []
        param_grads_fp16 = []
        param_grads_fp32 = []
        if hasattr(optimizer, "update_slice"):
            optimizer.update_slice()
            optimizer.update_scaler = True

        if getattr(optimizer._optim, '_param_groups', None) and isinstance(
                optimizer._optim._param_groups[0], dict):

            for group in optimizer._optim._param_groups:
                for param in group['params']:
                    if param.grad is not None:
                        param_grads.append(param.grad)
                        if param.grad.dtype in [
                                core.VarDesc.VarType.FP16, paddle.float16
                        ]:
                            param_grads_fp16.append(param.grad)
                        else:
                            param_grads_fp32.append(param.grad)
        else:
            for param in optimizer._optim._parameter_list:
                if param.grad is not None:
                    param_grads.append(param.grad)
                    if param.grad.dtype in [
                            core.VarDesc.VarType.FP16, paddle.float16
                    ]:
                        param_grads_fp16.append(param.grad)
                    else:
                        param_grads_fp32.append(param.grad)

        temp_found_inf_fp16 = to_variable(np.array([0]).astype(np.bool))
        temp_found_inf_fp32 = to_variable(np.array([0]).astype(np.bool))

        device = "cpu" if optimizer.offload else "gpu"
        dev_id = 0 if device == "cpu" else int(paddle.get_device().split(":")[
            1])

        with device_guard(dev_id, device):
            if len(param_grads_fp16):
                _C_ops.check_finite_and_unscale(param_grads_fp16, self._scale,
                                                param_grads_fp16,
                                                temp_found_inf_fp16)
            if len(param_grads_fp32):
                _C_ops.check_finite_and_unscale(param_grads_fp32, self._scale,
                                                param_grads_fp32,
                                                temp_found_inf_fp32)

        self._found_inf = 1 if temp_found_inf_fp16 or temp_found_inf_fp32 else 0
        is_found_inf = paddle.to_tensor([self._found_inf], dtype="int32")

        paddle.distributed.all_reduce(
            is_found_inf,
            op=paddle.distributed.ReduceOp.MAX,
            group=optimizer._group)
        self._found_inf = is_found_inf.numpy()[0]
    def test_ipu_set_device(self):
        num_devices = fluid.core.get_ipu_device_count()
        self.assertGreater(num_devices, 0)

        for i in range(num_devices):
            paddle.set_device('ipu')
            device = paddle.get_device()
            self.assertTrue(device == "ipus:{{0-{}}}".format(num_devices - 1))
Exemple #13
0
def device_count():
    gpu_useful = paddle.get_device().startswith("gpu")
    if gpu_useful:
        device_str = os.environ["CUDA_VISIBLE_DEVICES"]
        seg = device_str.split(",")
        return len(seg)
    else:
        return 0
Exemple #14
0
 def test_xpu(self):
     if core.is_compiled_with_xpu():
         with fluid.dygraph.guard():
             out = paddle.to_tensor([1, 2])
             device = paddle.get_device()
             self.assertEqual(
                 isinstance(framework._current_expected_place(),
                            core.XPUPlace), True)
             self.assertTrue(out.place.is_xpu_place())
             self.assertEqual(device, "xpu:0")
Exemple #15
0
 def test_cpu(self):
     with fluid.dygraph.guard():
         paddle.set_device('cpu')
         out1 = paddle.zeros(shape=[1, 3], dtype='float32')
         out2 = paddle.ones(shape=[1, 3], dtype='float32')
         out3 = paddle.concat(x=[out1, out2], axis=0)
         device = paddle.get_device()
         self.assertEqual(
             isinstance(framework._current_expected_place(), core.CPUPlace),
             True)
         self.assertEqual(device, "cpu")
Exemple #16
0
 def test_cpu_device(self):
     paddle.set_device('cpu')
     out1 = paddle.zeros(shape=[1, 3], dtype='float32')
     out2 = paddle.ones(shape=[1, 3], dtype='float32')
     out3 = paddle.concat(x=[out1, out2], axis=0)
     exe = paddle.fluid.Executor()
     exe.run(paddle.fluid.default_startup_program())
     res = exe.run(fetch_list=[out3])
     device = paddle.get_device()
     self.assertEqual(isinstance(exe.place, core.CPUPlace), True)
     self.assertEqual(device, "cpu")
    def __init__(self, size, dtype, device, convert_cpu=False):
        self._params = []
        self._param_ids = []
        self._fill = 0
        self._device = device
        self._dtype = dtype

        # The flatten tensor
        size = [size] if isinstance(size, int) else size
        if convert_cpu:
            value = np.zeros(
                size,
                dtype=np.float16) if Type.fp16.value == dtype else np.zeros(
                    size, dtype=np.float32)
            self.buffer = core.eager.Tensor(value=value, place=core.CPUPlace())
        else:
            self.buffer = paddle.zeros(size, dtype=dtype)

        self.dev_id = 0 if paddle.get_device() == "cpu" else int(
            paddle.get_device().split(":")[1])
Exemple #18
0
    def _check_output_impl(self,
                           result,
                           expected_result,
                           rtol,
                           atol,
                           equal=True):
        assertForNormalType = self.assertNotEqual
        assertForFloat = self.assertFalse
        if equal:
            assertForNormalType = self.assertEqual
            assertForFloat = self.assertTrue

        result_t = type(result)
        error_msg = 'Output has diff at place:{}. \nExpect: {} \nBut Got: {} in class {}'
        if result_t in [list, tuple]:
            result_t = get_container_type(result)
        if result_t in [
                str, int, bool, set, np.bool, np.int32, np.int64, np.str
        ]:
            assertForNormalType(
                result,
                expected_result,
                msg=error_msg.format(paddle.get_device(), expected_result,
                                     result, self.__class__.__name__))
        elif result_t in [float, np.ndarray, np.float32, np.float64]:
            assertForFloat(
                np.allclose(
                    result, expected_result, rtol=rtol, atol=atol),
                msg=error_msg.format(paddle.get_device(), expected_result,
                                     result, self.__class__.__name__))
            if result_t == np.ndarray:
                assertForNormalType(
                    result.shape,
                    expected_result.shape,
                    msg=error_msg.format(paddle.get_device(),
                                         expected_result.shape, result.shape,
                                         self.__class__.__name__))
        else:
            raise ValueError(
                'result type must be str, int, bool, set, np.bool, np.int32, '
                'np.int64, np.str, float, np.ndarray, np.float32, np.float64')
Exemple #19
0
 def test_gpu(self):
     if core.is_compiled_with_cuda():
         with fluid.dygraph.guard():
             paddle.set_device('gpu:0')
             out1 = paddle.zeros(shape=[1, 3], dtype='float32')
             out2 = paddle.ones(shape=[1, 3], dtype='float32')
             out3 = paddle.concat(x=[out1, out2], axis=0)
             device = paddle.get_device()
             self.assertEqual(
                 isinstance(framework._current_expected_place(),
                            core.CUDAPlace), True)
             self.assertEqual(device, "gpu:0")
Exemple #20
0
 def test_gpu_device(self):
     if core.is_compiled_with_cuda():
         out1 = paddle.zeros(shape=[1, 3], dtype='float32')
         out2 = paddle.ones(shape=[1, 3], dtype='float32')
         out3 = paddle.concat(x=[out1, out2], axis=0)
         paddle.set_device('gpu:0')
         exe = paddle.fluid.Executor()
         exe.run(paddle.fluid.default_startup_program())
         res = exe.run(fetch_list=[out3])
         device = paddle.get_device()
         self.assertEqual(isinstance(exe.place, core.CUDAPlace), True)
         self.assertEqual(device, "gpu:0")
Exemple #21
0
def train(model, data_loader):
    # 开始训练,定义一些训练过程中需要使用的超参数
    batch_size = 128
    epoch_num = 3
    embedding_size = 200
    step = 0
    learning_rate = 0.001

    # 判断可用的模型训练环境,优先使用GPU
    use_gpu = True if paddle.get_device().startswith("gpu") else False
    if use_gpu:
        paddle.set_device('gpu:0')

    # 开启模型训练模式
    model.train()

    # 构造训练这个网络的优化器
    adam = paddle.optimizer.Adam(learning_rate=learning_rate,
                                 parameters=model.parameters())

    # 使用build_batch函数,以mini-batch为单位,遍历训练数据,并训练网络
    for center_words, target_words, label in data_loader:
        # 使用paddle.to_tensor,将一个numpy的tensor,转换为飞桨可计算的tensor
        center_words_var = paddle.to_tensor(center_words)
        target_words_var = paddle.to_tensor(target_words)
        label_var = paddle.to_tensor(label)

        # 将转换后的tensor送入飞桨中,进行一次前向计算,并得到计算结果
        pred, loss = model(center_words_var, target_words_var, label_var)

        # 程序自动完成反向计算
        loss.backward()
        # 程序根据loss,完成一步对参数的优化更新
        adam.step()
        # 清空模型中的梯度,以便于下一个mini-batch进行更新
        adam.clear_grad()

        # 每经过100个mini-batch,打印一次当前的loss,看看loss是否在稳定下降
        step += 1
        if step % 1000 == 0:
            print("step %d, loss %.3f" % (step, loss.numpy()[0]))

        # 每隔10000步,打印一次模型对以下查询词的相似词,这里我们使用词和词之间的向量点积作为衡量相似度的方法,只打印了5个最相似的词
        if step % 10000 == 0:
            utils.get_similar_tokens('movie', 5, model.embedding.weight,
                                     word2id_dict, id2word_dict)
            utils.get_similar_tokens('one', 5, model.embedding.weight,
                                     word2id_dict, id2word_dict)
            utils.get_similar_tokens('chip', 5, model.embedding.weight,
                                     word2id_dict, id2word_dict)
def train():
  #print("paddle.distributed.ParallelEnv().dev_id:", paddle.distributed.ParallelEnv().device_id)
  #paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id)
  #paddle.set_device('gpu:0')
  #with paddle.fluid.dygraph.guard(paddle.fluid.CUDAPlace(paddle.distributed.ParallelEnv().dev_id)):
  #dist.init_parallel_env()
  print("paddle.distributed.ParallelEnv().dev_id:", paddle.distributed.ParallelEnv().device_id)
  paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().device_id)
  #place = paddle.CUDAPlace(paddle.distributed.ParallelEnv().dev_id)
  #paddle.disable_static(place)
  print("paddle.get_device()", paddle.get_device())
  #print("paddle.distributed.ParallelEnv().dev_id:", paddle.distributed.ParallelEnv().dev_id)
  state = paddle.load("./fc.example.model")
  print(state.keys().__len__())
Exemple #23
0
    def forward(ctx, run_function, preserve_rng_state, *args):
        if framework._dygraph_tracer()._has_grad:
            check_recompute_necessary(args)

        # store for recomputing 
        ctx.run_function = run_function
        ctx.preserve_rng_state = preserve_rng_state

        # NOTE the number of outputs of backward() should be equal to the number of tensors in forward()'s input
        # the order of tensors in backward()'s output should be the same as tensors in forward()'s input
        # None tensor inputs will be filtered in backward inputs.

        # save input for backward
        ctx.inputs = []
        ctx.tensor_indices = []
        tensor_inputs = []
        for i, arg in enumerate(args):
            if paddle.is_tensor(arg):
                tensor_inputs.append(arg)
                ctx.tensor_indices.append(i)
                ctx.inputs.append(None)
            else:
                ctx.inputs.append(arg)
        ctx.save_for_backward(*tensor_inputs)

        # NOTE recompute with restore RNG only support one senario where one process for one cuda gpu.
        # one process with multiple gpu and mix-gpu-cpu senarios are not support
        if ctx.preserve_rng_state:
            cur_device = paddle.get_device()
            if 'gpu:' not in cur_device:
                raise RuntimeError(
                    "Recompute with RNG perserve is not support current device: {}.".
                    format(cur_device))
            ctx.fw_cuda_rng_state = paddle.get_cuda_rng_state()

        # TODO support AMP
        tracer = framework._dygraph_tracer()
        ctx.is_fw_autocast = False if tracer._amp_level == core.AmpLevel.O0 else True
        if tracer._amp_level == core.AmpLevel.O2:
            ctx.amp_level = 'O2'
        elif tracer._amp_level in (core.AmpLevel.O1, core.AmpLevel.O0):
            ctx.amp_level = 'O1'
        else:
            raise ValueError("unsupported amp level: {}".format(
                tracer._amp_level))
        ctx.amp_white_list, ctx.amp_black_list = tracer._get_amp_op_list()

        with paddle.no_grad():
            outputs = run_function(*args)
        return outputs
Exemple #24
0
    def to(self, device, dtype=None, keep_alignment=True):
        """
        Move the underlying buffer
        """
        assert self.buffer is not None, "Cannot move a collapsed bucket, please rebuild it"
        assert (dtype == Type.fp32.value
                or Type.fp16.value), "Conversion type is not supported now"

        dev_id = 0 if paddle.get_device() == "cpu" else int(
            paddle.get_device().split(":")[1])

        if self._device != device:
            tmp_buffer = self.buffer.cuda(
                dev_id) if device == "gpu" else self.buffer.cpu()
            for param in self._params:
                param.clear_gradient(False)
                param._gradient_set_empty(False)
            self.buffer.value().get_tensor()._clear()
            self.buffer = tmp_buffer
            self._device = device

        if dtype is not None:
            self.buffer = self.buffer.cast(dtype=dtype)
            self._dtype = dtype
Exemple #25
0
    def testcase5(self):
        if not fluid.core.is_compiled_with_cuda():
            return

        shape = [2, 3, 4]
        x = np.arange(int(np.prod(shape))).reshape(shape)
        index = np.array([[0, 0, 2], [0, 1, 2]])
        val = np.array([-1, -3])

        with fluid.dygraph.guard():
            device = paddle.get_device()
            paddle.set_device('gpu')
            gpu_value = paddle.scatter_nd_add(paddle.to_tensor(x),
                                              paddle.to_tensor(index),
                                              paddle.to_tensor(val))
            paddle.set_device('cpu')
            cpu_value = paddle.scatter_nd_add(paddle.to_tensor(x),
                                              paddle.to_tensor(index),
                                              paddle.to_tensor(val))
            self.assertTrue(
                np.array_equal(gpu_value.numpy(), cpu_value.numpy()))
            paddle.set_device(device)

        @switch_to_static_graph
        def test_static_graph():
            with paddle.static.program_guard(paddle.static.Program(),
                                             paddle.static.Program()):
                x_t = paddle.static.data(name="x",
                                         dtype=x.dtype,
                                         shape=x.shape)
                index_t = paddle.static.data(name="index",
                                             dtype=index.dtype,
                                             shape=index.shape)
                val_t = paddle.static.data(name="val",
                                           dtype=val.dtype,
                                           shape=val.shape)
                out_t = paddle.scatter_nd_add(x_t, index_t, val_t)
                feed = {x_t.name: x, index_t.name: index, val_t.name: val}
                fetch = [out_t]

                gpu_exe = paddle.static.Executor(paddle.CUDAPlace(0))
                gpu_value = gpu_exe.run(feed=feed, fetch_list=fetch)[0]
                cpu_exe = paddle.static.Executor(paddle.CPUPlace())
                cpu_value = cpu_exe.run(feed=feed, fetch_list=fetch)[0]
                self.assertTrue(np.array_equal(gpu_value, cpu_value))

        test_static_graph()
Exemple #26
0
 def _check_predictor_type(self):
     if paddle.get_device() == 'cpu' and self._infer_precision == 'fp16':
         logger.info(
             "The inference precision is change to 'fp32', 'fp16' inference only takes effect on gpu."
         )
     else:
         if self._infer_precision == 'fp16':
             try:
                 import onnx
                 import onnxruntime as ort
                 import paddle2onnx
                 from onnxconverter_common import float16
                 self._predictor_type = 'onnxruntime'
             except:
                 logger.info(
                     "The inference precision is change to 'fp32', please install the dependencies that required for 'fp16' inference, pip install onnxruntime-gpu onnx onnxconverter-common paddle2onnx"
                 )
    def step(self):
        """
        A wrapper for Optimizer's step function to finish the update operation of the optimizer.
        """

        if self.offload:
            params_list = [self.offload_params.buffer]
        else:
            # Synchronize optimizer parameters for the current rank
            params_list = []
            for dtype in self.dtype_rank_params.keys():
                params_list.extend(self.dtype_rank_params[dtype][self.rank])

        params_name_list = list(map(lambda p: p.name, params_list))
        if not isinstance(self._optim._param_groups[0], dict):
            self._optim._parameter_list = params_list
            self._optim._param_groups = params_list
        else:
            for param_group in self._optim._param_groups:
                p_group = []
                for param in param_group['params']:
                    if param.name in params_name_list:
                        p_group.append(params_list[params_name_list.index(
                            param.name)])
                param_group['params'] = p_group

        # Run the optimizer of the current rank step
        if self.offload:
            with device_guard(device=self.offload_device):
                self._optim.step()

            dev_id = int(paddle.get_device().split(":")[1])
            for param in self._local_params:
                if param.name in self._master_params.keys():
                    param.set_value(self._master_params[param.name].cuda(dev_id)
                                    .cast(dtype=param.dtype))
        else:
            self._optim.step()

        # Synchronize all the updated shards in between the ranks
        self._broadcast_params()

        # Return full parameters to optimizer parameters
        self._optim._parameter_list = self._ori_parameter_list
        self._optim._param_groups = self._ori_param_groups
Exemple #28
0
    def build_inference_model(self):
        if paddle.in_dynamic_mode():
            # todo self.model = build_model(self.cfg)
            pass
        else:
            place = paddle.get_device()
            self.exe = paddle.static.Executor(place)
            file_names = os.listdir(self.weight_path)
            for file_name in file_names:
                if file_name.find('model') > -1:
                    model_file = file_name
                elif file_name.find('param') > -1:
                    param_file = file_name

            self.program, self.feed_names, self.fetch_targets = paddle.static.load_inference_model(
                self.weight_path,
                executor=self.exe,
                model_filename=model_file,
                params_filename=param_file)
Exemple #29
0
def reverse_transform(pred, trans_info, mode='nearest'):
    """recover pred to origin shape"""
    intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64]
    dtype = pred.dtype
    for item in trans_info[::-1]:
        if item[0] == 'resize':
            h, w = item[1][0], item[1][1]
            if paddle.get_device() == 'cpu' and dtype in intTypeList:
                pred = paddle.cast(pred, 'float32')
                pred = F.interpolate(pred, (h, w), mode=mode)
                pred = paddle.cast(pred, dtype)
            else:
                pred = F.interpolate(pred, (h, w), mode=mode)
        elif item[0] == 'padding':
            h, w = item[1][0], item[1][1]
            pred = pred[:, :, 0:h, 0:w]
        else:
            raise Exception("Unexpected info '{}' in im_info".format(item[0]))
    return pred
Exemple #30
0
def get_sys_env():
    """collect environment information"""
    env_info = {}
    env_info['platform'] = platform.platform()

    env_info['Python'] = sys.version.replace('\n', '')

    # TODO is_compiled_with_cuda() has not been moved
    compiled_with_cuda = paddle.is_compiled_with_cuda()
    env_info['Paddle compiled with cuda'] = compiled_with_cuda

    if compiled_with_cuda:
        cuda_home = _find_cuda_home()
        env_info['NVCC'] = _get_nvcc_info(cuda_home)
        # refer to https://github.com/PaddlePaddle/Paddle/blob/release/2.0-rc/paddle/fluid/platform/device_context.cc#L327
        v = paddle.get_cudnn_version()
        v = str(v // 1000) + '.' + str(v % 1000 // 100)
        env_info['cudnn'] = v
        if 'gpu' in paddle.get_device():
            gpu_nums = paddle.distributed.ParallelEnv().nranks
        else:
            gpu_nums = 0
        env_info['GPUs used'] = gpu_nums

        env_info['CUDA_VISIBLE_DEVICES'] = os.environ.get(
            'CUDA_VISIBLE_DEVICES')
        if gpu_nums == 0:
            os.environ['CUDA_VISIBLE_DEVICES'] = ''
        env_info['GPU'] = _get_gpu_info()

    try:
        gcc = subprocess.check_output(['gcc', '--version']).decode()
        gcc = gcc.strip().split('\n')[0]
        env_info['GCC'] = gcc
    except:
        pass

    env_info['PaddleSeg'] = paddleseg.__version__
    env_info['PaddlePaddle'] = paddle.__version__
    env_info['OpenCV'] = cv2.__version__

    return env_info