Esempio n. 1
0
def inference_mnist():
    if not args.use_gpu:
        place = paddle.CPUPlace()
    elif not args.use_data_parallel:
        place = paddle.CUDAPlace(0)
    else:
        place = paddle.CUDAPlace(paddle.fluid.dygraph.parallel.Env().dev_id)

    paddle.disable_static(place)
    mnist_infer = MNIST()
        # load checkpoint
    model_dict, _ = paddle.fluid.load_dygraph("save_temp")
    mnist_infer.set_dict(model_dict)
    print("checkpoint loaded")

        # start evaluate mode
    mnist_infer.eval()

    def load_image(file):
        im = Image.open(file).convert('L')
        im = im.resize((28, 28), Image.ANTIALIAS)
        im = np.array(im).reshape(1, 1, 28, 28).astype(np.float32)
        im = im / 255.0 * 2.0 - 1.0
        return im

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    tensor_img = load_image(cur_dir + '/image/infer_3.png')

    results = mnist_infer(paddle.to_tensor(data=tensor_img, dtype=None, place=None, stop_gradient=True))
    lab = np.argsort(results.numpy())
    print("Inference result of image/infer_3.png is: %d" % lab[0][-1])
    paddle.enable_static()
Esempio n. 2
0
    def __init__(self,
                 dataset,
                 batch_size,
                 is_train,
                 num_workers=4,
                 distributed=True):

        self.dataset = DictDataset(dataset)

        place = paddle.CUDAPlace(ParallelEnv().dev_id) \
                    if ParallelEnv().nranks > 1 else paddle.CUDAPlace(0)

        if distributed:
            sampler = DistributedBatchSampler(
                self.dataset,
                batch_size=batch_size,
                shuffle=True if is_train else False,
                drop_last=True if is_train else False)

            self.dataloader = paddle.io.DataLoader(self.dataset,
                                                   batch_sampler=sampler,
                                                   places=place,
                                                   num_workers=num_workers)
        else:
            self.dataloader = paddle.io.DataLoader(
                self.dataset,
                batch_size=batch_size,
                shuffle=True if is_train else False,
                drop_last=True if is_train else False,
                places=place,
                num_workers=num_workers)

        self.batch_size = batch_size
Esempio n. 3
0
def setup(args, cfg):
    if args.evaluate_only:
        cfg.isTrain = False

    cfg.timestamp = time.strftime('-%Y-%m-%d-%H-%M', time.localtime())
    cfg.output_dir = os.path.join(cfg.output_dir,
                                  str(cfg.model.name) + cfg.timestamp)

    logger = setup_logger(cfg.output_dir)

    logger.info('Configs: {}'.format(cfg))

    place = paddle.CUDAPlace(ParallelEnv().dev_id) \
                    if ParallelEnv().nranks > 1 else paddle.CUDAPlace(0)
    paddle.disable_static(place)
Esempio n. 4
0
 def setUp(self):
     paddle.enable_static()
     self.init_dtype()
     self.x = (np.random.rand(2, 3, 10, 10) + 0.5).astype(self.dtype)
     self.place = [paddle.CPUPlace()]
     if core.is_compiled_with_cuda():
         self.place.append(paddle.CUDAPlace(0))
Esempio n. 5
0
 def setUp(self):
     self.x_shape = [2, 3, 4, 5]
     self.x = np.random.uniform(-1., 1., self.x_shape).astype(np.float32)
     self.count_expected = 24
     self.place = paddle.CUDAPlace(0) \
         if paddle.fluid.core.is_compiled_with_cuda() \
         else paddle.CPUPlace()
Esempio n. 6
0
def quantize(args):
    place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()

    assert os.path.exists(args.model_path), "args.model_path doesn't exist"
    assert os.path.isdir(args.model_path), "args.model_path must be a dir"

    def reader_generator(imagenet_reader):
        def gen():
            for i, data in enumerate(imagenet_reader()):
                image, label = data
                image = np.expand_dims(image, axis=0)
                yield image
        return gen

    exe = paddle.static.Executor(place)
    quant_post_hpo(
        exe,
        place,
        args.model_path,
        args.save_path,
        train_sample_generator=reader_generator(reader.train()),
        eval_sample_generator=reader_generator(reader.val()),
        model_filename=args.model_filename,
        params_filename=args.params_filename,
        save_model_filename='__model__',
        save_params_filename='__params__',
        quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
        weight_quantize_type='channel_wise_abs_max',
        runcount_limit=args.max_model_quant_count)
Esempio n. 7
0
 def test_check_output_gpu(self):
     if paddle.is_compiled_with_cuda():
         paddle.disable_static(place=paddle.CUDAPlace(0))
         input_real_data = paddle.to_tensor(self.x_np)
         actual_w, actual_v = paddle.linalg.eigh(input_real_data, self.UPLO)
         valid_eigh_result(self.x_np,
                           actual_w.numpy(), actual_v.numpy(), self.UPLO)
Esempio n. 8
0
    def _prune_opt(self, param_name, dims, bool_mask, opt):
        if opt is None:
            return
        for k, v in opt._accumulators.items():
            var_tmp = v.get(param_name)
            #NOTE: var_tmp.shape == [1] is used to skip variables like beta1_pow_acc in Adam optimizer. Its shape is [1] and there's no need to prune this one-value variable.
            if var_tmp is None or var_tmp.shape == [1]:
                if var_tmp is not None: print(var_tmp.name, var_tmp.shape)
                continue
            t_value = var_tmp.value().get_tensor()
            value = np.array(t_value).astype("float32")

            pruned_value = np.apply_along_axis(lambda data: data[bool_mask],
                                               dims, value)

            p = t_value._place()
            if p.is_cpu_place():
                place = paddle.CPUPlace()
            elif p.is_cuda_pinned_place():
                place = paddle.CUDAPinnedPlace()
            else:
                p = core.Place()
                p.set_place(t_value._place())
                place = paddle.CUDAPlace(p.gpu_device_id())

            t_value.set(pruned_value, place)
Esempio n. 9
0
    def lazy_apply(self, model):
        for name, sub_layer in model.named_sublayers():
            for param in sub_layer.parameters(include_sublayers=False):
                if param.name in self._masks:
                    for _mask in self._masks[param.name]:
                        dims = _mask.dims
                        mask = _mask.mask
                        t_value = param.value().get_tensor()
                        value = np.array(t_value).astype("float32")
                        # The name of buffer can not contains "."
                        backup_name = param.name.replace(".", "_") + "_backup"
                        if backup_name not in sub_layer._buffers:
                            sub_layer.register_buffer(backup_name,
                                                      paddle.to_tensor(value))
                            _logger.debug(
                                "Backup values of {} into buffers.".format(
                                    param.name))
                        expand_mask_shape = [1] * len(value.shape)
                        expand_mask_shape[dims] = value.shape[dims]
                        _logger.debug("Expanded mask shape: {}".format(
                            expand_mask_shape))
                        expand_mask = mask.reshape(expand_mask_shape).astype(
                            "float32")

                        p = t_value._place()
                        if p.is_cpu_place():
                            place = paddle.CPUPlace()
                        elif p.is_cuda_pinned_place():
                            place = paddle.CUDAPinnedPlace()
                        else:
                            p = core.Place()
                            p.set_place(t_value._place())
                            place = paddle.CUDAPlace(p.gpu_device_id())

                        t_value.set(value * expand_mask, place)
Esempio n. 10
0
    def run(self):
        self.network()
        self.init_reader()
        use_cuda = int(config.get("runner.use_gpu"))
        place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
        self.exe = paddle.static.Executor(place)

        init_model_path = config.get("runner.model_save_path")
        init_model_path = os.path.join(config["config_abs_dir"],
                                       init_model_path)
        logger.info("init_model_path: {}".format(init_model_path))
        for file in os.listdir(init_model_path):
            file_path = os.path.join(init_model_path, file)
            # hard code for epoch model folder
            if os.path.isdir(file_path) and is_number(file):
                self.epoch_model_path_list.append(file_path)
        if len(self.epoch_model_path_list) == 0:
            self.epoch_model_path_list.append(init_model_path)

        self.epoch_model_path_list.sort()
        logger.info("self.epoch_model_path_list: {}".format(
            self.epoch_model_path_list))
        for idx, model_path in enumerate(self.epoch_model_path_list):
            logger.info("Begin Infer Model {}".format(
                self.epoch_model_path_list[idx]))
            model_name = model_path.split("/")[-1]
            infer_res = self.run_infer(model_path, model_name)
            self.infer_result_dict["result"][model_name] = infer_res

        self.record_result()
        logger.info("Run Success, Exit.")
Esempio n. 11
0
 def __init__(self, time_major=True, direction="forward", place="cpu"):
     super(TestSimpleRNN, self).__init__("runTest")
     self.time_major = time_major
     self.direction = direction
     self.num_directions = 2 if direction == "bidirectional" else 1
     self.place = paddle.CPUPlace() if place == "cpu" \
         else paddle.CUDAPlace(0)
 def setUp(self):
     self.init_config()
     self.generate_input()
     self.generate_output()
     self.places = [paddle.CPUPlace()]
     if core.is_compiled_with_cuda():
         self.places.append(paddle.CUDAPlace(0))
Esempio n. 13
0
 def setUp(self):
     self.init_dtype()
     self.x = np.random.rand(5).astype(self.dtype)
     self.res_ref = erfinv(self.x)
     self.place = [paddle.CPUPlace()]
     if core.is_compiled_with_cuda():
         self.place.append(paddle.CUDAPlace(0))
Esempio n. 14
0
    def run(self):
        self.network()
        self.init_reader()
        use_cuda = int(config.get("runner.use_gpu"))
        place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
        self.exe = paddle.static.Executor(place)

        init_model_path = config.get("runner.model_save_path")
        for file in os.listdir(init_model_path):
            file_path = os.path.join(init_model_path, file)
            # hard code for epoch model folder
            if os.path.isdir(file_path) and is_number(file):
                self.epoch_model_path_list.append(file_path)
                self.epoch_model_name_list.append(file)

        if len(self.epoch_model_path_list) == 0:
            self.epoch_model_path_list.append(init_model_path)
            self.epoch_model_name_list.append(init_model_path)

        self.epoch_model_path_list.sort()
        self.epoch_model_name_list.sort()

        for idx, model_path in enumerate(self.epoch_model_path_list):
            logger.info("Begin Infer Model {}".format(
                self.epoch_model_name_list[idx]))
            self.run_infer(model_path, self.epoch_model_name_list[idx])
        logger.info("Run Success, Exit.")
Esempio n. 15
0
    def test_NNFunctionalMseLoss_none(self):
        for dim in [[10, 10], [2, 10, 10], [3, 3, 10, 10]]:
            input_np = np.random.uniform(0.1, 0.5, dim).astype("float32")
            target_np = np.random.uniform(0.1, 0.5, dim).astype("float32")
            paddle.enable_static()
            prog = paddle.static.Program()
            startup_prog = paddle.static.Program()
            place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda(
            ) else paddle.CPUPlace()
            with paddle.static.program_guard(prog, startup_prog):
                input = paddle.data(name='input', shape=dim, dtype='float32')
                target = paddle.data(name='target', shape=dim, dtype='float32')
                mse_loss = paddle.nn.functional.mse_loss(input, target, 'none')

                exe = paddle.static.Executor(place)
                exe.run(startup_prog)
                static_result = exe.run(
                    prog,
                    feed={"input": input_np,
                          "target": target_np},
                    fetch_list=[mse_loss])

            paddle.disable_static()
            dy_ret = paddle.nn.functional.mse_loss(
                paddle.to_variable(input_np),
                paddle.to_variable(target_np), 'none')
            dy_result = dy_ret.numpy()

            sub = input_np - target_np
            expected = sub * sub
            self.assertTrue(np.allclose(static_result, expected))
            self.assertTrue(np.allclose(static_result, dy_result))
            self.assertTrue(np.allclose(dy_result, expected))
            self.assertTrue(dy_result.shape, [1])
Esempio n. 16
0
    def test_to_api(self):
        self.linear.to(dtype='double')
        self.assertEqual(self.linear.weight.dtype,
                         paddle.fluid.core.VarDesc.VarType.FP64)
        self.assertEqual(self.linear.buf_name.dtype,
                         paddle.fluid.core.VarDesc.VarType.FP64)
        self.assertTrue(
            np.allclose(self.linear.weight.grad.numpy(), self.new_grad))
        self.assertEqual(self.linear.weight._grad_ivar().dtype,
                         paddle.fluid.core.VarDesc.VarType.FP64)

        self.linear.to()
        self.assertEqual(self.linear.weight.dtype,
                         paddle.fluid.core.VarDesc.VarType.FP64)
        self.assertEqual(self.linear.buf_name.dtype,
                         paddle.fluid.core.VarDesc.VarType.FP64)
        self.assertTrue(
            np.allclose(self.linear.weight.grad.numpy(), self.new_grad))
        self.assertEqual(self.linear.weight._grad_ivar().dtype,
                         paddle.fluid.core.VarDesc.VarType.FP64)
        for p in self.linear.parameters():
            self.assertTrue(isinstance(p, paddle.fluid.framework.ParamBase))

        if paddle.fluid.is_compiled_with_cuda():
            self.linear.to(device=paddle.CUDAPlace(0))
            self.assertTrue(self.linear.weight.place.is_gpu_place())
            self.assertEqual(self.linear.weight.place.gpu_device_id(), 0)
            self.assertTrue(self.linear.buf_name.place.is_gpu_place())
            self.assertEqual(self.linear.buf_name.place.gpu_device_id(), 0)
            self.assertTrue(
                self.linear.weight._grad_ivar().place.is_gpu_place())
            self.assertEqual(
                self.linear.weight._grad_ivar().place.gpu_device_id(), 0)

            self.linear.to(device='gpu:0')
            self.assertTrue(self.linear.weight.place.is_gpu_place())
            self.assertEqual(self.linear.weight.place.gpu_device_id(), 0)
            self.assertTrue(self.linear.buf_name.place.is_gpu_place())
            self.assertEqual(self.linear.buf_name.place.gpu_device_id(), 0)
            self.assertTrue(
                self.linear.weight._grad_ivar().place.is_gpu_place())
            self.assertEqual(
                self.linear.weight._grad_ivar().place.gpu_device_id(), 0)
            for p in self.linear.parameters():
                self.assertTrue(isinstance(p,
                                           paddle.fluid.framework.ParamBase))

        self.linear.to(device=paddle.CPUPlace())
        self.assertTrue(self.linear.weight.place.is_cpu_place())
        self.assertTrue(self.linear.buf_name.place.is_cpu_place())
        self.assertTrue(self.linear.weight._grad_ivar().place.is_cpu_place())

        self.linear.to(device='cpu')
        self.assertTrue(self.linear.weight.place.is_cpu_place())
        self.assertTrue(self.linear.buf_name.place.is_cpu_place())
        self.assertTrue(self.linear.weight._grad_ivar().place.is_cpu_place())

        self.assertRaises(ValueError, self.linear.to, device=1)

        self.assertRaises(AssertionError, self.linear.to, blocking=1)
Esempio n. 17
0
    def test_attr_tensor_API(self):
        startup_program = Program()
        train_program = Program()
        with program_guard(train_program, startup_program):
            fill_value = 2.0
            input = paddle.fluid.data(name='input',
                                      dtype='float32',
                                      shape=[2, 3])
            output = paddle.full_like(input, fill_value)
            output_dtype = paddle.full_like(input, fill_value, dtype='float32')

            place = paddle.CPUPlace()
            if core.is_compiled_with_cuda():
                place = paddle.CUDAPlace(0)
            exe = paddle.static.Executor(place)
            exe.run(startup_program)

            img = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)

            res = exe.run(train_program,
                          feed={'input': img},
                          fetch_list=[output])

            out_np = np.array(res[0])
            self.assertTrue(not (out_np - np.full_like(img, fill_value)).any(),
                            msg="full_like output is wrong, out = " +
                            str(out_np))
Esempio n. 18
0
    def restore(self, model):
        for name, sub_layer in model.named_sublayers():
            for param in sub_layer.parameters(include_sublayers=False):
                backup_name = "_".join(
                    [param.name.replace(".", "_"), "backup"])
                if backup_name in sub_layer._buffers:
                    _logger.debug("Restore values of variable: {}".format(
                        param.name))
                    t_value = param.value().get_tensor()
                    t_backup = sub_layer._buffers[backup_name].value(
                    ).get_tensor()

                    p = t_value._place()
                    if p.is_cpu_place():
                        place = paddle.CPUPlace()
                    elif p.is_cuda_pinned_place():
                        place = paddle.CUDAPinnedPlace()
                    else:
                        p = core.Place()
                        p.set_place(t_value._place())
                        place = paddle.CUDAPlace(p.gpu_device_id())

                    t_value.set(np.array(t_backup).astype("float32"), place)

                    if isinstance(sub_layer, paddle.nn.layer.conv.Conv2D):
                        if sub_layer._groups > 1:
                            _logger.debug(
                                "Update groups of conv form {} to {}".format(
                                    sub_layer._groups,
                                    t_value.shape()[0]))
                            sub_layer._groups = t_value.shape()[0]
                    del sub_layer._buffers[backup_name]
Esempio n. 19
0
 def setUp(self):
     self.place = paddle.CUDAPlace(
         0) if core.is_compiled_with_cuda() else paddle.CPUPlace()
     self.x_np = np.random.uniform(-1., 1., [1, 2, 3, 4]).astype('float32')
     self.weight_np_0 = np.random.randn(1).astype('float32')
     self.weight_np_1 = np.random.randn(
         self.x_np.shape[1]).astype('float32')
Esempio n. 20
0
    def test_asp_training_with_amp(self):
        if core.is_compiled_with_cuda():
            place = paddle.CUDAPlace(0)
            with fluid.program_guard(self.main_program, self.startup_program):
                self.optimizer = fluid.contrib.mixed_precision.decorator.decorate(
                    self.optimizer)
                self.optimizer = paddle.incubate.asp.decorate(self.optimizer)
                self.optimizer.minimize(self.loss, self.startup_program)

            exe = fluid.Executor(place)
            feeder = fluid.DataFeeder(
                feed_list=[self.img, self.label], place=place)

            exe.run(self.startup_program)
            paddle.incubate.asp.prune_model(self.main_program)

            data = (np.random.randn(32, 3, 24, 24), np.random.randint(
                10, size=(32, 1)))
            exe.run(self.main_program, feed=feeder.feed([data]))

            for param in self.main_program.global_block().all_parameters():
                if ASPHelper._is_supported_layer(self.main_program, param.name):
                    mat = np.array(fluid.global_scope().find_var(param.name)
                                   .get_tensor())
                    self.assertTrue(
                        paddle.fluid.contrib.sparsity.check_sparsity(
                            mat.T, n=2, m=4))
    def test_dynamic_graph(self):
        for use_cuda in ([False, True]
                         if core.is_compiled_with_cuda() else [False]):
            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
            paddle.disable_static(place=place)
            x = paddle.to_variable(self.x_np)

            out_1 = paddle.nn.functional.adaptive_avg_pool3d(
                x=x, output_size=[3, 3, 3])

            out_2 = paddle.nn.functional.adaptive_avg_pool3d(x=x, output_size=5)

            out_3 = paddle.nn.functional.adaptive_avg_pool3d(
                x=x, output_size=[2, 3, 5])

            out_4 = paddle.nn.functional.adaptive_avg_pool3d(
                x=x, output_size=[3, 3, 3], data_format="NDHWC")

            out_5 = paddle.nn.functional.adaptive_avg_pool3d(
                x=x, output_size=[None, 3, None])

            out_6 = paddle.nn.functional.interpolate(
                x=x, mode="area", size=[2, 3, 5])

            assert np.allclose(out_1.numpy(), self.res_1_np)

            assert np.allclose(out_2.numpy(), self.res_2_np)

            assert np.allclose(out_3.numpy(), self.res_3_np)

            assert np.allclose(out_4.numpy(), self.res_4_np)

            assert np.allclose(out_5.numpy(), self.res_5_np)

            assert np.allclose(out_6.numpy(), self.res_3_np)
Esempio n. 22
0
def test_io_devices():
    n = 32
    x = ti.field(dtype=ti.i32, shape=n)

    @ti.kernel
    def load(y: ti.types.ndarray()):
        for i in x:
            x[i] = y[i] + 10

    @ti.kernel
    def inc():
        for i in x:
            x[i] += i

    @ti.kernel
    def store(y: ti.types.ndarray()):
        for i in x:
            y[i] = x[i] * 2

    devices = [paddle.CPUPlace()]
    if paddle.device.is_compiled_with_cuda():
        devices.append(paddle.CUDAPlace(0))
    for device in devices:
        y = paddle.to_tensor(np.ones(shape=n, dtype=np.int32), place=device)

        load(y)
        inc()
        store(y)

        y = y.cpu().numpy()

        for i in range(n):
            assert y[i] == (11 + i) * 2
Esempio n. 23
0
    def _restore_opt(self, param_name, sub_layer, opt):
        if opt is None:
            return
        for k, v in opt._accumulators.items():
            var_tmp = v.get(param_name)
            if var_tmp is None: continue
            backup_name = var_tmp.name.replace(".", "_") + "_backup"
            if backup_name in sub_layer._buffers:
                _logger.debug("Restore values of variable: {}".format(
                    var_tmp.name))
                t_value = var_tmp.value().get_tensor()
                t_backup = sub_layer._buffers[backup_name].value().get_tensor()

                p = t_value._place()
                if p.is_cpu_place():
                    place = paddle.CPUPlace()
                elif p.is_cuda_pinned_place():
                    place = paddle.CUDAPinnedPlace()
                else:
                    p = core.Place()
                    p.set_place(t_value._place())
                    place = paddle.CUDAPlace(p.gpu_device_id())

                t_value.set(np.array(t_backup).astype("float32"), place)
                del sub_layer._buffers[backup_name]
Esempio n. 24
0
def test_devices():
    n = 12
    X = ti.Matrix.field(3, 2, ti.f32, shape=(n, n, n))
    assert X.to_paddle(place=paddle.CPUPlace()).place.is_cpu_place()

    if paddle.device.is_compiled_with_cuda():
        assert X.to_paddle(place=paddle.CUDAPlace(0)).place.is_gpu_place()
Esempio n. 25
0
    def restore(self, model, opt=None):
        for name, sub_layer in model.named_sublayers(include_self=True):
            for param in sub_layer.parameters(include_sublayers=False):
                # restore optimizer accumulators from layer buffer
                self._restore_opt(param.name, sub_layer, opt)
                backup_name = "_".join(
                    [param.name.replace(".", "_"), "backup"])
                if backup_name in sub_layer._buffers:
                    _logger.debug("Restore values of variable: {}".format(
                        param.name))
                    t_value = param.value().get_tensor()
                    t_backup = sub_layer._buffers[backup_name].value(
                    ).get_tensor()

                    p = t_value._place()
                    if p.is_cpu_place():
                        place = paddle.CPUPlace()
                    elif p.is_cuda_pinned_place():
                        place = paddle.CUDAPinnedPlace()
                    else:
                        p = core.Place()
                        p.set_place(t_value._place())
                        place = paddle.CUDAPlace(p.gpu_device_id())

                    t_value.set(np.array(t_backup).astype("float32"), place)
                    if "_origin_groups" in sub_layer.__dict__:
                        sub_layer._groups = sub_layer._origin_groups
                    del sub_layer._buffers[backup_name]
    def _run_gpu_main(self, model, apply_pass, dump_file, **kwargs):
        gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
        place = paddle.CUDAPlace(gpu_id)
        scope = paddle.static.Scope()
        if apply_pass:
            self.apply_passes()
        else:
            self.apply_no_passes()
        with paddle.static.program_guard(paddle.static.Program(),
                                         paddle.static.Program()):
            with paddle.static.scope_guard(scope):
                with paddle.fluid.unique_name.guard():
                    main_prog, startup_prog, inputs, outputs, reader = self.get_model(
                        place, **kwargs)
                    inputs = self._to_var_names(inputs)
                    outputs = self._to_var_names(outputs)

        all_fetch_values = []
        exe = paddle.static.Executor(place)
        with paddle.static.scope_guard(scope):
            exe.run(startup_prog)
            for batch_id, input_data in enumerate(reader()):
                assert len(input_data) == len(inputs), "{} vs {}".format(
                    len(input_data), len(inputs))
                feed = dict(zip(inputs, input_data))
                fetch_values = exe.run(main_prog,
                                       feed=feed,
                                       fetch_list=outputs)
                if paddle.distributed.get_rank() == 0:
                    output_dict = OrderedDict(zip(outputs, fetch_values))
                    print('batch {}, outputs {}'.format(batch_id, output_dict))
                all_fetch_values.append(fetch_values)
        with open(dump_file, "wb") as f:
            pickle.dump(all_fetch_values, f)
    def test_synchronize(self):
        if paddle.is_compiled_with_cuda():
            self.assertIsNone(cuda.synchronize())
            self.assertIsNone(cuda.synchronize(0))
            self.assertIsNone(cuda.synchronize(paddle.CUDAPlace(0)))

            self.assertRaises(ValueError, cuda.synchronize, "gpu:0")
Esempio n. 28
0
def export(args):
    place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()
    exe = paddle.static.Executor(place)

    quant_config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'not_quant_pattern': ['skip_quant'],
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul']
    }
    train_config={
        "num_epoch": args.num_epoch, # training epoch num
        "max_iter": -1,
        "save_iter_step": args.save_iter_step,
        "learning_rate": args.learning_rate,
        "weight_decay": args.weight_decay,
        "use_pact": args.use_pact,
        "quant_model_ckpt_path":args.checkpoint_path,
        "teacher_model_path_prefix": args.teacher_model_path_prefix,
        "model_path_prefix": args.model_path_prefix,
        "distill_node_pair": args.distill_node_name_list
    }

    export_quant_infermodel(exe, place,
        scope=None,
        quant_config=quant_config,
        train_config=train_config,
        checkpoint_path=os.path.join(args.checkpoint_path, args.checkpoint_filename),
        export_inference_model_path_prefix=args.export_inference_model_path_prefix)
    def test_dynamic_graph(self):
        for use_cuda in ([False, True]
                         if core.is_compiled_with_cuda() else [False]):
            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
            paddle.disable_static(place=place)
            x = paddle.to_variable(self.x_np)

            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=[3, 3])
            out_1 = adaptive_avg_pool(x=x)

            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=5)
            out_2 = adaptive_avg_pool(x=x)

            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=[2, 5])
            out_3 = adaptive_avg_pool(x=x)

            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=[3, 3],
                                                            data_format="NHWC")
            out_4 = adaptive_avg_pool(x=x)

            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(
                output_size=[None, 3])
            out_5 = adaptive_avg_pool(x=x)

            assert np.allclose(out_1.numpy(), self.res_1_np)

            assert np.allclose(out_2.numpy(), self.res_2_np)

            assert np.allclose(out_3.numpy(), self.res_3_np)

            assert np.allclose(out_4.numpy(), self.res_4_np)

            assert np.allclose(out_5.numpy(), self.res_5_np)
Esempio n. 30
0
 def setUp(self):
     self.dtype = 'float32'
     self.input = np.ones((3, 1, 2)).astype(self.dtype)
     self.weight = np.ones((2, 2)).astype(self.dtype)
     self.bias = np.ones((2)).astype(self.dtype)
     self.place = paddle.CUDAPlace(
         0) if core.is_compiled_with_cuda() else paddle.CPUPlace()