def prune_params(model, param_config, super_model_sd=None): """ Prune parameters according to the config. Parameters: model(paddle.nn.Layer): instance of model. param_config(dict): prune config of each weight. super_model_sd(dict, optional): parameters come from supernet. If super_model_sd is not None, transfer parameters from this dict to model; otherwise, prune model from itself. """ for l_name, sublayer in model.named_sublayers(): if isinstance(sublayer, BaseBlock): continue for p_name, param in sublayer.named_parameters( include_sublayers=False): t_value = param.value().get_tensor() value = np.array(t_value).astype("float32") if super_model_sd != None: name = l_name + '.' + p_name super_t_value = super_model_sd[name].value().get_tensor() super_value = np.array(super_t_value).astype("float32") super_model_sd.pop(name) if param.name in param_config.keys(): if len(param_config[param.name]) > 1: in_exp = param_config[param.name][0] out_exp = param_config[param.name][1] if sublayer.__class__.__name__.lower() in CONV_TYPES: in_chn = get_actual_shape(in_exp, value.shape[1]) out_chn = get_actual_shape(out_exp, value.shape[0]) prune_value = super_value[:out_chn, :in_chn, ...] \ if super_model_sd != None else value[:out_chn, :in_chn, ...] else: in_chn = get_actual_shape(in_exp, value.shape[0]) out_chn = get_actual_shape(out_exp, value.shape[1]) prune_value = super_value[:in_chn, :out_chn, ...] \ if super_model_sd != None else value[:in_chn, :out_chn, ...] else: out_chn = get_actual_shape(param_config[param.name][0], value.shape[0]) prune_value = super_value[:out_chn, ...] \ if super_model_sd != None else value[:out_chn, ...] else: prune_value = super_value if super_model_sd != None else value p = t_value._place() if p.is_cpu_place(): place = core.CPUPlace() elif p.is_cuda_pinned_place(): place = core.CUDAPinnedPlace() else: place = core.CUDAPlace(p.gpu_device_id()) t_value.set(prune_value, place) if param.trainable: param.clear_gradient() ### initialize param which not in sublayers, such as create persistable inputs by create_parameters if super_model_sd != None and len(super_model_sd) != 0: for k, v in super_model_sd.items(): setattr(model, k, v)
def _set_var(var, ndarray): t = global_scope().find_var(var.name).get_tensor() p = t._place() if p.is_cpu_place(): place = core.CPUPlace() elif p.is_cuda_pinned_place(): place = core.CUDAPinnedPlace() else: p = core.Place() p.set_place(t._place()) place = core.CUDAPlace(p.gpu_device_id()) t.set(ndarray, place)
def _feeder(): for batch_data in batch_reader(): sample_batch = [] label_batch = [] for sample, label in batch_data: sample_batch.append(sample) label_batch.append([label]) tensor = core.LoDTensor() label = core.LoDTensor() place = core.CUDAPinnedPlace() if pin_memory else core.CPUPlace() tensor.set(np.array(sample_batch, dtype=img_dtype), place) label.set(np.array(label_batch, dtype="int64"), place) yield [tensor, label]
def test_tensor_set_int16(self): array = numpy.random.randint(100, size=(300, 500)).astype("int16") tensor = fluid.Tensor() place = core.CPUPlace() tensor.set(array, place) self.assertEqual(tensor._dtype(), core.VarDesc.VarType.INT16) self.assertTrue(numpy.array_equal(numpy.array(tensor), array)) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) tensor.set(array, place) self.assertEqual(tensor._dtype(), core.VarDesc.VarType.INT16) self.assertTrue(numpy.array_equal(numpy.array(tensor), array)) place = core.CUDAPinnedPlace() tensor.set(array, place) self.assertEqual(tensor._dtype(), core.VarDesc.VarType.INT16) self.assertTrue(numpy.array_equal(numpy.array(tensor), array))
def test_tensor_set_from_array_list(self): array = numpy.random.randint(1000, size=(200, 300)) list_array = [array, array] tensor = fluid.Tensor() place = core.CPUPlace() tensor.set(list_array, place) self.assertEqual([2, 200, 300], tensor.shape()) self.assertTrue(numpy.array_equal(numpy.array(tensor), list_array)) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) tensor.set(list_array, place) self.assertEqual([2, 200, 300], tensor.shape()) self.assertTrue(numpy.array_equal(numpy.array(tensor), list_array)) place = core.CUDAPinnedPlace() tensor.set(list_array, place) self.assertEqual([2, 200, 300], tensor.shape()) self.assertTrue(numpy.array_equal(numpy.array(tensor), list_array))
def test_tensor_poiter(self): place = core.CPUPlace() scope = core.Scope() var = scope.var("test_tensor") place = core.CPUPlace() tensor = var.get_tensor() dtype = core.VarDesc.VarType.FP32 self.assertTrue( isinstance(tensor._mutable_data(place, dtype), numbers.Integral)) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) self.assertTrue( isinstance(tensor._mutable_data(place, dtype), numbers.Integral)) place = core.CUDAPinnedPlace() self.assertTrue( isinstance(tensor._mutable_data(place, dtype), numbers.Integral))
def prune_params(model, param_config, super_model_sd=None): for name, param in model.named_parameters(): t_value = param.value().get_tensor() value = np.array(t_value).astype("float32") if super_model_sd != None: super_t_value = super_model_sd[name].value().get_tensor() super_value = np.array(super_t_value).astype("float32") if param.name in param_config.keys(): if len(param_config[param.name]) > 1: in_exp = param_config[param.name][0] out_exp = param_config[param.name][1] in_chn = int(value.shape[0]) if in_exp == None else int( value.shape[0] * in_exp) out_chn = int(value.shape[1]) if out_exp == None else int( value.shape[1] * out_exp) prune_value = super_value[:in_chn, :out_chn, ...] \ if super_model_sd != None else value[:in_chn, :out_chn, ...] else: out_chn = int(value.shape[0]) if param_config[ param.name][0] == None else int( value.shape[0] * param_config[param.name][0]) prune_value = super_value[:out_chn, ...] \ if super_model_sd != None else value[:out_chn, ...] else: prune_value = super_value if super_model_sd != None else value p = t_value._place() if p.is_cpu_place(): place = core.CPUPlace() elif p.is_cuda_pinned_place(): place = core.CUDAPinnedPlace() else: place = core.CUDAPlace(p.gpu_device_id()) t_value.set(prune_value, place) if param.trainable: param.clear_gradient()
def test_to_tensor(self): def _test_place(place): with fluid.dygraph.guard(): paddle.set_default_dtype('float32') # set_default_dtype should not take effect on int x = paddle.to_tensor(1, place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1])) self.assertNotEqual(x.dtype, core.VarDesc.VarType.FP32) # set_default_dtype should not take effect on numpy x = paddle.to_tensor(np.array([1.2]).astype('float16'), place=place, stop_gradient=False) self.assertTrue( np.array_equal(x.numpy(), np.array([1.2], 'float16'))) self.assertEqual(x.dtype, core.VarDesc.VarType.FP16) # set_default_dtype take effect on float x = paddle.to_tensor(1.2, place=place, stop_gradient=False) self.assertTrue( np.array_equal(x.numpy(), np.array([1.2]).astype('float32'))) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) # set_default_dtype take effect on complex x = paddle.to_tensor(1 + 2j, place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1 + 2j])) self.assertEqual(x.dtype, 'complex64') paddle.set_default_dtype('float64') x = paddle.to_tensor(1.2, place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1.2])) self.assertEqual(x.dtype, core.VarDesc.VarType.FP64) x = paddle.to_tensor(1 + 2j, place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1 + 2j])) self.assertEqual(x.dtype, 'complex128') x = paddle.to_tensor(1, dtype='float32', place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1.])) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) self.assertEqual(x.shape, [1]) self.assertEqual(x.stop_gradient, False) self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) x = paddle.to_tensor((1, 2), dtype='float32', place=place, stop_gradient=False) x = paddle.to_tensor([1, 2], dtype='float32', place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1., 2.])) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) self.assertEqual(x.grad, None) self.assertEqual(x.shape, [2]) self.assertEqual(x.stop_gradient, False) self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) x = paddle.to_tensor(self.array, dtype='float32', place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), self.array)) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) self.assertEqual(x.shape, self.shape) self.assertEqual(x.stop_gradient, False) self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) y = paddle.to_tensor(x) y = paddle.to_tensor(y, dtype='float64', place=place) self.assertTrue(np.array_equal(y.numpy(), self.array)) self.assertEqual(y.dtype, core.VarDesc.VarType.FP64) self.assertEqual(y.shape, self.shape) self.assertEqual(y.stop_gradient, True) self.assertEqual(y.type, core.VarDesc.VarType.LOD_TENSOR) z = x + y self.assertTrue(np.array_equal(z.numpy(), 2 * self.array)) x = paddle.to_tensor([1 + 2j, 1 - 2j], dtype='complex64', place=place) y = paddle.to_tensor(x) self.assertTrue(np.array_equal(x.numpy(), [1 + 2j, 1 - 2j])) self.assertEqual(y.dtype, 'complex64') self.assertEqual(y.shape, [2]) self.assertEqual(y.real.stop_gradient, True) self.assertEqual(y.real.type, core.VarDesc.VarType.LOD_TENSOR) with self.assertRaises(TypeError): paddle.to_tensor('test') with self.assertRaises(TypeError): paddle.to_tensor(1, dtype='test') with self.assertRaises(ValueError): paddle.to_tensor([[1], [2, 3]]) with self.assertRaises(ValueError): paddle.to_tensor([[1], [2, 3]], place='test') with self.assertRaises(ValueError): paddle.to_tensor([[1], [2, 3]], place=1) _test_place(core.CPUPlace()) if core.is_compiled_with_cuda(): _test_place(core.CUDAPinnedPlace()) _test_place(core.CUDAPlace(0))
def export(self, config, input_shapes, input_dtypes, origin_model=None, load_weights_from_supernet=True): """ Export the weights according origin model and sub model config. Parameters: origin_model(paddle.nn.Layer): the instance of original model. config(dict): the config of sub model, can get by OFA.get_current_config() or some special config, such as paddleslim.nas.ofa.utils.dynabert_config(width_mult). input_shapes(list|list(list)): the shape of all inputs. input_dtypes(list): the dtype of all inputs. load_weights_from_supernet(bool, optional): whether to load weights from SuperNet. Default: False. Examples: .. code-block:: python from paddle.vision.models import mobilenet_v1 origin_model = mobilenet_v1() config = {'conv2d_0': {'expand_ratio': 2}, 'conv2d_1': {'expand_ratio': 2}} origin_model = ofa_model.export(origin_model, config, input_shapes=[1, 3, 28, 28], input_dtypes=['float32']) """ self.set_net_config(config) self.model.eval() def build_input(input_size, dtypes): if isinstance(input_size, list) and all( isinstance(i, numbers.Number) for i in input_size): if isinstance(dtypes, list): dtype = dtypes[0] else: dtype = dtypes return paddle.cast(paddle.rand(list(input_size)), dtype) if isinstance(input_size, dict): inputs = {} if isinstance(dtypes, list): dtype = dtypes[0] else: dtype = dtypes for key, value in input_size.items(): inputs[key] = paddle.cast(paddle.rand(list(value)), dtype) return inputs if isinstance(input_size, list): return [ build_input(i, dtype) for i, dtype in zip(input_size, dtypes) ] data = build_input(input_shapes, input_dtypes) if isinstance(data, list): self.forward(*data) else: self.forward(data) super_model_state_dict = None if load_weights_from_supernet and origin_model != None: super_model_state_dict = remove_model_fn(origin_model, self.model.state_dict()) if origin_model == None: origin_model = self.model origin_model = origin_model._layers if isinstance( origin_model, DataParallel) else origin_model _logger.info("Start to get pruned params, please wait...") pruned_param = self._get_model_pruned_weight() pruned_state_dict = remove_model_fn(origin_model, pruned_param) _logger.info("Start to get pruned model, please wait...") for l_name, sublayer in origin_model.named_sublayers(): for p_name, param in sublayer.named_parameters( include_sublayers=False): name = l_name + '.' + p_name t_value = param.value().get_tensor() if name in pruned_state_dict: p = t_value._place() if p.is_cpu_place(): place = core.CPUPlace() elif p.is_cuda_pinned_place(): place = core.CUDAPinnedPlace() else: place = core.CUDAPlace(p.gpu_device_id()) t_value.set(pruned_state_dict[name], place) if super_model_state_dict != None and len(super_model_state_dict) != 0: origin_model.set_state_dict(super_model_state_dict) return origin_model
def test_to_tensor(self): def _test_place(place): with fluid.dygraph.guard(): paddle.set_default_dtype('float32') # set_default_dtype should not take effect on int x = paddle.to_tensor(1, place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1])) self.assertNotEqual(x.dtype, core.VarDesc.VarType.FP32) y = paddle.to_tensor(2, place=x.place) self.assertEqual(str(x.place), str(y.place)) # set_default_dtype should not take effect on numpy x = paddle.to_tensor(np.array([1.2]).astype('float16'), place=place, stop_gradient=False) self.assertTrue( np.array_equal(x.numpy(), np.array([1.2], 'float16'))) self.assertEqual(x.dtype, core.VarDesc.VarType.FP16) # set_default_dtype take effect on float x = paddle.to_tensor(1.2, place=place, stop_gradient=False) self.assertTrue( np.array_equal(x.numpy(), np.array([1.2]).astype('float32'))) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) clone_x = x.clone() self.assertTrue( np.array_equal(clone_x.numpy(), np.array([1.2]).astype('float32'))) self.assertEqual(clone_x.dtype, core.VarDesc.VarType.FP32) y = clone_x**2 y.backward() self.assertTrue( np.array_equal(x.grad, np.array([2.4]).astype('float32'))) y = x.cpu() self.assertEqual(y.place.__repr__(), "CPUPlace") if core.is_compiled_with_cuda(): y = x.pin_memory() self.assertEqual(y.place.__repr__(), "CUDAPinnedPlace") y = x.cuda(blocking=False) self.assertEqual(y.place.__repr__(), "CUDAPlace(0)") y = x.cuda(blocking=True) self.assertEqual(y.place.__repr__(), "CUDAPlace(0)") # set_default_dtype take effect on complex x = paddle.to_tensor(1 + 2j, place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1 + 2j])) self.assertEqual(x.dtype, core.VarDesc.VarType.COMPLEX64) paddle.set_default_dtype('float64') x = paddle.to_tensor(1.2, place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1.2])) self.assertEqual(x.dtype, core.VarDesc.VarType.FP64) x = paddle.to_tensor(1 + 2j, place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1 + 2j])) self.assertEqual(x.dtype, core.VarDesc.VarType.COMPLEX128) x = paddle.to_tensor(1, dtype='float32', place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1.])) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) self.assertEqual(x.shape, [1]) self.assertEqual(x.stop_gradient, False) self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) x = paddle.to_tensor((1, 2), dtype='float32', place=place, stop_gradient=False) x = paddle.to_tensor([1, 2], dtype='float32', place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1., 2.])) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) self.assertEqual(x.grad, None) self.assertEqual(x.shape, [2]) self.assertEqual(x.stop_gradient, False) self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) x = paddle.to_tensor(self.array, dtype='float32', place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), self.array)) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) self.assertEqual(x.shape, self.shape) self.assertEqual(x.stop_gradient, False) self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) y = paddle.to_tensor(x) y = paddle.to_tensor(y, dtype='float64', place=place) self.assertTrue(np.array_equal(y.numpy(), self.array)) self.assertEqual(y.dtype, core.VarDesc.VarType.FP64) self.assertEqual(y.shape, self.shape) self.assertEqual(y.stop_gradient, True) self.assertEqual(y.type, core.VarDesc.VarType.LOD_TENSOR) z = x + y self.assertTrue(np.array_equal(z.numpy(), 2 * self.array)) x = paddle.to_tensor([1 + 2j, 1 - 2j], dtype='complex64', place=place) y = paddle.to_tensor(x) self.assertTrue(np.array_equal(x.numpy(), [1 + 2j, 1 - 2j])) self.assertEqual(y.dtype, core.VarDesc.VarType.COMPLEX64) self.assertEqual(y.shape, [2]) with self.assertRaises(TypeError): paddle.to_tensor('test') with self.assertRaises(TypeError): paddle.to_tensor(1, dtype='test') with self.assertRaises(ValueError): paddle.to_tensor([[1], [2, 3]]) with self.assertRaises(ValueError): paddle.to_tensor([[1], [2, 3]], place='test') with self.assertRaises(ValueError): paddle.to_tensor([[1], [2, 3]], place=1) _test_place(core.CPUPlace()) if core.is_compiled_with_cuda(): _test_place(core.CUDAPinnedPlace()) _test_place(core.CUDAPlace(0))