Esempio n. 1
0
    def __init__(self, logits, name=None):
        """
        Args:
            logits(list|tuple|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64.
            name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
        """
        if not _non_static_mode():
            check_type(logits, 'logits',
                       (np.ndarray, tensor.Variable, list, tuple),
                       'Categorical')

        self.name = name if name is not None else 'Categorical'
        self.dtype = 'float32'

        if self._validate_args(logits):
            self.logits = logits
            self.dtype = convert_dtype(logits.dtype)
        else:
            if isinstance(logits, np.ndarray) and str(
                    logits.dtype) in ['float32', 'float64']:
                self.dtype = logits.dtype
            self.logits = self._to_tensor(logits)[0]
            if self.dtype != convert_dtype(self.logits.dtype):
                self.logits = tensor.cast(self.logits, dtype=self.dtype)
        dist_sum = paddle.sum(self.logits, axis=-1, keepdim=True)
        self._prob = self.logits / dist_sum
Esempio n. 2
0
    def update_model_kwargs_for_generation(outputs, model_kwargs):
        """
        Update the model inputs during generation. 
        Note that If `token_type_ids` and `attention_mask` in `model_kwargs` 
        and they contain pad value, the result vectors updated by this method 
        may be different from expected. In this case, you need to rewrite the 
        method.
        """
        # update cache
        if isinstance(outputs, tuple):
            model_kwargs["cache"] = outputs[1]

        # update token_type_ids with last value
        if "token_type_ids" in model_kwargs:
            token_type_ids = model_kwargs["token_type_ids"]
            model_kwargs["token_type_ids"] = paddle.concat(
                [token_type_ids, token_type_ids[:, -1].unsqueeze(-1)], axis=-1)

        # update position_ids
        if "position_ids" in model_kwargs:
            position_ids = model_kwargs["position_ids"]
            model_kwargs["position_ids"] = paddle.concat([
                position_ids,
                paddle.max(position_ids, axis=-1, keepdim=True) + 1
            ],
                                                         axis=-1)

        # update attention_mask
        if "attention_mask" in model_kwargs:
            attention_mask = model_kwargs["attention_mask"]
            # nn.Pad2D don't support the data type `bool`
            if convert_dtype(attention_mask.dtype) == 'bool':
                attention_mask = paddle.cast(attention_mask, 'int64')
            attention_mask = nn.Pad2D([0, 0, 0, 1],
                                      mode='replicate')(attention_mask)
            attention_mask = nn.Pad2D([0, 1, 0, 0], value=-1e9)(attention_mask)
            dtype = convert_dtype(attention_mask.dtype)
            if 'int' in dtype:
                attention_mask[:, :, -1, -1] = 1
            elif 'float' in dtype:
                attention_mask[:, :, -1, -1] = 0.0
            else:
                raise ValueError(
                    'The data type of input `attention_mask` must '
                    'be bool, int or float')
            model_kwargs["attention_mask"] = attention_mask

        return model_kwargs
Esempio n. 3
0
    def __check_out__(self, out):
        data_type = convert_dtype(out.dtype)
        self.assertEqual(
            data_type, self.dst_dtype,
            'dtype should be %s, but get %s' % (self.dst_dtype, data_type))

        shape = out.shape
        self.assertTupleEqual(
            shape, self.dst_shape,
            'shape should be %s, but get %s' % (self.dst_shape, shape))

        if data_type in ['float32', 'float64', 'int32', 'int64']:
            max_value = np.nanmax(out)
            min_value = np.nanmin(out)
            always_non_full_zero = max_value > min_value
            always_full_zero = max_value == 0.0 and min_value == 0.0
            self.assertTrue(always_full_zero or always_non_full_zero,
                            'always_full_zero or always_non_full_zero.')
        elif data_type in ['bool']:
            total_num = out.size
            true_num = np.sum(out == True)
            false_num = np.sum(out == False)
            self.assertTrue(total_num == true_num + false_num,
                            'The value should always be True or False.')
        else:
            self.assertTrue(False, 'invalid data type')
Esempio n. 4
0
    def update_model_kwargs_for_generation(outputs, model_kwargs):
        # update cache
        if isinstance(outputs, tuple):
            model_kwargs["cache"] = outputs[1]

        # update token_type_ids with last value
        if "token_type_ids" in model_kwargs:
            token_type_ids = model_kwargs["token_type_ids"]
            model_kwargs["token_type_ids"] = paddle.concat(
                [token_type_ids, token_type_ids[:, -1].unsqueeze(-1)], axis=-1)

        # update position_ids
        if "position_ids" in model_kwargs:
            position_ids = model_kwargs["position_ids"]
            model_kwargs["position_ids"] = paddle.concat(
                [position_ids, position_ids[:, -1].unsqueeze(-1) + 1], axis=-1)

        # update attention_mask
        if "attention_mask" in model_kwargs:
            attention_mask = model_kwargs["attention_mask"]
            # TODO
            attention_mask = nn.Pad2D([0, 0, 0, 1],
                                      mode='replicate')(attention_mask)
            attention_mask = nn.Pad2D([0, 1, 0, 0], value=-1e9)(attention_mask)
            dtype = convert_dtype(attention_mask.dtype)
            if dtype == 'bool':
                attention_mask[:, :, -1, -1] = True
            elif 'int' in dtype:
                attention_mask[:, :, -1, -1] = 1
            else:
                attention_mask[:, :, -1, -1] = 0.0
            model_kwargs["attention_mask"] = attention_mask

        return model_kwargs
Esempio n. 5
0
def to_string(var, prefix='Tensor'):
    indent = len(prefix) + 1

    _template = "{prefix}(shape={shape}, dtype={dtype}, place={place}, stop_gradient={stop_gradient},\n{indent}{data})"

    tensor = var.value().get_tensor()
    if not tensor._is_initialized():
        return "Tensor(Not initialized)"

    np_var = var.numpy()

    if len(var.shape) == 0:
        size = 0
    else:
        size = 1
        for dim in var.shape:
            size *= dim

    summary = False
    if size > DEFAULT_PRINT_OPTIONS.threshold:
        summary = True

    max_width, signed = _get_max_width(_to_summary(np_var))

    data = _format_tensor(
        np_var, summary, indent=indent, max_width=max_width, signed=signed)

    return _template.format(
        prefix=prefix,
        shape=var.shape,
        dtype=convert_dtype(var.dtype),
        place=var._place_str,
        stop_gradient=var.stop_gradient,
        indent=' ' * indent,
        data=data)
Esempio n. 6
0
    def _check_values_dtype_in_probs(self, param, value):
        """
        Log_prob and probs methods have input ``value``, if value's dtype is different from param,
        convert value's dtype to be consistent with param's dtype.

        Args:
            param (Tensor): low and high in Uniform class, loc and scale in Normal class.
            value (Tensor): The input tensor.

        Returns:
            value (Tensor): Change value's dtype if value's dtype is different from param.
        """
        if _non_static_mode():
            if value.dtype != param.dtype and convert_dtype(
                    value.dtype) in ['float32', 'float64']:
                warnings.warn(
                    "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted."
                )
                return _C_ops.cast(value, 'in_dtype', value.dtype, 'out_dtype',
                                   param.dtype)
            return value

        check_variable_and_dtype(value, 'value', ['float32', 'float64'],
                                 'log_prob')
        if value.dtype != param.dtype:
            warnings.warn(
                "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted."
            )
            return tensor.cast(value, dtype=param.dtype)
        return value
Esempio n. 7
0
def _convert_attention_mask(attn_mask, dtype):
    if attn_mask is not None and attn_mask.dtype != dtype:
        attn_mask_dtype = convert_dtype(attn_mask.dtype)
        if attn_mask_dtype == 'bool' or 'int' in attn_mask_dtype:
            attn_mask = (paddle.cast(attn_mask, dtype) - 1.0) * 1e9
        else:
            attn_mask = paddle.cast(attn_mask, dtype)
    return attn_mask
Esempio n. 8
0
    def __init__(self, loc, scale, name=None):
        if not _non_static_mode():
            check_type(loc, 'loc',
                       (int, float, np.ndarray, tensor.Variable, list, tuple),
                       'Normal')
            check_type(scale, 'scale',
                       (int, float, np.ndarray, tensor.Variable, list, tuple),
                       'Normal')

        self.batch_size_unknown = False
        self.all_arg_is_float = False
        self.name = name if name is not None else 'Normal'
        self.dtype = 'float32'

        if isinstance(loc, int):
            loc = float(loc)
        if isinstance(scale, int):
            scale = float(scale)

        if self._validate_args(loc, scale):
            self.batch_size_unknown = True
            self.loc = loc
            self.scale = scale
            self.dtype = convert_dtype(loc.dtype)
        else:
            if isinstance(loc, float) and isinstance(scale, float):
                self.all_arg_is_float = True
            if isinstance(loc, np.ndarray) and str(
                    loc.dtype) in ['float32', 'float64']:
                self.dtype = loc.dtype
            elif isinstance(scale, np.ndarray) and str(
                    scale.dtype) in ['float32', 'float64']:
                self.dtype = scale.dtype
            # pylint: disable=unbalanced-tuple-unpacking
            self.loc, self.scale = self._to_tensor(loc, scale)
            if self.dtype != convert_dtype(self.loc.dtype):
                self.loc = tensor.cast(self.loc, dtype=self.dtype)
                self.scale = tensor.cast(self.scale, dtype=self.dtype)
        super(Normal, self).__init__(self.loc.shape)
Esempio n. 9
0
    def __init__(self, low, high, name=None):
        if not _non_static_mode():
            check_type(low, 'low',
                       (int, float, np.ndarray, tensor.Variable, list, tuple),
                       'Uniform')
            check_type(high, 'high',
                       (int, float, np.ndarray, tensor.Variable, list, tuple),
                       'Uniform')

        self.all_arg_is_float = False
        self.batch_size_unknown = False
        self.name = name if name is not None else 'Uniform'
        self.dtype = 'float32'

        if isinstance(low, int):
            low = float(low)
        if isinstance(high, int):
            high = float(high)

        if self._validate_args(low, high):
            self.batch_size_unknown = True
            self.low = low
            self.high = high
            self.dtype = convert_dtype(low.dtype)
        else:
            if isinstance(low, float) and isinstance(high, float):
                self.all_arg_is_float = True
            if isinstance(low, np.ndarray) and str(
                    low.dtype) in ['float32', 'float64']:
                self.dtype = low.dtype
            elif isinstance(high, np.ndarray) and str(
                    high.dtype) in ['float32', 'float64']:
                self.dtype = high.dtype
            # pylint: disable=unbalanced-tuple-unpacking
            self.low, self.high = self._to_tensor(low, high)
            if self.dtype != convert_dtype(self.low.dtype):
                self.low = tensor.cast(self.low, dtype=self.dtype)
                self.high = tensor.cast(self.high, dtype=self.dtype)
Esempio n. 10
0
def convert_var_dtype(var, dtype):
    if isinstance(var, Variable):
        src_dtype = convert_dtype(var.dtype)
        assert src_dtype in [
            'bool', 'float16', 'float32', 'float64', 'int32', 'int64', 'uint8'
        ], "The dtype of var {} is {}, which is not supported in the cast op.".format(
            var.name, src_dtype)
        assert dtype in [
            'bool', 'int', 'float'
        ], "The casted target dtype is {}, which is not supported in type casting.".format(
            dtype)
        cast_map = {
            'bool': 'bool',
            'int': 'int32',
            'float': 'float32',
        }
        return cast(var, dtype=cast_map[dtype])
    else:
        return eval('{}(var)'.format(dtype))
Esempio n. 11
0
def tensor_to_string(tensor, prefix='Tensor'):
    indent = len(prefix) + 1

    dtype = convert_dtype(tensor.dtype)
    if tensor.dtype == core.VarDesc.VarType.BF16:
        dtype = 'bfloat16'

    _template = "{prefix}(shape={shape}, dtype={dtype}, place={place}, stop_gradient={stop_gradient},\n{indent}{data})"

    if tensor.is_sparse():
        return sparse_tensor_to_string(tensor, prefix)

    if not tensor._is_dense_tensor_hold_allocation():
        return "Tensor(Not initialized)"
    else:
        data = _format_dense_tensor(tensor, indent)
        return _template.format(
            prefix=prefix,
            shape=tensor.shape,
            dtype=dtype,
            place=tensor._place_str,
            stop_gradient=tensor.stop_gradient,
            indent=' ' * indent,
            data=data)
 def test_convert_dtype(self):
     self.assertEqual(convert_dtype(core.VarDesc.VarType.COMPLEX64),
                      "complex64")
     self.assertEqual(convert_dtype(core.VarDesc.VarType.COMPLEX128),
                      "complex128")
Esempio n. 13
0
def cast_bool_if_necessary(var):
    assert isinstance(var, Variable)
    if convert_dtype(var.dtype) not in ['bool']:
        var = cast(var, dtype="bool")
    return var
Esempio n. 14
0
def _handle_dtype(data, dtype):
    if dtype:
        if convert_dtype(dtype) != convert_dtype(data.dtype):
            return data.astype(convert_dtype(dtype))
    return data
Esempio n. 15
0
    def load(self,
             backbone: paddle.nn.Layer,
             classifier: paddle.nn.Layer = None,
             optimizer=None,
             for_train=True,
             dtype=None):

        assert os.path.exists(self.checkpoint_dir)
        checkpoint_dir = os.path.abspath(self.checkpoint_dir)

        type_dict = {}
        for name, param in backbone.state_dict().items():
            type_dict[param.name] = convert_dtype(param.dtype)

        if classifier is not None:
            # for dist param, we need to save their at all ranks.
            for name, param in classifier.state_dict().items():
                type_dict[param.name] = convert_dtype(param.dtype)

        if for_train:
            assert optimizer is not None
            opt_state_dict = optimizer.state_dict()
            lr_state_dict = opt_state_dict['LR_Scheduler']
            for name, opt in opt_state_dict.items():
                if name == 'LR_Scheduler' or '@GRAD' in name:
                    continue
                type_dict[name] = convert_dtype(opt.dtype)

        param_state_dict = {}
        opt_state_dict = {}
        dist_param_state_dict = {}

        dist_weight_state_dict = {}
        dist_weight_velocity_state_dict = {}
        dist_bias_state_dict = {}
        dist_bias_velocity_state_dict = {}
        for path in os.listdir(checkpoint_dir):
            path = os.path.join(checkpoint_dir, path)
            if not os.path.isfile(path):
                continue

            basename = os.path.basename(path)
            name, ext = os.path.splitext(basename)

            if ext not in ['.pdopt', '.pdparam']:
                continue

            if not for_train and ext == '.pdopt':
                continue

            if classifier is None and 'dist@' in name and '@rank@' in name:
                continue

            tensor = paddle.load(path, return_numpy=True)
            if dtype:
                assert dtype in ['float32', 'float16']
                tensor = tensor.astype(dtype)
            else:
                tensor = tensor.astype(type_dict[name])

            if 'dist@' in name and '@rank@' in name:
                if '.w' in name and 'velocity' not in name:
                    dist_weight_state_dict[name] = tensor
                elif '.w' in name and 'velocity' in name:
                    dist_weight_velocity_state_dict[name] = tensor
                elif '.b' in name and 'velocity' not in name:
                    dist_bias_state_dict[name] = tensor
                elif '.b' in name and 'velocity' in name:
                    dist_bias_velocity_state_dict[name] = tensor

            else:
                if ext == '.pdparam':
                    param_state_dict[name] = tensor
                else:
                    opt_state_dict[name] = tensor

        if classifier is not None and for_train:
            meta_file = os.path.join(checkpoint_dir, 'meta.json')
            if not os.path.exists(meta_file):
                logging.error(
                    "Please make sure the checkpoint dir {} exists, and "
                    "parameters in that dir are validating.".format(
                        checkpoint_dir))
                exit()

            with open(meta_file, 'r') as handle:
                extra_info = json.load(handle)

            # Preporcess distributed parameters.
            pretrain_world_size = extra_info['pretrain_world_size']
            assert pretrain_world_size > 0
            embedding_size = extra_info['embedding_size']
            assert embedding_size == self.embedding_size
            num_classes = extra_info['num_classes']
            assert num_classes == self.num_classes

            logging.info(
                "Parameters for pre-training: pretrain_world_size ({}), "
                "embedding_size ({}), and num_classes ({}).".format(
                    pretrain_world_size, embedding_size, num_classes))
            logging.info("Parameters for inference or fine-tuning: "
                         "world_size ({}).".format(self.world_size))

            rank_str = '%05d' % self.rank

            dist_weight_state_dict = rearrange_weight(dist_weight_state_dict,
                                                      pretrain_world_size,
                                                      self.world_size)
            dist_bias_state_dict = rearrange_weight(dist_bias_state_dict,
                                                    pretrain_world_size,
                                                    self.world_size)
            for name, value in dist_weight_state_dict.items():
                if rank_str in name:
                    dist_param_state_dict[name] = value
            for name, value in dist_bias_state_dict.items():
                if rank_str in name:
                    dist_param_state_dict[name] = value

            if for_train:
                dist_weight_velocity_state_dict = rearrange_weight(
                    dist_weight_velocity_state_dict, pretrain_world_size,
                    self.world_size)
                dist_bias_velocity_state_dict = rearrange_weight(
                    dist_bias_velocity_state_dict, pretrain_world_size,
                    self.world_size)
                for name, value in dist_weight_velocity_state_dict.items():
                    if rank_str in name:
                        opt_state_dict[name] = value
                for name, value in dist_bias_velocity_state_dict.items():
                    if rank_str in name:
                        opt_state_dict[name] = value

        def map_actual_param_name(state_dict, load_state_dict):
            for name, param in state_dict.items():
                state_dict[name] = load_state_dict[param.name]
            return state_dict

        logging.info("Load checkpoint from '{}'. ".format(checkpoint_dir))
        param_state_dict = map_actual_param_name(backbone.state_dict(),
                                                 param_state_dict)
        backbone.set_state_dict(param_state_dict)
        if classifier is not None:
            dist_param_state_dict = map_actual_param_name(
                classifier.state_dict(), dist_param_state_dict)
            classifier.set_state_dict(dist_param_state_dict)
        if for_train:
            assert optimizer is not None
            optimizer.set_state_dict(opt_state_dict)

        if classifier is not None and for_train:
            return extra_info
        else:
            return {}
Esempio n. 16
0
    def load(self, program, for_train=True, dtype=None):
        assert os.path.exists(self.checkpoint_dir)
        checkpoint_dir = os.path.abspath(self.checkpoint_dir)

        param_state_dict = program.state_dict(mode='param')
        opt_state_dict = program.state_dict(mode='opt')
        type_dict = {}
        shape_dict = {}
        for name, param in param_state_dict.items():
            type_dict[name] = convert_dtype(param._dtype())
            shape_dict[name] = param.shape()
        for name, opt in opt_state_dict.items():
            type_dict[name] = convert_dtype(opt._dtype())
            shape_dict[name] = opt.shape()

        state_dict = {}
        dist_weight_state_dict = {}
        dist_weight_velocity_state_dict = {}
        dist_bias_state_dict = {}
        dist_bias_velocity_state_dict = {}
        for path in os.listdir(checkpoint_dir):
            path = os.path.join(checkpoint_dir, path)
            if not os.path.isfile(path):
                continue

            basename = os.path.basename(path)
            name, ext = os.path.splitext(basename)

            if ext not in ['.pdopt', '.pdparam']:
                continue

            if not for_train and ext == '.pdopt':
                continue

            if name not in type_dict:
                continue

            tensor = paddle.load(path, return_numpy=True)
            if dtype:
                assert dtype in ['float32', 'float16']
                tensor = tensor.astype(dtype)
            elif name in type_dict:
                tensor = tensor.astype(type_dict[name])
            else:
                pass

            if list(shape_dict[name]) != list(tensor.shape):
                # for prelu NHWC[1, 1, 1, C] and NCHW [1, C, 1, 1]
                expect_shape = list(shape_dict[name])
                actual_shape = list(tensor.shape)
                if len(expect_shape) == len(actual_shape) and \
                    expect_shape[0] == actual_shape[0] and expect_shape[0] == 1 and \
                    expect_shape[2] == actual_shape[2] and expect_shape[2] == 1 and \
                    expect_shape[1] == actual_shape[3]:
                    if actual_shape[3] != 1:
                        tensor = tensor.transpose([0, 3, 1, 2])
                    elif actual_shape[1] != 1:
                        tensor = tensor.transpose([0, 2, 3, 1])

            if 'dist@' in name and '@rank@' in name:
                if '.w' in name and 'velocity' not in name:
                    dist_weight_state_dict[name] = tensor
                elif '.w' in name and 'velocity' in name:
                    dist_weight_velocity_state_dict[name] = tensor
                elif '.b' in name and 'velocity' not in name:
                    dist_bias_state_dict[name] = tensor
                elif '.b' in name and 'velocity' in name:
                    dist_bias_velocity_state_dict[name] = tensor

            else:
                state_dict[name] = tensor

        if for_train:
            meta_file = os.path.join(checkpoint_dir, 'meta.json')
            if not os.path.exists(meta_file):
                logging.error(
                    "Please make sure the checkpoint dir {} exists, and "
                    "parameters in that dir are validating.".format(
                        checkpoint_dir))
                exit()

            with open(meta_file, 'r') as handle:
                extra_info = json.load(handle)

            # Preporcess distributed parameters.
            pretrain_world_size = extra_info['pretrain_world_size']
            assert pretrain_world_size > 0
            embedding_size = extra_info['embedding_size']
            assert embedding_size == self.embedding_size
            num_classes = extra_info['num_classes']
            assert num_classes == self.num_classes

            logging.info(
                "Parameters for pre-training: pretrain_world_size ({}), "
                "embedding_size ({}), and num_classes ({}).".format(
                    pretrain_world_size, embedding_size, num_classes))
            logging.info("Parameters for inference or fine-tuning: "
                         "world_size ({}).".format(self.world_size))

            rank_str = '%05d' % self.rank

            dist_weight_state_dict = rearrange_weight(dist_weight_state_dict,
                                                      pretrain_world_size,
                                                      self.world_size)
            dist_bias_state_dict = rearrange_weight(dist_bias_state_dict,
                                                    pretrain_world_size,
                                                    self.world_size)
            for name, value in dist_weight_state_dict.items():
                if rank_str in name:
                    state_dict[name] = value
            for name, value in dist_bias_state_dict.items():
                if rank_str in name:
                    state_dict[name] = value

            if for_train:
                dist_weight_velocity_state_dict = rearrange_weight(
                    dist_weight_velocity_state_dict, pretrain_world_size,
                    self.world_size)
                dist_bias_velocity_state_dict = rearrange_weight(
                    dist_bias_velocity_state_dict, pretrain_world_size,
                    self.world_size)
                for name, value in dist_weight_velocity_state_dict.items():
                    if rank_str in name:
                        state_dict[name] = value
                for name, value in dist_bias_velocity_state_dict.items():
                    if rank_str in name:
                        state_dict[name] = value

        program.set_state_dict(state_dict)
        logging.info("Load checkpoint from '{}'. ".format(checkpoint_dir))
        if for_train:
            return extra_info
        else:
            return {}