Ejemplo n.º 1
0
def to_grayscale(img, num_output_channels=1):
    """Converts image to grayscale version of image.

    Args:
        img (np.array): Image to be converted to grayscale.

    Returns:
        np.array: Grayscale version of the image.
            if num_output_channels = 1 : returned image is single channel

            if num_output_channels = 3 : returned image is 3 channel with r = g = b

    """
    cv2 = try_import('cv2')

    if num_output_channels == 1:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
    elif num_output_channels == 3:
        # much faster than doing cvtColor to go back to gray
        img = np.broadcast_to(
            cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis], img.shape)
    else:
        raise ValueError('num_output_channels should be either 1 or 3')

    return img
Ejemplo n.º 2
0
def crop_by_image_size(image: Image, face):
    dlib = try_import('dlib')
    center = face.center()
    width, height = image.size
    if width > height:
        left = int(center.x - height / 2)
        right = int(center.x + height / 2)
        if left < 0:
            left, right = 0, height
        elif right > width:
            left, right = width - height, width
        image = image.crop((left, 0, right, height))
        face = dlib.rectangle(face.left() - left, face.top(),
                              face.right() - left, face.bottom())
    elif width < height:
        top = int(center.y - width / 2)
        bottom = int(center.y + width / 2)
        if top < 0:
            top, bottom = 0, width
        elif bottom > height:
            top, bottom = height - width, height
        image = image.crop((0, top, width, bottom))
        face = dlib.rectangle(face.left(),
                              face.top() - top, face.right(),
                              face.bottom() - top)
    return image, face
Ejemplo n.º 3
0
    def __init__(self,
                 sentencepiece_model_file,
                 do_lower_case=True,
                 encoding="utf8",
                 unk_token="<unk>",
                 sep_token="[SEP]",
                 pad_token="[PAD]",
                 cls_token="[CLS]",
                 mask_token="[MASK]"):

        if not os.path.isfile(sentencepiece_model_file):
            raise ValueError(
                "Can't find a vocabulary file at path '{}'. To load the "
                "vocabulary from a pretrained model please use "
                "`tokenizer = BigBirdTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
                .format(sentencepiece_model_file))
        self.encoding = encoding
        mod = try_import('sentencepiece')
        self.sp_model = mod.SentencePieceProcessor()
        if os.path.isfile(sentencepiece_model_file):
            self.sp_model.Load(sentencepiece_model_file)
        vocab_dict = {}
        for id in range(self.sp_model.get_piece_size()):
            vocab_dict[self.sp_model.id_to_piece(id)] = id
        self.vocab = Vocab.from_dict(vocab_dict, unk_token=unk_token)
        self.start_word_tokens = np.array([
            self.vocab._idx_to_token[i][0] == "▁"
            for i in range(0, len(self.vocab))
        ])
        self.unk_token = unk_token
        self.mask_id = vocab_dict[mask_token]
        self.unk_id = vocab_dict[unk_token]
        self.cls_id = vocab_dict[cls_token]
        self.sep_id = vocab_dict[sep_token]
Ejemplo n.º 4
0
    def __init__(self,
                 sentencepiece_model_file,
                 do_lower_case=True,
                 encoding="utf8",
                 unk_token="<unk>",
                 sep_token="[SEP]",
                 pad_token="[PAD]",
                 cls_token="[CLS]",
                 mask_token="[MASK]",
                 **kwargs):

        if not os.path.isfile(sentencepiece_model_file):
            raise ValueError(
                "Can't find a vocabulary file at path '{}'. To load the "
                "vocabulary from a pretrained model please use "
                "`tokenizer = BigBirdTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
                .format(sentencepiece_model_file))
        self.encoding = encoding
        mod = try_import('sentencepiece')
        self.sp_model = mod.SentencePieceProcessor()
        if os.path.isfile(sentencepiece_model_file):
            self.sp_model.Load(sentencepiece_model_file)
        vocab_dict = {}
        for id in range(self.sp_model.get_piece_size()):
            vocab_dict[self.sp_model.id_to_piece(id)] = id
        self.vocab = Vocab.from_dict(vocab_dict, unk_token=unk_token)
        self.start_word_tokens = np.array([
            self.vocab._idx_to_token[i][0] == "▁"
            for i in range(0, len(self.vocab))
        ])
        self.unk_token = unk_token
        self.mask_id = vocab_dict[mask_token]
        self.unk_id = vocab_dict[unk_token]
        self.cls_id = vocab_dict[cls_token]
        self.sep_id = vocab_dict[sep_token]
        self.pad_id = vocab_dict[pad_token] if pad_token in vocab_dict else 0

        unk_token = AddedToken(unk_token,
                               lstrip=False, rstrip=False) if isinstance(
                                   unk_token, str) else unk_token
        pad_token = AddedToken(pad_token,
                               lstrip=False, rstrip=False) if isinstance(
                                   pad_token, str) else pad_token
        cls_token = AddedToken(cls_token,
                               lstrip=False, rstrip=False) if isinstance(
                                   cls_token, str) else cls_token
        sep_token = AddedToken(sep_token,
                               lstrip=False, rstrip=False) if isinstance(
                                   sep_token, str) else sep_token

        # Mask token behave like a normal word, i.e. include the space before it
        mask_token = AddedToken(mask_token,
                                lstrip=True, rstrip=False) if isinstance(
                                    mask_token, str) else mask_token

        self._build_special_tokens_map_extended(sep_token=sep_token,
                                                cls_token=cls_token,
                                                unk_token=unk_token,
                                                pad_token=pad_token,
                                                mask_token=mask_token)
Ejemplo n.º 5
0
    def _updates(self, logs, mode):
        if not self._is_write():
            return
        if not hasattr(self, 'writer'):
            visualdl = try_import('visualdl')
            self.writer = visualdl.LogWriter(self.log_dir)

        metrics = getattr(self, '%s_metrics' % (mode))
        current_step = getattr(self, '%s_step' % (mode))

        if mode == 'train':
            total_step = current_step
        else:
            total_step = self.epoch

        for k in metrics:
            if k in logs:
                temp_tag = mode + '/' + k

                if isinstance(logs[k], (list, tuple)):
                    temp_value = logs[k][0]
                elif isinstance(logs[k], numbers.Number):
                    temp_value = logs[k]
                else:
                    continue

                self.writer.add_scalar(tag=temp_tag,
                                       step=total_step,
                                       value=temp_value)
Ejemplo n.º 6
0
    def __call__(self, model):
        paddleslim = try_import('paddleslim')
        from paddleslim.analysis import dygraph_flops as flops
        input_spec = [{
            "image":
            paddle.ones(shape=[1, 3, 640, 640], dtype='float32'),
            "im_shape":
            paddle.full([1, 2], 640, dtype='float32'),
            "scale_factor":
            paddle.ones(shape=[1, 2], dtype='float32')
        }]
        if self.print_params:
            print_prune_params(model)

        ori_flops = flops(model, input_spec) / 1000
        logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
        if self.criterion == 'fpgm':
            pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
        elif self.criterion == 'l1_norm':
            pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)

        logger.info("pruned params: {}".format(self.pruned_params))
        pruned_ratios = [float(n) for n in self.pruned_ratios]
        ratios = {}
        for i, param in enumerate(self.pruned_params):
            ratios[param] = pruned_ratios[i]
        pruner.prune_vars(ratios, [0])
        pruned_flops = flops(model, input_spec) / 1000
        logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
            pruned_flops, (ori_flops - pruned_flops) / ori_flops))

        return model
Ejemplo n.º 7
0
    def __init__(self,
                 vocab_file,
                 bos_token="<s>",
                 eos_token="</s>",
                 sep_token="</s>",
                 cls_token="<s>",
                 unk_token="<unk>",
                 pad_token="<pad>",
                 mask_token="<mask>",
                 **kwargs):
        mask_token = AddedToken(
            mask_token, lstrip=True,
            rstrip=False) if isinstance(mask_token, str) else mask_token
        self._bos_token = bos_token
        self._eos_token = eos_token
        self._sep_token = sep_token
        self._cls_token = cls_token
        self._unk_token = unk_token
        self._pad_token = pad_token
        self._mask_token = mask_token
        spm = try_import("sentencepiece")
        self.sp_model = spm.SentencePieceProcessor()
        self.sp_model.Load(vocab_file)
        self.vocab_file = vocab_file

        self.tokens_to_ids = {"<s>": 0, "<pad>": 1, "</s>": 2, "<unk>": 3}

        # The first "real" token "," has position 4 in the original fairseq vocab and position 3 in the spm vocab
        self.offset = 1

        self.tokens_to_ids["<mask>"] = len(self.sp_model) + self.offset
        self.ids_to_tokens = {v: k for k, v in self.tokens_to_ids.items()}
Ejemplo n.º 8
0
    def __init__(self,
                 vocab_file,
                 sentencepiece_model_file,
                 word_dict,
                 do_lower_case=True,
                 encoding="utf8",
                 unk_token="[UNK]",
                 sep_token="[SEP]",
                 pad_token="[PAD]",
                 cls_token="[CLS]",
                 mask_token="[MASK]"):
        mod = try_import('sentencepiece')
        self.sp_model = mod.SentencePieceProcessor()
        self.word_dict = word_dict

        self.do_lower_case = do_lower_case
        self.encoding = encoding
        if not os.path.isfile(vocab_file):
            raise ValueError(
                "Can't find a vocabulary file at path '{}'. To load the "
                "vocabulary from a pretrained model please use "
                "`tokenizer = ErnieTinyTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
                .format(vocab_file))
        if not os.path.isfile(word_dict):
            raise ValueError(
                "Can't find a file at path '{}'. To load the "
                "word dict from a pretrained model please use "
                "`tokenizer = ErnieTinyTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
                .format(word_dict))
        self.dict = pickle.load(open(word_dict, 'rb'))
        self.vocab = self.load_vocabulary(vocab_file, unk_token=unk_token)

        # if the sentencepiece_model_file is not exists, just the default sentence-piece model
        if os.path.isfile(sentencepiece_model_file):
            self.sp_model.Load(sentencepiece_model_file)
Ejemplo n.º 9
0
def resize(img, size, interpolation='bilinear'):
    """
    Resizes the image to given size

    Args:
        input (np.ndarray): Image to be resized.
        size (int|list|tuple): Target size of input data, with (height, width) shape.
        interpolation (int|str, optional): Interpolation method. when use cv2 backend, 
            support method are as following: 
            - "nearest": cv2.INTER_NEAREST, 
            - "bilinear": cv2.INTER_LINEAR, 
            - "area": cv2.INTER_AREA, 
            - "bicubic": cv2.INTER_CUBIC, 
            - "lanczos": cv2.INTER_LANCZOS4

    Returns:
        np.array: Resized image.

    """
    cv2 = try_import('cv2')
    _cv2_interp_from_str = {
        'nearest': cv2.INTER_NEAREST,
        'bilinear': cv2.INTER_LINEAR,
        'area': cv2.INTER_AREA,
        'bicubic': cv2.INTER_CUBIC,
        'lanczos': cv2.INTER_LANCZOS4
    }

    if not (isinstance(size, int) or
            (isinstance(size, Iterable) and len(size) == 2)):
        raise TypeError('Got inappropriate size arg: {}'.format(size))

    h, w = img.shape[:2]

    if isinstance(size, int):
        if (w <= h and w == size) or (h <= w and h == size):
            return img
        if w < h:
            ow = size
            oh = int(size * h / w)
            output = cv2.resize(
                img,
                dsize=(ow, oh),
                interpolation=_cv2_interp_from_str[interpolation])
        else:
            oh = size
            ow = int(size * w / h)
            output = cv2.resize(
                img,
                dsize=(ow, oh),
                interpolation=_cv2_interp_from_str[interpolation])
    else:
        output = cv2.resize(img,
                            dsize=(size[1], size[0]),
                            interpolation=_cv2_interp_from_str[interpolation])
    if len(img.shape) == 3 and img.shape[2] == 1:
        return output[:, :, np.newaxis]
    else:
        return output
Ejemplo n.º 10
0
def _build_mel_basis():
    assert audio_config.fmax <= audio_config.sample_rate // 2
    librosa = try_import('librosa')
    return librosa.filters.mel(audio_config.sample_rate,
                               audio_config.n_fft,
                               n_mels=audio_config.num_mels,
                               fmin=audio_config.fmin,
                               fmax=audio_config.fmax)
Ejemplo n.º 11
0
 def _tokenize(self, text):
     """ Tokenize a string. """
     split_tokens = []
     re = try_import("regex")
     words = re.findall(r"\S+\n?", text)
     for token in words:
         split_tokens.extend([t for t in self.bpe(token).split(" ")])
     return split_tokens
Ejemplo n.º 12
0
def _stft(y):
    if audio_config.use_lws:
        return _lws_processor(audio_config).stft(y).T
    else:
        librosa = try_import('librosa')
        return librosa.stft(y=y,
                            n_fft=audio_config.n_fft,
                            hop_length=get_hop_size(),
                            win_length=audio_config.win_size)
Ejemplo n.º 13
0
def read_config(fp=None):
    if fp is None:
        dir_name = os.path.dirname(os.path.abspath(__file__))
        fp = os.path.join(dir_name, "visual_backbone.yaml")
    with open(fp, "r") as fin:
        yacs_config = try_import("yacs.config")
        cfg = yacs_config.CfgNode().load_cfg(fin)
    cfg.freeze()
    return cfg
Ejemplo n.º 14
0
 def _tokenize(self, text):
     """ Tokenize a string. """
     bpe_tokens = []
     re = try_import("regex")
     for token in re.findall(self.pat, text):
         token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8'))
         bpe_tokens.extend(
             bpe_token for bpe_token in self.bpe(token).split(' '))
     return bpe_tokens
Ejemplo n.º 15
0
    def __call__(self, model):
        paddleslim = try_import('paddleslim')
        self.ptq = paddleslim.PTQ(**self.ptq_config)
        model.eval()
        quant_model = self.ptq.quantize(model,
                                        fuse=self.fuse,
                                        fuse_list=self.fuse_list)

        return quant_model
Ejemplo n.º 16
0
    def _load_anno(self):
        self.name2mem = {}
        self.data_tar = tarfile.open(self.data_file)
        for ele in self.data_tar.getmembers():
            self.name2mem[ele.name] = ele

        scio = try_import('scipy.io')

        self.labels = scio.loadmat(self.label_file)['labels'][0]
        self.indexes = scio.loadmat(self.setid_file)[self.flag][0]
Ejemplo n.º 17
0
def image_load(path, backend=None):
    """Load an image.

    Args:
        path (str): Path of the image.
        backend (str, optional): The image decoding backend type. Options are
            `cv2`, `pil`, `None`. If backend is None, the global _imread_backend 
            specified by ``paddle.vision.set_image_backend`` will be used. Default: None.

    Returns:
        PIL.Image or np.array: Loaded image.

    Examples:
    
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision import image_load, set_image_backend

            fake_img = Image.fromarray((np.random.random((32, 32, 3)) * 255).astype('uint8'))

            path = 'temp.png'
            fake_img.save(path)

            set_image_backend('pil')
            
            pil_img = image_load(path).convert('RGB')

            # should be PIL.Image.Image
            print(type(pil_img))

            # use opencv as backend
            # set_image_backend('cv2')

            # np_img = image_load(path)
            # # should get numpy.ndarray
            # print(type(np_img))
    
    """

    if backend is None:
        backend = _image_backend
    if backend not in ['pil', 'cv2', 'tensor']:
        raise ValueError(
            "Expected backend are one of ['pil', 'cv2', 'tensor'], but got {}".
            format(backend))

    if backend == 'pil':
        return Image.open(path)
    elif backend == 'cv2':
        cv2 = try_import('cv2')
        return cv2.imread(path)
Ejemplo n.º 18
0
def crop(image: Image, face, up_ratio, down_ratio, width_ratio):
    dlib = try_import('dlib')
    width, height = image.size
    face_height = face.height()
    face_width = face.width()
    delta_up = up_ratio * face_height
    delta_down = down_ratio * face_height
    delta_width = width_ratio * width

    img_left = int(max(0, face.left() - delta_width))
    img_top = int(max(0, face.top() - delta_up))
    img_right = int(min(width, face.right() + delta_width))
    img_bottom = int(min(height, face.bottom() + delta_down))
    image = image.crop((img_left, img_top, img_right, img_bottom))
    face = dlib.rectangle(face.left() - img_left,
                          face.top() - img_top,
                          face.right() - img_left,
                          face.bottom() - img_top)
    face_expand = dlib.rectangle(img_left, img_top, img_right, img_bottom)
    center = face_expand.center()
    width, height = image.size
    crop_left = img_left
    crop_top = img_top
    crop_right = img_right
    crop_bottom = img_bottom
    if width > height:
        left = int(center.x - height / 2)
        right = int(center.x + height / 2)
        if left < 0:
            left, right = 0, height
        elif right > width:
            left, right = width - height, width
        image = image.crop((left, 0, right, height))
        face = dlib.rectangle(face.left() - left, face.top(),
                              face.right() - left, face.bottom())
        crop_left += left
        crop_right = crop_left + height
    elif width < height:
        top = int(center.y - width / 2)
        bottom = int(center.y + width / 2)
        if top < 0:
            top, bottom = 0, width
        elif bottom > height:
            top, bottom = height - width, height
        image = image.crop((0, top, width, bottom))
        face = dlib.rectangle(face.left(),
                              face.top() - top, face.right(),
                              face.bottom() - top)
        crop_top += top
        crop_bottom = crop_top + width
    crop_face = dlib.rectangle(crop_left, crop_top, crop_right, crop_bottom)
    return image, face, crop_face
Ejemplo n.º 19
0
def hflip(img):
    """Horizontally flips the given image.

    Args:
        img (np.array): Image to be flipped.

    Returns:
        np.array:  Horizontall flipped image.

    """
    cv2 = try_import('cv2')

    return cv2.flip(img, 1)
    def __call__(self, model):
        paddleslim = try_import('paddleslim')
        self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
        if self.print_model:
            logger.info("Model before quant:")
            logger.info(model)

        self.quanter.quantize(model)

        if self.print_model:
            logger.info("Quantized model:")
            logger.info(model)

        return model
Ejemplo n.º 21
0
    def __init__(self,
                 vocab_file,
                 sentencepiece_model_file,
                 do_lower_case=False,
                 unk_token="[UNK]",
                 pad_token="[PAD]",
                 cls_token="[CLS]",
                 sep_token="[SEP]",
                 mask_token="[MASK]",
                 chitchat_token="[CHAT]",
                 knowledge_token="[KNOW]",
                 recommend_token="[RECO]",
                 special_tokens_file=""):
        mod = try_import('sentencepiece')
        self.spm_model = mod.SentencePieceProcessor()

        self.do_lower_case = do_lower_case
        if not os.path.isfile(vocab_file):
            raise ValueError(
                "Can't find a vocabulary file at path '{}'. To load the "
                "vocabulary from a pretrained model please use "
                "`tokenizer = ErnieTinyTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
                .format(vocab_file))
        self.vocab = self.load_vocabulary(vocab_file,
                                          unk_token,
                                          pad_token,
                                          cls_token,
                                          sep_token,
                                          mask_token=mask_token,
                                          chitchat_token=chitchat_token,
                                          knowledge_token=knowledge_token,
                                          recommend_token=recommend_token)

        # if the sentencepiece_model_file is not exists, just the default sentence-piece model
        if os.path.isfile(sentencepiece_model_file):
            self.spm_model.Load(sentencepiece_model_file)

        pat_str = ""
        if os.path.isfile(special_tokens_file):
            self.specials = self.read_file(special_tokens_file)
            for special in self.specials:
                pat_str += "(" + re.escape(special) + ")|"
        else:
            self.specials = {}

        pat_str += r"([a-zA-Z0-9\S]+)"
        self.pat = re.compile(pat_str)

        self.vocab_file = vocab_file
        self.sentencepiece_model_file = sentencepiece_model_file
Ejemplo n.º 22
0
    def pinyin_locs_map(self, text):
        """
        Get the map of pinyin locations and pinyin tensor.

        Args:
            text (str):
                The sequence to be processed.
 
        Returns:
            dict: the map of pinyin locations and pinyin tensor.
        """
        pinyin = try_import("pypinyin.pinyin")
        Style = try_import("pypinyin.Style")
        pinyin_list = pinyin(
            text,
            style=Style.TONE3,
            heteronym=True,
            errors=lambda x: [["not chinese"] for _ in x], )
        pinyin_locs = {}
        # get pinyin of each location
        for index, item in enumerate(pinyin_list):
            pinyin_string = item[0]
            # not a Chinese character, pass
            if pinyin_string == "not chinese":
                continue
            if pinyin_string in self.pinyin2tensor:
                pinyin_locs[index] = self.pinyin2tensor[pinyin_string]
            else:
                ids = [0] * 8
                for i, p in enumerate(pinyin_string):
                    if p not in self.pinyin_dict["char2idx"]:
                        ids = [0] * 8
                        break
                    ids[i] = self.pinyin_dict["char2idx"][p]
                pinyin_locs[index] = ids
        return pinyin_locs
Ejemplo n.º 23
0
    def __init__(
            self,
            vocab_file,
            merges_file,
            errors='replace',
            max_len=None,
            pad_token='<|endoftext|>',
            eos_token='<|endoftext|>',
            unk_token='<|endoftext|>',
            eol_token='\u010a',
            **kwargs  # The token of newline.
    ):
        pad_token = AddedToken(pad_token,
                               lstrip=False, rstrip=False) if isinstance(
                                   pad_token, str) else pad_token
        eos_token = AddedToken(eos_token,
                               lstrip=False, rstrip=False) if isinstance(
                                   eos_token, str) else eos_token
        unk_token = AddedToken(unk_token,
                               lstrip=False, rstrip=False) if isinstance(
                                   unk_token, str) else unk_token

        self._build_special_tokens_map_extended(bos_token=pad_token,
                                                eos_token=eos_token,
                                                unk_token=unk_token)

        self._vocab_file = vocab_file
        self._merges_file = merges_file
        self.max_len = max_len if max_len is not None else int(1e12)
        self.num_command_tokens = 2
        self.num_type_tokens = 2

        self.encoder = json.load(open(vocab_file))
        self.decoder = {v: k for k, v in self.encoder.items()}

        self.num_tokens = len(self.encoder)
        self.num_text_tokens = self.num_tokens - 1
        self.errors = errors  # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
        bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
        bpe_merges = [tuple(merge.split()) for merge in bpe_data]
        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
        self.cache = {}
        re = try_import("regex")
        self.pat = re.compile(
            r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
        )
Ejemplo n.º 24
0
def vflip(img):
    """Vertically flips the given np.array.

    Args:
        img (np.array): Image to be flipped.

    Returns:
        np.array:  Vertically flipped image.

    """
    cv2 = try_import('cv2')

    if len(img.shape) == 3 and img.shape[2] == 1:
        return cv2.flip(img, 0)[:, :, np.newaxis]
    else:
        return cv2.flip(img, 0)
Ejemplo n.º 25
0
def detect(image: Image):
    dlib = try_import('dlib')
    image = np.asarray(image)
    h, w = image.shape[:2]
    image = resize_by_max(image, 361)
    actual_h, actual_w = image.shape[:2]
    detector = dlib.get_frontal_face_detector()
    faces_on_small = detector(image, 1)
    faces = dlib.rectangles()
    for face in faces_on_small:
        faces.append(
            dlib.rectangle(int(face.left() / actual_w * w + 0.5),
                           int(face.top() / actual_h * h + 0.5),
                           int(face.right() / actual_w * w + 0.5),
                           int(face.bottom() / actual_h * h + 0.5)))
    return faces
Ejemplo n.º 26
0
    def __init__(self,
                 data_file=None,
                 label_file=None,
                 setid_file=None,
                 mode='train',
                 transform=None,
                 download=True,
                 backend=None):
        assert mode.lower() in ['train', 'valid', 'test'], \
                "mode should be 'train', 'valid' or 'test', but got {}".format(mode)

        if backend is None:
            backend = paddle.vision.get_image_backend()
        if backend not in ['pil', 'cv2']:
            raise ValueError(
                "Expected backend are one of ['pil', 'cv2'], but got {}"
                .format(backend))
        self.backend = backend

        flag = MODE_FLAG_MAP[mode.lower()]

        if not data_file:
            assert download, "data_file is not set and downloading automatically is disabled"
            data_file = _check_exists_and_download(
                data_file, DATA_URL, DATA_MD5, 'flowers', download)

        if not label_file:
            assert download, "label_file is not set and downloading automatically is disabled"
            label_file = _check_exists_and_download(
                label_file, LABEL_URL, LABEL_MD5, 'flowers', download)

        if not setid_file:
            assert download, "setid_file is not set and downloading automatically is disabled"
            setid_file = _check_exists_and_download(
                setid_file, SETID_URL, SETID_MD5, 'flowers', download)

        self.transform = transform

        data_tar = tarfile.open(data_file)
        self.data_path = data_file.replace(".tgz", "/")
        if not os.path.exists(self.data_path):
            os.mkdir(self.data_path)
        data_tar.extractall(self.data_path)

        scio = try_import('scipy.io')
        self.labels = scio.loadmat(label_file)['labels'][0]
        self.indexes = scio.loadmat(setid_file)[flag][0]
Ejemplo n.º 27
0
    def __init__(self,
                 vocab_file,
                 merges_file,
                 errors='replace',
                 special_tokens=None,
                 max_len=None,
                 do_lower_case=True):
        self._vocab_file = vocab_file
        self._merges_file = merges_file
        self.max_len = int(1e12)
        self.num_command_tokens = 2
        self.num_type_tokens = 2

        self.encoder = json.load(open(vocab_file))
        self.decoder = {v: k for k, v in self.encoder.items()}

        # construct the command tokens
        self._command_tokens = [
            CommandToken('pad', '<|endoftext|>',
                         self.encoder['<|endoftext|>']),
            CommandToken('eod', '<|endoftext|>',
                         self.encoder['<|endoftext|>']),
        ]
        self.command_name_map = {tok.name: tok for tok in self._command_tokens}
        self.command_token_map = {
            tok.token: tok
            for tok in self._command_tokens
        }
        self.command_id_map = {tok.Id: tok for tok in self._command_tokens}

        self.num_tokens = len(self.encoder)
        self.num_text_tokens = self.num_tokens - 1
        self.errors = errors  # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
        bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
        bpe_merges = [tuple(merge.split()) for merge in bpe_data]
        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
        self.cache = {}
        re = try_import("regex")
        self.pat = re.compile(
            r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
        )

        self.special_tokens = {}
        self.special_tokens_decoder = {}
        self.set_special_tokens(special_tokens)
Ejemplo n.º 28
0
    def __init__(
        self,
        vocab_file: str,
        spm_path: str,
        word_dict_path: str,
        do_lower_case: bool = True,
        unk_token: str = '[UNK]',
        sep_token: str = '[SEP]',
        pad_token: str = '[PAD]',
        cls_token: str = '[CLS]',
        mask_token: str = '[MASK]',
    ):
        mod = try_import('sentencepiece')
        self.unk_token = unk_token
        self.sep_token = sep_token
        self.pad_token = pad_token
        self.cls_token = cls_token
        self.mask_token = mask_token
        self.do_lower_case = do_lower_case
        self.all_special_tokens = [
            unk_token, sep_token, pad_token, cls_token, mask_token
        ]

        if not os.path.isfile(vocab_file):
            raise ValueError(
                'Can\'t find a vocabulary file at path \'{}\'.'.format(
                    vocab_file))
        self.vocab = load_vocab(vocab_file)
        self.ids_to_tokens = collections.OrderedDict([
            (ids, tok) for tok, ids in self.vocab.items()
        ])

        # Here is the difference with BertTokenizer.
        self.dict = pickle.load(open(word_dict_path, 'rb'))
        self.sp_model = mod.SentencePieceProcessor()
        self.window_size = 5
        self.sp_model.Load(spm_path)

        self.unk_token_id = self.convert_tokens_to_ids(self.unk_token)
        self.sep_token_id = self.convert_tokens_to_ids(self.sep_token)
        self.pad_token_id = self.convert_tokens_to_ids(self.pad_token)
        self.pad_token_type_id = 0
        self.cls_token_id = self.convert_tokens_to_ids(self.cls_token)
        self.mask_token_id = self.convert_tokens_to_ids(self.mask_token)
        self.all_special_ids = self.convert_tokens_to_ids(
            self.all_special_tokens)
Ejemplo n.º 29
0
    def _tokenize(self, text):
        """
        End-to-end tokenization for Blenderbot models.
        Args:
            text (str): The text to be tokenized.

        Returns:
            list: A list of string representing converted tokens.
        """
        bpe_tokens = []
        re = try_import("regex")
        for token in re.findall(self.pat, text):
            token = ''.join(self.byte_encoder[b]
                            for b in token.encode('utf-8'))
            bpe_tokens.extend(bpe_token
                              for bpe_token in self.bpe(token).split(' '))
        return bpe_tokens
Ejemplo n.º 30
0
def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0):
    """Perspective the image.

    Args:
        img (np.array): Image to be perspectived.
        startpoints (list[list[int]]): [top-left, top-right, bottom-right, bottom-left] of the original image,
        endpoints (list[list[int]]): [top-left, top-right, bottom-right, bottom-left] of the transformed image.
        interpolation (int|str, optional): Interpolation method. If omitted, or if the 
            image has only one channel, it is set to cv2.INTER_NEAREST.
            when use cv2 backend, support method are as following: 
            - "nearest": cv2.INTER_NEAREST, 
            - "bilinear": cv2.INTER_LINEAR, 
            - "bicubic": cv2.INTER_CUBIC
        fill (3-tuple or int): RGB pixel fill value for area outside the rotated image.
            If int, it is used for all channels respectively.

    Returns:
        np.array: Perspectived image.

    """
    cv2 = try_import('cv2')
    _cv2_interp_from_str = {
        'nearest': cv2.INTER_NEAREST,
        'bilinear': cv2.INTER_LINEAR,
        'area': cv2.INTER_AREA,
        'bicubic': cv2.INTER_CUBIC,
        'lanczos': cv2.INTER_LANCZOS4
    }
    h, w = img.shape[0:2]

    startpoints = np.array(startpoints, dtype="float32")
    endpoints = np.array(endpoints, dtype="float32")
    matrix = cv2.getPerspectiveTransform(startpoints, endpoints)

    if len(img.shape) == 3 and img.shape[2] == 1:
        return cv2.warpPerspective(img,
                                   matrix,
                                   dsize=(w, h),
                                   flags=_cv2_interp_from_str[interpolation],
                                   borderValue=fill)[:, :, np.newaxis]
    else:
        return cv2.warpPerspective(img,
                                   matrix,
                                   dsize=(w, h),
                                   flags=_cv2_interp_from_str[interpolation],
                                   borderValue=fill)