Esempio n. 1
0
    def __init__(self,
                 path_to_detector=None,
                 path_to_anchor=None,
                 verbose=False,
                 min_score_thresh=0.5,
                 min_suppression_threshold=0.3):
        super(BlazeFaceDetector, self).__init__(verbose)

        # Initialise the face detector
        if path_to_detector is None:
            model_weights_path = get_weights_path_from_url(blazeface_weights)
            model_weights = paddle.load(model_weights_path)
            model_anchors = np.load(
                get_weights_path_from_url(blazeface_anchors))
        else:
            model_weights = paddle.load(path_to_detector)
            model_anchors = np.load(path_to_anchor)

        self.face_detector = BlazeFace()
        self.face_detector.load_dict(model_weights)
        self.face_detector.load_anchors_from_npy(model_anchors)

        self.face_detector.min_score_thresh = min_score_thresh
        self.face_detector.min_suppression_threshold = min_suppression_threshold

        self.face_detector.eval()
Esempio n. 2
0
def load_sample_audio_text():
    """Load sample audio and text"""
    text_path = download.get_weights_path_from_url(TEXT_URL)
    with open(text_path) as f:
        text = f.read()

    audio_path = download.get_weights_path_from_url(AUDIO_URL)
    x = load_audio(audio_path)
    return x, text
Esempio n. 3
0
    def from_pretrained(cls, config_name):

        assert config_name in CONFIG_NAMES, f'input config {config_name} incorrect, available configs: {CONFIG_NAMES}'
        weight_url = URL_BASE + f'weights/{config_name}.pdparam'
        weight_path = download.get_weights_path_from_url(weight_url)

        config_url = URL_BASE + f'config/{config_name}.json'
        config_path = download.get_weights_path_from_url(config_url)

        config = PretrainedConfig.from_pretrained(config_path)
        model = cls(config)
        state_dict = paddle.load(weight_path)
        model.load_dict(state_dict)
        model.eval()
        return model
Esempio n. 4
0
def calculate_fid_given_paths(paths,
                              batch_size,
                              use_gpu,
                              dims,
                              model=None,
                              premodel_path=get_weights_path_from_url(FID_WEIGHTS_URL),
                              style=None):
    assert os.path.exists(
        premodel_path
    ), 'pretrain_model path {} is not exists! Please download it first'.format(
        premodel_path)
    for p in paths:
        if not os.path.exists(p):
            raise RuntimeError('Invalid path: %s' % p)

    if model is None and style != 'stargan':
        block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
        model = InceptionV3([block_idx], class_dim=1008)

    m1, s1 = _compute_statistics_of_path(paths[0], model, batch_size, dims,
                                         use_gpu, premodel_path, style)
    m2, s2 = _compute_statistics_of_path(paths[1], model, batch_size, dims,
                                         use_gpu, premodel_path, style)

    fid_value = _calculate_frechet_distance(m1, s1, m2, s2)
    return fid_value
Esempio n. 5
0
 def set_paddle_model(self):
     paddle_model = ViT_base_patch16_224()
     model_path = get_weights_path_from_url(
         'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch16_224_pretrained.pdparams'
     )
     paddle_model.set_dict(paddle.load(model_path))
     return paddle_model
Esempio n. 6
0
 def __init__(self, args, cfg, output_path='output'):
     self.args = args
     self.cfg = cfg
     self.weight_path = self.args.model_path
     if self.weight_path is None:
         self.weight_path = get_weights_path_from_url(PS_WEIGHT_URL)
     self.output_path = output_path
Esempio n. 7
0
def _yolov3_darknet(num_layers=53,
                    num_classes=80,
                    num_max_boxes=50,
                    model_mode='train',
                    pretrained=True):
    inputs = [
        InputSpec([None, 1], 'int64', name='img_id'),
        InputSpec([None, 2], 'int32', name='img_shape'),
        InputSpec([None, 3, None, None], 'float32', name='image')
    ]
    labels = [
        InputSpec([None, num_max_boxes, 4], 'float32', name='gt_bbox'),
        InputSpec([None, num_max_boxes], 'int32', name='gt_label'),
        InputSpec([None, num_max_boxes], 'float32', name='gt_score')
    ]
    net = YOLOv3(num_classes, model_mode)
    model = paddle.Model(net, inputs, labels)
    if pretrained:
        assert num_layers in pretrain_infos.keys(), \
                "YOLOv3-DarkNet{} do not have pretrained weights now, " \
                "pretrained should be set as False".format(num_layers)
        weight_path = get_weights_path_from_url(*(pretrain_infos[num_layers]))
        assert weight_path.endswith('.pdparams'), \
                "suffix of weight must be .pdparams"
        model.load(weight_path)
    return model
    def __init__(self,
                 generator,
                 discriminator=None,
                 syncnet_wt=1.0,
                 max_eval_steps=700,
                 is_train=True):
        """Initialize the Wav2lip class.

        Parameters:
            opt (config dict)-- stores all the experiment flags; needs to be a subclass of Dict
        """
        super(Wav2LipModel, self).__init__()
        self.syncnet_wt = syncnet_wt
        self.is_train = is_train
        self.eval_step = 0
        self.max_eval_steps = max_eval_steps
        self.eval_sync_losses, self.eval_recon_losses = [], []
        # define networks (both generator and discriminator)
        self.nets['netG'] = build_generator(generator)
        init_weights(self.nets['netG'], distribution='uniform')
        if self.is_train:
            self.nets['netD'] = build_discriminator(discriminator)
            weights_path = get_weights_path_from_url(SYNCNET_WEIGHT_URL)
            params = paddle.load(weights_path)
            self.nets['netD'].load_dict(params)

        if self.is_train:
            self.recon_loss = paddle.nn.L1Loss()
Esempio n. 9
0
def _vgg(arch: str, cfg: str, batch_norm: bool, pretrained: bool,
         **kwargs: Any) -> VGG:
    if pretrained:
        kwargs['init_weights'] = False
    model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)
    if pretrained:
        state_dict = get_weights_path_from_url(model_urls[arch])
        model.load_dict(state_dict)
    return model
Esempio n. 10
0
def _resnet(arch: str,
            block: Type[Union[BasicBlock, Bottleneck]],
            layers: List[int],
            pretrained: bool,
            **kwargs: Any) -> ResNet:
    model = ResNet(block, layers, **kwargs)
    if pretrained:
        state_dict = get_weights_path_from_url(model_urls[arch])
        model.load_dict(state_dict)
    return model
Esempio n. 11
0
def vgg16(pretrained=False):
    features = make_layers(cfg)
    model = VGG(features)

    if pretrained:
        weight_path = get_weights_path_from_url(model_urls['vgg16'][0],
                                                model_urls['vgg16'][1])
        param = paddle.load(weight_path)
        model.load_dict(param)

    return model
Esempio n. 12
0
def _squeezenet(arch, version, pretrained, **kwargs):
    model = SqueezeNet(version, **kwargs)
    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
        weight_path = get_weights_path_from_url(model_urls[arch][0],
                                                model_urls[arch][1])
        param = paddle.load(weight_path)
        model.set_dict(param)

    return model
Esempio n. 13
0
    def __init__(self, config_name, pretrained=True):
        super().__init__()

        assert config_name in CONFIG_NAMES, f'input config {config_name} incorrect, available configs: {CONFIG_NAMES}'

        config_url = URL_BASE + f'config/{config_name}.json'
        config_path = download.get_weights_path_from_url(config_url)
        config = PretrainedConfig.from_pretrained(config_path)

        self.wav2vec2 = Wav2Vec2Model(config)
        self.dropout = nn.Dropout(config.final_dropout)
        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)

        self.config = config
        if pretrained:
            weight_url = URL_BASE + f'weights/{config_name}.pdparam'
            weight_path = download.get_weights_path_from_url(weight_url)
            state_dict = paddle.load(weight_path)
            self.load_dict(state_dict)
            self.eval()
Esempio n. 14
0
def _resnet(arch, Block, depth, pretrained, **kwargs):
    model = ResNet(Block, depth, **kwargs)
    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
        weight_path = get_weights_path_from_url(model_urls[arch][0],
                                                model_urls[arch][1])

        param = paddle.load(weight_path)
        model.set_dict(param)

    return model
Esempio n. 15
0
def _darknet(num_layers=53, input_channels=3, pretrained=True):
    model = DarkNet(num_layers, input_channels)
    if pretrained:
        assert num_layers in pretrain_infos.keys(), \
                "DarkNet{} do not have pretrained weights now, " \
                "pretrained should be set as False".format(num_layers)
        weight_path = get_weights_path_from_url(*(pretrain_infos[num_layers]))
        assert weight_path.endswith('.pdparams'), \
                "suffix of weight must be .pdparams"
        weight_dict, _ = fluid.load_dygraph(weight_path[:-9])
        model.set_dict(weight_dict)
    return model
Esempio n. 16
0
def _mobilenet(arch, pretrained=False, **kwargs):
    model = MobileNetV2(**kwargs)
    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
        weight_path = get_weights_path_from_url(model_urls[arch][0],
                                                model_urls[arch][1])

        param = paddle.load(weight_path)
        model.load_dict(param)

    return model
Esempio n. 17
0
def _darknet(depth=53, pretrained=True):
    model = DarkNet(depth)
    if pretrained:
        assert depth in pretrain_infos.keys(), \
                "DarkNet{} do not have pretrained weights now, " \
                "pretrained should be set as False".format(num_layers)
        weight_path = get_weights_path_from_url(*(pretrain_infos[depth]))
        assert weight_path.endswith('.pdparams'), \
                "suffix of weight must be .pdparams"
        params = paddle.load(weight_path)
        model.load_dict(params)
    return model
Esempio n. 18
0
def _resnet(arch, Block, depth, pretrained, **kwargs):
    model = ResNet(Block, depth, **kwargs)
    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
        weight_path = get_weights_path_from_url(model_urls[arch][0],
                                                model_urls[arch][1])
        assert weight_path.endswith(
            '.pdparams'), "suffix of weight must be .pdparams"
        param, _ = fluid.load_dygraph(weight_path)
        model.set_dict(param)

    return model
    def __init__(self, path_to_detector=None, verbose=False):
        super(SFDDetector, self).__init__(verbose)

        # Initialise the face detector
        if path_to_detector is None:
            model_weights_path = get_weights_path_from_url(models_urls['s3fd'])
            model_weights = paddle.load(model_weights_path)
        else:
            model_weights = paddle.load(path_to_detector)

        self.face_detector = s3fd()
        self.face_detector.load_dict(model_weights)
        self.face_detector.eval()
Esempio n. 20
0
def _vgg(arch, cfg, batch_norm, pretrained, **kwargs):
    model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)

    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
        weight_path = get_weights_path_from_url(model_urls[arch][0],
                                                model_urls[arch][1])

        param = paddle.load(weight_path)
        model.load_dict(param)

    return model
Esempio n. 21
0
def build_model(name="RN50"):
    assert name in MODEL_NAMES, f"model name must be one of {MODEL_NAMES}"
    name2model = {
        "RN101": build_rn101_model,
        "VIT": build_vit_model,
        "RN50": build_rn50_model,
    }
    model = name2model[name]()
    weight = download.get_weights_path_from_url(URL[name])
    sd = paddle.load(weight)
    model.load_dict(sd)
    model.eval()
    return model
Esempio n. 22
0
def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True):
    inputs = [InputSpec([None, 8, 3, 224, 224], 'float32', name='image')]
    labels = [InputSpec([None, 1], 'int64', name='label')]
    net = TSM_ResNet(num_layers, seg_num, num_classes)
    model = paddle.Model(net, inputs, labels)
    if pretrained:
        assert num_layers in pretrain_infos.keys(), \
                "TSM-ResNet{} do not have pretrained weights now, " \
                "pretrained should be set as False".format(num_layers)
        weight_path = get_weights_path_from_url(*(pretrain_infos[num_layers]))
        assert weight_path.endswith('.pdparams'), \
                "suffix of weight must be .pdparams"
        # weight_dict, _ = fluid.load_dygraph(weight_path)
        # model.set_dict(weight_dict)
        model.load(weight_path)
    return model
Esempio n. 23
0
def _mobilenet_v3(arch, pretrained=False, scale=1.0, **kwargs):
    if arch == "mobilenet_v3_large":
        model = MobileNetV3Large(scale=scale, **kwargs)
    else:
        model = MobileNetV3Small(scale=scale, **kwargs)
    if pretrained:
        arch = "{}_x{}".format(arch, scale)
        assert (
            arch in model_urls
        ), "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
        weight_path = get_weights_path_from_url(model_urls[arch][0],
                                                model_urls[arch][1])

        param = paddle.load(weight_path)
        model.set_dict(param)
    return model
Esempio n. 24
0
def _vgg(arch, cfg, batch_norm, pretrained, **kwargs):
    model = VGG(make_layers(
        cfgs[cfg], batch_norm=batch_norm),
                num_classes=1000,
                **kwargs)

    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
        weight_path = get_weights_path_from_url(model_urls[arch][0],
                                                model_urls[arch][1])
        assert weight_path.endswith(
            '.pdparams'), "suffix of weight must be .pdparams"
        param, _ = fluid.load_dygraph(weight_path)
        model.load_dict(param)

    return model
Esempio n. 25
0
def bmn(tscale,
        dscale,
        feat_dim,
        prop_boundary_ratio,
        num_sample,
        num_sample_perbin,
        mode,
        pretrained=True):
    """BMN model
    
    Args:
        tscale (int): sequence length, default 100.
        dscale (int): max duration length, default 100.
        prop_boundary_ratio (float): ratio of expanded temporal region in proposal boundary, default 0.5. 
        num_sample (int): number of samples betweent starting boundary and ending boundary of each propoasl, default 32.
        num_sample_perbin (int):  number of selected points in each sample, default 3.
        pretrained (bool): If True, returns a model with pre-trained model, default True.
    """
    inputs = [
        InputSpec(
            [None, feat_dim, tscale], 'float32', name='feat_input')
    ]
    gt_iou_map = InputSpec(
        [None, dscale, tscale], 'float32', name='gt_iou_map')
    gt_start = InputSpec([None, tscale], 'float32', name='gt_start')
    gt_end = InputSpec([None, tscale], 'float32', name='gt_end')
    video_idx = InputSpec([None, 1], 'int64', name='video_idx')
    label_dict = {
        'train': [gt_iou_map, gt_start, gt_end],
        'test': [gt_iou_map, gt_start, gt_end, video_idx],
        'infer': [video_idx]
    }
    labels = label_dict[mode]

    net = BMN(tscale, dscale, prop_boundary_ratio, num_sample,
              num_sample_perbin)
    model = paddle.Model(net, inputs, labels)
    if pretrained:
        weight_path = get_weights_path_from_url(*(pretrain_infos['bmn']))
        assert weight_path.endswith('.pdparams'), \
                "suffix of weight must be .pdparams"
        model.load(weight_path)
    return model
Esempio n. 26
0
def googlenet(pretrained=False, **kwargs):
    """GoogLeNet (Inception v1) model architecture from
    `"Going Deeper with Convolutions" <https://arxiv.org/pdf/1409.4842.pdf>`_
    
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet

    Examples:
        .. code-block:: python

            import paddle
            from paddle.vision.models import googlenet

            # build model
            model = googlenet()

            # build model and load imagenet pretrained weight
            # model = googlenet(pretrained=True)

            x = paddle.rand([1, 3, 224, 224])
            out, out1, out2 = model(x)

            print(out.shape)
    """
    model = GoogLeNet(**kwargs)
    arch = "googlenet"
    if pretrained:
        assert (
            arch in model_urls
        ), "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
        weight_path = get_weights_path_from_url(model_urls[arch][0],
                                                model_urls[arch][1])

        param = paddle.load(weight_path)
        model.set_dict(param)
    return model
Esempio n. 27
0
    def __init__(self,
                 vocab_file=None,
                 bos_token="<s>",
                 eos_token="</s>",
                 unk_token="<unk>",
                 pad_token="<pad>",
                 word_delimiter_token="|",
                 do_lower_case=True,
                 verbose=False):
        if vocab_file is None:
            vocab_url = URL_BASE + 'config/vocab.json'
            vocab_file = download.get_weights_path_from_url(vocab_url)

        self._bos_token = bos_token
        self._eos_token = eos_token
        self._unk_token = unk_token
        self._sep_token = None
        self._pad_token = pad_token
        self._cls_token = None
        self._mask_token = None
        self._pad_token_type_id = 0
        self._word_delimiter_token = word_delimiter_token

        self.verbose = verbose

        self._additional_special_tokens = []

        self.added_tokens_encoder: Dict[str, int] = {}
        self.added_tokens_decoder: Dict[int, str] = {}

        self.do_lower_case = do_lower_case

        with open(vocab_file, encoding="utf-8") as vocab_handle:
            self.encoder = json.load(vocab_handle)

        self.decoder = {v: k for k, v in self.encoder.items()}
Esempio n. 28
0
def inception_v3(pretrained=False, **kwargs):
    """
    InceptionV3 model from
    `"Rethinking the Inception Architecture for Computer Vision" <https://arxiv.org/pdf/1512.00567.pdf>`_

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    
    Examples:
        .. code-block:: python

            import paddle
            from paddle.vision.models import inception_v3

            # build model
            model = inception_v3()

            # build model and load imagenet pretrained weight
            # model = inception_v3(pretrained=True)

            x = paddle.rand([1, 3, 299, 299])
            out = model(x)

            print(out.shape)
    """
    model = InceptionV3(**kwargs)
    arch = "inception_v3"
    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
        weight_path = get_weights_path_from_url(model_urls[arch][0],
                                                model_urls[arch][1])

        param = paddle.load(weight_path)
        model.set_dict(param)
    return model
Esempio n. 29
0
if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Audioset inference')
    parser.add_argument('--device',
                        help='set the gpu device number',
                        type=int,
                        required=False,
                        default=0)
    parser.add_argument('--weight', type=str, required=False, default='')
    args = parser.parse_args([])

    paddle.set_device('gpu:{}'.format(args.device))
    ModelClass = eval(c['model_type'])
    model = ModelClass(pretrained=False,
                       num_classes=c['num_classes'],
                       dropout=c['dropout'])

    if args.weight.strip() == '':
        args.weight = download.get_weights_path_from_url(checkpoint_url)
    model.load_dict(paddle.load(args.weight))
    model.eval()

    _, val_loader = get_loader()
    logger.info(f'evaluating...')

    val_acc, val_preci, val_recall, mAP_scores = evaluate(
        0, val_loader, model, F.binary_cross_entropy_with_logits)
    avg_map = np.mean(mAP_scores)
    logger.info(f'average mAP: {avg_map}')
Esempio n. 30
0
    def run(self):
        if not os.path.isfile(self.args.face):
            raise ValueError(
                '--face argument must be a valid path to video/image file')

        elif path.basename(
                self.args.face).split('.')[1] in ['jpg', 'png', 'jpeg']:
            full_frames = [cv2.imread(self.args.face)]
            fps = self.args.fps

        else:
            video_stream = cv2.VideoCapture(self.args.face)
            fps = video_stream.get(cv2.CAP_PROP_FPS)

            print('Reading video frames...')

            full_frames = []
            while 1:
                still_reading, frame = video_stream.read()
                if not still_reading:
                    video_stream.release()
                    break
                if self.args.resize_factor > 1:
                    frame = cv2.resize(
                        frame, (frame.shape[1] // self.args.resize_factor,
                                frame.shape[0] // self.args.resize_factor))

                if self.args.rotate:
                    frame = cv2.rotate(frame, cv2.cv2.ROTATE_90_CLOCKWISE)

                y1, y2, x1, x2 = self.args.crop
                if x2 == -1: x2 = frame.shape[1]
                if y2 == -1: y2 = frame.shape[0]

                frame = frame[y1:y2, x1:x2]

                full_frames.append(frame)

        print("Number of frames available for inference: " +
              str(len(full_frames)))

        if not self.args.audio.endswith('.wav'):
            print('Extracting raw audio...')
            command = 'ffmpeg -y -i {} -strict -2 {}'.format(
                self.args.audio, 'temp/temp.wav')

            subprocess.call(command, shell=True)
            self.args.audio = 'temp/temp.wav'

        wav = audio.load_wav(self.args.audio, 16000)
        mel = audio.melspectrogram(wav)
        print(mel.shape)

        if np.isnan(mel.reshape(-1)).sum() > 0:
            raise ValueError(
                'Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again'
            )

        mel_chunks = []
        mel_idx_multiplier = 80. / fps
        i = 0
        while 1:
            start_idx = int(i * mel_idx_multiplier)
            if start_idx + mel_step_size > len(mel[0]):
                mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:])
                break
            mel_chunks.append(mel[:, start_idx:start_idx + mel_step_size])
            i += 1

        print("Length of mel chunks: {}".format(len(mel_chunks)))

        full_frames = full_frames[:len(mel_chunks)]

        batch_size = self.args.wav2lip_batch_size
        gen = self.datagen(full_frames.copy(), mel_chunks)

        model = Wav2Lip()
        if self.args.checkpoint_path is None:
            model_weights_path = get_weights_path_from_url(WAV2LIP_WEIGHT_URL)
            weights = paddle.load(model_weights_path)
        else:
            weights = paddle.load(self.args.checkpoint_path)
        model.load_dict(weights)
        model.eval()
        print("Model loaded")
        for i, (img_batch, mel_batch, frames, coords) in enumerate(
                tqdm(gen,
                     total=int(np.ceil(float(len(mel_chunks)) / batch_size)))):
            if i == 0:

                frame_h, frame_w = full_frames[0].shape[:-1]
                out = cv2.VideoWriter('temp/result.avi',
                                      cv2.VideoWriter_fourcc(*'DIVX'), fps,
                                      (frame_w, frame_h))

            img_batch = paddle.to_tensor(np.transpose(
                img_batch, (0, 3, 1, 2))).astype('float32')
            mel_batch = paddle.to_tensor(np.transpose(
                mel_batch, (0, 3, 1, 2))).astype('float32')

            with paddle.no_grad():
                pred = model(mel_batch, img_batch)

            pred = pred.numpy().transpose(0, 2, 3, 1) * 255.

            for p, f, c in zip(pred, frames, coords):
                y1, y2, x1, x2 = c
                p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))

                f[y1:y2, x1:x2] = p
                out.write(f)

        out.release()

        command = 'ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}'.format(
            self.args.audio, 'temp/result.avi', self.args.outfile)
        subprocess.call(command, shell=platform.system() != 'Windows')