def deep_model(model: str = 'speakernet', quantized: bool = False, **kwargs):
    """
    Load Speaker2Vec model.

    Parameters
    ----------
    model : str, optional (default='speakernet')
        Model architecture supported. Allowed values:

        * ``'vggvox-v1'`` - VGGVox V1, embedding size 1024, exported from https://github.com/linhdvu14/vggvox-speaker-identification
        * ``'vggvox-v2'`` - VGGVox V2, embedding size 512, exported from https://github.com/WeidiXie/VGG-Speaker-Recognition
        * ``'deep-speaker'`` - Deep Speaker, embedding size 512, exported from https://github.com/philipperemy/deep-speaker
        * ``'speakernet'`` - SpeakerNet, embedding size 7205, exported from https://github.com/NVIDIA/NeMo/tree/main/examples/speaker_recognition

    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model. 
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result : malaya_speech.supervised.classification.load function
    """

    model = model.lower()
    if model not in _availability:
        raise Exception(
            'model not supported, please check supported models from `malaya_speech.speaker_vector.available_model()`.'
        )

    return classification.load(model=model,
                               module='speaker-vector',
                               extra={},
                               label={},
                               quantized=quantized,
                               **kwargs)
Exemple #2
0
def deep_model(model: str = 'vggvox-v2', quantized: bool = False, **kwargs):
    """
    Load VAD model.

    Parameters
    ----------
    model : str, optional (default='vggvox-v2')
        Model architecture supported. Allowed values:

        * ``'vggvox-v1'`` - finetuned VGGVox V1.
        * ``'vggvox-v2'`` - finetuned VGGVox V2.
    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model. 
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result : malaya_speech.supervised.classification.load function
    """
    model = model.lower()
    if model not in _availability:
        raise Exception(
            'model not supported, please check supported models from `malaya_speech.vad.available_model()`.'
        )

    settings = {
        'vggvox-v1': {
            'frame_len': 0.005,
            'frame_step': 0.0005
        },
        'vggvox-v2': {
            'hop_length': 24,
            'concat': False,
            'mode': 'eval'
        },
        'speakernet': {
            'frame_ms': 20,
            'stride_ms': 1.0
        },
    }

    return classification.load(model=model,
                               module='vad',
                               extra=settings[model],
                               label=[False, True],
                               quantized=quantized,
                               **kwargs)
def deep_model(model: str = 'speakernet', quantized: bool = False, **kwargs):
    """
    Load speaker change deep model.

    Parameters
    ----------
    model : str, optional (default='vggvox-v2')
        Model architecture supported. Allowed values:

        * ``'vggvox-v2'`` - finetuned VGGVox V2.
        * ``'speakernet'`` - finetuned SpeakerNet.
    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model. 
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result : malaya_speech.supervised.classification.load function
    """
    model = model.lower()
    if model not in _availability:
        raise Exception(
            'model not supported, please check supported models from `malaya_speech.speaker_change.available_model()`.'
        )

    settings = {
        'vggvox-v2': {
            'hop_length': 50,
            'concat': False,
            'mode': 'eval'
        },
        'speakernet': {
            'frame_ms': 20,
            'stride_ms': 2
        },
    }

    return classification.load(path=PATH_SPEAKER_CHANGE,
                               s3_path=S3_PATH_SPEAKER_CHANGE,
                               model=model,
                               name='speaker-change',
                               extra=settings[model],
                               label=[False, True],
                               quantized=quantized,
                               **kwargs)
Exemple #4
0
def deep_model(model: str = 'vggvox-v2', quantized: bool = False, **kwargs):
    """
    Load age detection deep model.

    Parameters
    ----------
    model : str, optional (default='vggvox-v2')
        Model architecture supported. Allowed values:

        * ``'vggvox-v2'`` - finetuned VGGVox V2.
        * ``'deep-speaker'`` - finetuned Deep Speaker.
    quantized : bool, optional (default=False)
        if True, will load 8-bit quantized model. 
        Quantized model not necessary faster, totally depends on the machine.

    Returns
    -------
    result : malaya_speech.supervised.classification.load function
    """

    model = model.lower()
    if model not in _availability:
        raise Exception(
            'model not supported, please check supported models from `malaya_speech.age_detection.available_model()`.'
        )

    settings = {
        'vggvox-v2': {
            'concat': False
        },
        'deep-speaker': {
            'voice_only': False
        },
    }

    return classification.load(path=PATH_AGE_DETECTION,
                               s3_path=S3_PATH_AGE_DETECTION,
                               model=model,
                               name='age-detection',
                               extra=settings[model],
                               label=labels,
                               quantized=quantized,
                               **kwargs)