class EncDecClassificationConfig(model_cfg.ModelConfig):
    # Model global arguments
    sample_rate: int = 16000
    repeat: int = 1
    dropout: float = 0.0
    separable: bool = True
    kernel_size_factor: float = 1.0
    labels: List[str] = MISSING
    timesteps: int = MISSING

    # Dataset configs
    train_ds: EncDecClassificationDatasetConfig = EncDecClassificationDatasetConfig(
        manifest_filepath=None, shuffle=True, trim_silence=False)
    validation_ds: EncDecClassificationDatasetConfig = EncDecClassificationDatasetConfig(
        manifest_filepath=None, shuffle=False)
    test_ds: EncDecClassificationDatasetConfig = EncDecClassificationDatasetConfig(
        manifest_filepath=None, shuffle=False)

    # Optimizer / Scheduler config
    optim: Optional[model_cfg.OptimConfig] = model_cfg.OptimConfig(
        sched=model_cfg.SchedConfig())

    # Model component configs
    preprocessor: AudioToMFCCPreprocessorConfig = AudioToMFCCPreprocessorConfig(
    )
    spec_augment: Optional[
        SpectrogramAugmentationConfig] = SpectrogramAugmentationConfig()
    crop_or_pad_augment: Optional[
        CropOrPadSpectrogramAugmentationConfig] = CropOrPadSpectrogramAugmentationConfig(
            audio_length=timesteps)

    encoder: ConvASREncoderConfig = ConvASREncoderConfig()
    decoder: ConvASRDecoderClassificationConfig = ConvASRDecoderClassificationConfig(
    )
Ejemplo n.º 2
0
class JasperModelConfig(ctc_cfg.EncDecCTCConfig):
    # Model global arguments
    sample_rate: int = 16000
    repeat: int = 1
    dropout: float = 0.0
    separable: bool = False
    labels: List[str] = MISSING

    # Dataset configs
    train_ds: ctc_cfg.ASRDatasetConfig = ctc_cfg.ASRDatasetConfig(
        manifest_filepath=None, shuffle=True, trim_silence=True)
    validation_ds: ctc_cfg.ASRDatasetConfig = ctc_cfg.ASRDatasetConfig(
        manifest_filepath=None, shuffle=False)
    test_ds: ctc_cfg.ASRDatasetConfig = ctc_cfg.ASRDatasetConfig(
        manifest_filepath=None, shuffle=False)

    # Optimizer / Scheduler config
    optim: Optional[model_cfg.OptimConfig] = model_cfg.OptimConfig(
        sched=model_cfg.SchedConfig())

    # Model general component configs
    preprocessor: AudioToMelSpectrogramPreprocessorConfig = AudioToMelSpectrogramPreprocessorConfig(
    )
    spec_augment: Optional[
        SpectrogramAugmentationConfig] = SpectrogramAugmentationConfig()
    encoder: ConvASREncoderConfig = ConvASREncoderConfig(activation="relu")
    decoder: ConvASRDecoderConfig = ConvASRDecoderConfig()
Ejemplo n.º 3
0
class MatchboxNetModelConfig(clf_cfg.EncDecClassificationConfig):
    # Model global arguments
    sample_rate: int = 16000
    repeat: int = 1
    dropout: float = 0.0
    separable: bool = True
    kernel_size_factor: float = 1.0
    timesteps: int = 128
    labels: List[str] = MISSING

    # Dataset configs
    train_ds: clf_cfg.EncDecClassificationDatasetConfig = clf_cfg.EncDecClassificationDatasetConfig(
        manifest_filepath=None, shuffle=True, trim_silence=False)
    validation_ds: clf_cfg.EncDecClassificationDatasetConfig = clf_cfg.EncDecClassificationDatasetConfig(
        manifest_filepath=None, shuffle=False)
    test_ds: clf_cfg.EncDecClassificationDatasetConfig = clf_cfg.EncDecClassificationDatasetConfig(
        manifest_filepath=None, shuffle=False)

    # Optimizer / Scheduler config
    optim: Optional[model_cfg.OptimConfig] = model_cfg.OptimConfig(
        sched=model_cfg.SchedConfig())

    # Model general component configs
    preprocessor: AudioToMFCCPreprocessorConfig = AudioToMFCCPreprocessorConfig(
        window_size=0.025)
    spec_augment: Optional[
        SpectrogramAugmentationConfig] = SpectrogramAugmentationConfig(
            freq_masks=2,
            time_masks=2,
            freq_width=15,
            time_width=25,
            rect_masks=5,
            rect_time=25,
            rect_freq=15)
    crop_or_pad_augment: Optional[
        CropOrPadSpectrogramAugmentationConfig] = CropOrPadSpectrogramAugmentationConfig(
            audio_length=128)

    encoder: ConvASREncoderConfig = ConvASREncoderConfig(activation="relu")
    decoder: ConvASRDecoderClassificationConfig = ConvASRDecoderClassificationConfig(
    )