from espnet2.enh.separator.rnn_separator import RNNSeparator from espnet2.enh.separator.tcn_separator import TCNSeparator from espnet2.enh.separator.transformer_separator import TransformerSeparator from espnet2.tasks.abs_task import AbsTask from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none encoder_choices = ClassChoices( name="encoder", classes=dict(stft=STFTEncoder, conv=ConvEncoder, same=NullEncoder), type_check=AbsEncoder, default="stft", ) separator_choices = ClassChoices( name="separator", classes=dict( rnn=RNNSeparator, tcn=TCNSeparator, dprnn=DPRNNSeparator, transformer=TransformerSeparator, conformer=ConformerSeparator, wpe_beamformer=NeuralBeamformer, asteroid=AsteroidModel_Converter, ), type_check=AbsSeparator,
from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import CommonPreprocessor from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import int_or_none from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none frontend_choices = ClassChoices( name="frontend", classes=dict( default=DefaultFrontend, sliding_window=SlidingWindow, s3prl=S3prlFrontend, ), type_check=AbsFrontend, default="default", ) normalize_choices = ClassChoices( "normalize", classes=dict( global_mvn=GlobalMVN, utterance_mvn=UtteranceMVN, ), type_check=AbsNormalize, default="utterance_mvn", optional=True, ) label_aggregator_choices = ClassChoices(
from espnet2.layers.utterance_mvn import UtteranceMVN from espnet2.tasks.abs_task import AbsTask from espnet2.text.phoneme_tokenizer import g2p_choices from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import CommonPreprocessor from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import float_or_none, int_or_none, str2bool, str_or_none frontend_choices = ClassChoices( name="frontend", classes=dict( default=DefaultFrontend, sliding_window=SlidingWindow, ), type_check=AbsFrontend, default="default", ) specaug_choices = ClassChoices( "specaug", classes=dict(specaug=SpecAug, ), type_check=AbsSpecAug, default=None, optional=True, ) normalize_choices = ClassChoices( "normalize", classes=dict( global_mvn=GlobalMVN,
from espnet2.tts.feats_extract.log_spectrogram import LogSpectrogram from espnet2.tts.tacotron2 import Tacotron2 from espnet2.tts.transformer import Transformer from espnet2.tts.utils import ParallelWaveGANPretrainedVocoder from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.griffin_lim import Spectrogram2Waveform from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import int_or_none from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none feats_extractor_choices = ClassChoices( "feats_extract", classes=dict( fbank=LogMelFbank, spectrogram=LogSpectrogram, linear_spectrogram=LinearSpectrogram, ), type_check=AbsFeatsExtract, default="fbank", ) pitch_extractor_choices = ClassChoices( "pitch_extract", classes=dict(dio=Dio), type_check=AbsFeatsExtract, default=None, optional=True, ) energy_extractor_choices = ClassChoices( "energy_extract", classes=dict(energy=Energy), type_check=AbsFeatsExtract,
from espnet2.enh.separator.skim_separator import SkiMSeparator from espnet2.enh.separator.tcn_separator import TCNSeparator from espnet2.enh.separator.transformer_separator import TransformerSeparator from espnet2.tasks.abs_task import AbsTask from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none encoder_choices = ClassChoices( name="encoder", classes=dict(stft=STFTEncoder, conv=ConvEncoder, same=NullEncoder), type_check=AbsEncoder, default="stft", ) separator_choices = ClassChoices( name="separator", classes=dict( rnn=RNNSeparator, skim=SkiMSeparator, tcn=TCNSeparator, dprnn=DPRNNSeparator, dccrn=DCCRNSeparator, transformer=TransformerSeparator, conformer=ConformerSeparator, wpe_beamformer=NeuralBeamformer, asteroid=AsteroidModel_Converter,
from espnet2.enh.separator.svoice_separator import SVoiceSeparator from espnet2.enh.separator.tcn_separator import TCNSeparator from espnet2.enh.separator.transformer_separator import TransformerSeparator from espnet2.tasks.abs_task import AbsTask from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none encoder_choices = ClassChoices( name="encoder", classes=dict(stft=STFTEncoder, conv=ConvEncoder, same=NullEncoder), type_check=AbsEncoder, default="stft", ) separator_choices = ClassChoices( name="separator", classes=dict( asteroid=AsteroidModel_Converter, conformer=ConformerSeparator, dan=DANSeparator, dc_crn=DC_CRNSeparator, dccrn=DCCRNSeparator, dpcl=DPCLSeparator, dpcl_e2e=DPCLE2ESeparator, dprnn=DPRNNSeparator, fasnet=FaSNetSeparator,
from espnet2.train.preprocessor import CommonPreprocessor from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract from espnet2.tts.feats_extract.dio import Dio from espnet2.tts.feats_extract.energy import Energy from espnet2.tts.feats_extract.linear_spectrogram import LinearSpectrogram from espnet2.tts.feats_extract.log_mel_fbank import LogMelFbank from espnet2.tts.feats_extract.log_spectrogram import LogSpectrogram from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import int_or_none, str2bool, str_or_none feats_extractor_choices = ClassChoices( "feats_extract", classes=dict( fbank=LogMelFbank, log_spectrogram=LogSpectrogram, linear_spectrogram=LinearSpectrogram, ), type_check=AbsFeatsExtract, default="linear_spectrogram", ) normalize_choices = ClassChoices( "normalize", classes=dict( global_mvn=GlobalMVN, utterance_mvn=UtteranceMVN, ), type_check=AbsNormalize, default=None, optional=True, ) tts_choices = ClassChoices(
from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract from espnet2.tts.feats_extract.dio import Dio from espnet2.tts.feats_extract.energy import Energy from espnet2.tts.feats_extract.log_mel_fbank import LogMelFbank from espnet2.tts.feats_extract.log_spectrogram import LogSpectrogram from espnet2.tts.tacotron2 import Tacotron2 from espnet2.tts.transformer import Transformer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import int_or_none from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none feats_extractor_choices = ClassChoices( "feats_extract", classes=dict(fbank=LogMelFbank, spectrogram=LogSpectrogram), type_check=AbsFeatsExtract, default="fbank", ) pitch_extractor_choices = ClassChoices( "pitch_extract", classes=dict(dio=Dio), type_check=AbsFeatsExtract, default=None, optional=True, ) energy_extractor_choices = ClassChoices( "energy_extract", classes=dict(energy=Energy), type_check=AbsFeatsExtract, default=None, optional=True,
from espnet2.tasks.abs_task import AbsTask from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import CommonPreprocessorPairedSpeech from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import float_or_none from espnet2.utils.types import int_or_none from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none frontend_choices = ClassChoices( name="frontend", classes=dict(robust=RobustFrontend, ), type_check=AbsFrontend, default="robust", ) encoder_choices = ClassChoices( "encoder", classes=dict( conformer=ConformerEncoder, transformer=TransformerEncoder, contextual_block_transformer=ContextualBlockTransformerEncoder, vgg_rnn=VGGRNNEncoder, rnn=RNNEncoder, wav2vec2=FairSeqWav2Vec2Encoder, hubert=FairseqHubertEncoder, hubert_pretrain=FairseqHubertPretrainEncoder, ),
from espnet2.mt.espnet_model import ESPnetMTModel from espnet2.mt.frontend.embedding import Embedding from espnet2.tasks.abs_task import AbsTask from espnet2.text.phoneme_tokenizer import g2p_choices from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import MutliTokenizerCommonPreprocessor from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import int_or_none, str2bool, str_or_none frontend_choices = ClassChoices( name="frontend", classes=dict(embed=Embedding, ), type_check=AbsFrontend, default="embed", ) preencoder_choices = ClassChoices( name="preencoder", classes=dict( sinc=LightweightSincConvs, linear=LinearProjection, ), type_check=AbsPreEncoder, default=None, optional=True, ) encoder_choices = ClassChoices( "encoder", classes=dict(
from espnet2.text.phoneme_tokenizer import g2p_choices from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import CommonPreprocessor from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import float_or_none from espnet2.utils.types import int_or_none from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none frontend_choices = ClassChoices( name="frontend", classes=dict(default=DefaultFrontend, sliding_window=SlidingWindow), type_check=AbsFrontend, default="default", ) specaug_choices = ClassChoices( name="specaug", classes=dict(specaug=SpecAug), type_check=AbsSpecAug, default=None, optional=True, ) normalize_choices = ClassChoices( "normalize", classes=dict( global_mvn=GlobalMVN, utterance_mvn=UtteranceMVN, ),
from espnet2.lm.transformer_lm import TransformerLM from hynet.main_funcs.collect_stats import collect_stats from hynet.train.trainer import Trainer from hynet.layers.fair_like_norm import FairNormalize from hynet.asr.espnet_model import ESPnetASRModel from hynet.asr.encoder.wav2vec2_encoder import FairSeqWav2VecCtc from hynet.asr.ctc import CTC from hynet.schedulers.tri_stage_lr import TriStageLR frontend_choices = ClassChoices( name="frontend", classes=dict(default=DefaultFrontend, sliding_window=SlidingWindow), type_check=AbsFrontend, default="default", ) specaug_choices = ClassChoices( name="specaug", classes=dict(specaug=SpecAug), type_check=AbsSpecAug, default=None, optional=True, ) normalize_choices = ClassChoices( "normalize", classes=dict( global_mvn=GlobalMVN, utterance_mvn=UtteranceMVN, fair_like_norm=FairNormalize,
from espnet2.enh.nets.tasnet import TasNet from espnet2.enh.nets.tf_mask_net import TFMaskingNet from espnet2.tasks.abs_task import AbsTask from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none enh_choices = ClassChoices( name="enh", classes=dict(tf_masking=TFMaskingNet, tasnet=TasNet, wpe_beamformer=BeamformerNet), type_check=AbsEnhancement, default="tf_masking", ) MAX_REFERENCE_NUM = 100 class EnhancementTask(AbsTask): # If you need more than one optimizers, change this value num_optimizers: int = 1 class_choices_list = [ # --enh and --enh_conf enh_choices, ]
from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import MutliTokenizerCommonPreprocessor from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import float_or_none from espnet2.utils.types import int_or_none from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none frontend_choices = ClassChoices( name="frontend", classes=dict( default=DefaultFrontend, sliding_window=SlidingWindow, s3prl=S3prlFrontend, ), type_check=AbsFrontend, default="default", ) specaug_choices = ClassChoices( name="specaug", classes=dict(specaug=SpecAug), type_check=AbsSpecAug, default=None, optional=True, ) normalize_choices = ClassChoices( "normalize", classes=dict( global_mvn=GlobalMVN,
from espnet2.asr.frontend.windowing import SlidingWindow from espnet2.asr.maskctc_model import MaskCTCModel from espnet2.asr.postencoder.abs_postencoder import AbsPostEncoder from espnet2.layers.global_mvn import GlobalMVN from espnet2.layers.utterance_mvn import UtteranceMVN from espnet2.layers.abs_normalize import AbsNormalize encoder_choices = ClassChoices( "encoder", classes=dict( conformer=ConformerEncoder, transformer=TransformerEncoder, contextual_block_transformer=ContextualBlockTransformerEncoder, vgg_rnn=VGGRNNEncoder, rnn=RNNEncoder, wav2vec2=FairSeqWav2Vec2Encoder, hubert=FairseqHubertEncoder, hubert_pretrain=FairseqHubertPretrainEncoder, ), type_check=AbsEncoder, default="rnn", ) normalize_choices = ClassChoices( "normalize", classes=dict( global_mvn=GlobalMVN, utterance_mvn=UtteranceMVN, ), type_check=AbsNormalize,
from espnet2.layers.utterance_mvn import UtteranceMVN from espnet2.tasks.abs_task import AbsTask from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import CommonPreprocessor from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import int_or_none from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none frontend_choices = ClassChoices( name="frontend", classes=dict(default=DefaultFrontend), type_check=AbsFrontend, default="default", ) specaug_choices = ClassChoices( name="specaug", classes=dict(specaug=SpecAug), type_check=AbsSpecAug, default=None, optional=True, ) normalize_choices = ClassChoices( "normalize", classes=dict( global_mvn=GlobalMVN, utterance_mvn=UtteranceMVN, ),
from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import CommonPreprocessor from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none lm_choices = ClassChoices( "lm", classes=dict( seq_rnn=SequentialRNNLM, transformer=TransformerLM, ), type_check=AbsLM, default="seq_rnn", ) class LMTask(AbsTask): # If you need more than one optimizers, change this value num_optimizers: int = 1 # Add variable objects configurations class_choices_list = [lm_choices] # If you need to modify train() or eval() procedures, change Trainer class here trainer = Trainer
from espnet2.layers.utterance_mvn import UtteranceMVN from espnet2.tasks.abs_task import AbsTask from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import CommonPreprocessor_multi from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import int_or_none from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none enh_choices = ClassChoices( name="enh", classes=dict(tf_masking=TFMaskingNet, tasnet=TasNet, wpe_beamformer=BeamformerNet), type_check=AbsEnhancement, default="tf_masking", ) frontend_choices = ClassChoices( name="frontend", classes=dict(default=DefaultFrontend), type_check=AbsFrontend, default="default", ) specaug_choices = ClassChoices( name="specaug", classes=dict(specaug=SpecAug), type_check=AbsSpecAug, default=None, optional=True, )
from espnet2.tts.abs_tts import AbsTTS from espnet2.tts.espnet_model import ESPnetTTSModel from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract from espnet2.tts.feats_extract.log_mel_fbank import LogMelFbank from espnet2.tts.feats_extract.log_spectrogram import LogSpectrogram from espnet2.tts.tacotron2 import Tacotron2 from espnet2.tts.transformer import Transformer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import int_or_none from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none feats_extractor_choices = ClassChoices( "feats_extract", classes=dict(fbank=LogMelFbank, spectrogram=LogSpectrogram), type_check=AbsFeatsExtract, default="fbank", ) normalize_choices = ClassChoices( "normalize", classes=dict(global_mvn=GlobalMVN), type_check=AbsNormalize, default="global_mvn", optional=True, ) tts_choices = ClassChoices( "tts", classes=dict(tacotron2=Tacotron2, transformer=Transformer), type_check=AbsTTS, default="tacotron2", )
from espnet2.lm.espnet_model import ESPnetLanguageModel from espnet2.lm.seq_rnn import SequentialRNNLM from espnet2.tasks.abs_task import AbsTask from espnet2.torch_utils.initialize import initialize from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import CommonPreprocessor from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import str2bool from espnet2.utils.types import str_or_none lm_choices = ClassChoices( "lm", classes=dict(seq_rnn=SequentialRNNLM), type_check=AbsLM, default="seq_rnn" ) class LMTask(AbsTask): # If you need more than one optimizers, change this value num_optimizers: int = 1 # Add variable objects configurations class_choices_list = [lm_choices] # If you need to modify train() or eval() procedures, change Trainer class here trainer = Trainer @classmethod def add_task_arguments(cls, parser: argparse.ArgumentParser):
from espnet2.train.class_choices import ClassChoices from espnet2.train.collate_fn import CommonCollateFn from espnet2.train.preprocessor import CommonPreprocessor from espnet2.train.trainer import Trainer from espnet2.utils.get_default_kwargs import get_default_kwargs from espnet2.utils.nested_dict_action import NestedDictAction from espnet2.utils.types import int_or_none from espnet2.utils.types import str_or_none from moneynet2.imgr.imgr_model import HynetImgrModel normalize_choices = ClassChoices( "normalize", classes=dict( global_mvn=GlobalMVN, utterance_mvn=UtteranceMVN, ), type_check=AbsNormalize, default="utterance_mvn", optional=True, ) class ImgrTask(AbsTask): # If you need more than one optimizers, change this value num_optimizers: int = 1 # Add variable objects configurations class_choices_list = [ # --normalize and --normalize_conf normalize_choices, ]