def __init__( self, config, training=True, enable_tflite_convertible=False, speech_config=dict, ): config['LAS_decoder'].update({'encoder_dim': config['dmodel']}) decoder_config = LASConfig(**config['LAS_decoder']) super(ConformerLAS, self).__init__( encoder=ConformerEncoder( dmodel=config['dmodel'], reduction_factor=config['reduction_factor'], num_blocks=config['num_blocks'], head_size=config['head_size'], num_heads=config['num_heads'], kernel_size=config['kernel_size'], fc_factor=config['fc_factor'], dropout=config['dropout'], name=config['name']), config=decoder_config, training=training, enable_tflite_convertible=enable_tflite_convertible, speech_config=speech_config) self.time_reduction_factor = config['reduction_factor']
def __init__(self, config, training, name: str = "LAS", enable_tflite_convertible=False): config['LAS_decoder'].update({'encoder_dim': config['model_config']['filter_size']}) decoder_config = LASConfig(**config['LAS_decoder']) super(ESPNetLAS, self).__init__( encoder= ESPNet(**config['model_config']), config=decoder_config, training=training,enable_tflite_convertible=enable_tflite_convertible, name=name) self.time_reduction_factor = 4
def __init__(self, config, input_shape: list, training, name: str = "LAS", enable_tflite_convertible=False): config['LAS_decoder'].update( {'encoder_dim': config['fc_conf']['fc_units'][-1]}) decoder_config = LASConfig(**config['LAS_decoder']) super(DeepSpeech2LAS, self).__init__( encoder=create_ds2(input_shape=input_shape, arch_config=config, name=name), config=decoder_config, training=training, enable_tflite_convertible=enable_tflite_convertible, ) self.time_reduction_factor = 1 for s in config["conv_conf"]["conv_strides"]: self.time_reduction_factor *= s[0]
def __init__(self, config, input_shape: list, training, name: str = "LAS", enable_tflite_convertible=False, speech_config=dict): config['LAS_decoder'].update({'encoder_dim': config['fc_conf']['fc_units'][-1]}) decoder_config = LASConfig(**config['LAS_decoder']) super(DeepSpeech2LAS, self).__init__( encoder=DeepSpeech2(arch_config=config, name=name, add_wav_info=speech_config['add_wav_info'], hop_size=int(speech_config['stride_ms'] * speech_config['sample_rate'] // 1000) ), config=decoder_config, training=training,enable_tflite_convertible=enable_tflite_convertible, speech_config=speech_config ) self.time_reduction_factor = 1 for s in config["conv_conf"]["conv_strides"]: self.time_reduction_factor *= s[0]