예제 #1
0
    def __init__(
        self,
        config,
        training=True,
        enable_tflite_convertible=False,
        speech_config=dict,
    ):
        config['LAS_decoder'].update({'encoder_dim': config['dmodel']})
        decoder_config = LASConfig(**config['LAS_decoder'])

        super(ConformerLAS, self).__init__(
            encoder=ConformerEncoder(
                dmodel=config['dmodel'],
                reduction_factor=config['reduction_factor'],
                num_blocks=config['num_blocks'],
                head_size=config['head_size'],
                num_heads=config['num_heads'],
                kernel_size=config['kernel_size'],
                fc_factor=config['fc_factor'],
                dropout=config['dropout'],
                name=config['name']),
            config=decoder_config,
            training=training,
            enable_tflite_convertible=enable_tflite_convertible,
            speech_config=speech_config)
        self.time_reduction_factor = config['reduction_factor']
예제 #2
0
    def __init__(self,
                 config,
                 training,
                 name: str = "LAS",
                 enable_tflite_convertible=False):
        config['LAS_decoder'].update({'encoder_dim': config['model_config']['filter_size']})
        decoder_config = LASConfig(**config['LAS_decoder'])

        super(ESPNetLAS, self).__init__(
            encoder= ESPNet(**config['model_config']),
            config=decoder_config, training=training,enable_tflite_convertible=enable_tflite_convertible,
        name=name)
        self.time_reduction_factor = 4
예제 #3
0
    def __init__(self,
                 config,
                 input_shape: list,
                 training,
                 name: str = "LAS",
                 enable_tflite_convertible=False):
        config['LAS_decoder'].update(
            {'encoder_dim': config['fc_conf']['fc_units'][-1]})
        decoder_config = LASConfig(**config['LAS_decoder'])

        super(DeepSpeech2LAS, self).__init__(
            encoder=create_ds2(input_shape=input_shape,
                               arch_config=config,
                               name=name),
            config=decoder_config,
            training=training,
            enable_tflite_convertible=enable_tflite_convertible,
        )
        self.time_reduction_factor = 1
        for s in config["conv_conf"]["conv_strides"]:
            self.time_reduction_factor *= s[0]
예제 #4
0
    def __init__(self,
                 config,
                 input_shape: list,
                 training,
                 name: str = "LAS",
                 enable_tflite_convertible=False,
                 speech_config=dict):
        config['LAS_decoder'].update({'encoder_dim': config['fc_conf']['fc_units'][-1]})
        decoder_config = LASConfig(**config['LAS_decoder'])

        super(DeepSpeech2LAS, self).__init__(
            encoder=DeepSpeech2(arch_config=config,
                                name=name,
                                add_wav_info=speech_config['add_wav_info'],
                                hop_size=int(speech_config['stride_ms'] * speech_config['sample_rate'] // 1000)

                                ),
            config=decoder_config, training=training,enable_tflite_convertible=enable_tflite_convertible,
            speech_config=speech_config
        )
        self.time_reduction_factor = 1
        for s in config["conv_conf"]["conv_strides"]:
            self.time_reduction_factor *= s[0]