def __init__(self, etype, idim, elayers, eunits, eprojs, subsample, dropout, in_channel=1): super(Encoder, self).__init__() typ = etype.lstrip("vgg").lstrip("sinc").rstrip("p") logging.info("Error: hello") if typ not in ['lstm', 'gru', 'blstm', 'bgru', 'bligru', 'blstm']: logging.error("Error: need to specify an appropriate encoder architecture") if etype.startswith("vgg"): if etype[-1] == "p": self.enc = torch.nn.ModuleList([VGG2L(in_channel), RNNP(get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, subsample, dropout, typ=typ)]) logging.info('Use CNN-VGG + ' + typ.upper() + 'P for encoder') elif etype[-1] == "s": self.enc = torch.nn.ModuleList([SincNet(sincnet=True, nb_filters=512), RNN(256, elayers, eunits, eprojs, dropout, typ=typ)]) logging.info('Use Raw SincNet + ' + typ.upper() + ' for encoder') else: self.enc = torch.nn.ModuleList([VGG2L(in_channel), RNN(get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, dropout, typ=typ)]) logging.info('Use CNN-VGG + ' + typ.upper() + ' for encoder') else: if etype[-1] == "p": self.enc = torch.nn.ModuleList( [RNNP(idim, elayers, eunits, eprojs, subsample, dropout, typ=typ)]) logging.info(typ.upper() + ' with every-layer projection for encoder') else: self.enc = torch.nn.ModuleList([RNN(idim, elayers, eunits, eprojs, dropout, typ=typ)]) logging.info(typ.upper() + ' without projection for encoder')
def __init__(self, etype, idim, elayers, eunits, eprojs, subsample, dropout, in_channel=1): super(Encoder, self).__init__() typ = etype.lstrip("vgg").rstrip("p") if typ not in ["lstm", "gru", "blstm", "bgru"]: logging.error( "Error: need to specify an appropriate encoder architecture") if etype.startswith("vgg"): if etype[-1] == "p": self.enc = torch.nn.ModuleList([ VGG2L(in_channel), RNNP( get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, subsample, dropout, typ=typ, ), ]) logging.info("Use CNN-VGG + " + typ.upper() + "P for encoder") else: self.enc = torch.nn.ModuleList([ VGG2L(in_channel), RNN( get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, dropout, typ=typ, ), ]) logging.info("Use CNN-VGG + " + typ.upper() + " for encoder") else: if etype[-1] == "p": self.enc = torch.nn.ModuleList([ RNNP(idim, elayers, eunits, eprojs, subsample, dropout, typ=typ) ]) logging.info(typ.upper() + " with every-layer projection for encoder") else: self.enc = torch.nn.ModuleList( [RNN(idim, elayers, eunits, eprojs, dropout, typ=typ)]) logging.info(typ.upper() + " without projection for encoder")
def __init__(self, etype, idim, elayers, eunits, eprojs, subsample, dropout, in_channel=1): super(Encoder, self).__init__() with self.init_scope(): if etype == 'blstm': self.enc1 = BLSTM(idim, elayers, eunits, eprojs, dropout) logging.info('BLSTM without projection for encoder') elif etype == 'blstmp': self.enc1 = BLSTMP(idim, elayers, eunits, eprojs, subsample, dropout) logging.info('BLSTM with every-layer projection for encoder') elif etype == 'vggblstmp': self.enc1 = VGG2L(in_channel) self.enc2 = BLSTMP(get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, subsample, dropout) logging.info('Use CNN-VGG + BLSTMP for encoder') elif etype == 'vggblstm': self.enc1 = VGG2L(in_channel) self.enc2 = BLSTM(get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, dropout) logging.info('Use CNN-VGG + BLSTM for encoder') else: logging.error( "Error: need to specify an appropriate encoder archtecture" ) sys.exit() self.etype = etype
def __init__(self, etype, idim, elayers, eunits, eprojs, subsample, dropout, in_channel=1): super(Encoder, self).__init__() typ = etype.lstrip("vgg").rstrip("p") if typ not in ['lstm', 'gru', 'blstm', 'bgru']: logging.error( "Error: need to specify an appropriate encoder architecture") with self.init_scope(): if etype.startswith("vgg"): if etype[-1] == "p": self.enc = chainer.Sequential( VGG2L(in_channel), RNNP(get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, subsample, dropout, typ=typ)) logging.info('Use CNN-VGG + ' + typ.upper() + 'P for encoder') else: self.enc = chainer.Sequential( VGG2L(in_channel), RNN(get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, dropout, typ=typ)) logging.info('Use CNN-VGG + ' + typ.upper() + ' for encoder') else: if etype[-1] == "p": self.enc = chainer.Sequential( RNNP(idim, elayers, eunits, eprojs, subsample, dropout, typ=typ)) logging.info(typ.upper() + ' with every-layer projection for encoder') else: self.enc = chainer.Sequential( RNN(idim, elayers, eunits, eprojs, dropout, typ=typ)) logging.info(typ.upper() + ' without projection for encoder')
def __init__(self, etype, idim, elayers, eunits, eprojs, subsample, dropout, in_channel=1): super(Encoder, self).__init__() typ = etype.lstrip("vgg").lstrip("b").rstrip("p") if typ != "lstm" and typ != "gru": logging.error( "Error: need to specify an appropriate encoder architecture") if etype.startswith("vgg"): if etype[-1] == "p": self.enc = torch.nn.ModuleList([ VGG2L(in_channel), BRNNP(get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, subsample, dropout, typ=typ) ]) logging.info('Use CNN-VGG + B' + typ.upper() + 'P for encoder') else: self.enc = torch.nn.ModuleList([ VGG2L(in_channel), BRNN(get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, dropout, typ=typ) ]) logging.info('Use CNN-VGG + B' + typ.upper() + ' for encoder') else: if etype[-1] == "p": self.enc = torch.nn.ModuleList([ BRNNP(idim, elayers, eunits, eprojs, subsample, dropout, typ=typ) ]) logging.info('B' + typ.upper() + ' with every-layer projection for encoder') else: self.enc = torch.nn.ModuleList( [BRNN(idim, elayers, eunits, eprojs, dropout, typ=typ)]) logging.info('B' + typ.upper() + ' without projection for encoder')
def __init__( self, input_size: int, rnn_type: str = "lstm", bidirectional: bool = True, use_projection: bool = True, num_layers: int = 4, hidden_size: int = 320, output_size: int = 320, dropout: float = 0.0, in_channel: int = 1, ): assert check_argument_types() super().__init__() self._output_size = output_size self.rnn_type = rnn_type self.bidirectional = bidirectional self.use_projection = use_projection if rnn_type not in {"lstm", "gru"}: raise ValueError(f"Not supported rnn_type={rnn_type}") # Subsample is not used for VGGRNN subsample = np.ones(num_layers + 1, dtype=np.int64) rnn_type = ("b" if bidirectional else "") + rnn_type if use_projection: self.enc = torch.nn.ModuleList([ VGG2L(in_channel), RNNP( get_vgg2l_odim(input_size, in_channel=in_channel), num_layers, hidden_size, output_size, subsample, dropout, typ=rnn_type, ), ]) else: self.enc = torch.nn.ModuleList([ VGG2L(in_channel), RNN( get_vgg2l_odim(input_size, in_channel=in_channel), num_layers, hidden_size, output_size, dropout, typ=rnn_type, ), ])
def __init__( self, etype, idim, elayers_sd, elayers_rec, eunits, eprojs, subsample, dropout, num_spkrs=2, in_channel=1, ): """Initialize the encoder of single-channel multi-speaker ASR.""" super(EncoderMix, self).__init__() typ = etype.lstrip("vgg").rstrip("p") if typ not in ["lstm", "gru", "blstm", "bgru"]: logging.error( "Error: need to specify an appropriate encoder architecture") if etype.startswith("vgg"): if etype[-1] == "p": self.enc_mix = torch.nn.ModuleList([VGG2L(in_channel)]) self.enc_sd = torch.nn.ModuleList([ torch.nn.ModuleList([ RNNP( get_vgg2l_odim(idim, in_channel=in_channel), elayers_sd, eunits, eprojs, subsample[:elayers_sd + 1], dropout, typ=typ, ) ]) for i in range(num_spkrs) ]) self.enc_rec = torch.nn.ModuleList([ RNNP( eprojs, elayers_rec, eunits, eprojs, subsample[elayers_sd:], dropout, typ=typ, ) ]) logging.info("Use CNN-VGG + B" + typ.upper() + "P for encoder") else: logging.error( f"Error: need to specify an appropriate encoder architecture. " f"Illegal name {etype}") sys.exit() else: logging.error( f"Error: need to specify an appropriate encoder architecture. " f"Illegal name {etype}") sys.exit() self.num_spkrs = num_spkrs
def __init__(self, etype, idim, elayers_sd, elayers_rec, eunits, eprojs, subsample, dropout, num_spkrs=2, in_channel=1): super(Encoder, self).__init__() typ = etype.lstrip("vgg").lstrip("b").rstrip("p") if typ != "lstm" and typ != "gru": logging.error( "Error: need to specify an appropriate encoder architecture") if etype.startswith("vgg"): if etype[-1] == "p": self.enc_mix = torch.nn.ModuleList([VGG2L(in_channel)]) self.enc_sd = torch.nn.ModuleList([ torch.nn.ModuleList([ RNNP(get_vgg2l_odim(idim, in_channel=in_channel), elayers_sd, eunits, eprojs, subsample[:elayers_sd + 1], dropout, typ=typ) ]) for i in range(num_spkrs) ]) self.enc_rec = torch.nn.ModuleList([ RNNP(eprojs, elayers_rec, eunits, eprojs, subsample[elayers_sd:], dropout, typ=typ) ]) logging.info('Use CNN-VGG + B' + typ.upper() + 'P for encoder') else: logging.error( "Error: need to specify an appropriate encoder architecture") sys.exit() self.num_spkrs = num_spkrs
def __init__( self, idim: int, etype: str, elayers: int, eunits: int, eprojs: int, subsample: np.ndarray, dropout_rate: float = 0.0, aux_enc_output_layers: List = [], ): """Initialize Encoder module.""" super(Encoder, self).__init__() rnn_type = etype.lstrip("vgg").rstrip("p") in_channel = 1 if etype.startswith("vgg"): if etype[-1] == "p": self.enc = torch.nn.ModuleList([ VGG2L(in_channel), RNNP( get_vgg2l_odim(idim, in_channel=in_channel), rnn_type, elayers, eunits, eprojs, subsample, dropout_rate=dropout_rate, aux_output_layers=aux_enc_output_layers, ), ]) else: self.enc = torch.nn.ModuleList([ VGG2L(in_channel), RNN( get_vgg2l_odim(idim, in_channel=in_channel), rnn_type, elayers, eunits, eprojs, dropout_rate=dropout_rate, aux_output_layers=aux_enc_output_layers, ), ]) self.conv_subsampling_factor = 4 else: if etype[-1] == "p": self.enc = torch.nn.ModuleList([ RNNP( idim, rnn_type, elayers, eunits, eprojs, subsample, dropout_rate=dropout_rate, aux_output_layers=aux_enc_output_layers, ) ]) else: self.enc = torch.nn.ModuleList([ RNN( idim, rnn_type, elayers, eunits, eprojs, dropout_rate=dropout_rate, aux_output_layers=aux_enc_output_layers, ) ]) self.conv_subsampling_factor = 1
def __init__( self, etype: str, idim: int, elayers: int, eunits: int, eprojs: int, subsample: np.ndarray, dropout: float, in_channel: int = 1, aux_task_layer_list: List = [], ): """Initialize Encoder module.""" super(Encoder, self).__init__() typ = etype.lstrip("vgg").rstrip("p") if typ not in ["lstm", "gru", "blstm", "bgru"]: logging.error( "Error: need to specify an appropriate encoder architecture") if etype.startswith("vgg"): if etype[-1] == "p": self.enc = torch.nn.ModuleList([ VGG2L(in_channel), RNNP( get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, subsample, dropout, typ=typ, aux_task_layer_list=aux_task_layer_list, ), ]) logging.info("Use CNN-VGG + " + typ.upper() + "P for encoder") else: self.enc = torch.nn.ModuleList([ VGG2L(in_channel), RNN( get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, dropout, typ=typ, aux_task_layer_list=aux_task_layer_list, ), ]) logging.info("Use CNN-VGG + " + typ.upper() + " for encoder") self.conv_subsampling_factor = 4 else: if etype[-1] == "p": self.enc = torch.nn.ModuleList([ RNNP( idim, elayers, eunits, eprojs, subsample, dropout, typ=typ, aux_task_layer_list=aux_task_layer_list, ) ]) logging.info(typ.upper() + " with every-layer projection for encoder") else: self.enc = torch.nn.ModuleList([ RNN( idim, elayers, eunits, eprojs, dropout, typ=typ, aux_task_layer_list=aux_task_layer_list, ) ]) logging.info(typ.upper() + " without projection for encoder") self.conv_subsampling_factor = 1
def __init__(self, etype, idim, elayers, eunits, eprojs, subsample, dropout, in_channel=1, apply_bn=False): super(Encoder, self).__init__() typ = etype.lstrip("vgg").lstrip("cnn").rstrip("p") if typ not in ["lstm", "gru", "blstm", "bgru"]: logging.error( "Error: need to specify an appropriate encoder architecture") if etype.startswith("vgg"): conv_layers = VGG2L(in_channel) out_channel = 128 elif etype.startswith("cnn"): conv_layers = Conv2L(in_channel, idim, apply_bn) out_channel = 32 else: conv_layers = None if conv_layers: if etype[-1] == "p": self.enc = torch.nn.ModuleList([ conv_layers, RNNP( get_vgg2l_odim(idim, in_channel=in_channel, out_channel=out_channel), elayers, eunits, eprojs, subsample, dropout, typ=typ, apply_bn=apply_bn, ), ]) logging.info("Use CNN-VGG + " + typ.upper() + "P for encoder") else: self.enc = torch.nn.ModuleList([ conv_layers, RNN( get_vgg2l_odim(idim, in_channel=in_channel, out_channel=out_channel), elayers, eunits, eprojs, dropout, typ=typ, ), ]) logging.info("Use CNN-VGG + " + typ.upper() + " for encoder") self.conv_subsampling_factor = 4 else: if etype[-1] == "p": self.enc = torch.nn.ModuleList([ RNNP(idim, elayers, eunits, eprojs, subsample, dropout, typ=typ, apply_bn=apply_bn) ]) logging.info(typ.upper() + " with every-layer projection for encoder") else: self.enc = torch.nn.ModuleList( [RNN(idim, elayers, eunits, eprojs, dropout, typ=typ)]) logging.info(typ.upper() + " without projection for encoder") self.conv_subsampling_factor = 1