def __init__( self, etype, idim, elayers_sd, elayers_rec, eunits, eprojs, subsample, dropout, num_spkrs=2, in_channel=1, ): """Initialize the encoder of single-channel multi-speaker ASR.""" super(EncoderMix, self).__init__() typ = etype.lstrip("vgg").rstrip("p") if typ not in ["lstm", "gru", "blstm", "bgru"]: logging.error( "Error: need to specify an appropriate encoder architecture") if etype.startswith("vgg"): if etype[-1] == "p": self.enc_mix = torch.nn.ModuleList([VGG2L(in_channel)]) self.enc_sd = torch.nn.ModuleList([ torch.nn.ModuleList([ RNNP( get_vgg2l_odim(idim, in_channel=in_channel), elayers_sd, eunits, eprojs, subsample[:elayers_sd + 1], dropout, typ=typ, ) ]) for i in range(num_spkrs) ]) self.enc_rec = torch.nn.ModuleList([ RNNP( eprojs, elayers_rec, eunits, eprojs, subsample[elayers_sd:], dropout, typ=typ, ) ]) logging.info("Use CNN-VGG + B" + typ.upper() + "P for encoder") else: logging.error( f"Error: need to specify an appropriate encoder architecture. " f"Illegal name {etype}") sys.exit() else: logging.error( f"Error: need to specify an appropriate encoder architecture. " f"Illegal name {etype}") sys.exit() self.num_spkrs = num_spkrs
def __init__(self, type, idim, layers, units, projs, dropout, nmask=1, nonlinear="sigmoid"): super().__init__() subsample = np.ones(layers + 1, dtype=np.int) typ = type.lstrip("vgg").rstrip("p") if type[-1] == "p": self.brnn = RNNP(idim, layers, units, projs, subsample, dropout, typ=typ) else: self.brnn = RNN(idim, layers, units, projs, dropout, typ=typ) self.type = type self.nmask = nmask self.linears = torch.nn.ModuleList( [torch.nn.Linear(projs, idim) for _ in range(nmask)]) if nonlinear not in ("sigmoid", "relu", "tanh", "crelu"): raise ValueError("Not supporting nonlinear={}".format(nonlinear)) self.nonlinear = nonlinear
def __init__( self, input_size: int, rnn_type: str = "lstm", bidirectional: bool = True, use_projection: bool = True, num_layers: int = 4, hidden_size: int = 320, output_size: int = 320, dropout: float = 0.0, subsample: Optional[Sequence[int]] = (2, 2, 1, 1), ): assert check_argument_types() super().__init__() self._output_size = output_size self.rnn_type = rnn_type self.bidirectional = bidirectional self.use_projection = use_projection if rnn_type not in {"lstm", "gru"}: raise ValueError(f"Not supported rnn_type={rnn_type}") if subsample is None: subsample = np.ones(num_layers + 1, dtype=np.int64) else: subsample = subsample[:num_layers] # Append 1 at the beginning because the second or later is used subsample = np.pad( np.array(subsample, dtype=np.int64), [1, num_layers - len(subsample)], mode="constant", constant_values=1, ) rnn_type = ("b" if bidirectional else "") + rnn_type if use_projection: self.enc = torch.nn.ModuleList([ RNNP( input_size, num_layers, hidden_size, output_size, subsample, dropout, typ=rnn_type, ) ]) else: self.enc = torch.nn.ModuleList([ RNN( input_size, num_layers, hidden_size, output_size, dropout, typ=rnn_type, ) ])
def __init__(self, etype, idim, elayers_sd, elayers_rec, eunits, eprojs, subsample, dropout, num_spkrs=2, in_channel=1): super(Encoder, self).__init__() typ = etype.lstrip("vgg").lstrip("b").rstrip("p") if typ != "lstm" and typ != "gru": logging.error( "Error: need to specify an appropriate encoder architecture") if etype.startswith("vgg"): if etype[-1] == "p": self.enc_mix = torch.nn.ModuleList([VGG2L(in_channel)]) self.enc_sd = torch.nn.ModuleList([ torch.nn.ModuleList([ RNNP(get_vgg2l_odim(idim, in_channel=in_channel), elayers_sd, eunits, eprojs, subsample[:elayers_sd + 1], dropout, typ=typ) ]) for i in range(num_spkrs) ]) self.enc_rec = torch.nn.ModuleList([ RNNP(eprojs, elayers_rec, eunits, eprojs, subsample[elayers_sd:], dropout, typ=typ) ]) logging.info('Use CNN-VGG + B' + typ.upper() + 'P for encoder') else: logging.error( "Error: need to specify an appropriate encoder architecture") sys.exit() self.num_spkrs = num_spkrs
def __init__( self, input_size: int, rnn_type: str = "lstm", bidirectional: bool = True, use_projection: bool = True, num_layers: int = 4, hidden_size: int = 320, output_size: int = 320, dropout: float = 0.0, in_channel: int = 1, ): assert check_argument_types() super().__init__() self._output_size = output_size self.rnn_type = rnn_type self.bidirectional = bidirectional self.use_projection = use_projection if rnn_type not in {"lstm", "gru"}: raise ValueError(f"Not supported rnn_type={rnn_type}") # Subsample is not used for VGGRNN subsample = np.ones(num_layers + 1, dtype=np.int64) rnn_type = ("b" if bidirectional else "") + rnn_type if use_projection: self.enc = torch.nn.ModuleList([ VGG2L(in_channel), RNNP( get_vgg2l_odim(input_size, in_channel=in_channel), num_layers, hidden_size, output_size, subsample, dropout, typ=rnn_type, ), ]) else: self.enc = torch.nn.ModuleList([ VGG2L(in_channel), RNN( get_vgg2l_odim(input_size, in_channel=in_channel), num_layers, hidden_size, output_size, dropout, typ=rnn_type, ), ])
def __init__(self, type, idim, layers, units, projs, dropout, nmask=1): super().__init__() subsample = np.ones(layers + 1, dtype=np.int) typ = type.lstrip("vgg").rstrip("p") if type[-1] == "p": self.brnn = RNNP(idim, layers, units, projs, subsample, dropout, typ=typ) else: self.brnn = RNN(idim, layers, units, projs, dropout, typ=typ) self.type = type self.nmask = nmask self.linears = torch.nn.ModuleList( [torch.nn.Linear(projs, idim) for _ in range(nmask)])