def __init__(self, params): super(TransformerLanguageModel, self).__init__(params) self.model_type = 'transformer_lm' self.normalize_before = False self.smoothing = params['smoothing'] self.vocab_size = params['vocab_size'] self.num_blocks = params['num_blocks'] self.embedding = nn.Embedding(self.vocab_size, params['d_model']) self.pos_embedding = PositionalEncoding(params['d_model'], 0.0) self.blocks = nn.ModuleList([ TransformerEncoderLayer( params['n_heads'], params['d_model'], params['d_ff'], slf_attn_dropout=0.0, ffn_dropout=0.0, residual_dropout=params['residual_dropout'], normalize_before=False, concat_after=False, activation='glu') for _ in range(self.num_blocks) ]) if self.normalize_before: self.after_norm = nn.LayerNorm(params['d_model']) self.output_project = nn.Linear(params['d_model'], self.vocab_size) if params['share_embedding']: self.output_project.weight = self.embedding.weight print('Share the weight of embedding to the output project layer!') self.crit = LabelSmoothingLoss(size=self.vocab_size, smoothing=self.smoothing, padding_idx=PAD)
def __init__(self, params): super(RecurrentLanguageModel, self).__init__(params) self.model_type = 'recurrent_lm' self.vocab_size = params['vocab_size'] self.share_embedding = params['share_embedding'] self.smoothing = params['smoothing'] self.num_layers = params['num_layers'] self.hidden_size = params['hidden_size'] self.embedding = nn.Embedding(params['vocab_size'], params['hidden_size']) self.rnn = nn.LSTM(input_size=params['hidden_size'], hidden_size=params['hidden_size'], num_layers=params['num_layers'], batch_first=True, dropout=params['dropout'], bidirectional=False) self.output_project = nn.Linear( params['hidden_size'], params['vocab_size']) if self.share_embedding: assert self.embedding.weight.size() == self.output_project.weight.size() self.output_project.weight = self.embedding.weight self.crit = LabelSmoothingLoss(size=self.vocab_size, smoothing=self.smoothing, padding_idx=PAD)
def __init__(self, params): super(SpeechToText, self).__init__() self.frontend = BuildFrontEnd[params['frontend_type']]( **params['frontend']) logger.info('Build a %s frontend!' % params['frontend_type']) self.encoder = BuildEncoder[params['encoder_type']]( **params['encoder']) logger.info('Build a %s encoder!' % params['encoder_type']) self.decoder = BuildDecoder[params['decoder_type']]( **params['decoder']) logger.info('Build a %s decoder!' % params['decoder_type']) self.crit = LabelSmoothingLoss(size=params['decoder']['vocab_size'], smoothing=params['smoothing']) self.ctc_weight = params['ctc_weight'] if self.ctc_weight > 0.0: self.assistor = CTCAssistor( hidden_size=params['encoder_output_size'], vocab_size=params['decoder']['vocab_size'], lookahead_steps=params['lookahead_steps'] if 'lookahead_steps' in params else 0) logger.info('Build a CTC Assistor with weight %.2f' % self.ctc_weight)
def __init__(self, params): super(SpeechToText, self).__init__() self.frontend = BuildFrontEnd[params["frontend_type"]](**params["frontend"]) logger.info("Build a %s frontend!" % params["frontend_type"]) self.encoder = BuildEncoder[params["encoder_type"]](**params["encoder"]) logger.info("Build a %s encoder!" % params["encoder_type"]) self.decoder = BuildDecoder[params["decoder_type"]](**params["decoder"]) logger.info("Build a %s decoder!" % params["decoder_type"]) self.crit = LabelSmoothingLoss( size=params["decoder"]["vocab_size"], smoothing=params["smoothing"] ) self.ctc_weight = params["ctc_weight"] if self.ctc_weight > 0.0: self.assistor = CTCAssistor( hidden_size=params["encoder_output_size"], vocab_size=params["decoder"]["vocab_size"], lookahead_steps=params["lookahead_steps"] if "lookahead_steps" in params else 0, ) logger.info("Build a CTC Assistor with weight %.2f" % self.ctc_weight)