def __init__( self, num_classes: int, # number of classes d_model: int = 512, # dimension of model d_ff: int = 512, # dimension of feed forward network num_layers: int = 6, # number of decoder layers num_heads: int = 8, # number of attention heads ffnet_style: str = 'ff', # style of feed forward network dropout_p: float = 0.3, # probability of dropout pad_id: int = 0, # identification of pad token eos_id: int = 2 # identification of end of sentence token ) -> None: super(SpeechTransformerDecoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.num_heads = num_heads self.embedding = Embedding(num_classes, pad_id, d_model) self.positional_encoding = PositionalEncoding(d_model) self.input_dropout = nn.Dropout(p=dropout_p) self.layers = nn.ModuleList([ SpeechTransformerDecoderLayer(d_model, num_heads, d_ff, dropout_p, ffnet_style) for _ in range(num_layers) ]) self.pad_id = pad_id self.eos_id = eos_id
def __init__(self, num_classes: int, d_model: int = 512, d_ff: int = 512, num_layers: int = 6, num_heads: int = 8, ffnet_style: str = 'ff', dropout_p: float = 0.3, pad_id: int = 0) -> None: super(TransformerDecoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.num_heads = num_heads self.embedding = Embedding(num_classes, pad_id, d_model) self.pos_encoding = PositionalEncoding(d_model) self.input_dropout = nn.Dropout(p=dropout_p) self.layers = nn.ModuleList( [TransformerDecoderLayer(d_model, num_heads, d_ff, dropout_p, ffnet_style) for _ in range(num_layers)] ) self.pad_id = pad_id self.logit_scale = (d_model ** -0.5)
def __init__( self, num_classes: int, # number of classes d_model: int = 512, # dimension of model d_ff: int = 512, # dimension of feed forward network num_layers: int = 6, # number of decoder layers num_heads: int = 8, # number of attention heads dropout_p: float = 0.3, # probability of dropout pad_id: int = 0, # identification of pad token sos_id: int = 1, # identification of start of sentence token eos_id: int = 2, # identification of end of sentence token max_length: int = 400, # max length of decoding ) -> None: super(TransformerDecoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.num_heads = num_heads self.max_length = max_length self.pad_id = pad_id self.sos_id = sos_id self.eos_id = eos_id self.embedding = Embedding(num_classes, pad_id, d_model) self.positional_encoding = PositionalEncoding(d_model) self.input_dropout = nn.Dropout(p=dropout_p) self.layers = nn.ModuleList([ TransformerDecoderLayer( d_model=d_model, num_heads=num_heads, d_ff=d_ff, dropout_p=dropout_p, ) for _ in range(num_layers) ]) self.fc = nn.Sequential( nn.LayerNorm(d_model), Linear(d_model, num_classes, bias=False), )