def build_transformer( num_classes: int, d_model: int, d_ff: int, num_heads: int, input_dim: int, num_encoder_layers: int, num_decoder_layers: int, extractor: str, dropout_p: float, device: torch.device, pad_id: int = 0, sos_id: int = 1, eos_id: int = 2, joint_ctc_attention: bool = False, max_length: int = 400, ) -> nn.DataParallel: if dropout_p < 0.0: raise ParameterError("dropout probability should be positive") if input_dim < 0: raise ParameterError("input_size should be greater than 0") if num_encoder_layers < 0: raise ParameterError("num_layers should be greater than 0") if num_decoder_layers < 0: raise ParameterError("num_layers should be greater than 0") return nn.DataParallel( SpeechTransformer( input_dim=input_dim, num_classes=num_classes, extractor=extractor, d_model=d_model, d_ff=d_ff, num_encoder_layers=num_encoder_layers, num_decoder_layers=num_decoder_layers, num_heads=num_heads, encoder_dropout_p=dropout_p, decoder_dropout_p=dropout_p, pad_id=pad_id, sos_id=sos_id, eos_id=eos_id, max_length=max_length, joint_ctc_attention=joint_ctc_attention, )).to(device)
def build_transformer(num_classes: int, pad_id: int, d_model: int, num_heads: int, input_size: int, num_encoder_layers: int, num_decoder_layers: int, dropout_p: float, ffnet_style: str, device: str, eos_id: int) -> nn.DataParallel: if ffnet_style not in {'ff', 'conv'}: raise ParameterError( "Unsupported ffnet_style: {0}".format(ffnet_style)) return nn.DataParallel( SpeechTransformer(num_classes=num_classes, pad_id=pad_id, d_model=d_model, num_heads=num_heads, num_encoder_layers=num_encoder_layers, num_decoder_layers=num_decoder_layers, dropout_p=dropout_p, ffnet_style=ffnet_style, input_dim=input_size, eos_id=eos_id)).to(device)
import torch from kospeech.models import SpeechTransformer batch_size = 4 seq_length = 200 target_length = 20 input_size = 80 transformer = SpeechTransformer(num_classes=10, d_model=16, d_ff=32, num_encoder_layers=3, num_decoder_layers=2) inputs = torch.FloatTensor(batch_size, seq_length, input_size) input_lengths = torch.LongTensor( [seq_length, seq_length - 10, seq_length - 20, seq_length - 30]) targets = torch.randint(0, 10, size=(batch_size, target_length), dtype=torch.long) output = transformer(inputs, input_lengths, targets) print(output)
import torch from kospeech.models import SpeechTransformer batch_size = 4 seq_length = 200 input_size = 80 transformer = SpeechTransformer(num_classes=10, d_model=16, d_ff=32, num_encoder_layers=3, num_decoder_layers=2) inputs = torch.FloatTensor(batch_size, seq_length, input_size) input_lengths = torch.LongTensor( [seq_length, seq_length - 10, seq_length - 20, seq_length - 30]) output = transformer.greedy_search(inputs, input_lengths, device='cuda') print(output)