Ejemplo n.º 1
0
def build_deepspeech2(input_size: int, num_classes: int, rnn_type: str,
                      num_rnn_layers: int, rnn_hidden_dim: int,
                      dropout_p: float, bidirectional: bool, activation: str,
                      device: torch.device) -> nn.DataParallel:
    if dropout_p < 0.0:
        raise ParameterError("dropout probability should be positive")
    if input_size < 0:
        raise ParameterError("input_size should be greater than 0")
    if rnn_hidden_dim < 0:
        raise ParameterError("hidden_dim should be greater than 0")
    if num_rnn_layers < 0:
        raise ParameterError("num_layers should be greater than 0")
    if rnn_type.lower() not in BaseRNN.supported_rnns.keys():
        raise ParameterError("Unsupported RNN Cell: {0}".format(rnn_type))

    return nn.DataParallel(
        DeepSpeech2(
            input_size=input_size,
            num_classes=num_classes,
            rnn_type=rnn_type,
            num_rnn_layers=num_rnn_layers,
            rnn_hidden_dim=rnn_hidden_dim,
            dropout_p=dropout_p,
            bidirectional=bidirectional,
            activation=activation,
            device=device,
        )).to(device)
Ejemplo n.º 2
0
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
import torch.nn as nn

from kospeech.models import DeepSpeech2

batch_size = 3
sequence_length = 14321
dimension = 80

cuda = torch.cuda.is_available()
device = torch.device('cuda' if cuda else 'cpu')

model = DeepSpeech2(num_classes=10, input_dim=dimension).to(device)

criterion = nn.CTCLoss(blank=3, zero_infinity=True)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-04)

for i in range(10):
    inputs = torch.rand(batch_size, sequence_length, dimension).to(device)
    input_lengths = torch.IntTensor([12345, 12300, 12000])
    targets = torch.LongTensor([[1, 3, 3, 3, 3, 3, 4, 5, 6, 2],
                                [1, 3, 3, 3, 3, 3, 4, 5, 2, 0],
                                [1, 3, 3, 3, 3, 3, 4, 2, 0, 0]]).to(device)
    target_lengths = torch.LongTensor([9, 8, 7])
    outputs, output_lengths = model(inputs, input_lengths)

    loss = criterion(outputs.transpose(0, 1), targets[:, 1:], output_lengths,
                     target_lengths)