Beispiel #1
0
def test_attention_location():
    encoder_out = Variable(torch.zeros(152, 2, 256))  # seq, batch, dim
    query_vector = Variable(torch.zeros(1, 2, 256))  # seq, batch, dim
    mask = Variable(torch.zeros(2, 152, 1))  # seq, batch, dim
    attention = LocationAttention(dim=256)
    context, mask = attention(query_vector, encoder_out, mask)
    assert context.size() == (1, 2, 256)
    assert mask.size() == (2, 152, 1)  # batch, input_seq_len
def test_attention_location_softmax():
    encoder_out = Variable(torch.randn(152, 2, 256))  # seq, batch, dim
    query_vector = Variable(torch.randn(1, 2, 1024))  # seq, batch, dim
    mask = Variable(torch.randn(2, 152, 1))  # batch, seq1, seq2
    attention = LocationAttention(encoded_dim=256,
                                  query_dim=1024,
                                  attention_dim=128)
    context, mask = attention(query_vector, encoder_out, mask)
    assert float(mask[:, 0, :].sum().data) == 1.0  # batch, input_seq_len
def test_attention_location_sizes():
    encoder_out = Variable(torch.randn(152, 2, 256))  # seq, batch, dim
    query_vector = Variable(torch.randn(1, 2, 1024))  # seq, batch, dim
    mask = Variable(torch.randn(2, 152, 1))  # batch, seq1, seq2
    attention = LocationAttention(encoded_dim=256,
                                  query_dim=1024,
                                  attention_dim=128)
    context, mask = attention(query_vector, encoder_out, mask)
    assert context.size() == (1, 2, 256)
    assert mask.size() == (1, 2, 152)  # seq2, batch, seq1
Beispiel #4
0
 def __init__(self, hidden_size=1024, num_layers=2):
     super(Decoder, self).__init__()
     self.prenet = PreNet(in_features=80, out_features=256)
     self.attention = LocationAttention(dim=256)
     self.rnn = nn.GRU(input_size=512,
                       hidden_size=hidden_size,
                       num_layers=num_layers,
                       dropout=0.1)
     self.spec_out = nn.Linear(in_features=1024 + 256, out_features=80)
     self.stop_out = nn.Linear(in_features=1024 + 256, out_features=1)
     self.postnet = PostNet()
Beispiel #5
0
def test_attention():
    """
    Attention should output a fixed length context vector (seq len = 1)
    and and a weight for each item in the input sequence
    """
    encoder_out = Variable(torch.zeros(152, 2, 256))  # seq, batch, dim
    query_vector = Variable(torch.zeros(1, 2, 256))  # seq, batch, dim

    attention = LocationAttention(dim=256)
    context, mask = attention(query_vector, encoder_out)
    assert context.size() == (1, 2, 256)  # seq, batch, dim
    assert mask.size() == (2, 152, 1)  # batch, input_seq_len
def test_attention_sizes():
    """
    Attention should output a fixed length context vector (seq len = 1)
    and and a weight for each item in the input sequence
    """
    encoder_out = Variable(torch.randn(152, 2, 256))  # seq, batch, dim
    query_vector = Variable(torch.randn(1, 2, 1024))  # seq, batch, dim

    attention = LocationAttention(encoded_dim=256,
                                  query_dim=1024,
                                  attention_dim=128)
    context, mask = attention(query_vector, encoder_out)
    assert context.size() == (1, 2, 256)  # seq, batch, dim
    assert mask.size() == (1, 2, 152)  # seq2, batch, seq1
Beispiel #7
0
    def __init__(self,
                 hidden_size=1024,
                 num_layers=2,
                 num_mels=80,
                 num_prenet_features=256):
        super(Decoder, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.num_mels = num_mels

        self.prenet = PreNet(in_features=num_mels,
                             out_features=num_prenet_features)
        self.attention = LocationAttention(encoded_dim=256,
                                           query_dim=hidden_size,
                                           attention_dim=128)
        self.rnn = nn.LSTM(input_size=num_prenet_features + 256,
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           dropout=0.1)
        self.spec_out = nn.Linear(in_features=hidden_size + 256,
                                  out_features=num_mels)
        self.stop_out = nn.Linear(in_features=hidden_size + 256,
                                  out_features=1)
        self.postnet = PostNet()