def test_functional_mlpg(): static_dim = 2 T = 5 for windows in _get_windows_set(): torch.manual_seed(1234) means = torch.rand(T, static_dim * len(windows)) variances = torch.ones(static_dim * len(windows)) y = G.mlpg(means.numpy(), variances.numpy(), windows) y = Variable(torch.from_numpy(y), requires_grad=False) means = Variable(means, requires_grad=True) # mlpg y_hat = AF.mlpg(means, variances, windows) assert np.allclose(y.data.numpy(), y_hat.data.numpy()) # Test backward pass nn.MSELoss()(y_hat, y).backward() # unit_variance_mlpg R = torch.from_numpy(G.unit_variance_mlpg_matrix(windows, T)) y_hat = AF.unit_variance_mlpg(R, means) assert np.allclose(y.data.numpy(), y_hat.data.numpy()) nn.MSELoss()(y_hat, y).backward() # Test 3D tensor inputs y_hat = AF.unit_variance_mlpg(R, means.view(1, -1, means.size(-1))) assert np.allclose( y.data.numpy(), y_hat.data.view(-1, static_dim).numpy()) nn.MSELoss()(y_hat.view(-1, static_dim), y).backward()
def test_multi_stream_mlpg(): windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] in_dim = 187 T = 100 R = unit_variance_mlpg_matrix(windows, T) R = torch.from_numpy(R) batch_size = 32 x = Variable(torch.rand(batch_size, T, in_dim)) stream_sizes = [180, 3, 1, 3] has_dynamic_features = [True, True, False, True] y = multi_stream_mlpg(x, R, stream_sizes, has_dynamic_features) assert y.size() == (batch_size, T, 60 + 1 + 1 + 1) mgc = y[:, :, :60] lf0 = y[:, :, 60] vuv = y[:, :, 61] bap = y[:, :, 62] assert (unit_variance_mlpg(R, x[:, :, :180]) == mgc).data.all() assert (unit_variance_mlpg(R, x[:, :, 180:180 + 3]) == lf0).data.all() assert (x[:, :, 183] == vuv).data.all() assert (unit_variance_mlpg(R, x[:, :, 184:184 + 3]) == bap).data.all() static_features = get_static_features(x, len(windows), stream_sizes, has_dynamic_features) assert static_features.size() == y.size()
def test_multi_stream_mlpg(): windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] in_dim = 187 T = 100 R = unit_variance_mlpg_matrix(windows, T) R = torch.from_numpy(R) batch_size = 32 x = Variable(torch.rand(batch_size, T, in_dim)) stream_sizes = [180, 3, 1, 3] has_dynamic_features = [True, True, False, True] y = multi_stream_mlpg(x, R, stream_sizes, has_dynamic_features) assert y.size() == (batch_size, T, 60 + 1 + 1 + 1) mgc = y[:, :, : 60] lf0 = y[:, :, 60] vuv = y[:, :, 61] bap = y[:, :, 62] assert (unit_variance_mlpg(R, x[:, :, : 180]) == mgc).data.all() assert (unit_variance_mlpg(R, x[:, :, 180: 180 + 3]).squeeze(-1) == lf0).data.all() assert (x[:, :, 183] == vuv).data.all() assert (unit_variance_mlpg(R, x[:, :, 184: 184 + 3]).squeeze(-1) == bap).data.all() static_features = get_static_features( x, len(windows), stream_sizes, has_dynamic_features) assert static_features.size() == y.size()
def forward(self, x, R, lengths=None): # Add batch axis x = x.unsqueeze(0) if x.dim() == 2 else x x_static = x[:, :, :self.static_dim] # T(x) Tx = self.sigmoid(self.T(x_static)) # Pack padded sequence for CuDNN if isinstance(lengths, Variable): lengths = lengths.data.cpu().long().numpy() if lengths is not None: inputs = nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True) else: inputs = x # G(x) output, _ = self.lstm(inputs) if lengths is not None: output, _ = nn.utils.rnn.pad_packed_sequence(output, batch_first=True) output = self.hidden2out(output) Gx = unit_variance_mlpg(R, output) # y^ = x + T(x) * G(x) return x, x_static + Tx * Gx
def forward(self, x, R, lengths=None): # Add batch axis x = x.unsqueeze(0) if x.dim() == 2 else x x_static = x[:, :, :self.static_dim] # T(x) Tx = self.sigmoid(self.T(x_static)) # G(x) for layer in self.H: x = self.dropout(self.relu(layer(x))) x = self.last_linear(x) Gx = unit_variance_mlpg(R, x) # y^ = x + T(x) * G(x) return x, x_static + Tx * Gx
def multi_stream_mlpg(inputs, R, stream_sizes=[180, 3, 1, 3], has_dynamic_features=[True, True, False, True], streams=[True, True, True, True]): """Split streams and do apply MLPG if stream has dynamic features. """ if R is None: num_windows = 1 else: num_windows = R.size(1) / R.size(0) B, T, D = inputs.size() if D != sum(stream_sizes): raise RuntimeError( "You probably have specified wrong dimention params.") # Straem indices for static+delta features # [0, 180, 183, 184] start_indices = np.hstack(([0], np.cumsum(stream_sizes)[:-1])) # [180, 183, 184, 187] end_indices = np.cumsum(stream_sizes) # Stream sizes for static features # [60, 1, 1, 1] static_stream_sizes = get_static_stream_sizes(stream_sizes, has_dynamic_features, num_windows) # [0, 60, 61, 62] static_stream_start_indices = np.hstack( ([0], np.cumsum(static_stream_sizes)[:-1])) # [60, 61, 62, 63] static_stream_end_indices = np.cumsum(static_stream_sizes) ret = [] for in_start_idx, in_end_idx, out_start_idx, out_end_idx, v, enabled in zip( start_indices, end_indices, static_stream_start_indices, static_stream_end_indices, has_dynamic_features, streams): if not enabled: continue x = inputs[:, :, in_start_idx:in_end_idx] y = unit_variance_mlpg(R, x) if v else x ret.append(y) return torch.cat(ret, dim=-1)
def multi_stream_mlpg(inputs, R, stream_sizes=[180, 3, 1, 3], has_dynamic_features=[True, True, False, True], streams=[True, True, True, True]): """Split streams and do apply MLPG if stream has dynamic features. """ if R is None: num_windows = 1 else: num_windows = R.size(1) / R.size(0) B, T, D = inputs.size() if D != sum(stream_sizes): raise RuntimeError("You probably have specified wrong dimention params.") # Straem indices for static+delta features # [0, 180, 183, 184] start_indices = np.hstack(([0], np.cumsum(stream_sizes)[:-1])) # [180, 183, 184, 187] end_indices = np.cumsum(stream_sizes) # Stream sizes for static features # [60, 1, 1, 1] static_stream_sizes = get_static_stream_sizes( stream_sizes, has_dynamic_features, num_windows) # [0, 60, 61, 62] static_stream_start_indices = np.hstack( ([0], np.cumsum(static_stream_sizes)[:-1])) # [60, 61, 62, 63] static_stream_end_indices = np.cumsum(static_stream_sizes) ret = [] for in_start_idx, in_end_idx, out_start_idx, out_end_idx, v, enabled in zip( start_indices, end_indices, static_stream_start_indices, static_stream_end_indices, has_dynamic_features, streams): if not enabled: continue x = inputs[:, :, in_start_idx:in_end_idx] y = unit_variance_mlpg(R, x) if v else x ret.append(y) return torch.cat(ret, dim=-1)
def test_minibatch_unit_variance_mlpg_gradcheck(): static_dim = 2 T = 5 for windows in _get_windows_set(): batch_size = 5 torch.manual_seed(1234) # Prepare inputs means = torch.rand(T, static_dim * len(windows)) means_expanded = means.expand( batch_size, means.shape[0], means.shape[1]) reshaped_means = torch.from_numpy( G.reshape_means(means.numpy(), static_dim)) reshaped_means_expanded = reshaped_means.expand( batch_size, reshaped_means.shape[0], reshaped_means.shape[1]) # Target y = G.mlpg(means.numpy(), np.ones(static_dim * len(windows)), windows) y = Variable(torch.from_numpy(y), requires_grad=False) y_expanded = y.expand(batch_size, y.size(0), y.size(1)) # Pack into variables means = Variable(means, requires_grad=True) means_expanded = Variable(means_expanded, requires_grad=True) reshaped_means = Variable(reshaped_means, requires_grad=True) reshaped_means_expanded = Variable( reshaped_means_expanded, requires_grad=True) # Case 1: 2d with reshaped means R = torch.from_numpy(G.unit_variance_mlpg_matrix(windows, T)) y_hat1 = AF.unit_variance_mlpg(R, reshaped_means) # Case 2: 3d with reshaped means y_hat2 = AF.unit_variance_mlpg(R, reshaped_means_expanded) for i in range(batch_size): assert np.allclose(y_hat1.data.numpy(), y_hat2[i].data.numpy()) nn.MSELoss()(y_hat1, y).backward() nn.MSELoss()(y_hat2, y_expanded).backward() # Check grad consistency for i in range(batch_size): grad1 = reshaped_means.grad.data.numpy() grad2 = reshaped_means_expanded.grad[i].data.numpy() assert np.allclose(grad1, grad2) # Case 3: 2d with non-reshaped input y_hat3 = AF.unit_variance_mlpg(R, means) # Case 4: 3d with non-reshaped input y_hat4 = AF.unit_variance_mlpg(R, means_expanded) for i in range(batch_size): assert np.allclose(y_hat1.data.numpy(), y_hat3.data.numpy()) assert np.allclose(y_hat3.data.numpy(), y_hat4[i].data.numpy()) nn.MSELoss()(y_hat3, y).backward() nn.MSELoss()(y_hat4, y_expanded).backward() # Check grad consistency for i in range(batch_size): grad1 = means.grad.data.numpy() grad2 = means_expanded.grad[i].data.numpy() assert np.allclose(grad1, grad2)
def benchmark_mlpg(static_dim=59, T=100, batch_size=10, use_cuda=True): if use_cuda and not torch.cuda.is_available(): return windows = _get_windows_set()[-1] np.random.seed(1234) torch.manual_seed(1234) means = np.random.rand(T, static_dim * len(windows)).astype(np.float32) variances = np.ones(static_dim * len(windows)) reshaped_means = G.reshape_means(means, static_dim) # Ppseud target y = G.mlpg(means, variances, windows).astype(np.float32) # Pack into variables means = Variable(torch.from_numpy(means), requires_grad=True) reshaped_means = Variable(torch.from_numpy(reshaped_means), requires_grad=True) y = Variable(torch.from_numpy(y), requires_grad=False) criterion = nn.MSELoss() # Case 1: MLPG since = time.time() for _ in range(batch_size): y_hat = AF.mlpg(means, torch.from_numpy(variances), windows) L = criterion(y_hat, y) assert np.allclose(y_hat.data.numpy(), y.data.numpy()) L.backward() # slow! elapsed_mlpg = time.time() - since # Case 2: UnitVarianceMLPG since = time.time() if use_cuda: y = y.cuda() R = G.unit_variance_mlpg_matrix(windows, T) R = torch.from_numpy(R) # Assuming minibatch are zero-ppaded, we only need to create MLPG matrix # per-minibatch, not per-utterance. if use_cuda: R = R.cuda() for _ in range(batch_size): if use_cuda: means = means.cpu() means = means.cuda() y_hat = AF.unit_variance_mlpg(R, means) L = criterion(y_hat, y) assert np.allclose(y_hat.cpu().data.numpy(), y.cpu().data.numpy(), atol=1e-5) L.backward() elapsed_unit_variance_mlpg = time.time() - since ratio = elapsed_mlpg / elapsed_unit_variance_mlpg print( "MLPG vs UnitVarianceMLPG (static_dim, T, batch_size, use_cuda) = ({}):" .format((static_dim, T, batch_size, use_cuda))) if ratio > 1: s = "faster" sys.stdout.write(OKGREEN) else: s = "slower" sys.stdout.write(FAIL) print( "UnitVarianceMLPG, {:4f} times {}. Elapsed times {:4f} / {:4f}".format( ratio, s, elapsed_mlpg, elapsed_unit_variance_mlpg)) print(ENDC)