def pre_compute_wuw(self, frame_number, var_base): windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] num_windows = len(windows) win_mats = self.build_win_mats(windows, frame_number) var_base = np.array(var_base) var_base = np.reshape(var_base, (1, 3)) var_frames = np.tile(var_base, (frame_number, 1)) var_frames[0, 1] = 100000000000 var_frames[0, 2] = 100000000000 var_frames[frame_number - 1, 1] = 100000000000 var_frames[frame_number - 1, 2] = 100000000000 tau_frames = 1.0 / var_frames prec = self.build_wuw(frame_number, tau_frames, win_mats) inv_prec_full = bla.solveh(prec, np.eye(frame_number)) wu_list = self.build_wu(frame_number, tau_frames, win_mats) wu_mat = np.zeros((frame_number, frame_number * 3)) wu_mat[:, 0:frame_number] = wu_list[0] wu_mat[:, frame_number:frame_number * 2] = wu_list[1] wu_mat[:, frame_number * 2:frame_number * 3] = wu_list[2] return inv_prec_full, wu_mat
def mlpg(mean_frames, variance_frames, windows): """Maximum Parameter Likelihood Generation (MLPG) """ dtype = mean_frames.dtype T, D = mean_frames.shape # expand variances over frames if variance_frames.ndim == 1 and variance_frames.shape[0] == D: variance_frames = np.tile(variance_frames, (T, 1)) assert mean_frames.shape == variance_frames.shape static_dim = D // len(windows) num_windows = len(windows) win_mats = build_win_mats(windows, T) # workspaces; those will be updated in the following generation loop means = np.zeros((T, num_windows)) precisions = np.zeros((T, num_windows)) # Perform dimension-wise generation y = np.zeros((T, static_dim), dtype=dtype) for d in range(static_dim): for win_idx in range(num_windows): means[:, win_idx] = mean_frames[:, win_idx * static_dim + d] precisions[:, win_idx] = 1 / \ variance_frames[:, win_idx * static_dim + d] bs = precisions * means b, P = build_poe(bs, precisions, win_mats) y[:, d] = bla.solveh(P, b) return y
def simple_example_with_random_parameters(): windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] num_windows = len(windows) frames = 10 mean_frames = np.random.randn(frames, num_windows) var_frames = np.abs(np.random.randn(frames, num_windows)) b_frames = mean_frames / var_frames tau_frames = 1.0 / var_frames win_mats = build_win_mats(windows, frames) b, prec = build_poe(b_frames, tau_frames, win_mats) mean_traj = bla.solveh(prec, b) print 'INPUT' print '-----' print 'mean parameters over time:' print mean_frames print 'variance parameters over time:' print var_frames print print 'OUTPUT' print '------' print 'mean trajectory (= maximum probability trajectory):' print mean_traj
def pre_compute_wuw(self, frame_number, var_base): windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] num_windows = len(windows) win_mats = self.build_win_mats(windows, frame_number) var_base = np.array(var_base) var_base = np.reshape(var_base, (1, 3)) var_frames = np.tile(var_base, (frame_number, 1)) var_frames[0, 1] = 100000000000; var_frames[0, 2] = 100000000000; var_frames[frame_number-1, 1] = 100000000000; var_frames[frame_number-1, 2] = 100000000000; tau_frames = 1.0 / var_frames prec = self.build_wuw(frame_number, tau_frames, win_mats) inv_prec_full = bla.solveh(prec, np.eye(frame_number)) wu_list = self.build_wu(frame_number, tau_frames, win_mats) wu_mat = np.zeros((frame_number, frame_number * 3)) wu_mat[:, 0:frame_number] = wu_list[0] wu_mat[:, frame_number:frame_number*2] = wu_list[1] wu_mat[:, frame_number*2:frame_number*3] = wu_list[2] return inv_prec_full, wu_mat
def generation(self, features, covariance, feature_dim): windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] num_windows = len(windows) frames = features.shape[0] smoothed_traj = np.zeros((frames, feature_dim)) win_mats = self.build_win_mats(windows, frames) mean_frames = np.zeros((frames, num_windows)) var_frames = np.zeros((frames, num_windows)) # If feature has multiple dimension, smooth each of it. for d in range(feature_dim): var_frames[:, 0] = covariance[d, d] var_frames[:, 1] = covariance[feature_dim + d, feature_dim + d] var_frames[:, 2] = covariance[feature_dim * 2 + d, feature_dim * 2 + d] var_frames[0, 1] = 100000000000 var_frames[0, 2] = 100000000000 var_frames[-1, 1] = 100000000000 var_frames[-1, 2] = 100000000000 mean_frames[:, 0] = features[:, d] mean_frames[:, 1] = features[:, feature_dim + d] mean_frames[:, 2] = features[:, feature_dim * 2 + d] b_frames = mean_frames / var_frames tau_frames = 1.0 / var_frames b, prec = self.build_poe(b_frames, tau_frames, win_mats) smoothed_traj[0:frames, d] = bla.solveh(prec, b) return smoothed_traj
def test_solveh(self, its=50): for it in range(its): size = random.choice([0, 1, randint(0, 10), randint(0, 100)]) b = randn(size) a_bm = gen_pos_def_BandMat(size) a_full = a_bm.full() x = bla.solveh(a_bm, b) assert_allclose(bm.dot_mv(a_bm, x), b) if size == 0: x_good = np.zeros((size,)) else: x_good = sla.solve(a_full, b, sym_pos=True) assert_allclose(x, x_good) assert not np.may_share_memory(x, a_bm.data) assert not np.may_share_memory(x, b)
def generation(self, features, covariance, static_dimension): windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] num_windows = len(windows) frame_number = features.shape[0] logger = logging.getLogger('param_generation') logger.debug('starting MLParameterGeneration.generation') gen_parameter = np.zeros((frame_number, static_dimension)) win_mats = self.build_win_mats(windows, frame_number) mu_frames = np.zeros((frame_number, 3)) var_frames = np.zeros((frame_number, 3)) for d in range(static_dimension): var_frames[:, 0] = covariance[:, d] var_frames[:, 1] = covariance[:, static_dimension + d] var_frames[:, 2] = covariance[:, static_dimension * 2 + d] mu_frames[:, 0] = features[:, d] mu_frames[:, 1] = features[:, static_dimension + d] mu_frames[:, 2] = features[:, static_dimension * 2 + d] var_frames[0, 1] = 100000000000 var_frames[0, 2] = 100000000000 var_frames[frame_number - 1, 1] = 100000000000 var_frames[frame_number - 1, 2] = 100000000000 b_frames = old_div(mu_frames, var_frames) tau_frames = old_div(1.0, var_frames) b, prec = self.build_poe(b_frames, tau_frames, win_mats) mean_traj = bla.solveh(prec, b) gen_parameter[0:frame_number, d] = mean_traj return gen_parameter
def generation(self, features, covariance, static_dimension): windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] num_windows = len(windows) frame_number = features.shape[0] logger = logging.getLogger('param_generation') logger.debug('starting MLParameterGeneration.generation') gen_parameter = np.zeros((frame_number, static_dimension)) win_mats = self.build_win_mats(windows, frame_number) mu_frames = np.zeros((frame_number, 3)) var_frames = np.zeros((frame_number, 3)) for d in range(static_dimension): var_frames[:, 0] = covariance[:, d] var_frames[:, 1] = covariance[:, static_dimension+d] var_frames[:, 2] = covariance[:, static_dimension*2+d] mu_frames[:, 0] = features[:, d] mu_frames[:, 1] = features[:, static_dimension+d] mu_frames[:, 2] = features[:, static_dimension*2+d] var_frames[0, 1] = 100000000000; var_frames[0, 2] = 100000000000; var_frames[frame_number-1, 1] = 100000000000; var_frames[frame_number-1, 2] = 100000000000; b_frames = mu_frames / var_frames tau_frames = 1.0 / var_frames b, prec = self.build_poe(b_frames, tau_frames, win_mats) mean_traj = bla.solveh(prec, b) gen_parameter[0:frame_number, d] = mean_traj return gen_parameter
def generation(self, features, covariance, static_dimension): windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] frame_number = features.shape[0] gen_parameter = np.zeros((frame_number, static_dimension)) win_mats = self.build_win_mats(windows, frame_number) mu_frames = np.zeros((frame_number, 3)) var_frames = np.zeros((frame_number, 3)) for d in xrange(static_dimension): var_frames[:, 0] = covariance[:, d] var_frames[:, 1] = covariance[:, static_dimension+d] var_frames[:, 2] = covariance[:, static_dimension*2+d] mu_frames[:, 0] = features[:, d] mu_frames[:, 1] = features[:, static_dimension+d] mu_frames[:, 2] = features[:, static_dimension*2+d] var_frames[0, 1] = 100000000000 var_frames[0, 2] = 100000000000 var_frames[frame_number-1, 1] = 100000000000 var_frames[frame_number-1, 2] = 100000000000 b_frames = mu_frames / var_frames tau_frames = 1.0 / var_frames b, prec = self.build_poe(b_frames, tau_frames, win_mats) mean_traj = bla.solveh(prec, b) gen_parameter[0:frame_number, d] = mean_traj return gen_parameter
def mlpg(mean_frames, variance_frames, windows): """Maximum Parameter Likelihood Generation (MLPG) Function ``f: (T, D) -> (T, static_dim)``. It peforms Maximum Likelihood Parameter Generation (MLPG) algorithm to generate static features from static + dynamic features over time frames dimension-by-dimension. Let :math:`\mu` (``T x 1``) is the input mean sequence of a particular dimension and :math:`y` (``T x 1``) is the static feature sequence we want to compute, the formula of MLPG is written as: .. math:: y = A^{-1} b where .. math:: A = \sum_{l} W_{l}^{T}P_{l}W_{l} , .. math:: b = P\mu :math:`W_{l}` is the ``l``-th window matrix (``T x T``) and :math:`P` (``T x T``) is the precision matrix which is given by the inverse of variance matrix. The implementation was heavily inspired by [1]_ and using bandmat_ for efficient computation. .. _bandmat: https://github.com/MattShannon/bandmat .. [1] M. Shannon, supervised by W. Byrne (2014), Probabilistic acoustic modelling for parametric speech synthesis PhD thesis, University of Cambridge, UK Args: mean_frames (2darray): The input features (static + delta). In statistical speech synthesis, these are means of gaussian distributions predicted by neural networks or decision trees. variance_frames (2d or 1darray): Variances (static + delta ) of gaussian distributions over time frames (2d) or global variances (1d). If global variances are given, these will get expanded over frames. windows (list): A sequence of ``(l, u, win_coeff)`` triples, where ``l`` and ``u`` are non-negative integers specifying the left and right extents of the window and `win_coeff` is an array specifying the window coefficients. Returns: Generated static features over time Examples: >>> from nnmnkwii import paramgen as G >>> windows = [ ... (0, 0, np.array([1.0])), # static ... (1, 1, np.array([-0.5, 0.0, 0.5])), # delta ... (1, 1, np.array([1.0, -2.0, 1.0])), # delta-delta ... ] >>> T, static_dim = 10, 24 >>> mean_frames = np.random.rand(T, static_dim * len(windows)) >>> variance_frames = np.random.rand(T, static_dim * len(windows)) >>> static_features = G.mlpg(mean_frames, variance_frames, windows) >>> assert static_features.shape == (T, static_dim) See also: :func:`nnmnkwii.autograd.mlpg` """ dtype = mean_frames.dtype T, D = mean_frames.shape # expand variances over frames if variance_frames.ndim == 1 and variance_frames.shape[0] == D: variance_frames = np.tile(variance_frames, (T, 1)) assert mean_frames.shape == variance_frames.shape static_dim = D // len(windows) num_windows = len(windows) win_mats = build_win_mats(windows, T) max_win_width = np.max([max(win_mat.l, win_mat.u) for win_mat in win_mats]) # workspaces; those will be updated in the following generation loop means = np.zeros((T, num_windows)) precisions = np.zeros((T, num_windows)) # Perform dimension-wise generation y = np.zeros((T, static_dim), dtype=dtype) for d in range(static_dim): for win_idx in range(num_windows): means[:, win_idx] = mean_frames[:, win_idx * static_dim + d] precisions[:, win_idx] = 1 / \ variance_frames[:, win_idx * static_dim + d] # use zero precisions at edge frames for dynamic features if win_idx != 0: precisions[:max_win_width, win_idx] = 0 precisions[-max_win_width:, win_idx] = 0 bs = precisions * means b, P = build_poe(bs, precisions, win_mats) y[:, d] = bla.solveh(P, b) return y
def MLPG(means, variances, windows=None, padding_size=0, seq_len=None): r"""Performs maximum-likelihood parameter generation. Parameters ---------- means : np.ndarray, shape (batch_size, seq_len, feat_dim) or (seq_len, feat_dim) Array of means for a single, or a batch of sequences. variances : np.ndarray, shape (batch_size, seq_len, feat_dim) or (seq_len, feat_dim) or (feat_dim) Array of variances for a single, or a batch of sequences. windows : list[tuple[int, int, np.ndarray]] Windows describing the static/delta features included in the feature dimension of `means` and `variances`. padding_size : int Padding on either side of signal, used to handle smoothing at the boundaries. seq_len : array_like, shape (batch_size) Sequence lengths, necessary when a batch of sequences is given, as out-of-sequence frames will be all zeros. Returns ------- most_probable_trajectory : np.ndarray, shape (batch_size, seq_len, feat_dim) or (seq_len, feat_dim) The most probable trajectory, calculated by maximum-likelihood parameter generation. """ # If inputs are torch.Tensor then convert to numpy.ndarry and convert back at the end of this function. device = None if isinstance(means, torch.Tensor): device = means.device means = means.detach().cpu().numpy() if isinstance(variances, torch.Tensor): if device is None: device = variances.device variances = variances.detach().cpu().numpy() if isinstance(seq_len, torch.Tensor): if device is None: device = seq_len.device seq_len = seq_len.detach().cpu().numpy() def _pad(sequence_feature, n=3): return np.concatenate( (np.repeat(sequence_feature[[0], :], n, axis=0), sequence_feature, np.repeat(sequence_feature[[-1], :], n, axis=0)), axis=0) if windows is None: windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] if means.ndim == 2: # Single sequence. means = means[np.newaxis, ...] using_batches = False else: # Batch of sequences. using_batches = True batch_size = means.shape[0] num_frames = means.shape[1] num_windows = len(windows) feat_dim = means.shape[-1] // num_windows if seq_len is None: seq_len = [num_frames] * batch_size if variances.ndim == 2: # Single sequence. variances = variances[None, ...] elif variances.ndim == 1: # Global variance. one_batch_variances = np.repeat(variances[None, :], num_frames, axis=0) variances = np.repeat(one_batch_variances[None, :, :], batch_size, axis=0) # Index array that can be used to select feature dimension and its corresponding deltas. idx_base = np.arange(num_windows) * feat_dim most_probable_trajectory = np.zeros((batch_size, num_frames, feat_dim)) for i in range(batch_size): # Crop using the sequence length, and add padding to act as a burn in. means_i = _pad(means[i, :seq_len[i]], n=padding_size) variances_i = _pad(variances[i, :seq_len[i]], n=padding_size) win_mats = _build_win_mats(windows, seq_len[i] + 2 * padding_size) for d in range(feat_dim): feat_mean = means_i[:, idx_base + d] feat_variance = variances_i[:, idx_base + d] feat_b = feat_mean / feat_variance feat_tau = 1.0 / feat_variance b, prec = _build_poe(feat_b, feat_tau, win_mats) feat_trajectory = bla.solveh(prec, b) most_probable_trajectory[i, :seq_len[i], d] = \ feat_trajectory[padding_size:len(feat_trajectory)-padding_size] if not using_batches: most_probable_trajectory = most_probable_trajectory.squeeze(axis=0) # If the input had type torch.Tensor, then convert the output to the same type. if device is not None: most_probable_trajectory = torch.tensor(most_probable_trajectory).type( torch.float).to(device) return most_probable_trajectory