예제 #1
0
    def pre_compute_wuw(self, frame_number, var_base):
        windows = [
            (0, 0, np.array([1.0])),
            (1, 1, np.array([-0.5, 0.0, 0.5])),
            (1, 1, np.array([1.0, -2.0, 1.0])),
        ]
        num_windows = len(windows)

        win_mats = self.build_win_mats(windows, frame_number)

        var_base = np.array(var_base)
        var_base = np.reshape(var_base, (1, 3))

        var_frames = np.tile(var_base, (frame_number, 1))
        var_frames[0, 1] = 100000000000
        var_frames[0, 2] = 100000000000
        var_frames[frame_number - 1, 1] = 100000000000
        var_frames[frame_number - 1, 2] = 100000000000

        tau_frames = 1.0 / var_frames

        prec = self.build_wuw(frame_number, tau_frames, win_mats)
        inv_prec_full = bla.solveh(prec, np.eye(frame_number))

        wu_list = self.build_wu(frame_number, tau_frames, win_mats)

        wu_mat = np.zeros((frame_number, frame_number * 3))
        wu_mat[:, 0:frame_number] = wu_list[0]
        wu_mat[:, frame_number:frame_number * 2] = wu_list[1]
        wu_mat[:, frame_number * 2:frame_number * 3] = wu_list[2]

        return inv_prec_full, wu_mat
예제 #2
0
def mlpg(mean_frames, variance_frames, windows):
    """Maximum Parameter Likelihood Generation (MLPG)
    """
    dtype = mean_frames.dtype
    T, D = mean_frames.shape
    # expand variances over frames
    if variance_frames.ndim == 1 and variance_frames.shape[0] == D:
        variance_frames = np.tile(variance_frames, (T, 1))
    assert mean_frames.shape == variance_frames.shape
    static_dim = D // len(windows)

    num_windows = len(windows)
    win_mats = build_win_mats(windows, T)

    # workspaces; those will be updated in the following generation loop
    means = np.zeros((T, num_windows))
    precisions = np.zeros((T, num_windows))
    # Perform dimension-wise generation
    y = np.zeros((T, static_dim), dtype=dtype)
    for d in range(static_dim):

        for win_idx in range(num_windows):
            means[:, win_idx] = mean_frames[:, win_idx * static_dim + d]
            precisions[:, win_idx] = 1 / \
                variance_frames[:, win_idx * static_dim + d]

        bs = precisions * means
        b, P = build_poe(bs, precisions, win_mats)
        y[:, d] = bla.solveh(P, b)

    return y
예제 #3
0
def simple_example_with_random_parameters():
    windows = [
        (0, 0, np.array([1.0])),
        (1, 1, np.array([-0.5, 0.0, 0.5])),
        (1, 1, np.array([1.0, -2.0, 1.0])),
    ]
    num_windows = len(windows)

    frames = 10
    mean_frames = np.random.randn(frames, num_windows)
    var_frames = np.abs(np.random.randn(frames, num_windows))

    b_frames = mean_frames / var_frames
    tau_frames = 1.0 / var_frames
    win_mats = build_win_mats(windows, frames)
    b, prec = build_poe(b_frames, tau_frames, win_mats)
    mean_traj = bla.solveh(prec, b)
    print 'INPUT'
    print '-----'
    print 'mean parameters over time:'
    print mean_frames
    print 'variance parameters over time:'
    print var_frames
    print
    print 'OUTPUT'
    print '------'
    print 'mean trajectory (= maximum probability trajectory):'
    print mean_traj
예제 #4
0
파일: st_dnn_cm.py 프로젝트: ronanki/merlin
    def pre_compute_wuw(self, frame_number, var_base):
        windows = [
            (0, 0, np.array([1.0])),
            (1, 1, np.array([-0.5, 0.0, 0.5])),
            (1, 1, np.array([1.0, -2.0, 1.0])),
        ]
        num_windows = len(windows)

        win_mats = self.build_win_mats(windows, frame_number)

        var_base = np.array(var_base)
        var_base = np.reshape(var_base, (1, 3))
        
        var_frames = np.tile(var_base, (frame_number, 1))
        var_frames[0, 1] = 100000000000;
        var_frames[0, 2] = 100000000000;
        var_frames[frame_number-1, 1] = 100000000000;
        var_frames[frame_number-1, 2] = 100000000000;
        
        
        tau_frames = 1.0 / var_frames
        
        prec = self.build_wuw(frame_number, tau_frames, win_mats)
        inv_prec_full = bla.solveh(prec, np.eye(frame_number))
        
        wu_list = self.build_wu(frame_number, tau_frames, win_mats)

        wu_mat = np.zeros((frame_number, frame_number * 3))
        wu_mat[:, 0:frame_number] = wu_list[0]
        wu_mat[:, frame_number:frame_number*2] = wu_list[1]
        wu_mat[:, frame_number*2:frame_number*3] = wu_list[2]


        return  inv_prec_full, wu_mat
예제 #5
0
def simple_example_with_random_parameters():
    windows = [
        (0, 0, np.array([1.0])),
        (1, 1, np.array([-0.5, 0.0, 0.5])),
        (1, 1, np.array([1.0, -2.0, 1.0])),
    ]
    num_windows = len(windows)

    frames = 10
    mean_frames = np.random.randn(frames, num_windows)
    var_frames = np.abs(np.random.randn(frames, num_windows))

    b_frames = mean_frames / var_frames
    tau_frames = 1.0 / var_frames
    win_mats = build_win_mats(windows, frames)
    b, prec = build_poe(b_frames, tau_frames, win_mats)
    mean_traj = bla.solveh(prec, b)
    print 'INPUT'
    print '-----'
    print 'mean parameters over time:'
    print mean_frames
    print 'variance parameters over time:'
    print var_frames
    print
    print 'OUTPUT'
    print '------'
    print 'mean trajectory (= maximum probability trajectory):'
    print mean_traj
예제 #6
0
파일: mlpg.py 프로젝트: sshuster/IdiapTTS
    def generation(self, features, covariance, feature_dim):
        windows = [
            (0, 0, np.array([1.0])),
            (1, 1, np.array([-0.5, 0.0, 0.5])),
            (1, 1, np.array([1.0, -2.0, 1.0])),
        ]
        num_windows = len(windows)

        frames = features.shape[0]
        smoothed_traj = np.zeros((frames, feature_dim))

        win_mats = self.build_win_mats(windows, frames)
        mean_frames = np.zeros((frames, num_windows))
        var_frames = np.zeros((frames, num_windows))

        # If feature has multiple dimension, smooth each of it.
        for d in range(feature_dim):
            var_frames[:, 0] = covariance[d, d]
            var_frames[:, 1] = covariance[feature_dim + d, feature_dim + d]
            var_frames[:, 2] = covariance[feature_dim * 2 + d,
                                          feature_dim * 2 + d]
            var_frames[0, 1] = 100000000000
            var_frames[0, 2] = 100000000000
            var_frames[-1, 1] = 100000000000
            var_frames[-1, 2] = 100000000000
            mean_frames[:, 0] = features[:, d]
            mean_frames[:, 1] = features[:, feature_dim + d]
            mean_frames[:, 2] = features[:, feature_dim * 2 + d]

            b_frames = mean_frames / var_frames
            tau_frames = 1.0 / var_frames
            b, prec = self.build_poe(b_frames, tau_frames, win_mats)
            smoothed_traj[0:frames, d] = bla.solveh(prec, b)

        return smoothed_traj
예제 #7
0
    def test_solveh(self, its=50):
        for it in range(its):
            size = random.choice([0, 1, randint(0, 10), randint(0, 100)])
            b = randn(size)
            a_bm = gen_pos_def_BandMat(size)
            a_full = a_bm.full()

            x = bla.solveh(a_bm, b)
            assert_allclose(bm.dot_mv(a_bm, x), b)
            if size == 0:
                x_good = np.zeros((size,))
            else:
                x_good = sla.solve(a_full, b, sym_pos=True)
            assert_allclose(x, x_good)
            assert not np.may_share_memory(x, a_bm.data)
            assert not np.may_share_memory(x, b)
예제 #8
0
    def test_solveh(self, its=50):
        for it in range(its):
            size = random.choice([0, 1, randint(0, 10), randint(0, 100)])
            b = randn(size)
            a_bm = gen_pos_def_BandMat(size)
            a_full = a_bm.full()

            x = bla.solveh(a_bm, b)
            assert_allclose(bm.dot_mv(a_bm, x), b)
            if size == 0:
                x_good = np.zeros((size,))
            else:
                x_good = sla.solve(a_full, b, sym_pos=True)
            assert_allclose(x, x_good)
            assert not np.may_share_memory(x, a_bm.data)
            assert not np.may_share_memory(x, b)
예제 #9
0
    def generation(self, features, covariance, static_dimension):

        windows = [
            (0, 0, np.array([1.0])),
            (1, 1, np.array([-0.5, 0.0, 0.5])),
            (1, 1, np.array([1.0, -2.0, 1.0])),
        ]
        num_windows = len(windows)

        frame_number = features.shape[0]

        logger = logging.getLogger('param_generation')
        logger.debug('starting MLParameterGeneration.generation')

        gen_parameter = np.zeros((frame_number, static_dimension))

        win_mats = self.build_win_mats(windows, frame_number)
        mu_frames = np.zeros((frame_number, 3))
        var_frames = np.zeros((frame_number, 3))

        for d in range(static_dimension):
            var_frames[:, 0] = covariance[:, d]
            var_frames[:, 1] = covariance[:, static_dimension + d]
            var_frames[:, 2] = covariance[:, static_dimension * 2 + d]
            mu_frames[:, 0] = features[:, d]
            mu_frames[:, 1] = features[:, static_dimension + d]
            mu_frames[:, 2] = features[:, static_dimension * 2 + d]
            var_frames[0, 1] = 100000000000
            var_frames[0, 2] = 100000000000
            var_frames[frame_number - 1, 1] = 100000000000
            var_frames[frame_number - 1, 2] = 100000000000

            b_frames = old_div(mu_frames, var_frames)
            tau_frames = old_div(1.0, var_frames)

            b, prec = self.build_poe(b_frames, tau_frames, win_mats)
            mean_traj = bla.solveh(prec, b)

            gen_parameter[0:frame_number, d] = mean_traj

        return gen_parameter
예제 #10
0
    def generation(self, features, covariance, static_dimension):

        windows = [
            (0, 0, np.array([1.0])),
            (1, 1, np.array([-0.5, 0.0, 0.5])),
            (1, 1, np.array([1.0, -2.0, 1.0])),
        ]
        num_windows = len(windows)

        frame_number = features.shape[0]

        logger = logging.getLogger('param_generation')
        logger.debug('starting MLParameterGeneration.generation')

        gen_parameter = np.zeros((frame_number, static_dimension))

        win_mats = self.build_win_mats(windows, frame_number)
        mu_frames = np.zeros((frame_number, 3))
        var_frames = np.zeros((frame_number, 3))

        for d in range(static_dimension):
            var_frames[:, 0] = covariance[:, d]
            var_frames[:, 1] = covariance[:, static_dimension+d]
            var_frames[:, 2] = covariance[:, static_dimension*2+d]
            mu_frames[:, 0] = features[:, d]
            mu_frames[:, 1] = features[:, static_dimension+d]
            mu_frames[:, 2] = features[:, static_dimension*2+d]
            var_frames[0, 1] = 100000000000;
            var_frames[0, 2] = 100000000000;
            var_frames[frame_number-1, 1] = 100000000000;
            var_frames[frame_number-1, 2] = 100000000000;

            b_frames = mu_frames / var_frames
            tau_frames = 1.0 / var_frames

            b, prec = self.build_poe(b_frames, tau_frames, win_mats)
            mean_traj = bla.solveh(prec, b)

            gen_parameter[0:frame_number, d] = mean_traj

        return  gen_parameter
예제 #11
0
    def generation(self, features, covariance, static_dimension):

        windows = [
            (0, 0, np.array([1.0])),
            (1, 1, np.array([-0.5, 0.0, 0.5])),
            (1, 1, np.array([1.0, -2.0, 1.0])),
        ]

        frame_number = features.shape[0]

        gen_parameter = np.zeros((frame_number, static_dimension))

        win_mats = self.build_win_mats(windows, frame_number)
        mu_frames = np.zeros((frame_number, 3))
        var_frames = np.zeros((frame_number, 3))

        for d in xrange(static_dimension):
            var_frames[:, 0] = covariance[:, d]
            var_frames[:, 1] = covariance[:, static_dimension+d]
            var_frames[:, 2] = covariance[:, static_dimension*2+d]
            mu_frames[:, 0] = features[:, d]
            mu_frames[:, 1] = features[:, static_dimension+d]
            mu_frames[:, 2] = features[:, static_dimension*2+d]
            var_frames[0, 1] = 100000000000
            var_frames[0, 2] = 100000000000
            var_frames[frame_number-1, 1] = 100000000000
            var_frames[frame_number-1, 2] = 100000000000

            b_frames = mu_frames / var_frames
            tau_frames = 1.0 / var_frames

            b, prec = self.build_poe(b_frames, tau_frames, win_mats)
            mean_traj = bla.solveh(prec, b)

            gen_parameter[0:frame_number, d] = mean_traj

        return gen_parameter
예제 #12
0
def mlpg(mean_frames, variance_frames, windows):
    """Maximum Parameter Likelihood Generation (MLPG)

    Function ``f: (T, D) -> (T, static_dim)``.

    It peforms Maximum Likelihood Parameter Generation (MLPG) algorithm
    to generate static features from static + dynamic features over
    time frames dimension-by-dimension.

    Let :math:`\mu` (``T x 1``) is the input mean sequence of a particular
    dimension and :math:`y` (``T x 1``) is the static
    feature sequence we want to compute, the formula of MLPG is written as:

    .. math::

        y = A^{-1} b

    where

    .. math::

        A = \sum_{l} W_{l}^{T}P_{l}W_{l}

    ,

    .. math::

        b = P\mu

    :math:`W_{l}` is the ``l``-th window matrix (``T x T``) and :math:`P`
    (``T x T``) is the precision matrix which is given by the inverse of
    variance matrix.

    The implementation was heavily inspired by [1]_ and
    using bandmat_ for efficient computation.

    .. _bandmat: https://github.com/MattShannon/bandmat

    .. [1] M. Shannon, supervised by W. Byrne (2014),
      Probabilistic acoustic modelling for parametric speech synthesis
      PhD thesis, University of Cambridge, UK

    Args:
        mean_frames (2darray): The input features (static + delta).
            In statistical speech synthesis, these are means of gaussian
            distributions predicted by neural networks or decision trees.
        variance_frames (2d or 1darray): Variances (static + delta ) of gaussian
            distributions over time frames (2d) or global variances (1d).
            If global variances are given, these will get expanded over frames.
        windows (list): A sequence of ``(l, u, win_coeff)`` triples, where
            ``l`` and ``u`` are non-negative integers specifying the left
            and right extents of the window and `win_coeff` is an array
            specifying the window coefficients.

    Returns:
        Generated static features over time

    Examples:
        >>> from nnmnkwii import paramgen as G
        >>> windows = [
        ...         (0, 0, np.array([1.0])),            # static
        ...         (1, 1, np.array([-0.5, 0.0, 0.5])), # delta
        ...         (1, 1, np.array([1.0, -2.0, 1.0])), # delta-delta
        ...     ]
        >>> T, static_dim = 10, 24
        >>> mean_frames = np.random.rand(T, static_dim * len(windows))
        >>> variance_frames = np.random.rand(T, static_dim * len(windows))
        >>> static_features = G.mlpg(mean_frames, variance_frames, windows)
        >>> assert static_features.shape == (T, static_dim)

    See also:
        :func:`nnmnkwii.autograd.mlpg`

    """
    dtype = mean_frames.dtype
    T, D = mean_frames.shape
    # expand variances over frames
    if variance_frames.ndim == 1 and variance_frames.shape[0] == D:
        variance_frames = np.tile(variance_frames, (T, 1))
    assert mean_frames.shape == variance_frames.shape
    static_dim = D // len(windows)

    num_windows = len(windows)
    win_mats = build_win_mats(windows, T)

    max_win_width = np.max([max(win_mat.l, win_mat.u) for win_mat in win_mats])

    # workspaces; those will be updated in the following generation loop
    means = np.zeros((T, num_windows))
    precisions = np.zeros((T, num_windows))
    # Perform dimension-wise generation
    y = np.zeros((T, static_dim), dtype=dtype)
    for d in range(static_dim):

        for win_idx in range(num_windows):
            means[:, win_idx] = mean_frames[:, win_idx * static_dim + d]
            precisions[:, win_idx] = 1 / \
                variance_frames[:, win_idx * static_dim + d]

            # use zero precisions at edge frames for dynamic features
            if win_idx != 0:
                precisions[:max_win_width, win_idx] = 0
                precisions[-max_win_width:, win_idx] = 0

        bs = precisions * means
        b, P = build_poe(bs, precisions, win_mats)
        y[:, d] = bla.solveh(P, b)

    return y
예제 #13
0
def MLPG(means, variances, windows=None, padding_size=0, seq_len=None):
    r"""Performs maximum-likelihood parameter generation.

    Parameters
    ----------
    means : np.ndarray, shape (batch_size, seq_len, feat_dim) or (seq_len, feat_dim)
        Array of means for a single, or a batch of sequences.
    variances : np.ndarray, shape (batch_size, seq_len, feat_dim) or (seq_len, feat_dim) or (feat_dim)
        Array of variances for a single, or a batch of sequences.
    windows : list[tuple[int, int, np.ndarray]]
        Windows describing the static/delta features included in the feature dimension of `means` and `variances`.
    padding_size : int
        Padding on either side of signal, used to handle smoothing at the boundaries.
    seq_len : array_like, shape (batch_size)
        Sequence lengths, necessary when a batch of sequences is given, as out-of-sequence frames will be all zeros.

    Returns
    -------
    most_probable_trajectory : np.ndarray, shape (batch_size, seq_len, feat_dim) or (seq_len, feat_dim)
        The most probable trajectory, calculated by maximum-likelihood parameter generation.
    """
    # If inputs are torch.Tensor then convert to numpy.ndarry and convert back at the end of this function.
    device = None
    if isinstance(means, torch.Tensor):
        device = means.device
        means = means.detach().cpu().numpy()
    if isinstance(variances, torch.Tensor):
        if device is None:
            device = variances.device
        variances = variances.detach().cpu().numpy()
    if isinstance(seq_len, torch.Tensor):
        if device is None:
            device = seq_len.device
        seq_len = seq_len.detach().cpu().numpy()

    def _pad(sequence_feature, n=3):
        return np.concatenate(
            (np.repeat(sequence_feature[[0], :], n, axis=0), sequence_feature,
             np.repeat(sequence_feature[[-1], :], n, axis=0)),
            axis=0)

    if windows is None:
        windows = [
            (0, 0, np.array([1.0])),
            (1, 1, np.array([-0.5, 0.0, 0.5])),
            (1, 1, np.array([1.0, -2.0, 1.0])),
        ]

    if means.ndim == 2:  # Single sequence.
        means = means[np.newaxis, ...]
        using_batches = False
    else:  # Batch of sequences.
        using_batches = True

    batch_size = means.shape[0]
    num_frames = means.shape[1]
    num_windows = len(windows)
    feat_dim = means.shape[-1] // num_windows

    if seq_len is None:
        seq_len = [num_frames] * batch_size

    if variances.ndim == 2:  # Single sequence.
        variances = variances[None, ...]
    elif variances.ndim == 1:  # Global variance.
        one_batch_variances = np.repeat(variances[None, :], num_frames, axis=0)
        variances = np.repeat(one_batch_variances[None, :, :],
                              batch_size,
                              axis=0)

    # Index array that can be used to select feature dimension and its corresponding deltas.
    idx_base = np.arange(num_windows) * feat_dim

    most_probable_trajectory = np.zeros((batch_size, num_frames, feat_dim))

    for i in range(batch_size):
        # Crop using the sequence length, and add padding to act as a burn in.
        means_i = _pad(means[i, :seq_len[i]], n=padding_size)
        variances_i = _pad(variances[i, :seq_len[i]], n=padding_size)
        win_mats = _build_win_mats(windows, seq_len[i] + 2 * padding_size)

        for d in range(feat_dim):
            feat_mean = means_i[:, idx_base + d]
            feat_variance = variances_i[:, idx_base + d]

            feat_b = feat_mean / feat_variance
            feat_tau = 1.0 / feat_variance

            b, prec = _build_poe(feat_b, feat_tau, win_mats)
            feat_trajectory = bla.solveh(prec, b)

            most_probable_trajectory[i, :seq_len[i], d] = \
                feat_trajectory[padding_size:len(feat_trajectory)-padding_size]

    if not using_batches:
        most_probable_trajectory = most_probable_trajectory.squeeze(axis=0)

    # If the input had type torch.Tensor, then convert the output to the same type.
    if device is not None:
        most_probable_trajectory = torch.tensor(most_probable_trajectory).type(
            torch.float).to(device)

    return most_probable_trajectory