Example #1
0
    def convergence_callback(Y, **kwargs):
        global SDR, SIR, ref

        t_enter = time.perf_counter()

        from mir_eval.separation import bss_eval_sources

        # projection back
        z = projection_back(Y, X_mics[:, :, 0])
        Y = Y.copy() * np.conj(z[None, :, :])

        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0], framesize, hop, win=win_s)[:, None]
        else:
            y = pra.transform.synthesis(Y, framesize, hop, win=win_s)
        y = y[framesize - hop :, :].astype(np.float64)

        if args.algo != "blinkiva":
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        m = np.minimum(y.shape[0], ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(ref[:, :m], y[:m, [0, 0]].T)
        SDR.append(sdr)
        SIR.append(sir)

        t_exit = time.perf_counter()
        eval_time.append(t_exit - t_enter)
Example #2
0
def blinkiva(X,
             U,
             n_src=None,
             sparse_reg=0.,
             n_iter=20,
             n_nmf_sub_iter=4,
             proj_back=True,
             W0=None,
             R0=None,
             seed=None,
             print_cost=False,
             return_filters=False,
             callback=None):
    '''
    Implementation of BlinkIVA algorithm for BSS presented by

    Robin Scheibler, Nobutaka Ono at ICASSP 2019, title to be determined

    Parameters
    ----------
    X: ndarray (nframes, nfrequencies, nchannels)
        STFT representation of the signal
    U: ndarray (nframes, nblinkies)
        The matrix containing the blinky signals
    n_src: int, optional
        The number of sources or independent components
    n_iter: int, optional
        The number of iterations (default 20)
    proj_back: bool, optional
        Scaling on first mic by back projection (default True)
    W0: ndarray (nfrequencies, nchannels, nchannels), optional
        Initial value for demixing matrix
    return_filters: bool
        If true, the function will return the demixing matrix too
    callback: func
        A callback function called every 10 iterations, allows to monitor convergence

    Returns
    -------
    Returns an (nframes, nfrequencies, nsources) array. Also returns
    the demixing matrix (nfrequencies, nchannels, nsources)
    if ``return_values`` keyword is True.
    '''

    n_frames, n_freq, n_chan = X.shape
    _, n_blink = U.shape

    assert n_frames == _, 'Microphones and blinkies do not have the same number of frames ({} != {})'.format(
        n_frames, _)

    # default to determined case
    if n_src is None:
        n_src = X.shape[2]

    # initialize the demixing matrices
    if W0 is None:
        W = np.array([np.eye(n_chan, n_chan) for f in range(n_freq)],
                     dtype=X.dtype)
    else:
        W = W0.copy()

    # we will threshold entries of R and G that get to small
    machine_epsilon = np.finfo(float).eps

    # U normalized by number of frequencies is used
    U_mean = U / n_freq / 2

    I = np.eye(n_chan, n_chan)
    Y = np.zeros((n_frames, n_freq, n_chan), dtype=X.dtype)
    V = np.zeros((n_freq, n_chan, n_chan, n_chan), dtype=X.dtype)
    P = np.zeros((n_frames, n_chan))

    if seed is not None:
        rng_state = np.random.get_state()
        np.random.seed(seed)

    # initialize the parts of NMF
    if R0 is None:
        R_all = np.ones((n_frames, n_chan))
        R = R_all[:, :n_src]  # subset tied to NMF of blinkies

        R[:, :] = 0.1 + 0.9 * np.random.rand(n_frames, n_src)
        R *= np.mean(np.abs(X[:, :, :n_src])**2, axis=(0, 1)) / R.sum(axis=0)
    else:
        R_all = R0.copy()
        R = R_all[:, :n_src]  # subset tied to NMF of blinkies

    G = 0.1 + 0.9 * np.random.rand(n_src, n_blink)
    G *= np.mean(U_mean) / np.mean(G)

    if seed is not None:
        np.random.set_state(rng_state)

    # Compute the demixed output
    def demix(Y, X, W, P, R_all):
        for f in range(n_freq):
            Y[:, f, :] = np.dot(X[:, f, :], np.conj(W[f, :, :]))

        # The sources magnitudes averaged over frequencies
        # shape: (n_frames, n_src)
        P[:, :] = np.linalg.norm(Y, axis=1)

        # copy activations for sources not tied to NMF
        R_all[:, n_src:] = (P[:, n_src:] / n_freq)**2

    def R_update(P, U, R, G):
        # Update the activations
        U_hat = np.dot(R, G)
        U_hat_I = 1. / U_hat
        R_I = 1. / R
        num = (0.5 * (P[:, :n_src] / n_freq) * R_I**1.5 +
               np.dot(U_mean * U_hat_I**2, G.T))
        denom = (0.5 * R_I + np.dot(U_hat_I, G.T) + sparse_reg)
        R *= np.sqrt(num / denom)
        R[R < machine_epsilon] = machine_epsilon

    def G_update(U, R, G):
        U_hat = np.dot(R, G)
        U_hat_I = 1. / U_hat
        num = np.dot(R.T, U_mean * U_hat_I**2)
        denom = np.dot(R.T, U_hat_I)
        G *= np.sqrt(num / denom)
        G[G < machine_epsilon] = machine_epsilon

    def cost_function(Y, U, R, G, W):
        U_hat = np.dot(R[:, :G.shape[0]], G)
        cf = -Y.shape[0] * np.sum(np.linalg.slogdet(W)[1])
        cf += np.sum(0.5 * Y.shape[1] * np.log(R) +
                     np.linalg.norm(Y, axis=1) / R**0.5)
        cf += np.sum(Y.shape[1] * np.log(U_hat) + U / U_hat / 2)
        return cf

    cost_func_list = []

    # initial demixing
    demix(Y, X, W, P, R_all)

    # NMF + IVA
    for epoch in range(n_iter):

        if callback is not None and epoch % 10 == 0:
            if proj_back:
                z = projection_back(Y, X[:, :, 0])
                callback(Y * np.conj(z[None, :, :]))
            else:
                callback(Y, extra=[W, G, R, X, U])

        if print_cost and epoch % 5 == 0:
            cost_func_list.append(cost_function(Y, U, R_all, G, W))
            print('epoch:', epoch, 'cost function ==', cost_func_list[-1])

        # several subiteration of NMF
        for sub_epoch in range(n_nmf_sub_iter):

            # Update the activations
            R_update(P, U, R, G)

            # Update the gains
            G_update(U, R, G)

            # Rescale all variables before continuing
            lmb = 1. / np.mean(R_all, axis=0)
            R_all *= lmb[None, :]
            W *= np.sqrt(lmb[None, None, :])
            P *= np.sqrt(lmb[None, :])
            G /= lmb[:n_src, None]

        # Compute Auxiliary Variable
        # shape: (n_freq, n_src, n_mic, n_mic)
        denom = 4 * P * R_all**0.5  # / n_freq  # when I add this, separation improves a lot!
        denom[denom < 1.] = 1.

        V = np.mean(
            (X[:, :, None, :, None] / denom[:, None, :, None, None]) *
            np.conj(X[:, :, None, None, :]),
            axis=0,
        )

        # Update the demixing matrices
        for s in range(n_chan):

            W_H = np.conj(np.swapaxes(W, 1, 2))
            WV = np.matmul(W_H, V[:, s, :, :])
            rhs = I[None, :, s][[0] * WV.shape[0], :]
            W[:, :, s] = np.linalg.solve(WV, rhs)

            P1 = np.conj(W[:, :, s])
            P2 = np.sum(V[:, s, :, :] * W[:, None, :, s], axis=-1)
            W[:, :, s] /= np.sqrt(np.sum(P1 * P2, axis=1))[:, None]

        demix(Y, X, W, P, R_all)

        # Rescale all variables before continuing
        lmb = 1. / np.mean(R_all, axis=0)
        R_all *= lmb[None, :]
        W *= np.sqrt(lmb[None, None, :])
        P *= np.sqrt(lmb[None, :])
        G /= lmb[:n_src, None]

    if proj_back:
        z = projection_back(Y, X[:, :, 0])
        Y *= np.conj(z[None, :, :])

    if return_filters:
        return Y, W, G, R_all
    else:
        return Y
Example #3
0
            X_mics,
            n_iter=n_iter,
            proj_back=False,
            model=args.dist,
            init_eig=(args.init == init_choices[1]),
            callback=convergence_callback,
            callback_checkpoints=callback_checkpoints,
        )
    else:
        raise ValueError("No such algorithm {}".format(args.algo))

    # Last evaluation of SDR/SIR
    convergence_callback(Y)

    # projection back
    z = projection_back(Y, X_mics[:, :, 0])
    Y *= np.conj(z[None, :, :])

    toc = time.perf_counter()

    tot_eval_time = sum(eval_time)

    print("Processing time: {:8.3f} s".format(toc - tic - tot_eval_time))
    print("Evaluation time: {:8.3f} s".format(tot_eval_time))

    # Run iSTFT
    if Y.shape[2] == 1:
        y = pra.transform.synthesis(Y[:, :, 0], framesize, hop, win=win_s)[:, None]
    else:
        y = pra.transform.synthesis(Y, framesize, hop, win=win_s)
    y = y[framesize - hop :, :].astype(np.float64)
Example #4
0
def auxiva_gauss(X,
                 n_src=None,
                 n_iter=20,
                 proj_back=True,
                 W0=None,
                 f_contrast=None,
                 f_contrast_args=[],
                 return_filters=False,
                 callback=None):
    '''
    Implementation of AuxIVA algorithm for BSS presented in

    N. Ono, *Stable and fast update rules for independent vector analysis based
    on auxiliary function technique*, Proc. IEEE, WASPAA, pp. 189-192, September, 2011.

    This version uses time-varying Gauss source model.

    Parameters
    ----------
    X: ndarray (nframes, nfrequencies, nchannels)
        STFT representation of the signal
    n_src: int, optional
        The number of sources or independent components
    n_iter: int, optional
        The number of iterations (default 20)
    proj_back: bool, optional
        Scaling on first mic by back projection (default True)
    W0: ndarray (nfrequencies, nchannels, nchannels), optional
        Initial value for demixing matrix
    f_contrast: dict of functions
        A dictionary with two elements 'f' and 'df' containing the contrast
        function taking 3 arguments This should be a ufunc acting element-wise
        on any array
    return_filters: bool
        If true, the function will return the demixing matrix too
    callback: func
        A callback function called every 10 iterations, allows to monitor convergence

    Returns
    -------
    Returns an (nframes, nfrequencies, nsources) array. Also returns
    the demixing matrix (nfrequencies, nchannels, nsources)
    if ``return_values`` keyword is True.
    '''

    n_frames, n_freq, n_chan = X.shape

    # default to determined case
    if n_src is None:
        n_src = X.shape[2]

    # for now, only supports determined case
    assert n_chan == n_src

    # initialize the demixing matrices
    if W0 is None:
        W = np.array([np.eye(n_chan, n_src) for f in range(n_freq)],
                     dtype=X.dtype)
    else:
        W = W0.copy()

    if f_contrast is None:
        f_contrast = f_contrasts['norm']
        f_contrast_args = [1, 1]

    I = np.eye(n_src, n_src)
    Y = np.zeros((n_frames, n_freq, n_src), dtype=X.dtype)
    V = np.zeros((n_freq, n_src, n_chan, n_chan), dtype=X.dtype)
    r = np.zeros((n_frames, n_src))
    G_r = np.zeros((n_frames, n_src))

    # Compute the demixed output
    def demix(Y, X, W):
        for f in range(n_freq):
            Y[:, f, :] = np.dot(X[:, f, :], np.conj(W[f, :, :]))

    for epoch in range(n_iter):

        demix(Y, X, W)

        if callback is not None and epoch % 10 == 0:
            if proj_back:
                z = projection_back(Y, X[:, :, 0])
                callback(Y * np.conj(z[None, :, :]))
            else:
                callback(Y)

        # simple loop as a start
        # shape: (n_frames, n_src)
        r[:, :] = np.mean(np.abs(Y * np.conj(Y)), axis=1)

        # Apply derivative of contrast function
        G_r[:, :] = 1. / r / 2.  # shape (n_frames, n_src)

        # Compute Auxiliary Variable
        for f in range(n_freq):
            for s in range(n_src):
                V[f, s, :, :] = (np.dot(G_r[None, :, s] * X[:, f, :].T,
                                        np.conj(X[:, f, :]))) / X.shape[0]

        # Update now the demixing matrix
        for f in range(n_freq):
            for s in range(n_src):
                WV = np.dot(np.conj(W[f, :, :].T), V[f, s, :, :])
                W[f, :, s] = np.linalg.solve(WV, I[:, s])
                W[f, :, s] /= np.sqrt(
                    np.inner(np.conj(W[f, :, s]),
                             np.dot(V[f, s, :, :], W[f, :, s])))

    demix(Y, X, W)

    if proj_back:
        z = projection_back(Y, X[:, :, 0])
        Y *= np.conj(z[None, :, :])

    if return_filters:
        return Y, W
    else:
        return Y
def ogive(
    X,
    n_iter=4000,
    step_size=0.1,
    tol=1e-3,
    update="demix",
    proj_back=True,
    W0=None,
    model="laplace",
    init_eig=False,
    return_filters=False,
    callback=None,
    callback_checkpoints=[],
):
    """
    Implementation of Orthogonally constrained Independent Vector Extraction
    (OGIVE) described in

    Z. Koldovský and P. Tichavský, “Gradient Algorithms for Complex
    Non-Gaussian Independent Component/Vector Extraction, Question of Convergence,”
    IEEE Trans. Signal Process., pp. 1050–1064, Dec. 2018.

    Parameters
    ----------
    X: ndarray (nframes, nfrequencies, nchannels)
        STFT representation of the signal
    n_src: int, optional
        The number of sources or independent components
    n_iter: int, optional
        The number of iterations (default 20)
    step_size: float
        The step size of the gradient ascent
    tol: float
        Stop when the gradient is smaller than this number
    update: str
        Selects update of the mixing or demixing matrix, or a switching scheme,
        possible values: "mix", "demix", "switching"
    proj_back: bool, optional
        Scaling on first mic by back projection (default True)
    W0: ndarray (nfrequencies, nsrc, nchannels), optional
        Initial value for demixing matrix
    model: str
        The model of source distribution 'gauss' or 'laplace' (default)
    init_eig: bool, optional (default ``False``)
        If ``True``, and if ``W0 is None``, then the weights are initialized
        using the principal eigenvectors of the covariance matrix of the input
        data.
    return_filters: bool
        If true, the function will return the demixing matrix too
    callback: func
        A callback function called every 10 iterations, allows to monitor
        convergence
    callback_checkpoints: list of int
        A list of epoch number when the callback should be called

    Returns
    -------
    Returns an (nframes, nfrequencies, nsources) array. Also returns
    the demixing matrix (nfrequencies, nchannels, nsources)
    if ``return_values`` keyword is True.
    """

    n_frames, n_freq, n_chan = X.shape
    n_src = 1

    # covariance matrix of input signal (n_freq, n_chan, n_chan)
    Cx = np.mean(X[:, :, :, None] * np.conj(X[:, :, None, :]), axis=0)
    Cx_inv = np.linalg.inv(Cx)
    Cx_norm = np.linalg.norm(Cx, axis=(1, 2))

    w = np.zeros((n_freq, n_chan, 1), dtype=X.dtype)
    a = np.zeros((n_freq, n_chan, 1), dtype=X.dtype)
    delta = np.zeros((n_freq, n_chan, 1), dtype=X.dtype)
    lambda_a = np.zeros((n_freq, 1, 1), dtype=np.float64)

    def tensor_H(T):
        return np.conj(T).swapaxes(1, 2)

    # eigenvectors of the input covariance
    eigval, eigvec = np.linalg.eig(Cx)
    lead_eigval = np.max(eigval, axis=1)
    lead_eigvec = np.zeros((n_freq, n_chan), dtype=Cx.dtype)
    for f in range(n_freq):
        ind = np.argmax(eigval[f])
        lead_eigvec[f, :] = eigvec[f, :, ind]

    # initialize A and W
    if W0 is None:
        if init_eig:

            # Initialize the demixing matrices with the principal
            # eigenvector
            w[:, :, 0] = lead_eigvec

        else:
            # Or with identity
            w[:, 0] = 1.0

    else:
        w[:, :] = W0

    def update_a_from_w(I):
        v_new = Cx[I] @ w[I]
        lambda_w = 1.0 / np.real(tensor_H(w[I]) @ v_new)
        a[I, :, :] = lambda_w * v_new

    def update_w_from_a(I):
        v_new = Cx_inv @ a
        lambda_a[:] = 1.0 / np.real(tensor_H(a) @ v_new)
        w[I, :, :] = lambda_a[I] * v_new[I]

    def switching_criterion():

        a_n = a / a[:, :1, :1]
        b_n = Cx @ a_n
        lmb = b_n[:, :1, :1].copy()  # copy is important here!
        b_n /= lmb

        p1 = np.linalg.norm(a_n - b_n, axis=(1, 2)) / Cx_norm
        Cbb = (lmb * (b_n @ tensor_H(b_n)) /
               np.linalg.norm(b_n, axis=(1, 2), keepdims=True)**2)
        p2 = np.linalg.norm(Cx - Cbb, axis=(1, 2))

        kappa = p1 * p2 / np.sqrt(n_chan)

        thresh = 0.1
        I_do_a[:] = kappa >= thresh
        I_do_w[:] = kappa < thresh

    # Compute the demixed output
    def demix(Y, X, W):
        Y[:, :, :] = X @ np.conj(W)

    # The very first update of a
    update_a_from_w(np.ones(n_freq, dtype=np.bool))

    if update == "mix":
        I_do_w = np.zeros(n_freq, dtype=np.bool)
        I_do_a = np.ones(n_freq, dtype=np.bool)
    else:  # default is "demix"
        I_do_w = np.ones(n_freq, dtype=np.bool)
        I_do_a = np.zeros(n_freq, dtype=np.bool)

    r_inv = np.zeros((n_frames, n_src))
    r = np.zeros((n_frames, n_src))

    # Things are more efficient when the frequencies are over the first axis
    Y = np.zeros((n_freq, n_frames, n_src), dtype=X.dtype)
    X_ref = X  # keep a reference to input signal
    X = X.swapaxes(0, 1).copy()  # more efficient order for processing

    for epoch in range(n_iter):
        # compute the switching criterion
        if update == "switching" and epoch % 10 == 0:
            switching_criterion()

        # Extract the target signal
        demix(Y, X, w)

        # Now run any necessary callback
        if callback is not None and epoch in callback_checkpoints:
            Y_tmp = Y.swapaxes(0, 1).copy()
            if proj_back:
                z = projection_back(Y_tmp, X_ref[:, :, 0])
                callback(Y_tmp * np.conj(z[None, :, :]))
            else:
                callback(Y_tmp)

        # simple loop as a start
        # shape: (n_frames, n_src)
        if model == "laplace":
            r[:, :] = np.linalg.norm(Y, axis=0) / np.sqrt(n_freq)

        elif model == "gauss":
            r[:, :] = (np.linalg.norm(Y, axis=0)**2) / n_freq

        eps = 1e-15
        r[r < eps] = eps

        r_inv[:, :] = 1.0 / r

        # Compute the score function
        psi = r_inv[None, :, :] * np.conj(Y)

        # "Nu" in Algo 3 in [1]
        # shape (n_freq, 1, 1)
        zeta = Y.swapaxes(1, 2) @ psi

        x_psi = (X.swapaxes(1, 2) @ psi) / zeta

        # The w-step
        # shape (n_freq, n_chan, 1)
        delta[I_do_w] = a[I_do_w] - x_psi[I_do_w]
        w[I_do_w] += step_size * delta[I_do_w]

        # The a-step
        # shape (n_freq, n_chan, 1)
        delta[I_do_a] = w[I_do_a] - (
            Cx_inv[I_do_a] @ x_psi[I_do_a]) * lambda_a[I_do_a]
        a[I_do_a] += step_size * delta[I_do_a]

        # Apply the orthogonal constraints
        update_a_from_w(I_do_w)
        update_w_from_a(I_do_a)

        max_delta = np.max(np.linalg.norm(delta, axis=(1, 2)))

        if max_delta < tol:
            break

    # Extract target
    demix(Y, X, w)

    Y = Y.swapaxes(0, 1).copy()
    X = X.swapaxes(0, 1)

    if proj_back:
        z = projection_back(Y, X_ref[:, :, 0])
        Y *= np.conj(z[None, :, :])

    if return_filters:
        return Y, w
    else:
        return Y
Example #6
0
def blinkiva_gauss(X, U, n_src=None,
        n_iter=20, n_nmf_sub_iter=20, proj_back=True, W0=None, R0=None,
        seed=None, epsilon=0.5, sparse_reg=0., print_cost=False,
        return_filters=False, callback=None):
    '''
    Implementation of BlinkIVA algorithm for blind source separation using jointly
    microphones and sound power sensors "blinkies". The algorithm was presented in

    R. Scheibler and N. Ono, *Multi-modal Blind Source Separation with Microphones and Blinkies,*
    Proc. IEEE ICASSP, Brighton, UK, May, 2019.  DOI: 10.1109/ICASSP.2019.8682594
    https://arxiv.org/abs/1904.02334

    Parameters
    ----------
    X: ndarray (nframes, nfrequencies, nchannels)
        STFT representation of the signal
    U: ndarray (nframes, nblinkies)
        The matrix containing the blinky signals
    n_src: int, optional
        The number of sources or independent components
    n_iter: int, optional
        The number of iterations (default 20)
    n_nmf_sub_iter: int, optional
        The number of NMF iteration to run between two updates of the demixing
        matrices (default 20)
    proj_back: bool, optional
        Scaling on first mic by back projection (default True)
    W0: ndarray (nfrequencies, nchannels, nchannels), optional
        Initial value for demixing matrix
    R0: ndarray (nframes, nsrc), optional
        Initial value of the activations
    seed: int, optional
        A seed to make deterministic the random initialization of NMF parts,
        when None (default), the random number generator is used in its current state
    epsilon: float, optional
        A regularization value to prevent too large values after the division
    sparse_reg: float
        A regularization term to make the activation matrix sparse
    print_cost: bool, optional
        Print the value of the cost function at each iteration
    return_filters: bool, optional
        If true, the function will return the demixing matrix, gains, and activations too
    callback: func
        A callback function called every 10 iterations, allows to monitor convergence

    Returns
    -------
    Returns an (nframes, nfrequencies, nsources) array. Also returns
    the demixing matrix (nfrequencies, nchannels, nsources), gains (nsrc, nblinkies),
    and activations (nframes, nchannels) if ``return_filters`` keyword is True.
    '''

    n_frames, n_freq, n_chan = X.shape
    _, n_blink = U.shape

    if _ != n_frames:
        raise ValueError('The microphones and blinky signals should have the same number of frames')

    # default to determined case
    if n_src is None:
        n_src = X.shape[2]

    # initialize the demixing matrices
    if W0 is None:
        W = np.array([np.eye(n_chan, n_chan) for f in range(n_freq)], dtype=X.dtype)
    else:
        W = W0.copy()

    # we will threshold entries of R and G that get to small
    machine_epsilon = np.finfo(float).eps

    # we will only work with the blinky signal normalized by frequency
    U_mean = U / n_freq / 2.

    I = np.eye(n_chan,n_chan)
    Y = np.zeros((n_frames, n_freq, n_chan), dtype=X.dtype)
    V = np.zeros((n_freq, n_chan, n_chan, n_chan), dtype=X.dtype)
    P = np.zeros((n_frames, n_chan))

    # initialize the parts of NMF
    R_all = np.ones((n_frames, n_chan))
    R = R_all[:,:n_src]  # subset tied to NMF of blinkies

    if seed is not None:
        rng_state = np.random.get_state()
        np.random.seed(seed)

    if R0 is None:
        R[:,:] = 0.1 + 0.9 * np.random.rand(n_frames, n_src)
        R /= np.mean(R, axis=0, keepdims=True)
    else:
        R[:,:] = R0

    G = 0.1 + 0.9 * np.random.rand(n_src, n_blink)
    U_hat = np.dot(R, G)
    G *= np.sum(U_mean, axis=0, keepdims=True) / np.sum(U_hat, axis=0, keepdims=True)

    if seed is not None:
        np.random.set_state(rng_state)

    def cost(Y, W, R, G):
        pwr = np.linalg.norm(Y, axis=1) ** 2
        cf1 = -2 * Y.shape[0] * np.sum(np.linalg.slogdet(W)[1])
        cf2 = np.sum(Y.shape[1] * np.log(R) + pwr / R)
        U_hat = np.dot(R[:,:n_src], G)
        cf3 = np.sum(np.log(U_hat) + U / U_hat / 2)
        return { 'iva' : cf1 + cf2, 'nmf' : cf2 + cf3, 'blinkiva' : cf1 + cf2 + cf3 }

    def rescale(W, P, R, G):
        # Fix the scale of all variables
        lmb = 1. / np.mean(R, axis=0)
        R *= lmb[None,:]
        P *= lmb[None,:]
        W *= np.sqrt(lmb[None,None,:])
        G /= lmb[:G.shape[0],None]

    # Compute the demixed output
    def demix(Y, X, W, P, R_all):
        for f in range(n_freq):
            Y[:,f,:] = np.dot(X[:,f,:], np.conj(W[f,:,:]))

        # The sources magnitudes averaged over frequencies
        # shape: (n_frames, n_src)
        P[:,:] = np.linalg.norm(Y, axis=1) ** 2 / n_freq

        # copy activations for sources not tied to NMF
        R_all[:,n_src:] = P[:,n_src:]


    # initial demixing
    demix(Y, X, W, P, R_all)

    cost_joint_list = []

    # NMF + IVA joint updates
    for epoch in range(n_iter):

        if callback is not None and epoch % 10 == 0:

            if proj_back:
                z = projection_back(Y, X[:,:,0])
                callback(Y * np.conj(z[None,:,:]))
            else:
                callback(Y, extra=[W,G,R,X,U])

        if print_cost and epoch % 10 == 0:
            print('Cost function: iva={iva:13.0f} nmf={nmf:13.0f} iva+nmf={blinkiva:13.0f}'.format(
                **cost(Y, W, R_all, G)))

        for sub_epoch in range(n_nmf_sub_iter):

            # Update the activations
            U_hat = np.dot(R, G)
            U_hat_I = 1. / U_hat
            R_I = 1. / R
            R *= np.sqrt(
                    (P[:,:n_src] * R_I ** 2 + np.dot(U_mean * U_hat_I ** 2, G.T))
                    / (R_I + np.dot(U_hat_I, G.T) + sparse_reg)
                    )
            R[R < machine_epsilon] = machine_epsilon

            # Update the gains
            U_hat = np.dot(R, G)
            U_hat_I = 1. / U_hat
            G *= np.sqrt( np.dot(R.T, U_mean * U_hat_I ** 2) / np.dot(R.T, U_hat_I) )
            G[G < machine_epsilon] = machine_epsilon

            # normalize
            #rescale(W, P, R_all, G)

        # Compute Auxiliary Variable
        # shape: (n_freq, n_src, n_mic, n_mic)
        denom = 2 * R_all
        denom[denom < epsilon] = epsilon  # regularize this part

        # 1) promote all arrays to (n_frames, n_freq, n_chan, n_chan, n_chan)
        # 2) take the outer product (complex) of the last two dimensions
        #    to get the covariance matrix over the microphones
        # 3) average over the time frames (index 0)
        V = np.mean(
                (X[:,:,None,:,None] / denom[:,None,:,None,None])
                * np.conj(X[:,:,None,None,:]),
                axis=0,
                )

        # Update now the demixing matrix
        #for s in range(n_src):
        for s in range(n_chan):

            W_H = np.conj(np.swapaxes(W, 1, 2))
            WV = np.matmul(W_H, V[:,s,:,:])
            rhs = I[None,:,s][[0] * WV.shape[0],:]
            W[:,:,s] = np.linalg.solve(WV, rhs)

            P1 = np.conj(W[:,:,s])
            P2 = np.sum(V[:,s,:,:] * W[:,None,:,s], axis=-1)
            W[:,:,s] /= np.sqrt(np.sum(P1 * P2, axis=1))[:,None]

        demix(Y, X, W, P, R_all)

        # Rescale all variables before continuing
        rescale(W, P, R_all, G)

    if proj_back:
        z = projection_back(Y, X[:,:,0])
        Y *= np.conj(z[None,:,:])

    if return_filters:
        return Y, W, G, R_all
    else:
        return Y
Example #7
0
def overiva(
    X,
    n_src=None,
    n_iter=20,
    proj_back=True,
    W0=None,
    model="laplace",
    init_eig=False,
    return_filters=False,
    callback=None,
):
    """
    Implementation of overdetermined IVA algorithm for BSS as presented. See
    the following publication for a detailed description of the algorithm.

    R. Scheibler and N. Ono, Independent Vector Analysis with more Microphones than Sources, arXiv, 2019.
    https://arxiv.org/abs/1905.07880

    Parameters
    ----------
    X: ndarray (nframes, nfrequencies, nchannels)
        STFT representation of the signal
    n_src: int, optional
        The number of sources or independent components. When
        ``n_src==nchannels``, the algorithms is identical to AuxIVA. When
        ``n_src==1``, then it is doing independent vector extraction.
    n_iter: int, optional
        The number of iterations (default 20)
    proj_back: bool, optional
        Scaling on first mic by back projection (default True)
    W0: ndarray (nfrequencies, nsrc, nchannels), optional
        Initial value for demixing matrix
    model: str
        The model of source distribution 'gauss' or 'laplace' (default)
    init_eig: bool, optional (default ``False``)
        If ``True``, and if ``W0 is None``, then the weights are initialized
        using the principal eigenvectors of the covariance matrix of the input
        data.
    return_filters: bool
        If true, the function will return the demixing matrix too
    callback: func
        A callback function called every 10 iterations, allows to monitor
        convergence

    Returns
    -------
    Returns an (nframes, nfrequencies, nsources) array. Also returns
    the demixing matrix (nfrequencies, nchannels, nsources)
    if ``return_values`` keyword is True.
    """

    n_frames, n_freq, n_chan = X.shape

    # default to determined case
    if n_src is None:
        n_src = n_chan

    # covariance matrix of input signal (n_freq, n_chan, n_chan)
    Cx = np.mean(X[:, :, :, None] * np.conj(X[:, :, None, :]), axis=0)

    W_hat = np.zeros((n_freq, n_chan, n_chan), dtype=X.dtype)
    W = W_hat[:, :, :n_src]
    J = W_hat[:, :n_src, n_src:]

    def tensor_H(T):
        return np.conj(T).swapaxes(1, 2)

    def update_J_from_orth_const():
        tmp = np.matmul(tensor_H(W), Cx)
        J[:, :, :] = np.linalg.solve(tmp[:, :, :n_src], tmp[:, :, n_src:])

    # initialize A and W
    if W0 is None:

        if init_eig:
            # Initialize the demixing matrices with the principal
            # eigenvectors of the input covariance
            v, w = np.linalg.eig(Cx)
            for f in range(n_freq):
                ind = np.argsort(v[f])[-n_src:]
                W[f, :, :] = np.conj(w[f][:, ind])

        else:
            # Or with identity
            for f in range(n_freq):
                W[f, :n_src, :] = np.eye(n_src)

    else:
        W[:, :, :] = W0

    # We still need to initialize the rest of the matrix
    if n_src < n_chan:
        update_J_from_orth_const()
        for f in range(n_freq):
            W_hat[f, n_src:, n_src:] = -np.eye(n_chan - n_src)

    eyes = np.tile(np.eye(n_chan, n_chan), (n_freq, 1, 1))
    V = np.zeros((n_freq, n_chan, n_chan), dtype=X.dtype)
    r_inv = np.zeros((n_frames, n_src))
    r = np.zeros((n_frames, n_src))

    # Things are more efficient when the frequencies are over the first axis
    Y = np.zeros((n_freq, n_frames, n_src), dtype=X.dtype)
    X = X.swapaxes(0, 1).copy()

    # Compute the demixed output
    def demix(Y, X, W):
        Y[:, :, :] = X @ np.conj(W)

    for epoch in range(n_iter):

        demix(Y, X, W)

        if callback is not None and epoch % 10 == 0:
            Y_tmp = Y.swapaxes(0, 1)
            if proj_back:
                z = projection_back(Y_tmp, X[:, :, 0].swapaxes(0, 1))
                callback(Y_tmp * np.conj(z[None, :, :]))
            else:
                callback(Y_tmp)

        # simple loop as a start
        # shape: (n_frames, n_src)
        if model == 'laplace':
            r[:, :] = (2. * np.linalg.norm(Y, axis=0))
        elif model == 'gauss':
            r[:, :] = (np.linalg.norm(Y, axis=0)**2) / n_freq

        # set the scale of r
        gamma = r.mean(axis=0)
        r /= gamma[None, :]

        if model == 'laplace':
            Y /= gamma[None, None, :]
            W /= gamma[None, None, :]
        elif model == 'gauss':
            g_sq = np.sqrt(gamma[None, None, :])
            Y /= g_sq
            W /= g_sq

        # ensure some numerical stability
        eps = 1e-15
        r[r < eps] = eps

        r_inv[:, :] = 1. / r

        # Update now the demixing matrix
        for s in range(n_src):
            # Compute Auxiliary Variable
            # shape: (n_freq, n_chan, n_chan)
            V[:, :, :] = (X.swapaxes(1, 2) *
                          r_inv[None, None, :, s]) @ np.conj(X) / n_frames

            WV = np.conj(W_hat).swapaxes(1, 2) @ V
            W[:, :, s] = np.linalg.solve(WV, eyes[:, :, s])

            # normalize
            denom = np.conj(W[:, None, :, s]) @ V[:, :, :] @ W[:, :, None, s]
            W[:, :, s] /= np.sqrt(denom[:, :, 0])

            # Update the mixing matrix according to orthogonal constraints
            if n_src < n_chan:
                update_J_from_orth_const()

    demix(Y, X, W)

    Y = Y.swapaxes(0, 1).copy()
    X = X.swapaxes(0, 1)

    if proj_back:
        z = projection_back(Y, X[:, :, 0])
        Y *= np.conj(z[None, :, :])

    if return_filters:
        return Y, W
    else:
        return Y