Ejemplo n.º 1
    def convergence_callback(Y, **kwargs):
        global SDR, SIR, ref

        t_enter = time.perf_counter()

        from mir_eval.separation import bss_eval_sources

        # projection back
        z = projection_back(Y, X_mics[:, :, 0])
        Y = Y.copy() * np.conj(z[None, :, :])

        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0], framesize, hop, win=win_s)[:, None]
            y = pra.transform.synthesis(Y, framesize, hop, win=win_s)
        y = y[framesize - hop :, :].astype(np.float64)

        if args.algo != "blinkiva":
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        m = np.minimum(y.shape[0], ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(ref[:, :m], y[:m, [0, 0]].T)

        t_exit = time.perf_counter()
        eval_time.append(t_exit - t_enter)
Ejemplo n.º 2
def blinkiva(X,
    Implementation of BlinkIVA algorithm for BSS presented by

    Robin Scheibler, Nobutaka Ono at ICASSP 2019, title to be determined

    X: ndarray (nframes, nfrequencies, nchannels)
        STFT representation of the signal
    U: ndarray (nframes, nblinkies)
        The matrix containing the blinky signals
    n_src: int, optional
        The number of sources or independent components
    n_iter: int, optional
        The number of iterations (default 20)
    proj_back: bool, optional
        Scaling on first mic by back projection (default True)
    W0: ndarray (nfrequencies, nchannels, nchannels), optional
        Initial value for demixing matrix
    return_filters: bool
        If true, the function will return the demixing matrix too
    callback: func
        A callback function called every 10 iterations, allows to monitor convergence

    Returns an (nframes, nfrequencies, nsources) array. Also returns
    the demixing matrix (nfrequencies, nchannels, nsources)
    if ``return_values`` keyword is True.

    n_frames, n_freq, n_chan = X.shape
    _, n_blink = U.shape

    assert n_frames == _, 'Microphones and blinkies do not have the same number of frames ({} != {})'.format(
        n_frames, _)

    # default to determined case
    if n_src is None:
        n_src = X.shape[2]

    # initialize the demixing matrices
    if W0 is None:
        W = np.array([np.eye(n_chan, n_chan) for f in range(n_freq)],
        W = W0.copy()

    # we will threshold entries of R and G that get to small
    machine_epsilon = np.finfo(float).eps

    # U normalized by number of frequencies is used
    U_mean = U / n_freq / 2

    I = np.eye(n_chan, n_chan)
    Y = np.zeros((n_frames, n_freq, n_chan), dtype=X.dtype)
    V = np.zeros((n_freq, n_chan, n_chan, n_chan), dtype=X.dtype)
    P = np.zeros((n_frames, n_chan))

    if seed is not None:
        rng_state = np.random.get_state()

    # initialize the parts of NMF
    if R0 is None:
        R_all = np.ones((n_frames, n_chan))
        R = R_all[:, :n_src]  # subset tied to NMF of blinkies

        R[:, :] = 0.1 + 0.9 * np.random.rand(n_frames, n_src)
        R *= np.mean(np.abs(X[:, :, :n_src])**2, axis=(0, 1)) / R.sum(axis=0)
        R_all = R0.copy()
        R = R_all[:, :n_src]  # subset tied to NMF of blinkies

    G = 0.1 + 0.9 * np.random.rand(n_src, n_blink)
    G *= np.mean(U_mean) / np.mean(G)

    if seed is not None:

    # Compute the demixed output
    def demix(Y, X, W, P, R_all):
        for f in range(n_freq):
            Y[:, f, :] = np.dot(X[:, f, :], np.conj(W[f, :, :]))

        # The sources magnitudes averaged over frequencies
        # shape: (n_frames, n_src)
        P[:, :] = np.linalg.norm(Y, axis=1)

        # copy activations for sources not tied to NMF
        R_all[:, n_src:] = (P[:, n_src:] / n_freq)**2

    def R_update(P, U, R, G):
        # Update the activations
        U_hat = np.dot(R, G)
        U_hat_I = 1. / U_hat
        R_I = 1. / R
        num = (0.5 * (P[:, :n_src] / n_freq) * R_I**1.5 +
               np.dot(U_mean * U_hat_I**2, G.T))
        denom = (0.5 * R_I + np.dot(U_hat_I, G.T) + sparse_reg)
        R *= np.sqrt(num / denom)
        R[R < machine_epsilon] = machine_epsilon

    def G_update(U, R, G):
        U_hat = np.dot(R, G)
        U_hat_I = 1. / U_hat
        num = np.dot(R.T, U_mean * U_hat_I**2)
        denom = np.dot(R.T, U_hat_I)
        G *= np.sqrt(num / denom)
        G[G < machine_epsilon] = machine_epsilon

    def cost_function(Y, U, R, G, W):
        U_hat = np.dot(R[:, :G.shape[0]], G)
        cf = -Y.shape[0] * np.sum(np.linalg.slogdet(W)[1])
        cf += np.sum(0.5 * Y.shape[1] * np.log(R) +
                     np.linalg.norm(Y, axis=1) / R**0.5)
        cf += np.sum(Y.shape[1] * np.log(U_hat) + U / U_hat / 2)
        return cf

    cost_func_list = []

    # initial demixing
    demix(Y, X, W, P, R_all)

    # NMF + IVA
    for epoch in range(n_iter):

        if callback is not None and epoch % 10 == 0:
            if proj_back:
                z = projection_back(Y, X[:, :, 0])
                callback(Y * np.conj(z[None, :, :]))
                callback(Y, extra=[W, G, R, X, U])

        if print_cost and epoch % 5 == 0:
            cost_func_list.append(cost_function(Y, U, R_all, G, W))
            print('epoch:', epoch, 'cost function ==', cost_func_list[-1])

        # several subiteration of NMF
        for sub_epoch in range(n_nmf_sub_iter):

            # Update the activations
            R_update(P, U, R, G)

            # Update the gains
            G_update(U, R, G)

            # Rescale all variables before continuing
            lmb = 1. / np.mean(R_all, axis=0)
            R_all *= lmb[None, :]
            W *= np.sqrt(lmb[None, None, :])
            P *= np.sqrt(lmb[None, :])
            G /= lmb[:n_src, None]

        # Compute Auxiliary Variable
        # shape: (n_freq, n_src, n_mic, n_mic)
        denom = 4 * P * R_all**0.5  # / n_freq  # when I add this, separation improves a lot!
        denom[denom < 1.] = 1.

        V = np.mean(
            (X[:, :, None, :, None] / denom[:, None, :, None, None]) *
            np.conj(X[:, :, None, None, :]),

        # Update the demixing matrices
        for s in range(n_chan):

            W_H = np.conj(np.swapaxes(W, 1, 2))
            WV = np.matmul(W_H, V[:, s, :, :])
            rhs = I[None, :, s][[0] * WV.shape[0], :]
            W[:, :, s] = np.linalg.solve(WV, rhs)

            P1 = np.conj(W[:, :, s])
            P2 = np.sum(V[:, s, :, :] * W[:, None, :, s], axis=-1)
            W[:, :, s] /= np.sqrt(np.sum(P1 * P2, axis=1))[:, None]

        demix(Y, X, W, P, R_all)

        # Rescale all variables before continuing
        lmb = 1. / np.mean(R_all, axis=0)
        R_all *= lmb[None, :]
        W *= np.sqrt(lmb[None, None, :])
        P *= np.sqrt(lmb[None, :])
        G /= lmb[:n_src, None]

    if proj_back:
        z = projection_back(Y, X[:, :, 0])
        Y *= np.conj(z[None, :, :])

    if return_filters:
        return Y, W, G, R_all
        return Y
Ejemplo n.º 3
            init_eig=(args.init == init_choices[1]),
        raise ValueError("No such algorithm {}".format(args.algo))

    # Last evaluation of SDR/SIR

    # projection back
    z = projection_back(Y, X_mics[:, :, 0])
    Y *= np.conj(z[None, :, :])

    toc = time.perf_counter()

    tot_eval_time = sum(eval_time)

    print("Processing time: {:8.3f} s".format(toc - tic - tot_eval_time))
    print("Evaluation time: {:8.3f} s".format(tot_eval_time))

    # Run iSTFT
    if Y.shape[2] == 1:
        y = pra.transform.synthesis(Y[:, :, 0], framesize, hop, win=win_s)[:, None]
        y = pra.transform.synthesis(Y, framesize, hop, win=win_s)
    y = y[framesize - hop :, :].astype(np.float64)
Ejemplo n.º 4
def auxiva_gauss(X,
    Implementation of AuxIVA algorithm for BSS presented in

    N. Ono, *Stable and fast update rules for independent vector analysis based
    on auxiliary function technique*, Proc. IEEE, WASPAA, pp. 189-192, September, 2011.

    This version uses time-varying Gauss source model.

    X: ndarray (nframes, nfrequencies, nchannels)
        STFT representation of the signal
    n_src: int, optional
        The number of sources or independent components
    n_iter: int, optional
        The number of iterations (default 20)
    proj_back: bool, optional
        Scaling on first mic by back projection (default True)
    W0: ndarray (nfrequencies, nchannels, nchannels), optional
        Initial value for demixing matrix
    f_contrast: dict of functions
        A dictionary with two elements 'f' and 'df' containing the contrast
        function taking 3 arguments This should be a ufunc acting element-wise
        on any array
    return_filters: bool
        If true, the function will return the demixing matrix too
    callback: func
        A callback function called every 10 iterations, allows to monitor convergence

    Returns an (nframes, nfrequencies, nsources) array. Also returns
    the demixing matrix (nfrequencies, nchannels, nsources)
    if ``return_values`` keyword is True.

    n_frames, n_freq, n_chan = X.shape

    # default to determined case
    if n_src is None:
        n_src = X.shape[2]

    # for now, only supports determined case
    assert n_chan == n_src

    # initialize the demixing matrices
    if W0 is None:
        W = np.array([np.eye(n_chan, n_src) for f in range(n_freq)],
        W = W0.copy()

    if f_contrast is None:
        f_contrast = f_contrasts['norm']
        f_contrast_args = [1, 1]

    I = np.eye(n_src, n_src)
    Y = np.zeros((n_frames, n_freq, n_src), dtype=X.dtype)
    V = np.zeros((n_freq, n_src, n_chan, n_chan), dtype=X.dtype)
    r = np.zeros((n_frames, n_src))
    G_r = np.zeros((n_frames, n_src))

    # Compute the demixed output
    def demix(Y, X, W):
        for f in range(n_freq):
            Y[:, f, :] = np.dot(X[:, f, :], np.conj(W[f, :, :]))

    for epoch in range(n_iter):

        demix(Y, X, W)

        if callback is not None and epoch % 10 == 0:
            if proj_back:
                z = projection_back(Y, X[:, :, 0])
                callback(Y * np.conj(z[None, :, :]))

        # simple loop as a start
        # shape: (n_frames, n_src)
        r[:, :] = np.mean(np.abs(Y * np.conj(Y)), axis=1)

        # Apply derivative of contrast function
        G_r[:, :] = 1. / r / 2.  # shape (n_frames, n_src)

        # Compute Auxiliary Variable
        for f in range(n_freq):
            for s in range(n_src):
                V[f, s, :, :] = (np.dot(G_r[None, :, s] * X[:, f, :].T,
                                        np.conj(X[:, f, :]))) / X.shape[0]

        # Update now the demixing matrix
        for f in range(n_freq):
            for s in range(n_src):
                WV = np.dot(np.conj(W[f, :, :].T), V[f, s, :, :])
                W[f, :, s] = np.linalg.solve(WV, I[:, s])
                W[f, :, s] /= np.sqrt(
                    np.inner(np.conj(W[f, :, s]),
                             np.dot(V[f, s, :, :], W[f, :, s])))

    demix(Y, X, W)

    if proj_back:
        z = projection_back(Y, X[:, :, 0])
        Y *= np.conj(z[None, :, :])

    if return_filters:
        return Y, W
        return Y
Ejemplo n.º 5
def ogive(
    Implementation of Orthogonally constrained Independent Vector Extraction
    (OGIVE) described in

    Z. Koldovský and P. Tichavský, “Gradient Algorithms for Complex
    Non-Gaussian Independent Component/Vector Extraction, Question of Convergence,”
    IEEE Trans. Signal Process., pp. 1050–1064, Dec. 2018.

    X: ndarray (nframes, nfrequencies, nchannels)
        STFT representation of the signal
    n_src: int, optional
        The number of sources or independent components
    n_iter: int, optional
        The number of iterations (default 20)
    step_size: float
        The step size of the gradient ascent
    tol: float
        Stop when the gradient is smaller than this number
    update: str
        Selects update of the mixing or demixing matrix, or a switching scheme,
        possible values: "mix", "demix", "switching"
    proj_back: bool, optional
        Scaling on first mic by back projection (default True)
    W0: ndarray (nfrequencies, nsrc, nchannels), optional
        Initial value for demixing matrix
    model: str
        The model of source distribution 'gauss' or 'laplace' (default)
    init_eig: bool, optional (default ``False``)
        If ``True``, and if ``W0 is None``, then the weights are initialized
        using the principal eigenvectors of the covariance matrix of the input
    return_filters: bool
        If true, the function will return the demixing matrix too
    callback: func
        A callback function called every 10 iterations, allows to monitor
    callback_checkpoints: list of int
        A list of epoch number when the callback should be called

    Returns an (nframes, nfrequencies, nsources) array. Also returns
    the demixing matrix (nfrequencies, nchannels, nsources)
    if ``return_values`` keyword is True.

    n_frames, n_freq, n_chan = X.shape
    n_src = 1

    # covariance matrix of input signal (n_freq, n_chan, n_chan)
    Cx = np.mean(X[:, :, :, None] * np.conj(X[:, :, None, :]), axis=0)
    Cx_inv = np.linalg.inv(Cx)
    Cx_norm = np.linalg.norm(Cx, axis=(1, 2))

    w = np.zeros((n_freq, n_chan, 1), dtype=X.dtype)
    a = np.zeros((n_freq, n_chan, 1), dtype=X.dtype)
    delta = np.zeros((n_freq, n_chan, 1), dtype=X.dtype)
    lambda_a = np.zeros((n_freq, 1, 1), dtype=np.float64)

    def tensor_H(T):
        return np.conj(T).swapaxes(1, 2)

    # eigenvectors of the input covariance
    eigval, eigvec = np.linalg.eig(Cx)
    lead_eigval = np.max(eigval, axis=1)
    lead_eigvec = np.zeros((n_freq, n_chan), dtype=Cx.dtype)
    for f in range(n_freq):
        ind = np.argmax(eigval[f])
        lead_eigvec[f, :] = eigvec[f, :, ind]

    # initialize A and W
    if W0 is None:
        if init_eig:

            # Initialize the demixing matrices with the principal
            # eigenvector
            w[:, :, 0] = lead_eigvec

            # Or with identity
            w[:, 0] = 1.0

        w[:, :] = W0

    def update_a_from_w(I):
        v_new = Cx[I] @ w[I]
        lambda_w = 1.0 / np.real(tensor_H(w[I]) @ v_new)
        a[I, :, :] = lambda_w * v_new

    def update_w_from_a(I):
        v_new = Cx_inv @ a
        lambda_a[:] = 1.0 / np.real(tensor_H(a) @ v_new)
        w[I, :, :] = lambda_a[I] * v_new[I]

    def switching_criterion():

        a_n = a / a[:, :1, :1]
        b_n = Cx @ a_n
        lmb = b_n[:, :1, :1].copy()  # copy is important here!
        b_n /= lmb

        p1 = np.linalg.norm(a_n - b_n, axis=(1, 2)) / Cx_norm
        Cbb = (lmb * (b_n @ tensor_H(b_n)) /
               np.linalg.norm(b_n, axis=(1, 2), keepdims=True)**2)
        p2 = np.linalg.norm(Cx - Cbb, axis=(1, 2))

        kappa = p1 * p2 / np.sqrt(n_chan)

        thresh = 0.1
        I_do_a[:] = kappa >= thresh
        I_do_w[:] = kappa < thresh

    # Compute the demixed output
    def demix(Y, X, W):
        Y[:, :, :] = X @ np.conj(W)

    # The very first update of a
    update_a_from_w(np.ones(n_freq, dtype=np.bool))

    if update == "mix":
        I_do_w = np.zeros(n_freq, dtype=np.bool)
        I_do_a = np.ones(n_freq, dtype=np.bool)
    else:  # default is "demix"
        I_do_w = np.ones(n_freq, dtype=np.bool)
        I_do_a = np.zeros(n_freq, dtype=np.bool)

    r_inv = np.zeros((n_frames, n_src))
    r = np.zeros((n_frames, n_src))

    # Things are more efficient when the frequencies are over the first axis
    Y = np.zeros((n_freq, n_frames, n_src), dtype=X.dtype)
    X_ref = X  # keep a reference to input signal
    X = X.swapaxes(0, 1).copy()  # more efficient order for processing

    for epoch in range(n_iter):
        # compute the switching criterion
        if update == "switching" and epoch % 10 == 0:

        # Extract the target signal
        demix(Y, X, w)

        # Now run any necessary callback
        if callback is not None and epoch in callback_checkpoints:
            Y_tmp = Y.swapaxes(0, 1).copy()
            if proj_back:
                z = projection_back(Y_tmp, X_ref[:, :, 0])
                callback(Y_tmp * np.conj(z[None, :, :]))

        # simple loop as a start
        # shape: (n_frames, n_src)
        if model == "laplace":
            r[:, :] = np.linalg.norm(Y, axis=0) / np.sqrt(n_freq)

        elif model == "gauss":
            r[:, :] = (np.linalg.norm(Y, axis=0)**2) / n_freq

        eps = 1e-15
        r[r < eps] = eps

        r_inv[:, :] = 1.0 / r

        # Compute the score function
        psi = r_inv[None, :, :] * np.conj(Y)

        # "Nu" in Algo 3 in [1]
        # shape (n_freq, 1, 1)
        zeta = Y.swapaxes(1, 2) @ psi

        x_psi = (X.swapaxes(1, 2) @ psi) / zeta

        # The w-step
        # shape (n_freq, n_chan, 1)
        delta[I_do_w] = a[I_do_w] - x_psi[I_do_w]
        w[I_do_w] += step_size * delta[I_do_w]

        # The a-step
        # shape (n_freq, n_chan, 1)
        delta[I_do_a] = w[I_do_a] - (
            Cx_inv[I_do_a] @ x_psi[I_do_a]) * lambda_a[I_do_a]
        a[I_do_a] += step_size * delta[I_do_a]

        # Apply the orthogonal constraints

        max_delta = np.max(np.linalg.norm(delta, axis=(1, 2)))

        if max_delta < tol:

    # Extract target
    demix(Y, X, w)

    Y = Y.swapaxes(0, 1).copy()
    X = X.swapaxes(0, 1)

    if proj_back:
        z = projection_back(Y, X_ref[:, :, 0])
        Y *= np.conj(z[None, :, :])

    if return_filters:
        return Y, w
        return Y
Ejemplo n.º 6
def blinkiva_gauss(X, U, n_src=None,
        n_iter=20, n_nmf_sub_iter=20, proj_back=True, W0=None, R0=None,
        seed=None, epsilon=0.5, sparse_reg=0., print_cost=False,
        return_filters=False, callback=None):
    Implementation of BlinkIVA algorithm for blind source separation using jointly
    microphones and sound power sensors "blinkies". The algorithm was presented in

    R. Scheibler and N. Ono, *Multi-modal Blind Source Separation with Microphones and Blinkies,*
    Proc. IEEE ICASSP, Brighton, UK, May, 2019.  DOI: 10.1109/ICASSP.2019.8682594

    X: ndarray (nframes, nfrequencies, nchannels)
        STFT representation of the signal
    U: ndarray (nframes, nblinkies)
        The matrix containing the blinky signals
    n_src: int, optional
        The number of sources or independent components
    n_iter: int, optional
        The number of iterations (default 20)
    n_nmf_sub_iter: int, optional
        The number of NMF iteration to run between two updates of the demixing
        matrices (default 20)
    proj_back: bool, optional
        Scaling on first mic by back projection (default True)
    W0: ndarray (nfrequencies, nchannels, nchannels), optional
        Initial value for demixing matrix
    R0: ndarray (nframes, nsrc), optional
        Initial value of the activations
    seed: int, optional
        A seed to make deterministic the random initialization of NMF parts,
        when None (default), the random number generator is used in its current state
    epsilon: float, optional
        A regularization value to prevent too large values after the division
    sparse_reg: float
        A regularization term to make the activation matrix sparse
    print_cost: bool, optional
        Print the value of the cost function at each iteration
    return_filters: bool, optional
        If true, the function will return the demixing matrix, gains, and activations too
    callback: func
        A callback function called every 10 iterations, allows to monitor convergence

    Returns an (nframes, nfrequencies, nsources) array. Also returns
    the demixing matrix (nfrequencies, nchannels, nsources), gains (nsrc, nblinkies),
    and activations (nframes, nchannels) if ``return_filters`` keyword is True.

    n_frames, n_freq, n_chan = X.shape
    _, n_blink = U.shape

    if _ != n_frames:
        raise ValueError('The microphones and blinky signals should have the same number of frames')

    # default to determined case
    if n_src is None:
        n_src = X.shape[2]

    # initialize the demixing matrices
    if W0 is None:
        W = np.array([np.eye(n_chan, n_chan) for f in range(n_freq)], dtype=X.dtype)
        W = W0.copy()

    # we will threshold entries of R and G that get to small
    machine_epsilon = np.finfo(float).eps

    # we will only work with the blinky signal normalized by frequency
    U_mean = U / n_freq / 2.

    I = np.eye(n_chan,n_chan)
    Y = np.zeros((n_frames, n_freq, n_chan), dtype=X.dtype)
    V = np.zeros((n_freq, n_chan, n_chan, n_chan), dtype=X.dtype)
    P = np.zeros((n_frames, n_chan))

    # initialize the parts of NMF
    R_all = np.ones((n_frames, n_chan))
    R = R_all[:,:n_src]  # subset tied to NMF of blinkies

    if seed is not None:
        rng_state = np.random.get_state()

    if R0 is None:
        R[:,:] = 0.1 + 0.9 * np.random.rand(n_frames, n_src)
        R /= np.mean(R, axis=0, keepdims=True)
        R[:,:] = R0

    G = 0.1 + 0.9 * np.random.rand(n_src, n_blink)
    U_hat = np.dot(R, G)
    G *= np.sum(U_mean, axis=0, keepdims=True) / np.sum(U_hat, axis=0, keepdims=True)

    if seed is not None:

    def cost(Y, W, R, G):
        pwr = np.linalg.norm(Y, axis=1) ** 2
        cf1 = -2 * Y.shape[0] * np.sum(np.linalg.slogdet(W)[1])
        cf2 = np.sum(Y.shape[1] * np.log(R) + pwr / R)
        U_hat = np.dot(R[:,:n_src], G)
        cf3 = np.sum(np.log(U_hat) + U / U_hat / 2)
        return { 'iva' : cf1 + cf2, 'nmf' : cf2 + cf3, 'blinkiva' : cf1 + cf2 + cf3 }

    def rescale(W, P, R, G):
        # Fix the scale of all variables
        lmb = 1. / np.mean(R, axis=0)
        R *= lmb[None,:]
        P *= lmb[None,:]
        W *= np.sqrt(lmb[None,None,:])
        G /= lmb[:G.shape[0],None]

    # Compute the demixed output
    def demix(Y, X, W, P, R_all):
        for f in range(n_freq):
            Y[:,f,:] = np.dot(X[:,f,:], np.conj(W[f,:,:]))

        # The sources magnitudes averaged over frequencies
        # shape: (n_frames, n_src)
        P[:,:] = np.linalg.norm(Y, axis=1) ** 2 / n_freq

        # copy activations for sources not tied to NMF
        R_all[:,n_src:] = P[:,n_src:]

    # initial demixing
    demix(Y, X, W, P, R_all)

    cost_joint_list = []

    # NMF + IVA joint updates
    for epoch in range(n_iter):

        if callback is not None and epoch % 10 == 0:

            if proj_back:
                z = projection_back(Y, X[:,:,0])
                callback(Y * np.conj(z[None,:,:]))
                callback(Y, extra=[W,G,R,X,U])

        if print_cost and epoch % 10 == 0:
            print('Cost function: iva={iva:13.0f} nmf={nmf:13.0f} iva+nmf={blinkiva:13.0f}'.format(
                **cost(Y, W, R_all, G)))

        for sub_epoch in range(n_nmf_sub_iter):

            # Update the activations
            U_hat = np.dot(R, G)
            U_hat_I = 1. / U_hat
            R_I = 1. / R
            R *= np.sqrt(
                    (P[:,:n_src] * R_I ** 2 + np.dot(U_mean * U_hat_I ** 2, G.T))
                    / (R_I + np.dot(U_hat_I, G.T) + sparse_reg)
            R[R < machine_epsilon] = machine_epsilon

            # Update the gains
            U_hat = np.dot(R, G)
            U_hat_I = 1. / U_hat
            G *= np.sqrt( np.dot(R.T, U_mean * U_hat_I ** 2) / np.dot(R.T, U_hat_I) )
            G[G < machine_epsilon] = machine_epsilon

            # normalize
            #rescale(W, P, R_all, G)

        # Compute Auxiliary Variable
        # shape: (n_freq, n_src, n_mic, n_mic)
        denom = 2 * R_all
        denom[denom < epsilon] = epsilon  # regularize this part

        # 1) promote all arrays to (n_frames, n_freq, n_chan, n_chan, n_chan)
        # 2) take the outer product (complex) of the last two dimensions
        #    to get the covariance matrix over the microphones
        # 3) average over the time frames (index 0)
        V = np.mean(
                (X[:,:,None,:,None] / denom[:,None,:,None,None])
                * np.conj(X[:,:,None,None,:]),

        # Update now the demixing matrix
        #for s in range(n_src):
        for s in range(n_chan):

            W_H = np.conj(np.swapaxes(W, 1, 2))
            WV = np.matmul(W_H, V[:,s,:,:])
            rhs = I[None,:,s][[0] * WV.shape[0],:]
            W[:,:,s] = np.linalg.solve(WV, rhs)

            P1 = np.conj(W[:,:,s])
            P2 = np.sum(V[:,s,:,:] * W[:,None,:,s], axis=-1)
            W[:,:,s] /= np.sqrt(np.sum(P1 * P2, axis=1))[:,None]

        demix(Y, X, W, P, R_all)

        # Rescale all variables before continuing
        rescale(W, P, R_all, G)

    if proj_back:
        z = projection_back(Y, X[:,:,0])
        Y *= np.conj(z[None,:,:])

    if return_filters:
        return Y, W, G, R_all
        return Y
Ejemplo n.º 7
def overiva(
    Implementation of overdetermined IVA algorithm for BSS as presented. See
    the following publication for a detailed description of the algorithm.

    R. Scheibler and N. Ono, Independent Vector Analysis with more Microphones than Sources, arXiv, 2019.

    X: ndarray (nframes, nfrequencies, nchannels)
        STFT representation of the signal
    n_src: int, optional
        The number of sources or independent components. When
        ``n_src==nchannels``, the algorithms is identical to AuxIVA. When
        ``n_src==1``, then it is doing independent vector extraction.
    n_iter: int, optional
        The number of iterations (default 20)
    proj_back: bool, optional
        Scaling on first mic by back projection (default True)
    W0: ndarray (nfrequencies, nsrc, nchannels), optional
        Initial value for demixing matrix
    model: str
        The model of source distribution 'gauss' or 'laplace' (default)
    init_eig: bool, optional (default ``False``)
        If ``True``, and if ``W0 is None``, then the weights are initialized
        using the principal eigenvectors of the covariance matrix of the input
    return_filters: bool
        If true, the function will return the demixing matrix too
    callback: func
        A callback function called every 10 iterations, allows to monitor

    Returns an (nframes, nfrequencies, nsources) array. Also returns
    the demixing matrix (nfrequencies, nchannels, nsources)
    if ``return_values`` keyword is True.

    n_frames, n_freq, n_chan = X.shape

    # default to determined case
    if n_src is None:
        n_src = n_chan

    # covariance matrix of input signal (n_freq, n_chan, n_chan)
    Cx = np.mean(X[:, :, :, None] * np.conj(X[:, :, None, :]), axis=0)

    W_hat = np.zeros((n_freq, n_chan, n_chan), dtype=X.dtype)
    W = W_hat[:, :, :n_src]
    J = W_hat[:, :n_src, n_src:]

    def tensor_H(T):
        return np.conj(T).swapaxes(1, 2)

    def update_J_from_orth_const():
        tmp = np.matmul(tensor_H(W), Cx)
        J[:, :, :] = np.linalg.solve(tmp[:, :, :n_src], tmp[:, :, n_src:])

    # initialize A and W
    if W0 is None:

        if init_eig:
            # Initialize the demixing matrices with the principal
            # eigenvectors of the input covariance
            v, w = np.linalg.eig(Cx)
            for f in range(n_freq):
                ind = np.argsort(v[f])[-n_src:]
                W[f, :, :] = np.conj(w[f][:, ind])

            # Or with identity
            for f in range(n_freq):
                W[f, :n_src, :] = np.eye(n_src)

        W[:, :, :] = W0

    # We still need to initialize the rest of the matrix
    if n_src < n_chan:
        for f in range(n_freq):
            W_hat[f, n_src:, n_src:] = -np.eye(n_chan - n_src)

    eyes = np.tile(np.eye(n_chan, n_chan), (n_freq, 1, 1))
    V = np.zeros((n_freq, n_chan, n_chan), dtype=X.dtype)
    r_inv = np.zeros((n_frames, n_src))
    r = np.zeros((n_frames, n_src))

    # Things are more efficient when the frequencies are over the first axis
    Y = np.zeros((n_freq, n_frames, n_src), dtype=X.dtype)
    X = X.swapaxes(0, 1).copy()

    # Compute the demixed output
    def demix(Y, X, W):
        Y[:, :, :] = X @ np.conj(W)

    for epoch in range(n_iter):

        demix(Y, X, W)

        if callback is not None and epoch % 10 == 0:
            Y_tmp = Y.swapaxes(0, 1)
            if proj_back:
                z = projection_back(Y_tmp, X[:, :, 0].swapaxes(0, 1))
                callback(Y_tmp * np.conj(z[None, :, :]))

        # simple loop as a start
        # shape: (n_frames, n_src)
        if model == 'laplace':
            r[:, :] = (2. * np.linalg.norm(Y, axis=0))
        elif model == 'gauss':
            r[:, :] = (np.linalg.norm(Y, axis=0)**2) / n_freq

        # set the scale of r
        gamma = r.mean(axis=0)
        r /= gamma[None, :]

        if model == 'laplace':
            Y /= gamma[None, None, :]
            W /= gamma[None, None, :]
        elif model == 'gauss':
            g_sq = np.sqrt(gamma[None, None, :])
            Y /= g_sq
            W /= g_sq

        # ensure some numerical stability
        eps = 1e-15
        r[r < eps] = eps

        r_inv[:, :] = 1. / r

        # Update now the demixing matrix
        for s in range(n_src):
            # Compute Auxiliary Variable
            # shape: (n_freq, n_chan, n_chan)
            V[:, :, :] = (X.swapaxes(1, 2) *
                          r_inv[None, None, :, s]) @ np.conj(X) / n_frames

            WV = np.conj(W_hat).swapaxes(1, 2) @ V
            W[:, :, s] = np.linalg.solve(WV, eyes[:, :, s])

            # normalize
            denom = np.conj(W[:, None, :, s]) @ V[:, :, :] @ W[:, :, None, s]
            W[:, :, s] /= np.sqrt(denom[:, :, 0])

            # Update the mixing matrix according to orthogonal constraints
            if n_src < n_chan:

    demix(Y, X, W)

    Y = Y.swapaxes(0, 1).copy()
    X = X.swapaxes(0, 1)

    if proj_back:
        z = projection_back(Y, X[:, :, 0])
        Y *= np.conj(z[None, :, :])

    if return_filters:
        return Y, W
        return Y