Ejemplo n.º 1
0
    def loss(y, y_pred, t_mean, t_log_var):
        """
        Variational lower bound for a Bernoulli VAE.

        Parameters
        ----------
        y : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, N)`
            The original images.
        y_pred : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, N)`
            The VAE reconstruction of the images.
        t_mean: :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, T)`
            Mean of the variational distribution :math:`q(t \mid x)`.
        t_log_var: :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, T)`
            Log of the variance vector of the variational distribution
            :math:`q(t \mid x)`.

        Returns
        -------
        loss : float
            The VLB, averaged across the batch.
        """
        # prevent nan on log(0)
        eps = 2.220446049250313e-16
        y_pred = np.clip(y_pred, eps, 1 - eps)

        # reconstruction loss: binary cross-entropy
        rec_loss = -np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred), axis=1)

        # KL divergence between the variational distribution q and the prior p,
        # a unit gaussian
        kl_loss = -0.5 * np.sum(1 + t_log_var - t_mean ** 2 - np.exp(t_log_var), axis=1)
        loss = np.mean(kl_loss + rec_loss)
        return loss
Ejemplo n.º 2
0
    def fit(self, X):
        """
        Store the feature-wise mean and standard deviation across the samples
        in `X` for future scaling.

        Parameters
        ----------
        X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)`
            An array of N samples, each with dimensionality `C`
        """
        if not isinstance(X, np.ndarray):
            X = np.array(X)

        if X.shape[0] < 2:
            raise ValueError("`X` must contain at least 2 samples")

        std = np.ones(X.shape[1])
        mean = np.zeros(X.shape[1])

        if self.with_mean:
            mean = np.mean(X, axis=0)

        if self.with_std:
            std = np.std(X, axis=0, ddof=0)
        self._mean = mean
        self._std = std
        self._is_fit = True
Ejemplo n.º 3
0
    def predict(self, X):
        """
        Generate predictions for the targets associated with the rows in `X`.

        Parameters
        ----------
        X : numpy array of shape `(N', M')`
            An array of `N'` examples to generate predictions on.

        Returns
        -------
        y : numpy array of shape `(N',\*)`
            Predicted targets for the `N'` rows in `X`.
        """
        predictions = []
        H = self.hyperparameters
        for x in X:
            pred = None
            nearest = self._ball_tree.nearest_neighbors(H["k"], x)
            targets = [n.val.item() for n in nearest]
            # print("targets", type(targets),targets)

            if H["classifier"]:
                if H["weights"] == "uniform":
                    pred = Counter(targets).most_common(1)[0][0]
                elif H["weights"] == "distance":
                    best_score = -np.inf
                    for label in set(targets):
                        scores = [1 / n.distance for n in nearest if n.val == label]
                        pred = label if np.sum(scores) > best_score else pred
            else:
                if H["weights"] == "uniform":
                    pred = np.mean(targets)
                elif H["weights"] == "distance":
                    weights = [1 / n.distance for n in nearest]
                    pred = np.average(targets, weights=weights)
            predictions.append(pred)
        return np.array(predictions)
Ejemplo n.º 4
0
Archivo: lda.py Proyecto: Tommliu/mx-ml
    def _maximize_alpha(self, max_iters=1000, tol=0.1):
        """
        Optimize alpha using Blei's O(n) Newton-Raphson modification
        for a Hessian with special structure
        """
        D = self.D
        T = self.T

        alpha = self.alpha
        gamma = self.gamma

        for _ in range(max_iters):
            alpha_old = alpha

            #  Calculate gradient
            g = D * (digamma(np.sum(alpha)) - digamma(alpha)) + np.sum(
                digamma(gamma) - np.tile(digamma(np.sum(gamma, axis=1)), (T, 1)).T,
                axis=0,
            )

            #  Calculate Hessian diagonal component
            h = -D * polygamma(1, alpha)

            #  Calculate Hessian constant component
            z = D * polygamma(1, np.sum(alpha))

            #  Calculate constant
            c = np.sum(g / h) / (z ** (-1.0) + np.sum(h ** (-1.0)))

            #  Update alpha
            alpha = alpha - (g - c) / h

            #  Check convergence
            if np.sqrt(np.mean(np.square(alpha - alpha_old))) < tol:
                break

        return alpha
Ejemplo n.º 5
0
Archivo: dsp.py Proyecto: Tommliu/mx-ml
def mfcc(
    x,
    fs=44000,
    n_mfccs=13,
    alpha=0.95,
    center=True,
    n_filters=20,
    window="hann",
    normalize=True,
    lifter_coef=22,
    stride_duration=0.01,
    window_duration=0.025,
    replace_intercept=True,
):
    """
    Compute the Mel-frequency cepstral coefficients (MFCC) for a signal.

    Notes
    -----
    Computing MFCC features proceeds in the following stages:

        1. Convert the signal into overlapping frames and apply a window fn
        2. Compute the power spectrum at each frame
        3. Apply the mel filterbank to the power spectra to get mel filterbank powers
        4. Take the logarithm of the mel filterbank powers at each frame
        5. Take the discrete cosine transform (DCT) of the log filterbank
           energies and retain only the first k coefficients to further reduce
           the dimensionality

    MFCCs were developed in the context of HMM-GMM automatic speech recognition
    (ASR) systems and can be used to provide a somewhat speaker/pitch
    invariant representation of phonemes.

    Parameters
    ----------
    x : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
        A 1D signal consisting of N samples
    fs : int
        The sample rate/frequency for the signal. Default is 44000.
    n_mfccs : int
        The number of cepstral coefficients to return (including the intercept
        coefficient). Default is 13.
    alpha : float in [0, 1)
        The preemphasis coefficient. A value of 0 corresponds to no
        filtering. Default is 0.95.
    center : bool
        Whether to the kth frame of the signal should *begin* at index ``x[k *
        stride_len]`` (center = False) or be *centered* at ``x[k * stride_len]``
        (center = True). Default is True.
    n_filters : int
        The number of filters to include in the Mel filterbank. Default is 20.
    normalize : bool
        Whether to mean-normalize the MFCC values. Default is True.
    lifter_coef : int in :math:[0, + \infty]`
        The cepstral filter coefficient. 0 corresponds to no filtering, larger
        values correspond to greater amounts of smoothing. Default is 22.
    window : {'hamming', 'hann', 'blackman_harris'}
        The windowing function to apply to the signal before taking the DFT.
        Default is 'hann'.
    stride_duration : float
        The duration of the hop between consecutive windows (in seconds).
        Default is 0.01.
    window_duration : float
        The duration of each frame / window (in seconds). Default is 0.025.
    replace_intercept : bool
        Replace the first MFCC coefficient (the intercept term) with the
        log of the total frame energy instead. Default is True.

    Returns
    -------
    mfccs : :py:class:`ndarray <numpy.ndarray>` of shape `(G, C)`
        Matrix of Mel-frequency cepstral coefficients. Rows correspond to
        frames, columns to cepstral coefficients
    """
    # map the power spectrum for the (framed + windowed representation of) `x`
    # onto the mel scale
    filter_energies, frame_energies = mel_spectrogram(
        x=x,
        fs=fs,
        alpha=alpha,
        center=center,
        window=window,
        n_filters=n_filters,
        mean_normalize=False,
        window_duration=window_duration,
        stride_duration=stride_duration,
    )

    log_energies = 10 * np.log10(filter_energies)

    # perform a DCT on the log-mel coefficients to further reduce the data
    # dimensionality -- the early DCT coefficients will capture the majority of
    # the data, allowing us to discard coefficients > n_mfccs
    mfccs = np.array([DCT(frame) for frame in log_energies])[:, :n_mfccs]

    mfccs = cepstral_lifter(mfccs, D=lifter_coef)
    mfccs -= np.mean(mfccs, axis=0) if normalize else 0

    if replace_intercept:
        # the 0th MFCC coefficient doesn't tell us anything about the spectrum;
        # replace it with the log of the frame energy for something more
        # informative
        mfccs[:, 0] = np.log(frame_energies)
    return mfccs
Ejemplo n.º 6
0
Archivo: dsp.py Proyecto: Tommliu/mx-ml
def mel_spectrogram(
    x,
    window_duration=0.025,
    stride_duration=0.01,
    mean_normalize=True,
    window="hamming",
    n_filters=20,
    center=True,
    alpha=0.95,
    fs=44000,
):
    """
    Apply the Mel-filterbank to the power spectrum for a signal `x`.

    Notes
    -----
    The Mel spectrogram is the projection of the power spectrum of the framed
    and windowed signal onto the basis set provided by the Mel filterbank.

    Parameters
    ----------
    x : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
        A 1D signal consisting of N samples
    window_duration : float
        The duration of each frame / window (in seconds). Default is 0.025.
    stride_duration : float
        The duration of the hop between consecutive windows (in seconds).
        Default is 0.01.
    mean_normalize : bool
        Whether to subtract the coefficient means from the final filter values
        to improve the signal-to-noise ratio. Default is True.
    window : {'hamming', 'hann', 'blackman_harris'}
        The windowing function to apply to the signal before FFT. Default is
        'hamming'.
    n_filters : int
        The number of mel filters to include in the filterbank. Default is 20.
    center : bool
        Whether to the `k` th frame of the signal should *begin* at index ``x[k *
        stride_len]`` (center = False) or be *centered* at ``x[k * stride_len]``
        (center = True). Default is False.
    alpha : float in [0, 1)
        The coefficient for the preemphasis filter. A value of 0 corresponds to
        no filtering. Default is 0.95.
    fs : int
        The sample rate/frequency for the signal. Default is 44000.

    Returns
    -------
    filter_energies : :py:class:`ndarray <numpy.ndarray>` of shape `(G, n_filters)`
        The (possibly mean_normalized) power for each filter in the Mel
        filterbank (i.e., the Mel spectrogram). Rows correspond to frames,
        columns to filters
    energy_per_frame : :py:class:`ndarray <numpy.ndarray>` of shape `(G,)`
        The total energy in each frame of the signal
    """
    eps = np.finfo(float).eps
    window_fn = WindowInitializer()(window)

    stride = round(stride_duration * fs)
    frame_width = round(window_duration * fs)
    N = frame_width

    # add a preemphasis filter to the raw signal
    x = preemphasis(x, alpha)

    # convert signal to overlapping frames and apply a window function
    x = np.pad(x, N // 2, "reflect") if center else x
    frames = to_frames(x, frame_width, stride, fs)

    window = np.tile(window_fn(frame_width), (frames.shape[0], 1))
    frames = frames * window

    # compute the power spectrum
    power_spec = power_spectrum(frames)
    energy_per_frame = np.sum(power_spec, axis=1)
    energy_per_frame[energy_per_frame == 0] = eps

    # compute the power at each filter in the Mel filterbank
    fbank = mel_filterbank(N, n_filters=n_filters, fs=fs)
    filter_energies = power_spec @ fbank.T
    filter_energies -= np.mean(filter_energies,
                               axis=0) if mean_normalize else 0
    filter_energies[filter_energies == 0] = eps
    return filter_energies, energy_per_frame