Esempio n. 1
0
    def transform(self, labels, categories=None):
        """
        Convert a list of labels into a one-hot encoding.

        Parameters
        ----------
        labels : list of length `N`
            A list of category labels.
        categories : list of length `C`
            List of the unique category labels for the items to encode. Default
            is None.

        Returns
        -------
        Y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)`
            The one-hot encoded labels. Each row corresponds to an example,
            with a single 1 in the column corresponding to the respective
            label.
        """
        if not self._is_fit:
            categories = set(labels) if categories is None else categories
            self.fit(categories)
        unknown = list(set(labels.asnumpy()) - set(self.cat2idx.keys()))
        assert len(unknown) == 0, "Unrecognized label(s): {}".format(unknown)

        N, C = len(labels), len(self.cat2idx)
        cols = np.array([self.cat2idx[c.item()] for c in labels])

        Y = np.zeros((N, C))
        Y[np.arange(N), cols] = 1
        return Y
Esempio n. 2
0
    def _init_params(self):
        init_weights = WeightInitializer(str(self.act_fn), mode=self.init)

        self.X = []
        b = np.zeros((1, self.n_classes))
        W = init_weights((self.n_classes, self.n_in))

        self.parameters = {"W": W, "b": b}

        self.gradients = {"W": np.zeros_like(W), "b": np.zeros_like(b)}

        self.derived_variables = {
            "y_pred": [],
            "target": [],
            "true_w": [],
            "true_b": [],
            "sampled_b": [],
            "sampled_w": [],
            "out_labels": [],
            "target_logits": [],
            "noise_samples": [],
            "noise_logits": [],
        }

        self.is_initialized = True
Esempio n. 3
0
    def fit(self, X):
        """
        Store the feature-wise mean and standard deviation across the samples
        in `X` for future scaling.

        Parameters
        ----------
        X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)`
            An array of N samples, each with dimensionality `C`
        """
        if not isinstance(X, np.ndarray):
            X = np.array(X)

        if X.shape[0] < 2:
            raise ValueError("`X` must contain at least 2 samples")

        std = np.ones(X.shape[1])
        mean = np.zeros(X.shape[1])

        if self.with_mean:
            mean = np.mean(X, axis=0)

        if self.with_std:
            std = np.std(X, axis=0, ddof=0)
        self._mean = mean
        self._std = std
        self._is_fit = True
Esempio n. 4
0
def conv2D_naive(X, W, stride, pad, dilation=0):
    """
    A slow but more straightforward implementation of a 2D "convolution"
    (technically, cross-correlation) of input `X` with a collection of kernels `W`.

    Notes
    -----
    This implementation uses ``for`` loops and direct indexing to perform the
    convolution. As a result, it is slower than the vectorized :func:`conv2D`
    function that relies on the :func:`col2im` and :func:`im2col`
    transformations.

    Parameters
    ----------
    X : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, in_rows, in_cols, in_ch)`
        Input volume.
    W: :py:class:`ndarray <numpy.ndarray>` of shape `(kernel_rows, kernel_cols, in_ch, out_ch)`
        The volume of convolution weights/kernels.
    stride : int
        The stride of each convolution kernel.
    pad : tuple, int, or 'same'
        The padding amount. If 'same', add padding to ensure that the output of
        a 2D convolution with a kernel of `kernel_shape` and stride `stride`
        produces an output volume of the same dimensions as the input.  If
        2-tuple, specifies the number of padding rows and colums to add *on both
        sides* of the rows/columns in `X`. If 4-tuple, specifies the number of
        rows/columns to add to the top, bottom, left, and right of the input
        volume.
    dilation : int
        Number of pixels inserted between kernel elements. Default is 0.

    Returns
    -------
    Z : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, out_rows, out_cols, out_ch)`
        The covolution of `X` with `W`.
    """
    s, d = stride, dilation
    X_pad, p = pad2D(X, pad, W.shape[:2], stride=s, dilation=d)

    pr1, pr2, pc1, pc2 = p
    fr, fc, in_ch, out_ch = W.shape
    n_ex, in_rows, in_cols, in_ch = X.shape

    # update effective filter shape based on dilation factor
    fr, fc = fr * (d + 1) - d, fc * (d + 1) - d

    out_rows = int((in_rows + pr1 + pr2 - fr) / s + 1)
    out_cols = int((in_cols + pc1 + pc2 - fc) / s + 1)

    Z = np.zeros((n_ex, out_rows, out_cols, out_ch))
    for m in range(n_ex):
        for c in range(out_ch):
            for i in range(out_rows):
                for j in range(out_cols):
                    i0, i1 = i * s, (i * s) + fr
                    j0, j1 = j * s, (j * s) + fc

                    window = X_pad[m, i0:i1:(d + 1), j0:j1:(d + 1), :]
                    Z[m, i, j, c] = np.sum(window * W[:, :, :, c])
    return Z
Esempio n. 5
0
File: dsp.py Progetto: Tommliu/mx-ml
def autocorrelate1D(x):
    """
    Autocorrelate a 1D signal `x` with itself.

    Notes
    -----
    The `k` th term in the 1 dimensional autocorrelation is

    .. math::

        a_k = \sum_n x_{n + k} x_n

    NB. This is a naive :math:`O(N^2)` implementation.  For a faster :math:`O(N
    \log N)` approach using the FFT, see [1].

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Autocorrelation#Efficient%computation

    Parameters
    ----------
    x : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
        A 1D signal consisting of N samples

    Returns
    -------
    auto : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
        The autocorrelation of `x` with itself
    """
    N = len(x)
    auto = np.zeros(N)
    for k in range(N):
        for n in range(N - k):
            auto[k] += x[n + k] * x[n]
    return auto
Esempio n. 6
0
    def _M_step(self):
        C, N, X = self.C, self.N, self.X
        denoms = np.sum(self.Q, axis=0)

        # update cluster priors
        self.pi = denoms / N

        # update cluster means
        nums_mu = [np.dot(self.Q[:, c], X) for c in range(C)]
        for ix, (num, den) in enumerate(zip(nums_mu, denoms)):
            self.mu[ix, :] = num / den if den > 0 else np.zeros_like(num)

        # update cluster covariances
        for c in range(C):
            mu_c = self.mu[c, :]
            n_c = denoms[c]

            outer = np.zeros((self.d, self.d))
            for i in range(N):
                wic = self.Q[i, c]
                xi = self.X[i, :]
                outer += wic * np.outer(xi - mu_c, xi - mu_c)

            outer = outer / n_c if n_c > 0 else outer
            self.sigma[c, :, :] = outer

        assert_allclose(np.sum(self.pi),
                        1,
                        err_msg="{}".format(np.sum(self.pi)))
Esempio n. 7
0
File: hmm.py Progetto: Tommliu/mx-ml
    def _forward(self, Obs):
        """
        Computes the forward probability trellis for an HMM parameterized by
        :math:`(A, B, \pi)`.

        Notes
        -----
        The forward trellis (sometimes referred to as `alpha` in the HMM
        literature), is a 2D array where entry `i`, `j` represents the probability
        under the HMM of being in latent state `i` after seeing the first `j`
        observations:

        .. math:: \mathtt{forward[i,j]} = P(o_1,\ldots,o_j,q_j=i|A,B,\pi)

        Here :math:`q_j = i` indicates that the hidden state at time `j` is of
        type `i`.

        The DP step is::

            forward[i,j] = sum_{s'=1}^N forward[s',j-1] * A[s',i] * B[i,o_j]
                         = sum_{s'=1}^N P(o_1,\ldots,o_{j-1},q_{j-1}=s'|A,B,pi) *
                           P(q_j=i|q_{j-1}=s') * P(o_j|q_j=i)

        In words, ``forward[i,j]`` is the weighted sum of the values computed
        on the previous timestep. The weight on each previous state value is
        the product of the probability of transitioning from that state to
        state `i` and the probability of emitting observation `j` in state `i`.

        Parameters
        ----------
        Obs : :py:class:`ndarray <numpy.ndarray>` of shape `(T,)`
            An observation sequence of length `T`.

        Returns
        -------
        forward : :py:class:`ndarray <numpy.ndarray>` of shape `(N, T)`
            The forward trellis.
        """
        eps = self.eps
        T = Obs.shape[0]

        # initialize the forward probability matrix
        forward = np.zeros((self.N, T))

        ot = Obs[0]
        for s in range(self.N):
            forward[s, 0] = np.log(self.pi[s] + eps) + np.log(self.B[s, ot] + eps)

        for t in range(1, T):
            ot = Obs[t]
            for s in range(self.N):
                forward[s, t] = logsumexp(
                    [
                        forward[s_, t - 1]
                        + np.log(self.A[s_, s] + eps)
                        + np.log(self.B[s, ot] + eps)
                        for s_ in range(self.N)
                    ]
                )
        return forward
Esempio n. 8
0
File: hmm.py Progetto: Tommliu/mx-ml
    def _backward(self, Obs):
        """
        Compute the backward probability trellis for an HMM parameterized by
        :math:`(A, B, \pi)`.

        Notes
        -----
        The backward trellis (sometimes referred to as `beta` in the HMM
        literature), is a 2D array where entry `i`,`j` represents the probability
        of seeing the observations from time `j+1` onward given that the HMM is
        in state `i` at time `j`

        .. math:: \mathtt{backward[i,j]} = P(o_{j+1},o_{j+2},...,o_T|q_j=i,A,B,\pi)

        Here :math:`q_j = i` indicates that the hidden state at time `j` is of type `i`.

        The DP step is::

            backward[i,j] = sum_{s'=1}^N backward[s',j+1] * A[i, s'] * B[s',o_{j+1}]
                          = sum_{s'=1}^N P(o_{j+1},o_{j+2},...,o_T|q_j=i,A,B,pi) *
                            P(q_{j+1}=s'|q_{j}=i) * P(o_{j+1}|q_{j+1}=s')

        In words, ``backward[i,j]`` is the weighted sum of the values computed
        on the following timestep. The weight on each state value from the
        `j+1`'th timestep is the product of the probability of transitioning from
        state i to that state and the probability of emitting observation `j+1`
        from that state.

        Parameters
        ----------
        Obs : :py:class:`ndarray <numpy.ndarray>` of shape `(T,)`
            A single observation sequence of length `T`.

        Returns
        -------
        backward : :py:class:`ndarray <numpy.ndarray>` of shape `(N, T)`
            The backward trellis.
        """
        eps = self.eps
        T = Obs.shape[0]

        # initialize the backward trellis
        backward = np.zeros((self.N, T))

        for s in range(self.N):
            backward[s, T - 1] = 0

        for t in reversed(range(T - 1)):
            ot1 = Obs[t + 1]
            for s in range(self.N):
                backward[s, t] = logsumexp(
                    [
                        np.log(self.A[s, s_] + eps)
                        + np.log(self.B[s_, ot1] + eps)
                        + backward[s_, t + 1]
                        for s_ in range(self.N)
                    ]
                )
        return backward
Esempio n. 9
0
def col2im(X_col, X_shape, W_shape, pad, stride, dilation=0):
    """
    Take columns of a 2D matrix and rearrange them into the blocks/windows of
    a 4D image volume.

    Notes
    -----
    A NumPy reimagining of MATLAB's ``col2im`` 'sliding' function.

    Code extended from Andrej Karpathy's ``im2col.py``.

    Parameters
    ----------
    X_col : :py:class:`ndarray <numpy.ndarray>` of shape `(Q, Z)`
        The columnized version of `X` (assumed to include padding)
    X_shape : 4-tuple containing `(n_ex, in_rows, in_cols, in_ch)`
        The original dimensions of `X` (not including padding)
    W_shape: 4-tuple containing `(kernel_rows, kernel_cols, in_ch, out_ch)`
        The dimensions of the weights in the present convolutional layer
    pad : 4-tuple of `(left, right, up, down)`
        Number of zero-padding rows/cols to add to `X`
    stride : int
        The stride of each convolution kernel
    dilation : int
        Number of pixels inserted between kernel elements. Default is 0.

    Returns
    -------
    img : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, in_rows, in_cols, in_ch)`
        The reshaped `X_col` input matrix
    """
    if not (isinstance(pad, tuple) and len(pad) == 4):
        raise TypeError("pad must be a 4-tuple, but got: {}".format(pad))

    s, d = stride, dilation
    pr1, pr2, pc1, pc2 = pad
    fr, fc, n_in, n_out = W_shape
    n_ex, in_rows, in_cols, n_in = X_shape

    X_pad = np.zeros((n_ex, n_in, in_rows + pr1 + pr2, in_cols + pc1 + pc2))
    k, i, j = _im2col_indices((n_ex, n_in, in_rows, in_cols), fr, fc, pad, s,
                              d)

    X_col_reshaped = X_col.reshape(n_in * fr * fc, -1, n_ex)
    X_col_reshaped = X_col_reshaped.transpose(2, 0, 1)

    np.add.at(X_pad, (slice(None), k, i, j), X_col_reshaped)

    pr2 = None if pr2 == 0 else -pr2
    pc2 = None if pc2 == 0 else -pc2
    return X_pad[:, :, pr1:pr2, pc1:pc2]
Esempio n. 10
0
    def _initialize_params(self):
        """
        Randomly initialize the starting GMM parameters.
        """
        C, d = self.C, self.d
        rr = np.random.rand(C)
        self.pi = rr / rr.sum()  # cluster priors
        self.Q = np.zeros((self.N, C))  # variational distribution q(T)
        self.mu = np.random.uniform(-5, 10, C * d).reshape(C,
                                                           d)  # cluster means
        self.sigma = np.array([np.identity(d)
                               for _ in range(C)])  # cluster covariances

        self.best_pi = None
        self.best_mu = None
        self.best_sigma = None
        self.best_elbo = -np.inf
Esempio n. 11
0
    def fit(self, X, y):
        """
        Fit the GP prior to the training data.

        Parameters
        ----------
        X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
            A training dataset of `N` examples, each with dimensionality `M`.
        y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, O)`
            A collection of real-valued training targets for the
            examples in `X`, each with dimension `O`.
        """
        mu = np.zeros(X.shape[0])
        K = self.kernel(X, X)

        self.parameters["X"] = X
        self.parameters["y"] = y
        self.parameters["GP_cov"] = K
        self.parameters["GP_mean"] = mu
Esempio n. 12
0
    def _encode_dense(self, examples):
        N = len(examples)
        table = np.zeros([N, self.n_dim])  # dense

        for row, feat_dict in enumerate(examples):
            for f_id, val in feat_dict.items():
                if isinstance(f_id, str):
                    f_id = f_id.encode("utf-8")

                # use json module to convert the feature id into a unique
                # string compatible with the buffer API (required by hashlib)
                if isinstance(f_id, (tuple, dict, list)):
                    f_id = json.dumps(f_id, sort_keys=True).encode("utf-8")

                h = int(self.hash(f_id).hexdigest(), base=16)
                col = h % self.n_dim
                table[row, col] += np.sign(h) * val

        return table
Esempio n. 13
0
    def sample(self, X, n_samples=1, dist="posterior_predictive"):
        """
        Sample functions from the GP prior or posterior predictive
        distribution.

        Parameters
        ----------
        X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
            The collection of datapoints to generate predictions on. Only used if
            `dist` = 'posterior_predictive'.
        n_samples: int
            The number of samples to generate. Default is 1.
        dist : {"posterior_predictive", "prior"}
            The distribution to draw samples from. Default is
            "posterior_predictive".

        Returns
        -------
        samples : :py:class:`ndarray <numpy.ndarray>` of shape `(n_samples, O, N)`
            The generated samples for the points in `X`.
        """
        mvnorm = np.random.multivariate_normal

        if dist == "prior":
            mu = np.zeros((X.shape[0], 1))
            cov = self.kernel(X, X)
        elif dist == "posterior_predictive":
            mu, _, cov = self.predict(X, return_cov=True)
        else:
            raise ValueError("Unrecognized dist: '{}'".format(dist))

        if mu.ndim == 1:
            mu = mu[:, np.newaxis]

        samples = np.array([mvnorm(_mu, cov, size=n_samples) for _mu in mu.T])
        return samples.swapaxes(0, 1)
Esempio n. 14
0
def calc_conv_out_dims(X_shape, W_shape, stride=1, pad=0, dilation=0):
    """
    Compute the dimension of the output volume for the specified convolution.

    Parameters
    ----------
    X_shape : 3-tuple or 4-tuple
        The dimensions of the input volume to the convolution. If 3-tuple,
        entries are expected to be (`n_ex`, `in_length`, `in_ch`). If 4-tuple,
        entries are expected to be (`n_ex`, `in_rows`, `in_cols`, `in_ch`).
    weight_shape : 3-tuple or 4-tuple
        The dimensions of the weight volume for the convolution. If 3-tuple,
        entries are expected to be (`f_len`, `in_ch`, `out_ch`). If 4-tuple,
        entries are expected to be (`fr`, `fc`, `in_ch`, `out_ch`).
    pad : tuple, int, or {'same', 'causal'}
        The padding amount. If 'same', add padding to ensure that the output
        length of a 1D convolution with a kernel of `kernel_shape` and stride
        `stride` is the same as the input length.  If 'causal' compute padding
        such that the output both has the same length as the input AND
        ``output[t]`` does not depend on ``input[t + 1:]``. If 2-tuple, specifies the
        number of padding columns to add on each side of the sequence. Default
        is 0.
    stride : int
        The stride for the convolution kernel. Default is 1.
    dilation : int
        The dilation of the convolution kernel. Default is 0.

    Returns
    -------
    out_dims : 3-tuple or 4-tuple
        The dimensions of the output volume. If 3-tuple, entries are (`n_ex`,
        `out_length`, `out_ch`). If 4-tuple, entries are (`n_ex`, `out_rows`,
        `out_cols`, `out_ch`).
    """
    dummy = np.zeros(X_shape)
    s, p, d = stride, pad, dilation
    if len(X_shape) == 3:
        _, p = pad1D(dummy, p)
        pw1, pw2 = p
        fw, in_ch, out_ch = W_shape
        n_ex, in_length, in_ch = X_shape

        _fw = fw * (d + 1) - d
        out_length = (in_length + pw1 + pw2 - _fw) // s + 1
        out_dims = (n_ex, out_length, out_ch)

    elif len(X_shape) == 4:
        _, p = pad2D(dummy, p)
        pr1, pr2, pc1, pc2 = p
        fr, fc, in_ch, out_ch = W_shape
        n_ex, in_rows, in_cols, in_ch = X_shape

        # adjust effective filter size to account for dilation
        _fr, _fc = fr * (d + 1) - d, fc * (d + 1) - d
        out_rows = (in_rows + pr1 + pr2 - _fr) // s + 1
        out_cols = (in_cols + pc1 + pc2 - _fc) // s + 1
        out_dims = (n_ex, out_rows, out_cols, out_ch)
    else:
        raise ValueError("Unrecognized number of input dims: {}".format(
            len(X_shape)))
    return out_dims
Esempio n. 15
0
File: dsp.py Progetto: Tommliu/mx-ml
def mel_filterbank(N,
                   n_filters=20,
                   fs=44000,
                   min_freq=0,
                   max_freq=None,
                   normalize=True):
    """
    Compute the filters in a Mel filterbank and return the corresponding
    transformation matrix

    Notes
    -----
    The Mel scale is a perceptual scale designed to simulate the way the human
    ear works. Pitches judged by listeners to be equal in perceptual /
    psychological distance have equal distance on the Mel scale.  Practically,
    this corresponds to a scale with higher resolution at low frequencies and
    lower resolution at higher (> 500 Hz) frequencies.

    Each filter in the Mel filterbank is triangular with a response of 1 at its
    center and a linear decay on both sides until it reaches the center
    frequency of the next adjacent filter.

    This implementation is based on code in the (superb) LibROSA package [1].

    References
    ----------
    .. [1] McFee et al. (2015). "librosa: Audio and music signal analysis in
       Python", *Proceedings of the 14th Python in Science Conference*
       https://librosa.github.io

    Parameters
    ----------
    N : int
        The number of DFT bins
    n_filters : int
        The number of mel filters to include in the filterbank. Default is 20.
    min_freq : int
        Minimum filter frequency (in Hz). Default is 0.
    max_freq : int
        Maximum filter frequency (in Hz). Default is 0.
    fs : int
        The sample rate/frequency for the signal. Default is 44000.
    normalize : bool
        If True, scale the Mel filter weights by their area in Mel space.
        Default is True.

    Returns
    -------
    fbank : :py:class:`ndarray <numpy.ndarray>` of shape `(n_filters, N // 2 + 1)`
        The mel-filterbank transformation matrix. Rows correspond to filters,
        columns to DFT bins.
    """
    max_freq = fs / 2 if max_freq is None else max_freq
    min_mel, max_mel = hz2mel(min_freq), hz2mel(max_freq)

    fbank = np.zeros((n_filters, N // 2 + 1))

    # uniformly spaced values on the mel scale, translated back into Hz
    mel_bins = mel2hz(np.linspace(min_mel, max_mel, n_filters + 2))

    # the centers of the frequency bins for the DFT
    hz_bins = dft_bins(N, fs)

    mel_spacing = np.diff(mel_bins)

    # ramps[i] = mel_bins[i] - hz_bins
    ramps = mel_bins.reshape(-1, 1) - hz_bins.reshape(1, -1)
    for i in range(n_filters):
        # calc the filter values on the left and right across the bins ...
        left = -ramps[i] / mel_spacing[i]
        right = ramps[i + 2] / mel_spacing[i + 1]

        # .. and set them zero when they cross the x-axis
        fbank[i] = np.maximum(0, np.minimum(left, right))

    if normalize:
        energy_norm = 2.0 / (mel_bins[2:n_filters + 2] - mel_bins[:n_filters])
        fbank *= energy_norm[:, np.newaxis]

    return fbank
Esempio n. 16
0
File: hmm.py Progetto: Tommliu/mx-ml
    def _Mstep(self, gamma, xi, phi):
        """
        Run a single M-step update for the Baum-Welch/Forward-Backward
        algorithm.

        Parameters
        ----------
        gamma : :py:class:`ndarray <numpy.ndarray>` of shape `(I, N, T)`
            The estimated state-occupancy count matrix.
        xi : :py:class:`ndarray <numpy.ndarray>` of shape `(I, N, N, T)`
            The estimated state-state transition count matrix.
        phi : :py:class:`ndarray <numpy.ndarray>` of shape `(I, N)`
            The estimated starting count matrix for each latent state.

        Returns
        -------
        A : :py:class:`ndarray <numpy.ndarray>` of shape `(N, N)`
            The estimated transition matrix.
        B : :py:class:`ndarray <numpy.ndarray>` of shape `(N, V)`
            The estimated emission matrix.
        pi : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
            The estimated prior probabilities for each latent state.
        """
        eps = self.eps

        # initialize the estimated transition (A) and emission (B) matrices
        A = np.zeros((self.N, self.N))
        B = np.zeros((self.N, self.V))
        pi = np.zeros(self.N)

        count_gamma = np.zeros((self.I, self.N, self.V))
        count_xi = np.zeros((self.I, self.N, self.N))

        for i in range(self.I):
            Obs = self.O[i, :]
            for si in range(self.N):
                for vk in range(self.V):
                    # if not (Obs == vk).any():
                    if not int(Obs[0]) == vk:
                        #  count_gamma[i, si, vk] = -np.inf
                        count_gamma[i, si, vk] = np.log(eps)
                    else:
                        count_gamma[i, si, vk] = logsumexp(gamma[i, si, Obs == vk])

                for sj in range(self.N):
                    count_xi[i, si, sj] = logsumexp(xi[i, si, sj, :])

        pi = logsumexp(phi, axis=0) - np.log(self.I + eps)
        np.testing.assert_almost_equal(np.exp(pi).sum(), 1)

        for si in range(self.N):
            for vk in range(self.V):
                B[si, vk] = logsumexp(count_gamma[:, si, vk]) - logsumexp(
                    count_gamma[:, si, :]
                )

            for sj in range(self.N):
                A[si, sj] = logsumexp(count_xi[:, si, sj]) - logsumexp(
                    count_xi[:, si, :]
                )

            np.testing.assert_almost_equal(np.exp(A[si, :]).sum(), 1)
            np.testing.assert_almost_equal(np.exp(B[si, :]).sum(), 1)
        return np.exp(A), np.exp(B), np.exp(pi)
Esempio n. 17
0
File: hmm.py Progetto: Tommliu/mx-ml
    def _Estep(self):
        """
        Run a single E-step update for the Baum-Welch/Forward-Backward
        algorithm. This step estimates ``xi`` and ``gamma``, the excepted
        state-state transition counts and the expected state-occupancy counts,
        respectively.

        ``xi[i,j,k]`` gives the probability of being in state `i` at time `k`
        and state `j` at time `k+1` given the observed sequence `O` and the
        current estimates for transition (`A`) and emission (`B`) matrices::

            xi[i,j,k] = P(q_k=i,q_{k+1}=j|O,A,B,pi)
                      = P(q_k=i,q_{k+1}=j,O|A,B,pi) / P(O|A,B,pi)
                      = [
                            P(o_1,o_2,...,o_k,q_k=i|A,B,pi) *
                            P(q_{k+1}=j|q_k=i) * P(o_{k+1}|q_{k+1}=j) *
                            P(o_{k+2},o_{k+3},...,o_T|q_{k+1}=j,A,B,pi)
                        ] / P(O|A,B,pi)
                      = [
                            fwd[j, k] * self.A[j, i] *
                            self.B[i, o_{k+1}] * bwd[i, k + 1]
                        ] / fwd[:, T].sum()

        The expected number of transitions from state `i` to state `j` across the
        entire sequence is then the sum over all timesteps: ``xi[i,j,:].sum()``.

        ``gamma[i,j]`` gives the probability of being in state `i` at time `j`

        .. math:: \mathtt{gamma[i,j]} = P(q_j = i \mid O, A, B, \pi)

        Returns
        -------
        gamma : :py:class:`ndarray <numpy.ndarray>` of shape `(I, N, T)`
            The estimated state-occupancy count matrix.
        xi : :py:class:`ndarray <numpy.ndarray>` of shape `(I, N, N, T)`
            The estimated state-state transition count matrix.
        phi : :py:class:`ndarray <numpy.ndarray>` of shape `(I, N)`
            The estimated prior counts for each latent state.
        """
        eps = self.eps

        gamma = np.zeros((self.I, self.N, self.T))
        xi = np.zeros((self.I, self.N, self.N, self.T))
        phi = np.zeros((self.I, self.N))

        for i in range(self.I):
            Obs = self.O[i, :]
            fwd = self._forward(Obs)
            bwd = self._backward(Obs)
            log_likelihood = logsumexp(fwd[:, self.T - 1])

            t = self.T - 1
            for si in range(self.N):
                gamma[i, si, t] = fwd[si, t] + bwd[si, t] - log_likelihood
                phi[i, si] = fwd[si, 0] + bwd[si, 0] - log_likelihood

            for t in range(self.T - 1):
                ot1 = Obs[t + 1]
                for si in range(self.N):
                    gamma[i, si, t] = fwd[si, t] + bwd[si, t] - log_likelihood
                    for sj in range(self.N):
                        xi[i, si, sj, t] = (
                            fwd[si, t]
                            + np.log(self.A[si, sj] + eps)
                            + np.log(self.B[sj, ot1] + eps)
                            + bwd[sj, t + 1]
                            - log_likelihood
                        )

        return gamma, xi, phi
Esempio n. 18
0
File: hmm.py Progetto: Tommliu/mx-ml
    def decode(self, O):
        """
        Given the HMM parameterized by :math:`(A, B, \pi)` and an observation
        sequence :math:`O = o_1, \ldots, o_T`, compute the most probable
        sequence of latent states, :math:`Q = q_1, \ldots, q_T`.

        Notes
        -----
        HMM decoding is done efficiently via DP using the Viterbi algorithm,
        which produces a 2D trellis, ``viterbi``, where entry `i`, `j` represents the
        probability under the HMM of being in state `i` at time `j` after having
        passed through the *most probable* state sequence :math:`q_1,\ldots,q_{j-1}`:

        .. math::

            \mathtt{viterbi[i,j]} =
                \max_{q_1,\ldots,q_{j-1}} P(o_1,\ldots,o_j,q_1,\ldots,q_{j-1},q_j=i \mid A,B,\pi)

        Here :math:`q_j = i` indicates that the hidden state at time `j` is of
        type `i`, and :math:`\max_{q_1,\ldots,q_{j-1}}` represents the maximum over
        all possible latent state sequences for the first `j-1` observations.

        The DP step is:

        .. math::

            \mathtt{viterbi[i,j]}  &=  \max_{s'=1}^N \mathtt{viterbi[s',j-1]} \cdot
            \mathtt{A[s',i]} \cdot \mathtt{B[i,o_j]} \\

                                   &=  \max_{s'=1}^N
                                   P(o_1,\ldots,o_j,q_1,\ldots,q_{j-1},q_j=i \mid A,B,\pi)
                                   P(q_j=i \mid q_{j-1}=s') P(o_j \mid q_j=i)

        In words, ``viterbi[i,j]`` is the weighted sum of the values computed
        on the previous timestep. The weight on each value is the product of
        the probability of transitioning from that state to state `i` and the
        probability of emitting observation `j` in state `i`.

        To compute the most probable state sequence we maintain a second
        trellis, ``back_pointer``, whose `i`, `j` entry contains the value of the
        latent state at timestep `j-1` that is most likely to lead to latent
        state `i` at timestep `j`.

        When we have completed the ``viterbi`` and ``back_pointer`` trellises for
        all `T` timseteps/observations, we greedily move backwards through the
        ``back_pointer`` trellis to construct the best path for the full
        sequence of observations.

        Parameters
        ----------
        O : :py:class:`ndarray <numpy.ndarray>` of shape `(T,)`
            An observation sequence of length `T`.

        Returns
        -------
        best_path : list of length `T`
            The most probable sequence of latent states for observations `O`.
        best_path_prob : float
            The probability of the latent state sequence in `best_path` under
            the HMM.
        """
        eps = self.eps

        if O.ndim == 1:
            O = O.reshape(1, -1)

        # number of observations in each sequence
        T = O.shape[1]

        # number of training sequences
        I = O.shape[0]
        if I != 1:
            raise ValueError("Can only decode a single sequence (O.shape[0] must be 1)")

        # initialize the viterbi and back_pointer matrices
        viterbi = np.zeros((self.N, T))
        back_pointer = np.zeros((self.N, T)).astype(int)

        ot = O[0, 0]
        for s in range(self.N):
            back_pointer[s, 0] = 0
            viterbi[s, 0] = np.log(self.pi[s] + eps) + np.log(self.B[s, ot] + eps)

        for t in range(1, T):
            ot = O[0, t]
            for s in range(self.N):
                seq_probs = [
                    viterbi[s_, t - 1]
                    + np.log(self.A[s_, s] + eps)
                    + np.log(self.B[s, ot] + eps)
                    for s_ in range(self.N)
                ]

                viterbi[s, t] = np.max(seq_probs)
                back_pointer[s, t] = np.argmax(seq_probs)

        best_path_log_prob = viterbi[:, T - 1].max()

        # backtrack through the trellis to get the most likely sequence of
        # latent states
        pointer = viterbi[:, T - 1].argmax()
        best_path = [pointer]
        for t in reversed(range(1, T)):
            pointer = back_pointer[pointer, t]
            best_path.append(pointer)
        best_path = best_path[::-1]
        return best_path, best_path_log_prob