Ejemplo n.º 1
0
def gaussian_array(X, rows=-2, cols=-1, scale=1):

    # Get mean and second moment
    (x, xx) = X.get_moments()
    ndim = len(X.dims[0])
    shape = X.get_shape(0)
    size = len(X.get_shape(0))

    # Compute standard deviation
    xx = utils.get_diag(xx, ndim=ndim)
    std = np.sqrt(xx - x**2)

    # Force explicit elements when broadcasting
    x = x * np.ones(shape)
    std = std * np.ones(shape)

    # Preprocess the axes to 0,...,ndim
    if rows < 0:
        rows += size
    if cols < 0:
        cols += size
    if rows < 0 or rows >= size:
        raise ValueError("Row axis invalid")
    if cols < 0 or cols >= size:
        raise ValueError("Column axis invalid")

    # Put the row and column axes to the end
    axes = [i for i in range(size) if i not in (rows, cols)] + [rows, cols]
    x = np.transpose(x, axes=axes)
    std = np.transpose(std, axes=axes)

    # Remove non-row and non-column axes that have length 1
    squeezed_shape = tuple([sh for sh in np.shape(x)[:-2] if sh != 1])
    x = np.reshape(x, squeezed_shape+np.shape(x)[-2:])
    std = np.reshape(std, squeezed_shape+np.shape(x)[-2:])

    # Make explicit four axes
    x = utils.atleast_nd(x, 4)
    std = utils.atleast_nd(std, 4)

    if np.ndim(x) != 4:
        raise ValueError("Can not plot arrays with over 4 axes")

    M = np.shape(x)[0]
    N = np.shape(x)[1]
    vmax = np.max(np.abs(x) + scale*std)
    #plt.subplots(M, N, sharey=True, sharex=True, fig_kw)
    ax = [plt.subplot(M, N, i*N+j+1) for i in range(M) for j in range(N)]
    for i in range(M):
        for j in range(N):
            plt.subplot(M, N, i*N+j+1)
            #plt.subplot(M, N, i*N+j+1, sharey=ax[0], sharex=ax[0])
            if scale == 0:
                hinton(x[i,j], vmax=vmax)
            else:
                hinton(x[i,j], vmax=vmax, error=scale*std[i,j])
Ejemplo n.º 2
0
def gaussian_array(X, rows=-2, cols=-1, scale=1):

    # Get mean and second moment
    (x, xx) = X.get_moments()
    ndim = len(X.dims[0])
    shape = X.get_shape(0)
    size = len(X.get_shape(0))

    # Compute standard deviation
    xx = utils.get_diag(xx, ndim=ndim)
    std = np.sqrt(xx - x**2)

    # Force explicit elements when broadcasting
    x = x * np.ones(shape)
    std = std * np.ones(shape)

    # Preprocess the axes to 0,...,ndim
    if rows < 0:
        rows += size
    if cols < 0:
        cols += size
    if rows < 0 or rows >= size:
        raise ValueError("Row axis invalid")
    if cols < 0 or cols >= size:
        raise ValueError("Column axis invalid")

    # Put the row and column axes to the end
    axes = [i for i in range(size) if i not in (rows, cols)] + [rows, cols]
    x = np.transpose(x, axes=axes)
    std = np.transpose(std, axes=axes)

    # Remove non-row and non-column axes that have length 1
    squeezed_shape = tuple([sh for sh in np.shape(x)[:-2] if sh != 1])
    x = np.reshape(x, squeezed_shape + np.shape(x)[-2:])
    std = np.reshape(std, squeezed_shape + np.shape(x)[-2:])

    # Make explicit four axes
    x = utils.atleast_nd(x, 4)
    std = utils.atleast_nd(std, 4)

    if np.ndim(x) != 4:
        raise ValueError("Can not plot arrays with over 4 axes")

    M = np.shape(x)[0]
    N = np.shape(x)[1]
    vmax = np.max(np.abs(x) + scale * std)
    #plt.subplots(M, N, sharey=True, sharex=True, fig_kw)
    ax = [plt.subplot(M, N, i * N + j + 1) for i in range(M) for j in range(N)]
    for i in range(M):
        for j in range(N):
            plt.subplot(M, N, i * N + j + 1)
            #plt.subplot(M, N, i*N+j+1, sharey=ax[0], sharex=ax[0])
            if scale == 0:
                hinton(x[i, j], vmax=vmax)
            else:
                hinton(x[i, j], vmax=vmax, error=scale * std[i, j])
Ejemplo n.º 3
0
def timeseries_categorical_mc(Z):

    # Make sure that the node is categorical
    Z = Z._convert(CategoricalMoments)

    # Get expectations (and broadcast explicitly)
    z = Z._message_to_child()[0] * np.ones(Z.get_shape(0))

    # Compute the subplot layout
    z = utils.atleast_nd(z, 4)
    if np.ndim(z) != 4:
        raise ValueError("Can not plot arrays with over 4 axes")
    M = np.shape(z)[0]
    N = np.shape(z)[1]

    #print("DEBUG IN PLOT", Z.get_shape(0), np.shape(z))

    # Plot Hintons
    for i in range(M):
        for j in range(N):
            plt.subplot(M, N, i * N + j + 1)
            hinton(z[i, j].T, vmax=1.0, square=False)
Ejemplo n.º 4
0
def timeseries_categorical_mc(Z):

    # Make sure that the node is categorical
    Z = Z._convert(CategoricalMoments)

    # Get expectations (and broadcast explicitly)
    z = Z._message_to_child()[0] * np.ones(Z.get_shape(0))

    # Compute the subplot layout
    z = utils.atleast_nd(z, 4)
    if np.ndim(z) != 4:
        raise ValueError("Can not plot arrays with over 4 axes")
    M = np.shape(z)[0]
    N = np.shape(z)[1]

    #print("DEBUG IN PLOT", Z.get_shape(0), np.shape(z))

    # Plot Hintons
    for i in range(M):
        for j in range(N):
            plt.subplot(M, N, i*N+j+1)
            hinton(z[i,j].T, vmax=1.0, square=False)
Ejemplo n.º 5
0
def infer(y, D, K,
          mask=True, 
          maxiter=100,
          rotate=False, 
          debug=False, 
          precompute=False,
          update_hyper=0,
          start_rotating=0,
          start_rotating_weights=0,
          plot_C=True,
          monitor=True,
          autosave=None):
    
    """
    Run VB inference for linear state-space model with time-varying dynamics.
    """

    y = utils.atleast_nd(y, 2)
    (M, N) = np.shape(y)

    # Construct the model
    Q = model(M, N, D, K)
    if not plot_C:
        Q['C'].set_plotter(None)
        
    if autosave is not None:
        Q.set_autosave(autosave, iterations=10)

    # Observe data
    Q['Y'].observe(y, mask=mask)

    # Set up rotation speed-up
    if rotate:
        
        # Initial rotate the D-dimensional state space (X, A, C)
        # Does not update hyperparameters
        rotA_init = transformations.RotateGaussianARD(Q['A'], 
                                                      axis=0,
                                                      precompute=precompute)
        rotX_init = transformations.RotateVaryingMarkovChain(Q['X'], 
                                                             Q['A'], 
                                                             Q['S']._convert(GaussianMoments)[...,1:,None], 
                                                             rotA_init)
        rotC_init = transformations.RotateGaussianARD(Q['C'],
                                                      axis=0,
                                                      precompute=precompute)
        R_X_init = transformations.RotationOptimizer(rotX_init, rotC_init, D)

        # Rotate the D-dimensional state space (X, A, C)
        rotA = transformations.RotateGaussianARD(Q['A'], 
                                                 Q['alpha'],
                                                 axis=0,
                                                 precompute=precompute)
        rotX = transformations.RotateVaryingMarkovChain(Q['X'], 
                                                        Q['A'], 
                                                        Q['S']._convert(GaussianMoments)[...,1:,None], 
                                                        rotA)
        rotC = transformations.RotateGaussianARD(Q['C'],
                                                 Q['gamma'],
                                                 axis=0,
                                                 precompute=precompute)
        R_X = transformations.RotationOptimizer(rotX, rotC, D)

        # Rotate the K-dimensional latent dynamics space (S, A, C)
        rotB = transformations.RotateGaussianARD(Q['B'],
                                                 Q['beta'], 
                                                 precompute=precompute)
        rotS = transformations.RotateGaussianMarkovChain(Q['S'], rotB)
        rotA = transformations.RotateGaussianARD(Q['A'],
                                                 Q['alpha'],
                                                 axis=-1,
                                                 precompute=precompute)
        R_S = transformations.RotationOptimizer(rotS, rotA, K)
            
        if debug:
            rotate_kwargs = {'maxiter': 10,
                             'check_bound': True,
                             'check_gradient': True}
        else:
            rotate_kwargs = {'maxiter': 10}

    # Plot initial distributions
    if monitor:
        Q.plot()

    # Run inference using rotations
    for ind in range(maxiter):

        if ind < update_hyper:
            # It might be a good idea to learn the lower level nodes a bit
            # before starting to learn the upper level nodes.
            Q.update('X', 'C', 'A', 'tau', plot=monitor)
            if rotate and ind >= start_rotating:
                # Use the rotation which does not update alpha nor beta
                R_X_init.rotate(**rotate_kwargs)
        else:
            Q.update(plot=monitor)
            if rotate and ind >= start_rotating:
                # It might be a good idea to not rotate immediately because it
                # might lead to pruning out components too efficiently before
                # even estimating them roughly
                R_X.rotate(**rotate_kwargs)
                if ind >= start_rotating_weights:
                    R_S.rotate(**rotate_kwargs)

    # Return the posterior approximation
    return Q
Ejemplo n.º 6
0
    def compute_message_to_parent(self, parent, index, u, *u_parents):

        if index == 0:

            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            # Shape(L)      = [Nn,..,K,..,N0]
            # Shape(u)      = [Nn,..,N0,Dd,..,D0]
            # Shape(result) = [Nn,..,N0,K]

            # Compute g:
            # Shape(g)      = [Nn,..,K,..,N0]
            g = self.distribution.compute_cgf_from_parents(*(u_parents[1:]))
            # Reshape(g):
            # Shape(g)      = [Nn,..,N0,K]
            g = utils.moveaxis(g, self.cluster_plate, -1)

            # Compute phi:
            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            phi = self.distribution.compute_phi_from_parents(*(u_parents[1:]))
            # Move phi axis:
            # Shape(phi)    = [Nn,..,N0,K,Dd,..,D0]
            for ind in range(len(phi)):
                if self.cluster_plate < 0:
                    axis_from = self.cluster_plate - self.ndims[ind]
                else:
                    raise RuntimeError("Cluster plate axis must be negative")
                axis_to = -1 - self.ndims[ind]
                if np.ndim(phi[ind]) >= abs(axis_from):
                    # Cluster plate axis exists, move it to the correct position
                    phi[ind] = utils.moveaxis(phi[ind], axis_from, axis_to)
                else:
                    # No cluster plate axis, just add a new axis to the correct
                    # position, if phi has something on that axis
                    if np.ndim(phi[ind]) >= abs(axis_to):
                        phi[ind] = np.expand_dims(phi[ind], axis=axis_to)

            # Reshape u:
            # Shape(u)      = [Nn,..,N0,1,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                u_self.append(np.expand_dims(u[ind], axis=(-1 - self.ndims[ind])))

            # Compute logpdf:
            # Shape(L)      = [Nn,..,N0,K]
            L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims)

            # Sum over other than the cluster dimensions? No!
            # Hmm.. I think the message passing method will do
            # that automatically

            m = [L]

            return m

        elif index >= 1:

            # Parent index for the distribution used for the
            # mixture.
            index = index - 1

            # Reshape u:
            # Shape(u)      = [Nn,..1,..,N0,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                if self.cluster_plate < 0:
                    cluster_axis = self.cluster_plate - self.ndims[ind]
                else:
                    cluster_axis = self.cluster_plate
                u_self.append(np.expand_dims(u[ind], axis=cluster_axis))

            # Message from the mixed distribution
            m = self.distribution.compute_message_to_parent(parent, index, u_self, *(u_parents[1:]))

            # Weigh the messages with the responsibilities
            for i in range(len(m)):

                # Shape(m)      = [Nn,..,K,..,N0,Dd,..,D0]
                # Shape(p)      = [Nn,..,N0,K]
                # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0]

                # Number of axes for the variable dimensions for
                # the parent message.
                D = self.ndims_parents[index][i]

                # Responsibilities for clusters are the first
                # parent's first moment:
                # Shape(p)      = [Nn,..,N0,K]
                p = u_parents[0][0]
                # Move the cluster axis to the proper place:
                # Shape(p)      = [Nn,..,K,..,N0]
                p = utils.atleast_nd(p, abs(self.cluster_plate))
                p = utils.moveaxis(p, -1, self.cluster_plate)
                # Add axes for variable dimensions to the contributions
                # Shape(p)      = [Nn,..,K,..,N0,1,..,1]
                p = utils.add_trailing_axes(p, D)

                if self.cluster_plate < 0:
                    # Add the variable dimensions
                    cluster_axis = self.cluster_plate - D

                # Add axis for clusters:
                # Shape(m)      = [Nn,..,1,..,N0,Dd,..,D0]
                # m[i] = np.expand_dims(m[i], axis=cluster_axis)

                #
                # TODO: You could do summing here already so that
                # you wouldn't compute huge matrices as
                # intermediate result. Use einsum.

                # Compute the message contributions for each
                # cluster:
                # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0]
                m[i] = m[i] * p

            return m
Ejemplo n.º 7
0
def infer(y,
          D,
          K,
          mask=True,
          maxiter=100,
          rotate=False,
          debug=False,
          precompute=False,
          update_hyper=0,
          start_rotating=0,
          start_rotating_weights=0,
          plot_C=True,
          monitor=True,
          autosave=None):
    """
    Run VB inference for linear state-space model with time-varying dynamics.
    """

    y = utils.atleast_nd(y, 2)
    (M, N) = np.shape(y)

    # Construct the model
    Q = model(M, N, D, K)
    if not plot_C:
        Q['C'].set_plotter(None)

    if autosave is not None:
        Q.set_autosave(autosave, iterations=10)

    # Observe data
    Q['Y'].observe(y, mask=mask)

    # Set up rotation speed-up
    if rotate:

        # Initial rotate the D-dimensional state space (X, A, C)
        # Does not update hyperparameters
        rotA_init = transformations.RotateGaussianARD(Q['A'],
                                                      axis=0,
                                                      precompute=precompute)
        rotX_init = transformations.RotateVaryingMarkovChain(
            Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[..., 1:, None],
            rotA_init)
        rotC_init = transformations.RotateGaussianARD(Q['C'],
                                                      axis=0,
                                                      precompute=precompute)
        R_X_init = transformations.RotationOptimizer(rotX_init, rotC_init, D)

        # Rotate the D-dimensional state space (X, A, C)
        rotA = transformations.RotateGaussianARD(Q['A'],
                                                 Q['alpha'],
                                                 axis=0,
                                                 precompute=precompute)
        rotX = transformations.RotateVaryingMarkovChain(
            Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[..., 1:, None],
            rotA)
        rotC = transformations.RotateGaussianARD(Q['C'],
                                                 Q['gamma'],
                                                 axis=0,
                                                 precompute=precompute)
        R_X = transformations.RotationOptimizer(rotX, rotC, D)

        # Rotate the K-dimensional latent dynamics space (S, A, C)
        rotB = transformations.RotateGaussianARD(Q['B'],
                                                 Q['beta'],
                                                 precompute=precompute)
        rotS = transformations.RotateGaussianMarkovChain(Q['S'], rotB)
        rotA = transformations.RotateGaussianARD(Q['A'],
                                                 Q['alpha'],
                                                 axis=-1,
                                                 precompute=precompute)
        R_S = transformations.RotationOptimizer(rotS, rotA, K)

        if debug:
            rotate_kwargs = {
                'maxiter': 10,
                'check_bound': True,
                'check_gradient': True
            }
        else:
            rotate_kwargs = {'maxiter': 10}

    # Plot initial distributions
    if monitor:
        Q.plot()

    # Run inference using rotations
    for ind in range(maxiter):

        if ind < update_hyper:
            # It might be a good idea to learn the lower level nodes a bit
            # before starting to learn the upper level nodes.
            Q.update('X', 'C', 'A', 'tau', plot=monitor)
            if rotate and ind >= start_rotating:
                # Use the rotation which does not update alpha nor beta
                R_X_init.rotate(**rotate_kwargs)
        else:
            Q.update(plot=monitor)
            if rotate and ind >= start_rotating:
                # It might be a good idea to not rotate immediately because it
                # might lead to pruning out components too efficiently before
                # even estimating them roughly
                R_X.rotate(**rotate_kwargs)
                if ind >= start_rotating_weights:
                    R_S.rotate(**rotate_kwargs)

    # Return the posterior approximation
    return Q
Ejemplo n.º 8
0
    def compute_message_to_parent(self, parent, index, u, *u_parents):

        if index == 0:

            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            # Shape(L)      = [Nn,..,K,..,N0]
            # Shape(u)      = [Nn,..,N0,Dd,..,D0]
            # Shape(result) = [Nn,..,N0,K]

            # Compute g:
            # Shape(g)      = [Nn,..,K,..,N0]
            g = self.distribution.compute_cgf_from_parents(*(u_parents[1:]))
            # Reshape(g):
            # Shape(g)      = [Nn,..,N0,K]
            g = utils.moveaxis(g, self.cluster_plate, -1)

            # Compute phi:
            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            phi = self.distribution.compute_phi_from_parents(*(u_parents[1:]))
            # Move phi axis:
            # Shape(phi)    = [Nn,..,N0,K,Dd,..,D0]
            for ind in range(len(phi)):
                if self.cluster_plate < 0:
                    axis_from = self.cluster_plate-self.distribution.ndims[ind]
                else:
                    raise RuntimeError("Cluster plate axis must be negative")
                axis_to = -1-self.distribution.ndims[ind]
                if np.ndim(phi[ind]) >= abs(axis_from):
                    # Cluster plate axis exists, move it to the correct position
                    phi[ind] = utils.moveaxis(phi[ind], axis_from, axis_to)
                else:
                    # No cluster plate axis, just add a new axis to the correct
                    # position, if phi has something on that axis
                    if np.ndim(phi[ind]) >= abs(axis_to):
                        phi[ind] = np.expand_dims(phi[ind], axis=axis_to)

            # Reshape u:
            # Shape(u)      = [Nn,..,N0,1,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                u_self.append(np.expand_dims(u[ind],
                                             axis=(-1-self.distribution.ndims[ind])))

            # Compute logpdf:
            # Shape(L)      = [Nn,..,N0,K]
            L = self.distribution.compute_logpdf(u_self, phi, g, 0)

            # Sum over other than the cluster dimensions? No!
            # Hmm.. I think the message passing method will do
            # that automatically

            m = [L]

            return m

        elif index >= 1:

            # Parent index for the distribution used for the
            # mixture.
            index = index - 1

            # Reshape u:
            # Shape(u)      = [Nn,..1,..,N0,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                if self.cluster_plate < 0:
                    cluster_axis = self.cluster_plate - self.distribution.ndims[ind]
                else:
                    cluster_axis = self.cluster_plate
                u_self.append(np.expand_dims(u[ind], axis=cluster_axis))

            # Message from the mixed distribution
            m = self.distribution.compute_message_to_parent(parent,
                                                        index, 
                                                        u_self, 
                                                        *(u_parents[1:]))

            # Weigh the messages with the responsibilities
            for i in range(len(m)):

                # Shape(m)      = [Nn,..,K,..,N0,Dd,..,D0]
                # Shape(p)      = [Nn,..,N0,K]
                # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0]

                # Number of axes for the variable dimensions for
                # the parent message.
                D = self.distribution.ndims_parents[index][i]

                # Responsibilities for clusters are the first
                # parent's first moment:
                # Shape(p)      = [Nn,..,N0,K]
                p = u_parents[0][0]
                # Move the cluster axis to the proper place:
                # Shape(p)      = [Nn,..,K,..,N0]
                p = utils.atleast_nd(p, abs(self.cluster_plate))
                p = utils.moveaxis(p, -1, self.cluster_plate)
                # Add axes for variable dimensions to the contributions
                # Shape(p)      = [Nn,..,K,..,N0,1,..,1]
                p = utils.add_trailing_axes(p, D)

                if self.cluster_plate < 0:
                    # Add the variable dimensions
                    cluster_axis = self.cluster_plate - D

                # Add axis for clusters:
                # Shape(m)      = [Nn,..,1,..,N0,Dd,..,D0]
                #m[i] = np.expand_dims(m[i], axis=cluster_axis)

                #
                # TODO: You could do summing here already so that
                # you wouldn't compute huge matrices as
                # intermediate result. Use einsum.

                # Compute the message contributions for each
                # cluster:
                # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0]
                m[i] = m[i] * p

            return m