Example #1
0
    def _compute_message_to_parent(self, index, m_child, u_Z, u_X):
        """
        """
        if index == 0:
            m0 = 0
            # Compute Child * X, sum over variable axes and move the gated axis
            # to be the last.  Need to do some shape changing in order to make
            # Child and X to broadcast properly.
            for i in range(len(m_child)):
                ndim = len(self.dims[i])
                c = m_child[i][..., None]
                c = misc.moveaxis(c, -1, -ndim - 1)
                gated_axis = self.gated_plate - ndim
                x = u_X[i]
                if np.ndim(x) < abs(gated_axis):
                    x = np.expand_dims(x, -ndim - 1)
                else:
                    x = misc.moveaxis(x, gated_axis, -ndim - 1)
                axes = tuple(range(-ndim, 0))
                m0 = m0 + misc.sum_product(c, x, axes_to_sum=axes)

            # Make sure the variable axis does not use broadcasting
            m0 = m0 * np.ones(self.K)

            # Send the message
            m = [m0]
            return m

        elif index == 1:

            m = []
            for i in range(len(m_child)):
                # Make the moments of Z and the message from children
                # broadcastable. The gated plate is handled as the last axis in
                # the arrays and moved to the correct position at the end.

                # Add variable axes to Z moments
                ndim = len(self.dims[i])
                z = misc.add_trailing_axes(u_Z[0], ndim)
                z = misc.moveaxis(z, -ndim - 1, -1)
                # Axis index of the gated plate
                gated_axis = self.gated_plate - ndim
                # Add the gate axis to the message from the children
                c = misc.add_trailing_axes(m_child[i], 1)
                # Compute the message to parent
                mi = z * c
                # Add extra axes if necessary
                if np.ndim(mi) < abs(gated_axis):
                    mi = misc.add_leading_axes(mi,
                                               abs(gated_axis) - np.ndim(mi))
                # Move the axis to the correct position
                mi = misc.moveaxis(mi, -1, gated_axis)
                m.append(mi)

            return m

        else:
            raise ValueError("Invalid parent index")
Example #2
0
    def _compute_message_to_parent(self, index, m_child, u_Z, u_X):
        """
        """
        if index == 0:
            m0 = 0
            # Compute Child * X, sum over variable axes and move the gated axis
            # to be the last.  Need to do some shape changing in order to make
            # Child and X to broadcast properly.
            for i in range(len(m_child)):
                ndim = len(self.dims[i])
                c = m_child[i][...,None]
                c = misc.moveaxis(c, -1, -ndim-1)
                gated_axis = self.gated_plate - ndim
                x = u_X[i]
                if np.ndim(x) < abs(gated_axis):
                    x = np.expand_dims(x, -ndim-1)
                else:
                    x = misc.moveaxis(x, gated_axis, -ndim-1)
                axes = tuple(range(-ndim, 0))
                m0 = m0 + misc.sum_product(c, x, axes_to_sum=axes)

            # Make sure the variable axis does not use broadcasting
            m0 = m0 * np.ones(self.K)

            # Send the message
            m = [m0]
            return m

        elif index == 1:

            m = []
            for i in range(len(m_child)):
                # Make the moments of Z and the message from children
                # broadcastable. The gated plate is handled as the last axis in
                # the arrays and moved to the correct position at the end.

                # Add variable axes to Z moments
                ndim = len(self.dims[i])
                z = misc.add_trailing_axes(u_Z[0], ndim)
                z = misc.moveaxis(z, -ndim-1, -1)
                # Axis index of the gated plate
                gated_axis = self.gated_plate - ndim
                # Add the gate axis to the message from the children
                c = misc.add_trailing_axes(m_child[i], 1)
                # Compute the message to parent
                mi = z * c
                # Add extra axes if necessary
                if np.ndim(mi) < abs(gated_axis):
                    mi = misc.add_leading_axes(mi,
                                                abs(gated_axis) - np.ndim(mi))
                # Move the axis to the correct position
                mi = misc.moveaxis(mi, -1, gated_axis)
                m.append(mi)

            return m

        else:
            raise ValueError("Invalid parent index")
Example #3
0
def run(M=10,
        N=100,
        D_y=3,
        D=5,
        seed=42,
        rotate=False,
        maxiter=100,
        debug=False,
        plot=True):

    if seed is not None:
        np.random.seed(seed)

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M, N))

    # Construct model
    (Y, F, W, X, tau, alpha) = model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.5)  # randomly missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha)

    # Initialize some nodes randomly
    X.initialize_from_random()
    W.initialize_from_random()

    # Run inference algorithm
    if rotate:
        # Use rotations to speed up learning
        rotW = transformations.RotateGaussianARD(W, alpha)
        rotX = transformations.RotateGaussianARD(X)
        R = transformations.RotationOptimizer(rotW, rotX, D)
        for ind in range(maxiter):
            Q.update()
            if debug:
                R.rotate(check_bound=True, check_gradient=True)
            else:
                R.rotate()

    else:
        # Use standard VB-EM alone
        Q.update(repeat=maxiter)

    # Plot results
    if plot:
        plt.figure()
        bpplt.timeseries_normal(F, scale=2)
        bpplt.timeseries(f, color='g', linestyle='-')
        bpplt.timeseries(y, color='r', linestyle='None', marker='+')
Example #4
0
def run(M=40, N=100, D_y=6, D=8, seed=42, rotate=False, maxiter=1000, debug=False, plot=True):
    """
    Run pattern search demo for PCA.
    """

    if seed is not None:
        np.random.seed(seed)
    
    # Generate data
    w = np.random.normal(0, 1, size=(M,1,D_y))
    x = np.random.normal(0, 1, size=(1,N,D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M,N))

    # Construct model
    Q = VB(*(pca.model(M, N, D)))

    # Data with missing values
    mask = random.mask(M, N, p=0.5) # randomly missing
    y[~mask] = np.nan
    Q['Y'].observe(y, mask=mask)

    # Initialize some nodes randomly
    Q['X'].initialize_from_random()
    Q['W'].initialize_from_random()

    # Use a few VB-EM updates at the beginning
    Q.update(repeat=10)
    Q.save()

    # Standard VB-EM as a baseline
    Q.update(repeat=maxiter)
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore initial state
    Q.load()

    # Pattern search method for comparison
    for n in range(maxiter):

        Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha'])
        Q.update(repeat=20)

        if Q.has_converged():
            break

    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')
        bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
Example #5
0
def run(M=10, N=100, D_y=3, D=5, seed=42, rotate=False, maxiter=100, debug=False):

    if seed is not None:
        np.random.seed(seed)
    
    # Generate data
    w = np.random.normal(0, 1, size=(M,1,D_y))
    x = np.random.normal(0, 1, size=(1,N,D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M,N))

    # Construct model
    (Y, F, W, X, tau, alpha) = model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.5) # randomly missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha)

    # Initialize some nodes randomly
    X.initialize_from_random()
    W.initialize_from_random()

    # Run inference algorithm
    if rotate:
        # Use rotations to speed up learning
        rotW = transformations.RotateGaussianARD(W, alpha)
        rotX = transformations.RotateGaussianARD(X)
        R = transformations.RotationOptimizer(rotW, rotX, D)
        for ind in range(maxiter):
            Q.update()
            if debug:
                R.rotate(check_bound=True,
                         check_gradient=True)
            else:
                R.rotate()
            
    else:
        # Use standard VB-EM alone
        Q.update(repeat=maxiter)

    # Plot results
    plt.figure()
    bpplt.timeseries_normal(F, scale=2)
    bpplt.timeseries(f, color='g', linestyle='-')
    bpplt.timeseries(y, color='r', linestyle='None', marker='+')
    plt.show()
Example #6
0
def run(M=10,
        N=100,
        D_y=3,
        D=5,
        seed=42,
        rotate=False,
        maxiter=1000,
        debug=False,
        plot=True):

    if seed is not None:
        np.random.seed(seed)

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.1, size=(M, N))

    # Construct model
    Q = model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.5)  # randomly missing
    y[~mask] = np.nan
    Q['Y'].observe(y, mask=mask)

    # Run inference algorithm
    if rotate:
        # Use rotations to speed up learning
        rotW = transformations.RotateGaussianARD(Q['W'], Q['alpha'])
        rotX = transformations.RotateGaussianARD(Q['X'])
        R = transformations.RotationOptimizer(rotW, rotX, D)
        if debug:
            Q.callback = lambda: R.rotate(check_bound=True,
                                          check_gradient=True)
        else:
            Q.callback = R.rotate

    # Use standard VB-EM alone
    Q.update(repeat=maxiter)

    # Plot results
    if plot:
        plt.figure()
        bpplt.timeseries_normal(Q['F'], scale=2)
        bpplt.timeseries(f, color='g', linestyle='-')
        bpplt.timeseries(y, color='r', linestyle='None', marker='+')
Example #7
0
    def _compute_moments(self, u_Z, u_X):
        """
        """

        u = []
        for i in range(len(u_X)):
            # Make the moments of Z and X broadcastable and move the gated plate
            # to be the last axis in the moments, then sum-product over that
            # axis
            ndim = len(self.dims[i])
            z = misc.add_trailing_axes(u_Z[0], ndim)
            z = misc.moveaxis(z, -ndim-1, -1)
            gated_axis = self.gated_plate - ndim
            if np.ndim(u_X[i]) < abs(gated_axis):
                x = misc.add_trailing_axes(u_X[i], 1)
            else:
                x = misc.moveaxis(u_X[i], gated_axis, -1)
            ui = misc.sum_product(z, x, axes_to_sum=-1)
            u.append(ui)
        return u
Example #8
0
    def _compute_moments(self, u_Z, u_X):
        """
        """

        u = []
        for i in range(len(u_X)):
            # Make the moments of Z and X broadcastable and move the gated plate
            # to be the last axis in the moments, then sum-product over that
            # axis
            ndim = len(self.dims[i])
            z = misc.add_trailing_axes(u_Z[0], ndim)
            z = misc.moveaxis(z, -ndim - 1, -1)
            gated_axis = self.gated_plate - ndim
            if np.ndim(u_X[i]) < abs(gated_axis):
                x = misc.add_trailing_axes(u_X[i], 1)
            else:
                x = misc.moveaxis(u_X[i], gated_axis, -1)
            ui = misc.sum_product(z, x, axes_to_sum=-1)
            u.append(ui)
        return u
Example #9
0
def run(M=10, N=100, D_y=3, D=5, seed=42, rotate=False, maxiter=1000, debug=False, plot=True):

    if seed is not None:
        np.random.seed(seed)
    
    # Generate data
    w = np.random.normal(0, 1, size=(M,1,D_y))
    x = np.random.normal(0, 1, size=(1,N,D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.1, size=(M,N))

    # Construct model
    Q = model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.5) # randomly missing
    y[~mask] = np.nan
    Q['Y'].observe(y, mask=mask)

    # Run inference algorithm
    if rotate:
        # Use rotations to speed up learning
        rotW = transformations.RotateGaussianARD(Q['W'], Q['alpha'])
        rotX = transformations.RotateGaussianARD(Q['X'])
        R = transformations.RotationOptimizer(rotW, rotX, D)
        if debug:
            Q.callback = lambda : R.rotate(check_bound=True,
                                           check_gradient=True)
        else:
            Q.callback = R.rotate

    # Use standard VB-EM alone
    Q.update(repeat=maxiter)

    # Plot results
    if plot:
        plt.figure()
        bpplt.timeseries_normal(Q['F'], scale=2)
        bpplt.timeseries(f, color='g', linestyle='-')
        bpplt.timeseries(y, color='r', linestyle='None', marker='+')
Example #10
0
    def compute_cgf_from_parents(self, *u_parents):
        """
        Compute :math:`\mathrm{E}_{q(p)}[g(p)]`
        """

        # Compute weighted average of g over the clusters.

        # Shape(g)      = [Nn,..,K,..,N0]
        # Shape(p)      = [Nn,..,N0,K]
        # Shape(result) = [Nn,..,N0]

        # Compute g for clusters:
        # Shape(g)      = [Nn,..,K,..,N0]
        g = self.distribution.compute_cgf_from_parents(*(u_parents[1:]))

        # Move cluster axis to last:
        # Shape(g)      = [Nn,..,N0,K]
        if np.ndim(g) < abs(self.cluster_plate):
            # Not enough axes, just add the cluster plate axis
            g = np.expand_dims(g, -1)
        else:
            # Move the cluster plate axis
            g = misc.moveaxis(g, self.cluster_plate, -1)

        # Cluster assignments/contributions/probabilities/weights:
        # Shape(p)      = [Nn,..,N0,K]
        p = u_parents[0][0]

        # Weighted average of g over the clusters. As p and g are
        # properly aligned, you can just sum p*g over the last
        # axis and utilize broadcasting:
        # Shape(result) = [Nn,..,N0]

        g = misc.sum_product(p, g, axes_to_sum=-1)

        return g
Example #11
0
    def compute_cgf_from_parents(self, *u_parents):
        """
        Compute :math:`\mathrm{E}_{q(p)}[g(p)]`
        """

        # Compute weighted average of g over the clusters.

        # Shape(g)      = [Nn,..,K,..,N0]
        # Shape(p)      = [Nn,..,N0,K]
        # Shape(result) = [Nn,..,N0]

        # Compute g for clusters:
        # Shape(g)      = [Nn,..,K,..,N0]
        g = self.distribution.compute_cgf_from_parents(*(u_parents[1:]))

        # Move cluster axis to last:
        # Shape(g)      = [Nn,..,N0,K]
        if np.ndim(g) < abs(self.cluster_plate):
            # Not enough axes, just add the cluster plate axis
            g = np.expand_dims(g, -1)
        else:
            # Move the cluster plate axis
            g = misc.moveaxis(g, self.cluster_plate, -1)

        # Cluster assignments/contributions/probabilities/weights:
        # Shape(p)      = [Nn,..,N0,K]
        p = u_parents[0][0]

        # Weighted average of g over the clusters. As p and g are
        # properly aligned, you can just sum p*g over the last
        # axis and utilize broadcasting:
        # Shape(result) = [Nn,..,N0]

        g = misc.sum_product(p, g, axes_to_sum=-1)

        return g
Example #12
0
    def compute_phi_from_parents(self, *u_parents, mask=True):
        """
        Compute the natural parameter vector given parent moments.
        """
        # Compute weighted average of the parameters

        # Cluster parameters
        Phi = self.distribution.compute_phi_from_parents(*(u_parents[1:]))
        # Contributions/weights/probabilities
        P = u_parents[0][0]

        phi = list()

        nans = False

        for ind in range(len(Phi)):
            # Compute element-wise product and then sum over K clusters.
            # Note that the dimensions aren't perfectly aligned because
            # the cluster dimension (K) may be arbitrary for phi, and phi
            # also has dimensions (Dd,..,D0) of the parameters.
            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            # Shape(p)      = [Nn,..,N0,K]
            # Shape(result) = [Nn,..,N0,Dd,..,D0]
            # General broadcasting rules apply for Nn,..,N0, that is,
            # preceding dimensions may be missing or dimension may be
            # equal to one. Probably, shape(phi) has lots of missing
            # dimensions and/or dimensions that are one.

            if self.cluster_plate < 0:
                cluster_axis = self.cluster_plate - self.ndims[ind]
            else:
                raise RuntimeError("Cluster plate should be negative")

            # Move cluster axis to the last:
            # Shape(phi)    = [Nn,..,N0,Dd,..,D0,K]
            if np.ndim(Phi[ind]) >= abs(cluster_axis):
                phi.append(misc.moveaxis(Phi[ind], cluster_axis, -1))
            else:
                phi.append(Phi[ind][...,None])

            # Add axes to p:
            # Shape(p)      = [Nn,..,N0,K,1,..,1]
            p = misc.add_trailing_axes(P, self.ndims[ind])
            # Move cluster axis to the last:
            # Shape(p)      = [Nn,..,N0,1,..,1,K]
            p = misc.moveaxis(p, -(self.ndims[ind]+1), -1)

            # Handle zero probability cases. This avoids nans when p=0 and
            # phi=inf.
            phi[ind] = np.where(p != 0, phi[ind], 0)

            # Now the shapes broadcast perfectly and we can sum
            # p*phi over the last axis:
            # Shape(result) = [Nn,..,N0,Dd,..,D0]
            phi[ind] = misc.sum_product(p, phi[ind], axes_to_sum=-1)
            if np.any(np.isnan(phi[ind])):
                nans = True

        if nans:
            warnings.warn("The natural parameters of mixture distribution "
                          "contain nans. This may happen if you use fixed "
                          "parameters in your model. Technically, one possible "
                          "reason is that the cluster assignment probability "
                          "for some element is zero (p=0) and the natural "
                          "parameter of that cluster is -inf, thus "
                          "0*(-inf)=nan. Solution: Use parameters that assign "
                          "non-zero probabilities for the whole domain.")
            
        return phi
Example #13
0
    def compute_phi_from_parents(self, *u_parents, mask=True):
        """
        Compute the natural parameter vector given parent moments.
        """
        # Compute weighted average of the parameters

        # Cluster parameters
        Phi = self.distribution.compute_phi_from_parents(*(u_parents[1:]))
        # Contributions/weights/probabilities
        P = u_parents[0][0]

        phi = list()

        nans = False

        for ind in range(len(Phi)):
            # Compute element-wise product and then sum over K clusters.
            # Note that the dimensions aren't perfectly aligned because
            # the cluster dimension (K) may be arbitrary for phi, and phi
            # also has dimensions (Dd,..,D0) of the parameters.
            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            # Shape(p)      = [Nn,..,N0,K]
            # Shape(result) = [Nn,..,N0,Dd,..,D0]
            # General broadcasting rules apply for Nn,..,N0, that is,
            # preceding dimensions may be missing or dimension may be
            # equal to one. Probably, shape(phi) has lots of missing
            # dimensions and/or dimensions that are one.

            if self.cluster_plate < 0:
                cluster_axis = self.cluster_plate - self.ndims[ind]
            else:
                raise RuntimeError("Cluster plate should be negative")

            # Move cluster axis to the last:
            # Shape(phi)    = [Nn,..,N0,Dd,..,D0,K]
            if np.ndim(Phi[ind]) >= abs(cluster_axis):
                phi.append(misc.moveaxis(Phi[ind], cluster_axis, -1))
            else:
                phi.append(Phi[ind][..., None])

            # Add axes to p:
            # Shape(p)      = [Nn,..,N0,K,1,..,1]
            p = misc.add_trailing_axes(P, self.ndims[ind])
            # Move cluster axis to the last:
            # Shape(p)      = [Nn,..,N0,1,..,1,K]
            p = misc.moveaxis(p, -(self.ndims[ind] + 1), -1)

            # Handle zero probability cases. This avoids nans when p=0 and
            # phi=inf.
            phi[ind] = np.where(p != 0, phi[ind], 0)

            # Now the shapes broadcast perfectly and we can sum
            # p*phi over the last axis:
            # Shape(result) = [Nn,..,N0,Dd,..,D0]
            phi[ind] = misc.sum_product(p, phi[ind], axes_to_sum=-1)
            if np.any(np.isnan(phi[ind])):
                nans = True

        if nans:
            warnings.warn(
                "The natural parameters of mixture distribution "
                "contain nans. This may happen if you use fixed "
                "parameters in your model. Technically, one possible "
                "reason is that the cluster assignment probability "
                "for some element is zero (p=0) and the natural "
                "parameter of that cluster is -inf, thus "
                "0*(-inf)=nan. Solution: Use parameters that assign "
                "non-zero probabilities for the whole domain.")

        return phi
Example #14
0
def run(M=10, N=100, D_y=3, D=5):
    seed = 45
    print('seed =', seed)
    np.random.seed(seed)

    # Check HDF5 version.
    if h5py.version.hdf5_version_tuple < (1, 8, 7):
        print(
            "WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not "
            "able to save empty arrays, thus you may experience problems if "
            "you for instance try to save before running any iteration steps."
            % str(h5py.version.hdf5_version_tuple))

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.5, size=(M, N))

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.9)  # randomly missing
    mask[:, 20:40] = False  # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_iterations=5)

    # Initialize some nodes randomly
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    # Save the state into a HDF5 file
    filename = tempfile.NamedTemporaryFile(suffix='hdf5').name
    Q.update(X, W, alpha, tau, repeat=1)
    Q.save(filename=filename)

    # Inference loop.
    Q.update(X, W, alpha, tau, repeat=10)

    # Reload the state from the HDF5 file
    Q.load(filename=filename)

    # Inference loop again.
    Q.update(X, W, alpha, tau, repeat=10)

    # NOTE: Saving and loading requires that you have the model
    # constructed. "Save" does not store the model structure nor does "load"
    # read it. They are just used for reading and writing the contents of the
    # nodes. Thus, if you want to load, you first need to construct the same
    # model that was used for saving and then use load to set the states of the
    # nodes.

    plt.clf()
    WX_params = WX.get_parameters()
    fh = WX_params[0] * np.ones(y.shape)
    err_fh = 2 * np.sqrt(WX_params[1] + 1 / tau.get_moments()[0]) * np.ones(
        y.shape)
    for m in range(M):
        plt.subplot(M, 1, m + 1)
        #errorplot(y, error=None, x=None, lower=None, upper=None):
        bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m])
        plt.plot(np.arange(N), f[m], 'g')
        plt.plot(np.arange(N), y[m], 'r+')

    plt.figure()
    Q.plot_iteration_by_nodes()

    plt.figure()
    plt.subplot(2, 2, 1)
    bpplt.binary_matrix(W.mask)
    plt.subplot(2, 2, 2)
    bpplt.binary_matrix(X.mask)
    plt.subplot(2, 2, 3)
    #bpplt.binary_matrix(WX.get_mask())
    plt.subplot(2, 2, 4)
    bpplt.binary_matrix(Y.mask)
Example #15
0
def run(M=40,
        N=100,
        D_y=6,
        D=8,
        seed=42,
        rotate=False,
        maxiter=1000,
        debug=False,
        plot=True):
    """
    Run pattern search demo for PCA.
    """

    if seed is not None:
        np.random.seed(seed)

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M, N))

    # Construct model
    Q = VB(*(pca.model(M, N, D)))

    # Data with missing values
    mask = random.mask(M, N, p=0.5)  # randomly missing
    y[~mask] = np.nan
    Q['Y'].observe(y, mask=mask)

    # Initialize some nodes randomly
    Q['X'].initialize_from_random()
    Q['W'].initialize_from_random()

    # Use a few VB-EM updates at the beginning
    Q.update(repeat=10)
    Q.save()

    # Standard VB-EM as a baseline
    Q.update(repeat=maxiter)
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore initial state
    Q.load()

    # Pattern search method for comparison
    for n in range(maxiter):

        Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha'])
        Q.update(repeat=20)

        if Q.has_converged():
            break

    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')
        bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
Example #16
0
def run(M=10, N=100, D_y=3, D=5):
    seed = 45
    print('seed =', seed)
    np.random.seed(seed)

    # Check HDF5 version.
    if h5py.version.hdf5_version_tuple < (1,8,7): 
        print("WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not "
              "able to save empty arrays, thus you may experience problems if "
              "you for instance try to save before running any iteration steps."
              % str(h5py.version.hdf5_version_tuple))
    
    # Generate data
    w = np.random.normal(0, 1, size=(M,1,D_y))
    x = np.random.normal(0, 1, size=(1,N,D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.5, size=(M,N))

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.9) # randomly missing
    mask[:,20:40] = False # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_iterations=5)

    # Initialize some nodes randomly
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    # Save the state into a HDF5 file
    filename = tempfile.NamedTemporaryFile(suffix='hdf5').name
    Q.update(X, W, alpha, tau, repeat=1)
    Q.save(filename=filename)

    # Inference loop.
    Q.update(X, W, alpha, tau, repeat=10)

    # Reload the state from the HDF5 file
    Q.load(filename=filename)

    # Inference loop again.
    Q.update(X, W, alpha, tau, repeat=10)

    # NOTE: Saving and loading requires that you have the model
    # constructed. "Save" does not store the model structure nor does "load"
    # read it. They are just used for reading and writing the contents of the
    # nodes. Thus, if you want to load, you first need to construct the same
    # model that was used for saving and then use load to set the states of the
    # nodes.

    plt.clf()
    WX_params = WX.get_parameters()
    fh = WX_params[0] * np.ones(y.shape)
    err_fh = 2*np.sqrt(WX_params[1] + 1/tau.get_moments()[0]) * np.ones(y.shape)
    for m in range(M):
        plt.subplot(M,1,m+1)
        #errorplot(y, error=None, x=None, lower=None, upper=None):
        bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m])
        plt.plot(np.arange(N), f[m], 'g')
        plt.plot(np.arange(N), y[m], 'r+')

    plt.figure()
    Q.plot_iteration_by_nodes()

    plt.figure()
    plt.subplot(2,2,1)
    bpplt.binary_matrix(W.mask)
    plt.subplot(2,2,2)
    bpplt.binary_matrix(X.mask)
    plt.subplot(2,2,3)
    #bpplt.binary_matrix(WX.get_mask())
    plt.subplot(2,2,4)
    bpplt.binary_matrix(Y.mask)