def _compute_message_to_parent(self, index, m_child, u_Z, u_X): """ """ if index == 0: m0 = 0 # Compute Child * X, sum over variable axes and move the gated axis # to be the last. Need to do some shape changing in order to make # Child and X to broadcast properly. for i in range(len(m_child)): ndim = len(self.dims[i]) c = m_child[i][..., None] c = misc.moveaxis(c, -1, -ndim - 1) gated_axis = self.gated_plate - ndim x = u_X[i] if np.ndim(x) < abs(gated_axis): x = np.expand_dims(x, -ndim - 1) else: x = misc.moveaxis(x, gated_axis, -ndim - 1) axes = tuple(range(-ndim, 0)) m0 = m0 + misc.sum_product(c, x, axes_to_sum=axes) # Make sure the variable axis does not use broadcasting m0 = m0 * np.ones(self.K) # Send the message m = [m0] return m elif index == 1: m = [] for i in range(len(m_child)): # Make the moments of Z and the message from children # broadcastable. The gated plate is handled as the last axis in # the arrays and moved to the correct position at the end. # Add variable axes to Z moments ndim = len(self.dims[i]) z = misc.add_trailing_axes(u_Z[0], ndim) z = misc.moveaxis(z, -ndim - 1, -1) # Axis index of the gated plate gated_axis = self.gated_plate - ndim # Add the gate axis to the message from the children c = misc.add_trailing_axes(m_child[i], 1) # Compute the message to parent mi = z * c # Add extra axes if necessary if np.ndim(mi) < abs(gated_axis): mi = misc.add_leading_axes(mi, abs(gated_axis) - np.ndim(mi)) # Move the axis to the correct position mi = misc.moveaxis(mi, -1, gated_axis) m.append(mi) return m else: raise ValueError("Invalid parent index")
def _compute_message_to_parent(self, index, m_child, u_Z, u_X): """ """ if index == 0: m0 = 0 # Compute Child * X, sum over variable axes and move the gated axis # to be the last. Need to do some shape changing in order to make # Child and X to broadcast properly. for i in range(len(m_child)): ndim = len(self.dims[i]) c = m_child[i][...,None] c = misc.moveaxis(c, -1, -ndim-1) gated_axis = self.gated_plate - ndim x = u_X[i] if np.ndim(x) < abs(gated_axis): x = np.expand_dims(x, -ndim-1) else: x = misc.moveaxis(x, gated_axis, -ndim-1) axes = tuple(range(-ndim, 0)) m0 = m0 + misc.sum_product(c, x, axes_to_sum=axes) # Make sure the variable axis does not use broadcasting m0 = m0 * np.ones(self.K) # Send the message m = [m0] return m elif index == 1: m = [] for i in range(len(m_child)): # Make the moments of Z and the message from children # broadcastable. The gated plate is handled as the last axis in # the arrays and moved to the correct position at the end. # Add variable axes to Z moments ndim = len(self.dims[i]) z = misc.add_trailing_axes(u_Z[0], ndim) z = misc.moveaxis(z, -ndim-1, -1) # Axis index of the gated plate gated_axis = self.gated_plate - ndim # Add the gate axis to the message from the children c = misc.add_trailing_axes(m_child[i], 1) # Compute the message to parent mi = z * c # Add extra axes if necessary if np.ndim(mi) < abs(gated_axis): mi = misc.add_leading_axes(mi, abs(gated_axis) - np.ndim(mi)) # Move the axis to the correct position mi = misc.moveaxis(mi, -1, gated_axis) m.append(mi) return m else: raise ValueError("Invalid parent index")
def run(M=10, N=100, D_y=3, D=5, seed=42, rotate=False, maxiter=100, debug=False, plot=True): if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M, 1, D_y)) x = np.random.normal(0, 1, size=(1, N, D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.2, size=(M, N)) # Construct model (Y, F, W, X, tau, alpha) = model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha) # Initialize some nodes randomly X.initialize_from_random() W.initialize_from_random() # Run inference algorithm if rotate: # Use rotations to speed up learning rotW = transformations.RotateGaussianARD(W, alpha) rotX = transformations.RotateGaussianARD(X) R = transformations.RotationOptimizer(rotW, rotX, D) for ind in range(maxiter): Q.update() if debug: R.rotate(check_bound=True, check_gradient=True) else: R.rotate() else: # Use standard VB-EM alone Q.update(repeat=maxiter) # Plot results if plot: plt.figure() bpplt.timeseries_normal(F, scale=2) bpplt.timeseries(f, color='g', linestyle='-') bpplt.timeseries(y, color='r', linestyle='None', marker='+')
def run(M=40, N=100, D_y=6, D=8, seed=42, rotate=False, maxiter=1000, debug=False, plot=True): """ Run pattern search demo for PCA. """ if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M,1,D_y)) x = np.random.normal(0, 1, size=(1,N,D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.2, size=(M,N)) # Construct model Q = VB(*(pca.model(M, N, D))) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Q['Y'].observe(y, mask=mask) # Initialize some nodes randomly Q['X'].initialize_from_random() Q['W'].initialize_from_random() # Use a few VB-EM updates at the beginning Q.update(repeat=10) Q.save() # Standard VB-EM as a baseline Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore initial state Q.load() # Pattern search method for comparison for n in range(maxiter): Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha']) Q.update(repeat=20) if Q.has_converged(): break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
def run(M=10, N=100, D_y=3, D=5, seed=42, rotate=False, maxiter=100, debug=False): if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M,1,D_y)) x = np.random.normal(0, 1, size=(1,N,D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.2, size=(M,N)) # Construct model (Y, F, W, X, tau, alpha) = model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha) # Initialize some nodes randomly X.initialize_from_random() W.initialize_from_random() # Run inference algorithm if rotate: # Use rotations to speed up learning rotW = transformations.RotateGaussianARD(W, alpha) rotX = transformations.RotateGaussianARD(X) R = transformations.RotationOptimizer(rotW, rotX, D) for ind in range(maxiter): Q.update() if debug: R.rotate(check_bound=True, check_gradient=True) else: R.rotate() else: # Use standard VB-EM alone Q.update(repeat=maxiter) # Plot results plt.figure() bpplt.timeseries_normal(F, scale=2) bpplt.timeseries(f, color='g', linestyle='-') bpplt.timeseries(y, color='r', linestyle='None', marker='+') plt.show()
def run(M=10, N=100, D_y=3, D=5, seed=42, rotate=False, maxiter=1000, debug=False, plot=True): if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M, 1, D_y)) x = np.random.normal(0, 1, size=(1, N, D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.1, size=(M, N)) # Construct model Q = model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Q['Y'].observe(y, mask=mask) # Run inference algorithm if rotate: # Use rotations to speed up learning rotW = transformations.RotateGaussianARD(Q['W'], Q['alpha']) rotX = transformations.RotateGaussianARD(Q['X']) R = transformations.RotationOptimizer(rotW, rotX, D) if debug: Q.callback = lambda: R.rotate(check_bound=True, check_gradient=True) else: Q.callback = R.rotate # Use standard VB-EM alone Q.update(repeat=maxiter) # Plot results if plot: plt.figure() bpplt.timeseries_normal(Q['F'], scale=2) bpplt.timeseries(f, color='g', linestyle='-') bpplt.timeseries(y, color='r', linestyle='None', marker='+')
def _compute_moments(self, u_Z, u_X): """ """ u = [] for i in range(len(u_X)): # Make the moments of Z and X broadcastable and move the gated plate # to be the last axis in the moments, then sum-product over that # axis ndim = len(self.dims[i]) z = misc.add_trailing_axes(u_Z[0], ndim) z = misc.moveaxis(z, -ndim-1, -1) gated_axis = self.gated_plate - ndim if np.ndim(u_X[i]) < abs(gated_axis): x = misc.add_trailing_axes(u_X[i], 1) else: x = misc.moveaxis(u_X[i], gated_axis, -1) ui = misc.sum_product(z, x, axes_to_sum=-1) u.append(ui) return u
def _compute_moments(self, u_Z, u_X): """ """ u = [] for i in range(len(u_X)): # Make the moments of Z and X broadcastable and move the gated plate # to be the last axis in the moments, then sum-product over that # axis ndim = len(self.dims[i]) z = misc.add_trailing_axes(u_Z[0], ndim) z = misc.moveaxis(z, -ndim - 1, -1) gated_axis = self.gated_plate - ndim if np.ndim(u_X[i]) < abs(gated_axis): x = misc.add_trailing_axes(u_X[i], 1) else: x = misc.moveaxis(u_X[i], gated_axis, -1) ui = misc.sum_product(z, x, axes_to_sum=-1) u.append(ui) return u
def run(M=10, N=100, D_y=3, D=5, seed=42, rotate=False, maxiter=1000, debug=False, plot=True): if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M,1,D_y)) x = np.random.normal(0, 1, size=(1,N,D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.1, size=(M,N)) # Construct model Q = model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Q['Y'].observe(y, mask=mask) # Run inference algorithm if rotate: # Use rotations to speed up learning rotW = transformations.RotateGaussianARD(Q['W'], Q['alpha']) rotX = transformations.RotateGaussianARD(Q['X']) R = transformations.RotationOptimizer(rotW, rotX, D) if debug: Q.callback = lambda : R.rotate(check_bound=True, check_gradient=True) else: Q.callback = R.rotate # Use standard VB-EM alone Q.update(repeat=maxiter) # Plot results if plot: plt.figure() bpplt.timeseries_normal(Q['F'], scale=2) bpplt.timeseries(f, color='g', linestyle='-') bpplt.timeseries(y, color='r', linestyle='None', marker='+')
def compute_cgf_from_parents(self, *u_parents): """ Compute :math:`\mathrm{E}_{q(p)}[g(p)]` """ # Compute weighted average of g over the clusters. # Shape(g) = [Nn,..,K,..,N0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0] # Compute g for clusters: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Move cluster axis to last: # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Cluster assignments/contributions/probabilities/weights: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Weighted average of g over the clusters. As p and g are # properly aligned, you can just sum p*g over the last # axis and utilize broadcasting: # Shape(result) = [Nn,..,N0] g = misc.sum_product(p, g, axes_to_sum=-1) return g
def compute_phi_from_parents(self, *u_parents, mask=True): """ Compute the natural parameter vector given parent moments. """ # Compute weighted average of the parameters # Cluster parameters Phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() nans = False for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate should be negative") # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] if np.ndim(Phi[ind]) >= abs(cluster_axis): phi.append(misc.moveaxis(Phi[ind], cluster_axis, -1)) else: phi.append(Phi[ind][...,None]) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = misc.add_trailing_axes(P, self.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = misc.moveaxis(p, -(self.ndims[ind]+1), -1) # Handle zero probability cases. This avoids nans when p=0 and # phi=inf. phi[ind] = np.where(p != 0, phi[ind], 0) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = misc.sum_product(p, phi[ind], axes_to_sum=-1) if np.any(np.isnan(phi[ind])): nans = True if nans: warnings.warn("The natural parameters of mixture distribution " "contain nans. This may happen if you use fixed " "parameters in your model. Technically, one possible " "reason is that the cluster assignment probability " "for some element is zero (p=0) and the natural " "parameter of that cluster is -inf, thus " "0*(-inf)=nan. Solution: Use parameters that assign " "non-zero probabilities for the whole domain.") return phi
def compute_phi_from_parents(self, *u_parents, mask=True): """ Compute the natural parameter vector given parent moments. """ # Compute weighted average of the parameters # Cluster parameters Phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() nans = False for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate should be negative") # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] if np.ndim(Phi[ind]) >= abs(cluster_axis): phi.append(misc.moveaxis(Phi[ind], cluster_axis, -1)) else: phi.append(Phi[ind][..., None]) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = misc.add_trailing_axes(P, self.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = misc.moveaxis(p, -(self.ndims[ind] + 1), -1) # Handle zero probability cases. This avoids nans when p=0 and # phi=inf. phi[ind] = np.where(p != 0, phi[ind], 0) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = misc.sum_product(p, phi[ind], axes_to_sum=-1) if np.any(np.isnan(phi[ind])): nans = True if nans: warnings.warn( "The natural parameters of mixture distribution " "contain nans. This may happen if you use fixed " "parameters in your model. Technically, one possible " "reason is that the cluster assignment probability " "for some element is zero (p=0) and the natural " "parameter of that cluster is -inf, thus " "0*(-inf)=nan. Solution: Use parameters that assign " "non-zero probabilities for the whole domain.") return phi
def run(M=10, N=100, D_y=3, D=5): seed = 45 print('seed =', seed) np.random.seed(seed) # Check HDF5 version. if h5py.version.hdf5_version_tuple < (1, 8, 7): print( "WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not " "able to save empty arrays, thus you may experience problems if " "you for instance try to save before running any iteration steps." % str(h5py.version.hdf5_version_tuple)) # Generate data w = np.random.normal(0, 1, size=(M, 1, D_y)) x = np.random.normal(0, 1, size=(1, N, D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.5, size=(M, N)) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.9) # randomly missing mask[:, 20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha, autosave_iterations=5) # Initialize some nodes randomly X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) # Save the state into a HDF5 file filename = tempfile.NamedTemporaryFile(suffix='hdf5').name Q.update(X, W, alpha, tau, repeat=1) Q.save(filename=filename) # Inference loop. Q.update(X, W, alpha, tau, repeat=10) # Reload the state from the HDF5 file Q.load(filename=filename) # Inference loop again. Q.update(X, W, alpha, tau, repeat=10) # NOTE: Saving and loading requires that you have the model # constructed. "Save" does not store the model structure nor does "load" # read it. They are just used for reading and writing the contents of the # nodes. Thus, if you want to load, you first need to construct the same # model that was used for saving and then use load to set the states of the # nodes. plt.clf() WX_params = WX.get_parameters() fh = WX_params[0] * np.ones(y.shape) err_fh = 2 * np.sqrt(WX_params[1] + 1 / tau.get_moments()[0]) * np.ones( y.shape) for m in range(M): plt.subplot(M, 1, m + 1) #errorplot(y, error=None, x=None, lower=None, upper=None): bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m]) plt.plot(np.arange(N), f[m], 'g') plt.plot(np.arange(N), y[m], 'r+') plt.figure() Q.plot_iteration_by_nodes() plt.figure() plt.subplot(2, 2, 1) bpplt.binary_matrix(W.mask) plt.subplot(2, 2, 2) bpplt.binary_matrix(X.mask) plt.subplot(2, 2, 3) #bpplt.binary_matrix(WX.get_mask()) plt.subplot(2, 2, 4) bpplt.binary_matrix(Y.mask)
def run(M=40, N=100, D_y=6, D=8, seed=42, rotate=False, maxiter=1000, debug=False, plot=True): """ Run pattern search demo for PCA. """ if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M, 1, D_y)) x = np.random.normal(0, 1, size=(1, N, D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.2, size=(M, N)) # Construct model Q = VB(*(pca.model(M, N, D))) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Q['Y'].observe(y, mask=mask) # Initialize some nodes randomly Q['X'].initialize_from_random() Q['W'].initialize_from_random() # Use a few VB-EM updates at the beginning Q.update(repeat=10) Q.save() # Standard VB-EM as a baseline Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore initial state Q.load() # Pattern search method for comparison for n in range(maxiter): Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha']) Q.update(repeat=20) if Q.has_converged(): break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
def run(M=10, N=100, D_y=3, D=5): seed = 45 print('seed =', seed) np.random.seed(seed) # Check HDF5 version. if h5py.version.hdf5_version_tuple < (1,8,7): print("WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not " "able to save empty arrays, thus you may experience problems if " "you for instance try to save before running any iteration steps." % str(h5py.version.hdf5_version_tuple)) # Generate data w = np.random.normal(0, 1, size=(M,1,D_y)) x = np.random.normal(0, 1, size=(1,N,D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.5, size=(M,N)) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.9) # randomly missing mask[:,20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha, autosave_iterations=5) # Initialize some nodes randomly X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) # Save the state into a HDF5 file filename = tempfile.NamedTemporaryFile(suffix='hdf5').name Q.update(X, W, alpha, tau, repeat=1) Q.save(filename=filename) # Inference loop. Q.update(X, W, alpha, tau, repeat=10) # Reload the state from the HDF5 file Q.load(filename=filename) # Inference loop again. Q.update(X, W, alpha, tau, repeat=10) # NOTE: Saving and loading requires that you have the model # constructed. "Save" does not store the model structure nor does "load" # read it. They are just used for reading and writing the contents of the # nodes. Thus, if you want to load, you first need to construct the same # model that was used for saving and then use load to set the states of the # nodes. plt.clf() WX_params = WX.get_parameters() fh = WX_params[0] * np.ones(y.shape) err_fh = 2*np.sqrt(WX_params[1] + 1/tau.get_moments()[0]) * np.ones(y.shape) for m in range(M): plt.subplot(M,1,m+1) #errorplot(y, error=None, x=None, lower=None, upper=None): bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m]) plt.plot(np.arange(N), f[m], 'g') plt.plot(np.arange(N), y[m], 'r+') plt.figure() Q.plot_iteration_by_nodes() plt.figure() plt.subplot(2,2,1) bpplt.binary_matrix(W.mask) plt.subplot(2,2,2) bpplt.binary_matrix(X.mask) plt.subplot(2,2,3) #bpplt.binary_matrix(WX.get_mask()) plt.subplot(2,2,4) bpplt.binary_matrix(Y.mask)