def gaussian_array(X, rows=-2, cols=-1, scale=1): # Get mean and second moment (x, xx) = X.get_moments() ndim = len(X.dims[0]) shape = X.get_shape(0) size = len(X.get_shape(0)) # Compute standard deviation xx = utils.get_diag(xx, ndim=ndim) std = np.sqrt(xx - x**2) # Force explicit elements when broadcasting x = x * np.ones(shape) std = std * np.ones(shape) # Preprocess the axes to 0,...,ndim if rows < 0: rows += size if cols < 0: cols += size if rows < 0 or rows >= size: raise ValueError("Row axis invalid") if cols < 0 or cols >= size: raise ValueError("Column axis invalid") # Put the row and column axes to the end axes = [i for i in range(size) if i not in (rows, cols)] + [rows, cols] x = np.transpose(x, axes=axes) std = np.transpose(std, axes=axes) # Remove non-row and non-column axes that have length 1 squeezed_shape = tuple([sh for sh in np.shape(x)[:-2] if sh != 1]) x = np.reshape(x, squeezed_shape+np.shape(x)[-2:]) std = np.reshape(std, squeezed_shape+np.shape(x)[-2:]) # Make explicit four axes x = utils.atleast_nd(x, 4) std = utils.atleast_nd(std, 4) if np.ndim(x) != 4: raise ValueError("Can not plot arrays with over 4 axes") M = np.shape(x)[0] N = np.shape(x)[1] vmax = np.max(np.abs(x) + scale*std) #plt.subplots(M, N, sharey=True, sharex=True, fig_kw) ax = [plt.subplot(M, N, i*N+j+1) for i in range(M) for j in range(N)] for i in range(M): for j in range(N): plt.subplot(M, N, i*N+j+1) #plt.subplot(M, N, i*N+j+1, sharey=ax[0], sharex=ax[0]) if scale == 0: hinton(x[i,j], vmax=vmax) else: hinton(x[i,j], vmax=vmax, error=scale*std[i,j])
def gaussian_array(X, rows=-2, cols=-1, scale=1): # Get mean and second moment (x, xx) = X.get_moments() ndim = len(X.dims[0]) shape = X.get_shape(0) size = len(X.get_shape(0)) # Compute standard deviation xx = utils.get_diag(xx, ndim=ndim) std = np.sqrt(xx - x**2) # Force explicit elements when broadcasting x = x * np.ones(shape) std = std * np.ones(shape) # Preprocess the axes to 0,...,ndim if rows < 0: rows += size if cols < 0: cols += size if rows < 0 or rows >= size: raise ValueError("Row axis invalid") if cols < 0 or cols >= size: raise ValueError("Column axis invalid") # Put the row and column axes to the end axes = [i for i in range(size) if i not in (rows, cols)] + [rows, cols] x = np.transpose(x, axes=axes) std = np.transpose(std, axes=axes) # Remove non-row and non-column axes that have length 1 squeezed_shape = tuple([sh for sh in np.shape(x)[:-2] if sh != 1]) x = np.reshape(x, squeezed_shape + np.shape(x)[-2:]) std = np.reshape(std, squeezed_shape + np.shape(x)[-2:]) # Make explicit four axes x = utils.atleast_nd(x, 4) std = utils.atleast_nd(std, 4) if np.ndim(x) != 4: raise ValueError("Can not plot arrays with over 4 axes") M = np.shape(x)[0] N = np.shape(x)[1] vmax = np.max(np.abs(x) + scale * std) #plt.subplots(M, N, sharey=True, sharex=True, fig_kw) ax = [plt.subplot(M, N, i * N + j + 1) for i in range(M) for j in range(N)] for i in range(M): for j in range(N): plt.subplot(M, N, i * N + j + 1) #plt.subplot(M, N, i*N+j+1, sharey=ax[0], sharex=ax[0]) if scale == 0: hinton(x[i, j], vmax=vmax) else: hinton(x[i, j], vmax=vmax, error=scale * std[i, j])
def timeseries_categorical_mc(Z): # Make sure that the node is categorical Z = Z._convert(CategoricalMoments) # Get expectations (and broadcast explicitly) z = Z._message_to_child()[0] * np.ones(Z.get_shape(0)) # Compute the subplot layout z = utils.atleast_nd(z, 4) if np.ndim(z) != 4: raise ValueError("Can not plot arrays with over 4 axes") M = np.shape(z)[0] N = np.shape(z)[1] #print("DEBUG IN PLOT", Z.get_shape(0), np.shape(z)) # Plot Hintons for i in range(M): for j in range(N): plt.subplot(M, N, i * N + j + 1) hinton(z[i, j].T, vmax=1.0, square=False)
def timeseries_categorical_mc(Z): # Make sure that the node is categorical Z = Z._convert(CategoricalMoments) # Get expectations (and broadcast explicitly) z = Z._message_to_child()[0] * np.ones(Z.get_shape(0)) # Compute the subplot layout z = utils.atleast_nd(z, 4) if np.ndim(z) != 4: raise ValueError("Can not plot arrays with over 4 axes") M = np.shape(z)[0] N = np.shape(z)[1] #print("DEBUG IN PLOT", Z.get_shape(0), np.shape(z)) # Plot Hintons for i in range(M): for j in range(N): plt.subplot(M, N, i*N+j+1) hinton(z[i,j].T, vmax=1.0, square=False)
def infer(y, D, K, mask=True, maxiter=100, rotate=False, debug=False, precompute=False, update_hyper=0, start_rotating=0, start_rotating_weights=0, plot_C=True, monitor=True, autosave=None): """ Run VB inference for linear state-space model with time-varying dynamics. """ y = utils.atleast_nd(y, 2) (M, N) = np.shape(y) # Construct the model Q = model(M, N, D, K) if not plot_C: Q['C'].set_plotter(None) if autosave is not None: Q.set_autosave(autosave, iterations=10) # Observe data Q['Y'].observe(y, mask=mask) # Set up rotation speed-up if rotate: # Initial rotate the D-dimensional state space (X, A, C) # Does not update hyperparameters rotA_init = transformations.RotateGaussianARD(Q['A'], axis=0, precompute=precompute) rotX_init = transformations.RotateVaryingMarkovChain(Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[...,1:,None], rotA_init) rotC_init = transformations.RotateGaussianARD(Q['C'], axis=0, precompute=precompute) R_X_init = transformations.RotationOptimizer(rotX_init, rotC_init, D) # Rotate the D-dimensional state space (X, A, C) rotA = transformations.RotateGaussianARD(Q['A'], Q['alpha'], axis=0, precompute=precompute) rotX = transformations.RotateVaryingMarkovChain(Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[...,1:,None], rotA) rotC = transformations.RotateGaussianARD(Q['C'], Q['gamma'], axis=0, precompute=precompute) R_X = transformations.RotationOptimizer(rotX, rotC, D) # Rotate the K-dimensional latent dynamics space (S, A, C) rotB = transformations.RotateGaussianARD(Q['B'], Q['beta'], precompute=precompute) rotS = transformations.RotateGaussianMarkovChain(Q['S'], rotB) rotA = transformations.RotateGaussianARD(Q['A'], Q['alpha'], axis=-1, precompute=precompute) R_S = transformations.RotationOptimizer(rotS, rotA, K) if debug: rotate_kwargs = {'maxiter': 10, 'check_bound': True, 'check_gradient': True} else: rotate_kwargs = {'maxiter': 10} # Plot initial distributions if monitor: Q.plot() # Run inference using rotations for ind in range(maxiter): if ind < update_hyper: # It might be a good idea to learn the lower level nodes a bit # before starting to learn the upper level nodes. Q.update('X', 'C', 'A', 'tau', plot=monitor) if rotate and ind >= start_rotating: # Use the rotation which does not update alpha nor beta R_X_init.rotate(**rotate_kwargs) else: Q.update(plot=monitor) if rotate and ind >= start_rotating: # It might be a good idea to not rotate immediately because it # might lead to pruning out components too efficiently before # even estimating them roughly R_X.rotate(**rotate_kwargs) if ind >= start_rotating_weights: R_S.rotate(**rotate_kwargs) # Return the posterior approximation return Q
def compute_message_to_parent(self, parent, index, u, *u_parents): if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] g = utils.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1 - self.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = utils.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append(np.expand_dims(u[ind], axis=(-1 - self.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: cluster_axis = self.cluster_plate u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent(parent, index, u_self, *(u_parents[1:])) # Weigh the messages with the responsibilities for i in range(len(m)): # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] # Number of axes for the variable dimensions for # the parent message. D = self.ndims_parents[index][i] # Responsibilities for clusters are the first # parent's first moment: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Move the cluster axis to the proper place: # Shape(p) = [Nn,..,K,..,N0] p = utils.atleast_nd(p, abs(self.cluster_plate)) p = utils.moveaxis(p, -1, self.cluster_plate) # Add axes for variable dimensions to the contributions # Shape(p) = [Nn,..,K,..,N0,1,..,1] p = utils.add_trailing_axes(p, D) if self.cluster_plate < 0: # Add the variable dimensions cluster_axis = self.cluster_plate - D # Add axis for clusters: # Shape(m) = [Nn,..,1,..,N0,Dd,..,D0] # m[i] = np.expand_dims(m[i], axis=cluster_axis) # # TODO: You could do summing here already so that # you wouldn't compute huge matrices as # intermediate result. Use einsum. # Compute the message contributions for each # cluster: # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] m[i] = m[i] * p return m
def infer(y, D, K, mask=True, maxiter=100, rotate=False, debug=False, precompute=False, update_hyper=0, start_rotating=0, start_rotating_weights=0, plot_C=True, monitor=True, autosave=None): """ Run VB inference for linear state-space model with time-varying dynamics. """ y = utils.atleast_nd(y, 2) (M, N) = np.shape(y) # Construct the model Q = model(M, N, D, K) if not plot_C: Q['C'].set_plotter(None) if autosave is not None: Q.set_autosave(autosave, iterations=10) # Observe data Q['Y'].observe(y, mask=mask) # Set up rotation speed-up if rotate: # Initial rotate the D-dimensional state space (X, A, C) # Does not update hyperparameters rotA_init = transformations.RotateGaussianARD(Q['A'], axis=0, precompute=precompute) rotX_init = transformations.RotateVaryingMarkovChain( Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[..., 1:, None], rotA_init) rotC_init = transformations.RotateGaussianARD(Q['C'], axis=0, precompute=precompute) R_X_init = transformations.RotationOptimizer(rotX_init, rotC_init, D) # Rotate the D-dimensional state space (X, A, C) rotA = transformations.RotateGaussianARD(Q['A'], Q['alpha'], axis=0, precompute=precompute) rotX = transformations.RotateVaryingMarkovChain( Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[..., 1:, None], rotA) rotC = transformations.RotateGaussianARD(Q['C'], Q['gamma'], axis=0, precompute=precompute) R_X = transformations.RotationOptimizer(rotX, rotC, D) # Rotate the K-dimensional latent dynamics space (S, A, C) rotB = transformations.RotateGaussianARD(Q['B'], Q['beta'], precompute=precompute) rotS = transformations.RotateGaussianMarkovChain(Q['S'], rotB) rotA = transformations.RotateGaussianARD(Q['A'], Q['alpha'], axis=-1, precompute=precompute) R_S = transformations.RotationOptimizer(rotS, rotA, K) if debug: rotate_kwargs = { 'maxiter': 10, 'check_bound': True, 'check_gradient': True } else: rotate_kwargs = {'maxiter': 10} # Plot initial distributions if monitor: Q.plot() # Run inference using rotations for ind in range(maxiter): if ind < update_hyper: # It might be a good idea to learn the lower level nodes a bit # before starting to learn the upper level nodes. Q.update('X', 'C', 'A', 'tau', plot=monitor) if rotate and ind >= start_rotating: # Use the rotation which does not update alpha nor beta R_X_init.rotate(**rotate_kwargs) else: Q.update(plot=monitor) if rotate and ind >= start_rotating: # It might be a good idea to not rotate immediately because it # might lead to pruning out components too efficiently before # even estimating them roughly R_X.rotate(**rotate_kwargs) if ind >= start_rotating_weights: R_S.rotate(**rotate_kwargs) # Return the posterior approximation return Q
def compute_message_to_parent(self, parent, index, u, *u_parents): if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] g = utils.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate-self.distribution.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1-self.distribution.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = utils.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append(np.expand_dims(u[ind], axis=(-1-self.distribution.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.distribution.ndims[ind] else: cluster_axis = self.cluster_plate u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent(parent, index, u_self, *(u_parents[1:])) # Weigh the messages with the responsibilities for i in range(len(m)): # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] # Number of axes for the variable dimensions for # the parent message. D = self.distribution.ndims_parents[index][i] # Responsibilities for clusters are the first # parent's first moment: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Move the cluster axis to the proper place: # Shape(p) = [Nn,..,K,..,N0] p = utils.atleast_nd(p, abs(self.cluster_plate)) p = utils.moveaxis(p, -1, self.cluster_plate) # Add axes for variable dimensions to the contributions # Shape(p) = [Nn,..,K,..,N0,1,..,1] p = utils.add_trailing_axes(p, D) if self.cluster_plate < 0: # Add the variable dimensions cluster_axis = self.cluster_plate - D # Add axis for clusters: # Shape(m) = [Nn,..,1,..,N0,Dd,..,D0] #m[i] = np.expand_dims(m[i], axis=cluster_axis) # # TODO: You could do summing here already so that # you wouldn't compute huge matrices as # intermediate result. Use einsum. # Compute the message contributions for each # cluster: # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] m[i] = m[i] * p return m