def _compute_message_to_parent(self, index, m_child, u_Z, u_X): """ """ if index == 0: m0 = 0 # Compute Child * X, sum over variable axes and move the gated axis # to be the last. Need to do some shape changing in order to make # Child and X to broadcast properly. for i in range(len(m_child)): ndim = len(self.dims[i]) c = m_child[i][...,None] c = misc.moveaxis(c, -1, -ndim-1) gated_axis = self.gated_plate - ndim x = u_X[i] if np.ndim(x) < abs(gated_axis): x = np.expand_dims(x, -ndim-1) else: x = misc.moveaxis(x, gated_axis, -ndim-1) axes = tuple(range(-ndim, 0)) m0 = m0 + misc.sum_product(c, x, axes_to_sum=axes) # Make sure the variable axis does not use broadcasting m0 = m0 * np.ones(self.K) # Send the message m = [m0] return m elif index == 1: m = [] for i in range(len(m_child)): # Make the moments of Z and the message from children # broadcastable. The gated plate is handled as the last axis in # the arrays and moved to the correct position at the end. # Add variable axes to Z moments ndim = len(self.dims[i]) z = misc.add_trailing_axes(u_Z[0], ndim) z = misc.moveaxis(z, -ndim-1, -1) # Axis index of the gated plate gated_axis = self.gated_plate - ndim # Add the gate axis to the message from the children c = misc.add_trailing_axes(m_child[i], 1) # Compute the message to parent mi = z * c # Add extra axes if necessary if np.ndim(mi) < abs(gated_axis): mi = misc.add_leading_axes(mi, abs(gated_axis) - np.ndim(mi)) # Move the axis to the correct position mi = misc.moveaxis(mi, -1, gated_axis) m.append(mi) return m else: raise ValueError("Invalid parent index")
def _compute_message_to_parent(self, index, m_child, u_Z, u_X): """ """ if index == 0: m0 = 0 # Compute Child * X, sum over variable axes and move the gated axis # to be the last. Need to do some shape changing in order to make # Child and X to broadcast properly. for i in range(len(m_child)): ndim = len(self.dims[i]) c = m_child[i][..., None] c = misc.moveaxis(c, -1, -ndim - 1) gated_axis = self.gated_plate - ndim x = u_X[i] if np.ndim(x) < abs(gated_axis): x = np.expand_dims(x, -ndim - 1) else: x = misc.moveaxis(x, gated_axis, -ndim - 1) axes = tuple(range(-ndim, 0)) m0 = m0 + misc.sum_product(c, x, axes_to_sum=axes) # Make sure the variable axis does not use broadcasting m0 = m0 * np.ones(self.K) # Send the message m = [m0] return m elif index == 1: m = [] for i in range(len(m_child)): # Make the moments of Z and the message from children # broadcastable. The gated plate is handled as the last axis in # the arrays and moved to the correct position at the end. # Add variable axes to Z moments ndim = len(self.dims[i]) z = misc.add_trailing_axes(u_Z[0], ndim) z = misc.moveaxis(z, -ndim - 1, -1) # Axis index of the gated plate gated_axis = self.gated_plate - ndim # Add the gate axis to the message from the children c = misc.add_trailing_axes(m_child[i], 1) # Compute the message to parent mi = z * c # Add extra axes if necessary if np.ndim(mi) < abs(gated_axis): mi = misc.add_leading_axes(mi, abs(gated_axis) - np.ndim(mi)) # Move the axis to the correct position mi = misc.moveaxis(mi, -1, gated_axis) m.append(mi) return m else: raise ValueError("Invalid parent index")
def integrated_logpdf_from_parents(self, x, index): """ Approximates the posterior predictive pdf \int p(x|parents) q(parents) dparents in log-scale as \int q(parents_i) exp( \int q(parents_\i) \log p(x|parents) dparents_\i ) dparents_i.""" if index == 0: # Integrate out the cluster assignments # First, integrate the cluster parameters in log-scale # compute_logpdf(cls, u, phi, g, f): # Shape(x) = [M1,..,Mm,N1,..,Nn,D1,..,Dd] u_parents = self._message_from_parents() # Shape(u) = [M1,..,Mm,N1,..,1,..,Nn,D1,..,Dd] # Shape(f) = [M1,..,Mm,N1,..,1,..,Nn] (u, f ) = self._distribution.distribution.compute_fixed_moments_and_f(x) f = np.expand_dims(f, axis=self.cluster_plate) for i in range(len(u)): ndim_i = len(self.dims[i]) cluster_axis = self.cluster_plate - ndim_i u[i] = np.expand_dims(u[i], axis=cluster_axis) # Shape(phi) = [N1,..,K,..,Nn,D1,..,Dd] phi = self._distribution.distribution.compute_phi_from_parents( *(u_parents[1:])) # Shape(g) = [N1,..,K,..,Nn] g = self._distribution.distribution.compute_cgf_from_parents( *(u_parents[1:])) # Shape(lpdf) = [M1,..,Mm,N1,..,K,..,Nn] lpdf = self._distribution.distribution.compute_logpdf( u, phi, g, f, self.ndims) # From logpdf to pdf, but avoid over/underflow lpdf_max = np.max(lpdf, axis=self.cluster_plate, keepdims=True) pdf = np.exp(lpdf - lpdf_max) # Move cluster axis to be the last: # Shape(pdf) = [M1,..,Mm,N1,..,Nn,K] pdf = misc.moveaxis(pdf, self.cluster_plate, -1) # Cluster assignments/probabilities/weights # Shape(p) = [N1,..,Nn,K] p = u_parents[0][0] # Weighted average. TODO/FIXME: Use einsum! # Shape(pdf) = [M1,..,Mm,N1,..,Nn] pdf = np.sum(pdf * p, axis=self.cluster_plate) # Back to log-scale (add the overflow fix!) lpdf_max = np.squeeze(lpdf_max, axis=self.cluster_plate) lpdf = np.log(pdf) + lpdf_max return lpdf raise NotImplementedError()
def integrated_logpdf_from_parents(self, x, index): """ Approximates the posterior predictive pdf \int p(x|parents) q(parents) dparents in log-scale as \int q(parents_i) exp( \int q(parents_\i) \log p(x|parents) dparents_\i ) dparents_i.""" if index == 0: # Integrate out the cluster assignments # First, integrate the cluster parameters in log-scale # compute_logpdf(cls, u, phi, g, f): # Shape(x) = [M1,..,Mm,N1,..,Nn,D1,..,Dd] u_parents = self._message_from_parents() # Shape(u) = [M1,..,Mm,N1,..,1,..,Nn,D1,..,Dd] # Shape(f) = [M1,..,Mm,N1,..,1,..,Nn] (u, f) = self._distribution.distribution.compute_fixed_moments_and_f(x) f = np.expand_dims(f, axis=self.cluster_plate) for i in range(len(u)): ndim_i = len(self.dims[i]) cluster_axis = self.cluster_plate - ndim_i u[i] = np.expand_dims(u[i], axis=cluster_axis) # Shape(phi) = [N1,..,K,..,Nn,D1,..,Dd] phi = self._distribution.distribution.compute_phi_from_parents(*(u_parents[1:])) # Shape(g) = [N1,..,K,..,Nn] g = self._distribution.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Shape(lpdf) = [M1,..,Mm,N1,..,K,..,Nn] lpdf = self._distribution.distribution.compute_logpdf(u, phi, g, f, self.ndims) # From logpdf to pdf, but avoid over/underflow lpdf_max = np.max(lpdf, axis=self.cluster_plate, keepdims=True) pdf = np.exp(lpdf-lpdf_max) # Move cluster axis to be the last: # Shape(pdf) = [M1,..,Mm,N1,..,Nn,K] pdf = misc.moveaxis(pdf, self.cluster_plate, -1) # Cluster assignments/probabilities/weights # Shape(p) = [N1,..,Nn,K] p = u_parents[0][0] # Weighted average. TODO/FIXME: Use einsum! # Shape(pdf) = [M1,..,Mm,N1,..,Nn] pdf = np.sum(pdf * p, axis=self.cluster_plate) # Back to log-scale (add the overflow fix!) lpdf_max = np.squeeze(lpdf_max, axis=self.cluster_plate) lpdf = np.log(pdf) + lpdf_max return lpdf raise NotImplementedError()
def _compute_moments(self, u_Z, u_X): """ """ u = [] for i in range(len(u_X)): # Make the moments of Z and X broadcastable and move the gated plate # to be the last axis in the moments, then sum-product over that # axis ndim = len(self.dims[i]) z = misc.add_trailing_axes(u_Z[0], ndim) z = misc.moveaxis(z, -ndim-1, -1) gated_axis = self.gated_plate - ndim if np.ndim(u_X[i]) < abs(gated_axis): x = misc.add_trailing_axes(u_X[i], 1) else: x = misc.moveaxis(u_X[i], gated_axis, -1) ui = misc.sum_product(z, x, axes_to_sum=-1) u.append(ui) return u
def _compute_moments(self, u_Z, u_X): """ """ u = [] for i in range(len(u_X)): # Make the moments of Z and X broadcastable and move the gated plate # to be the last axis in the moments, then sum-product over that # axis ndim = len(self.dims[i]) z = misc.add_trailing_axes(u_Z[0], ndim) z = misc.moveaxis(z, -ndim - 1, -1) gated_axis = self.gated_plate - ndim if np.ndim(u_X[i]) < abs(gated_axis): x = misc.add_trailing_axes(u_X[i], 1) else: x = misc.moveaxis(u_X[i], gated_axis, -1) ui = misc.sum_product(z, x, axes_to_sum=-1) u.append(ui) return u
def compute_cgf_from_parents(self, *u_parents): """ Compute :math:`\mathrm{E}_{q(p)}[g(p)]` """ # Compute weighted average of g over the clusters. # Shape(g) = [Nn,..,K,..,N0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0] # Compute g for clusters: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Move cluster axis to last: # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Cluster assignments/contributions/probabilities/weights: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Weighted average of g over the clusters. As p and g are # properly aligned, you can just sum p*g over the last # axis and utilize broadcasting: # Shape(result) = [Nn,..,N0] g = misc.sum_product(p, g, axes_to_sum=-1) return g
def compute_message_to_parent(self, parent, index, u, *u_parents): """ Compute the message to a parent node. """ if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.raw_distribution.compute_cgf_from_parents( *(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.raw_distribution.compute_phi_from_parents( *(u_parents[1:])) # Reshape u: # Shape(u) = = [Nn,..,1,..,N0,Dd,..,D0] u_reshaped = [ np.expand_dims(ui, self.cluster_plate - ndimi) if np.ndim(ui) >= abs(self.cluster_plate - ndimi) else ui for (ui, ndimi) in zip(u, self.ndims) ] # Compute logpdf: # Shape(L) = [Nn,..,K,..,N0] L = self.raw_distribution.compute_logpdf( u_reshaped, phi, g, 0, self.ndims, ) # Move axis: # Shape(L) = [Nn,..,N0,K] L = np.moveaxis(L, self.cluster_plate, -1) m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index_for_parent = index - 1 # Reshape u: # Shape(u_self) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise ValueError("Cluster plate axis must be negative") u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] m = self.raw_distribution.compute_message_to_parent( parent, index_for_parent, u_self, *(u_parents[1:])) # Note: The cluster assignment probabilities can be considered as # weights to plate elements. These weights need to mapped properly # via the plate mapping of self.distribution. Otherwise, nested # mixtures won't work, or possibly not any distribution that does # something to the plates. Thus, use compute_weights_to_parent to # compute the transformations to the weight array properly. # # See issue #39 for more details. # Compute weights (i.e., cluster assignment probabilities) and map # the plates properly. # Shape(p) = [Nn,..,K,..,N0] p = misc.atleast_nd(u_parents[0][0], abs(self.cluster_plate)) p = misc.moveaxis(p, -1, self.cluster_plate) p = self.raw_distribution.compute_weights_to_parent( index_for_parent, p, ) # Weigh the elements in the message array # # TODO/FIXME: This may result in huge intermediate arrays. Need to # use einsum! m = [ mi * misc.add_trailing_axes(p, ndim) #for (mi, ndim) in zip(m, self.ndims)] for (mi, ndim) in zip(m, self.ndims_parents[index_for_parent]) ] return m
def compute_message_to_parent(self, parent, index, u, *u_parents): """ Compute the message to a parent node. """ if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate-self.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1-self.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append(np.expand_dims(u[ind], axis=(-1-self.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index_for_parent = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise ValueError("Cluster plate axis must be negative") u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent(parent, index_for_parent, u_self, *(u_parents[1:])) # Note: The cluster assignment probabilities can be considered as # weights to plate elements. These weights need to mapped properly # via the plate mapping of self.distribution. Otherwise, nested # mixtures won't work, or possibly not any distribution that does # something to the plates. Thus, use compute_weights_to_parent to # compute the transformations to the weight array properly. # # See issue #39 for more details. # Compute weights (i.e., cluster assignment probabilities) and map # the plates properly. p = misc.atleast_nd(u_parents[0][0], abs(self.cluster_plate)) p = misc.moveaxis(p, -1, self.cluster_plate) p = self.distribution.compute_weights_to_parent( index_for_parent, p, ) # Weigh the elements in the message array m = [mi * misc.add_trailing_axes(p, ndim) #for (mi, ndim) in zip(m, self.ndims)] for (mi, ndim) in zip(m, self.ndims_parents[index_for_parent])] return m
def compute_phi_from_parents(self, *u_parents, mask=True): """ Compute the natural parameter vector given parent moments. """ # Compute weighted average of the parameters # Cluster parameters Phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() nans = False for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate should be negative") # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] if np.ndim(Phi[ind]) >= abs(cluster_axis): phi.append(misc.moveaxis(Phi[ind], cluster_axis, -1)) else: phi.append(Phi[ind][...,None]) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = misc.add_trailing_axes(P, self.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = misc.moveaxis(p, -(self.ndims[ind]+1), -1) # Handle zero probability cases. This avoids nans when p=0 and # phi=inf. phi[ind] = np.where(p != 0, phi[ind], 0) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = misc.sum_product(p, phi[ind], axes_to_sum=-1) if np.any(np.isnan(phi[ind])): nans = True if nans: warnings.warn("The natural parameters of mixture distribution " "contain nans. This may happen if you use fixed " "parameters in your model. Technically, one possible " "reason is that the cluster assignment probability " "for some element is zero (p=0) and the natural " "parameter of that cluster is -inf, thus " "0*(-inf)=nan. Solution: Use parameters that assign " "non-zero probabilities for the whole domain.") return phi
def compute_message_to_parent(self, parent, index, u, *u_parents): """ Compute the message to a parent node. """ if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1 - self.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append( np.expand_dims(u[ind], axis=(-1 - self.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: cluster_axis = self.cluster_plate u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent( parent, index, u_self, *(u_parents[1:])) # Weigh the messages with the responsibilities for i in range(len(m)): # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] # Number of axes for the variable dimensions for # the parent message. D = self.ndims_parents[index][i] # Responsibilities for clusters are the first # parent's first moment: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Move the cluster axis to the proper place: # Shape(p) = [Nn,..,K,..,N0] p = misc.atleast_nd(p, abs(self.cluster_plate)) p = misc.moveaxis(p, -1, self.cluster_plate) # Add axes for variable dimensions to the contributions # Shape(p) = [Nn,..,K,..,N0,1,..,1] p = misc.add_trailing_axes(p, D) if self.cluster_plate < 0: # Add the variable dimensions cluster_axis = self.cluster_plate - D # Add axis for clusters: # Shape(m) = [Nn,..,1,..,N0,Dd,..,D0] #m[i] = np.expand_dims(m[i], axis=cluster_axis) # # TODO: You could do summing here already so that # you wouldn't compute huge matrices as # intermediate result. Use einsum. # Compute the message contributions for each # cluster: # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] m[i] = m[i] * p return m
def compute_message_to_parent(self, parent, index, u, *u_parents): """ Compute the message to a parent node. """ if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1 - self.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append( np.expand_dims(u[ind], axis=(-1 - self.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index_for_parent = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise ValueError("Cluster plate axis must be negative") u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent( parent, index_for_parent, u_self, *(u_parents[1:])) # Note: The cluster assignment probabilities can be considered as # weights to plate elements. These weights need to mapped properly # via the plate mapping of self.distribution. Otherwise, nested # mixtures won't work, or possibly not any distribution that does # something to the plates. Thus, use compute_weights_to_parent to # compute the transformations to the weight array properly. # # See issue #39 for more details. # Compute weights (i.e., cluster assignment probabilities) and map # the plates properly. p = misc.atleast_nd(u_parents[0][0], abs(self.cluster_plate)) p = misc.moveaxis(p, -1, self.cluster_plate) p = self.distribution.compute_weights_to_parent( index_for_parent, p, ) # Weigh the elements in the message array m = [ mi * misc.add_trailing_axes(p, ndim) #for (mi, ndim) in zip(m, self.ndims)] for (mi, ndim) in zip(m, self.ndims_parents[index_for_parent]) ] return m
def compute_phi_from_parents(self, *u_parents, mask=True): """ Compute the natural parameter vector given parent moments. """ # Compute weighted average of the parameters # Cluster parameters Phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() nans = False for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate should be negative") # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] if np.ndim(Phi[ind]) >= abs(cluster_axis): phi.append(misc.moveaxis(Phi[ind], cluster_axis, -1)) else: phi.append(Phi[ind][..., None]) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = misc.add_trailing_axes(P, self.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = misc.moveaxis(p, -(self.ndims[ind] + 1), -1) # Handle zero probability cases. This avoids nans when p=0 and # phi=inf. phi[ind] = np.where(p != 0, phi[ind], 0) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = misc.sum_product(p, phi[ind], axes_to_sum=-1) if np.any(np.isnan(phi[ind])): nans = True if nans: warnings.warn( "The natural parameters of mixture distribution " "contain nans. This may happen if you use fixed " "parameters in your model. Technically, one possible " "reason is that the cluster assignment probability " "for some element is zero (p=0) and the natural " "parameter of that cluster is -inf, thus " "0*(-inf)=nan. Solution: Use parameters that assign " "non-zero probabilities for the whole domain.") return phi
def compute_message_to_parent(self, parent, index, u, *u_parents): """ Compute the message to a parent node. """ if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1 - self.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append(np.expand_dims(u[ind], axis=(-1 - self.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: cluster_axis = self.cluster_plate u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent(parent, index, u_self, *(u_parents[1:])) # Weigh the messages with the responsibilities for i in range(len(m)): # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] # Number of axes for the variable dimensions for # the parent message. D = self.ndims_parents[index][i] # Responsibilities for clusters are the first # parent's first moment: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Move the cluster axis to the proper place: # Shape(p) = [Nn,..,K,..,N0] p = misc.atleast_nd(p, abs(self.cluster_plate)) p = misc.moveaxis(p, -1, self.cluster_plate) # Add axes for variable dimensions to the contributions # Shape(p) = [Nn,..,K,..,N0,1,..,1] p = misc.add_trailing_axes(p, D) if self.cluster_plate < 0: # Add the variable dimensions cluster_axis = self.cluster_plate - D # Add axis for clusters: # Shape(m) = [Nn,..,1,..,N0,Dd,..,D0] # m[i] = np.expand_dims(m[i], axis=cluster_axis) # # TODO: You could do summing here already so that # you wouldn't compute huge matrices as # intermediate result. Use einsum. # Compute the message contributions for each # cluster: # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] m[i] = m[i] * p return m
def setup(self, plate_axis=None): """ This method should be called just before optimization. For efficiency, sum over axes that are not in mu, alpha nor rotation. If using Q, set rotate_plates to True. """ # Store the original plate_axis parameter for later use in other methods self.plate_axis = plate_axis # Manipulate the plate_axis parameter to suit the needs of this method if plate_axis is not None: if not isinstance(plate_axis, int): raise ValueError("Plate axis must be integer") if plate_axis >= 0: plate_axis -= len(self.node_X.plates) if plate_axis < -len(self.node_X.plates) or plate_axis >= 0: raise ValueError("Axis out of bounds") plate_axis -= self.ndim - 1 # Why -1? Because one axis is preserved! # Get the mean parameter. It will not be rotated. This assumes that mu # and alpha are really independent. (alpha_mu, alpha_mu2, alpha, _) = self.node_parent.get_moments() (X, XX) = self.node_X.get_moments() # mu = alpha_mu / alpha mu2 = alpha_mu2 / alpha # For simplicity, force mu to have the same shape as X mu = mu * np.ones(self.node_X.dims[0]) mu2 = mu2 * np.ones(self.node_X.dims[0]) ## (mu, mumu) = gaussian.reshape_gaussian_array(self.node_mu.dims[0], ## self.node_X.dims[0], ## mu, ## mumu) # Take diagonal of covariances to variances for axes that are not in R # (and move those axes to be the last) XX = covariance_to_variance(XX, ndim=self.ndim, covariance_axis=self.axis) ## mumu = covariance_to_variance(mumu, ## ndim=self.ndim, ## covariance_axis=self.axis) # Move axes of X and mu and compute their outer product X = misc.moveaxis(X, self.axis, -1) mu = misc.moveaxis(mu, self.axis, -1) mu2 = misc.moveaxis(mu2, self.axis, -1) Xmu = linalg.outer(X, mu, ndim=1) D = np.shape(X)[-1] # Move axes of alpha related variables def safe_move_axis(x): if np.ndim(x) >= -self.axis: return misc.moveaxis(x, self.axis, -1) else: return x[..., np.newaxis] if self.update_alpha: a = safe_move_axis(self.node_alpha.phi[1]) a0 = safe_move_axis(self.node_alpha.parents[0].get_moments()[0]) b0 = safe_move_axis(self.node_alpha.parents[1].get_moments()[0]) plates_alpha = list(self.node_alpha.plates) else: alpha = safe_move_axis(self.node_parent.get_moments()[2]) plates_alpha = list(self.node_parent.get_shape(2)) # Move plates of alpha for R if len(plates_alpha) >= -self.axis: plate = plates_alpha.pop(self.axis) plates_alpha.append(plate) else: plates_alpha.append(1) plates_X = list(self.node_X.get_shape(0)) plates_X.pop(self.axis) def sum_to_alpha(V, ndim=2): # TODO/FIXME: This could be improved so that it is not required to # explicitly repeat to alpha plates. Multiplying by ones was just a # simple bug fix. return sum_to_plates( V * np.ones(plates_alpha[:-1] + ndim * [1]), plates_alpha[:-1], ndim=ndim, plates_from=plates_X ) if plate_axis is not None: # Move plate axis just before the rotated dimensions (which are # last) def safe_move_plate_axis(x, ndim): if np.ndim(x) - ndim >= -plate_axis: return misc.moveaxis(x, plate_axis - ndim, -ndim - 1) else: inds = (Ellipsis, None) + ndim * (slice(None),) return x[inds] X = safe_move_plate_axis(X, 1) mu = safe_move_plate_axis(mu, 1) XX = safe_move_plate_axis(XX, 2) mu2 = safe_move_plate_axis(mu2, 1) if self.update_alpha: a = safe_move_plate_axis(a, 1) a0 = safe_move_plate_axis(a0, 1) b0 = safe_move_plate_axis(b0, 1) else: alpha = safe_move_plate_axis(alpha, 1) # Move plates of X and alpha plate = plates_X.pop(plate_axis) plates_X.append(plate) if len(plates_alpha) >= -plate_axis + 1: plate = plates_alpha.pop(plate_axis - 1) else: plate = 1 plates_alpha = plates_alpha[:-1] + [plate] + plates_alpha[-1:] CovX = XX - linalg.outer(X, X) self.CovX = sum_to_plates(CovX, plates_alpha[:-2], ndim=3, plates_from=plates_X[:-1]) # Broadcast mumu to ensure shape # mumu = np.ones(np.shape(XX)[-3:]) * mumu mu2 = mu2 * np.ones(np.shape(X)[-2:]) self.mu2 = sum_to_alpha(mu2, ndim=1) if self.precompute: # Precompute some stuff for the gradient of plate rotation # # NOTE: These terms may require a lot of memory if alpha has the # same or almost the same plates as X. self.X_X = sum_to_plates( X[..., :, :, None, None] * X[..., None, None, :, :], plates_alpha[:-2], ndim=4, plates_from=plates_X[:-1], ) self.X_mu = sum_to_plates( X[..., :, :, None, None] * mu[..., None, None, :, :], plates_alpha[:-2], ndim=4, plates_from=plates_X[:-1], ) else: self.X = X self.mu = mu else: # Sum axes that are not in the plates of alpha self.XX = sum_to_alpha(XX) self.mu2 = sum_to_alpha(mu2, ndim=1) self.Xmu = sum_to_alpha(Xmu) if self.update_alpha: self.a = a self.a0 = a0 self.b0 = b0 else: self.alpha = alpha self.plates_X = plates_X self.plates_alpha = plates_alpha # Take only a subset of the matrix for rotation if self.subset is not None: if self.precompute: raise NotImplementedError("Precomputation not implemented when " "using a subset") # from X self.X = self.X[..., self.subset] self.mu2 = self.mu2[..., self.subset] if plate_axis is not None: # from CovX inds = [] for i in range(np.ndim(self.CovX) - 2): inds.append(range(np.shape(self.CovX)[i])) inds.append(self.subset) inds.append(self.subset) indices = np.ix_(*inds) self.CovX = self.CovX[indices] # from mu self.mu = self.mu[..., self.subset] else: # from XX inds = [] for i in range(np.ndim(self.XX) - 2): inds.append(range(np.shape(self.XX)[i])) inds.append(self.subset) inds.append(self.subset) indices = np.ix_(*inds) self.XX = self.XX[indices] # from Xmu self.Xmu = self.Xmu[..., self.subset] # from alpha if self.update_alpha: if np.shape(self.a)[-1] > 1: self.a = self.a[..., self.subset] if np.shape(self.a0)[-1] > 1: self.a0 = self.a0[..., self.subset] if np.shape(self.b0)[-1] > 1: self.b0 = self.b0[..., self.subset] else: if np.shape(self.alpha)[-1] > 1: self.alpha = self.alpha[..., self.subset] self.plates_alpha[-1] = min(self.plates_alpha[-1], len(self.subset))
def safe_move_axis(x): if np.ndim(x) >= -self.axis: return misc.moveaxis(x, self.axis, -1) else: return x[..., np.newaxis]
def safe_move_plate_axis(x, ndim): if np.ndim(x) - ndim >= -plate_axis: return misc.moveaxis(x, plate_axis - ndim, -ndim - 1) else: inds = (Ellipsis, None) + ndim * (slice(None),) return x[inds]