def lower_bound_contribution(self, gradient=False): # Compute E[ log p(X|parents) - log q(X) ] over q(X)q(parents) # Messages from parents #u_parents = [parent.message_to_child() for parent in self.parents] u_parents = self._message_from_parents() phi = self._distribution.compute_phi_from_parents(*u_parents) # G from parents L = self._distribution.compute_cgf_from_parents(*u_parents) # L = g # G for unobserved variables (ignored variables are handled # properly automatically) latent_mask = np.logical_not(self.observed) #latent_mask = np.logical_and(self.mask, np.logical_not(self.observed)) # F for observed, G for latent L = L + np.where(self.observed, self.f, -self.g) for (phi_p, phi_q, u_q, dims) in zip(phi, self.phi, self.u, self.dims): # Form a mask which puts observed variables to zero and # broadcasts properly latent_mask_i = utils.add_trailing_axes( utils.add_leading_axes(latent_mask, len(self.plates) - np.ndim(latent_mask)), len(dims)) axis_sum = tuple(range(-len(dims), 0)) # Compute the term phi_q = np.where(latent_mask_i, phi_q, 0) # TODO/FIXME: Use einsum here? Z = np.sum((phi_p - phi_q) * u_q, axis=axis_sum) L = L + Z return (np.sum(np.where(self.mask, L, 0)) * self._plate_multiplier( self.plates, np.shape(L), np.shape(self.mask)))
def update_u(self, u, mask=True): # Store the computed moments u but do not change moments for # observations, i.e., utilize the mask. for ind in range(len(u)): # Add axes to the mask for the variable dimensions (mask # contains only axes for the plates). u_mask = utils.add_trailing_axes(mask, self.ndims[ind]) # Enlarge self.u[ind] as necessary so that it can store the # broadcasted result. sh = utils.broadcasted_shape_from_arrays(self.u[ind], u[ind], u_mask) self.u[ind] = utils.repeat_to_shape(self.u[ind], sh) # Use mask to update only unobserved plates and keep the # observed as before np.copyto(self.u[ind], u[ind], where=u_mask) # Make sure u has the correct number of dimensions: shape = self.get_shape(ind) ndim = len(shape) ndim_u = np.ndim(self.u[ind]) if ndim > ndim_u: self.u[ind] = utils.add_leading_axes(u[ind], ndim - ndim_u) elif ndim < np.ndim(self.u[ind]): raise Exception("Weird, this shouldn't happen.. :)")
def _update_phi_from_parents(self, *u_parents): # TODO/FIXME: Could this be combined to the function # _update_distribution_and_lowerbound ? # No, because some initialization methods may want to use this. # This makes correct broadcasting self.phi = self._distribution.compute_phi_from_parents(*u_parents) #self.phi = self._compute_phi_from_parents(*u_parents) self.phi = list(self.phi) # Make sure phi has the correct number of axes. It makes life # a bit easier elsewhere. for i in range(len(self.phi)): axes = len(self.plates) + self.ndims[i] - np.ndim(self.phi[i]) if axes > 0: # Add axes self.phi[i] = utils.add_leading_axes(self.phi[i], axes) elif axes < 0: # Remove extra leading axes first = -(len(self.plates)+self.ndims[i]) sh = np.shape(self.phi[i])[first:] self.phi[i] = np.reshape(self.phi[i], sh) # Check that the shape is correct if not utils.is_shape_subset(np.shape(self.phi[i]), self.get_shape(i)): raise ValueError("Incorrect shape of phi[%d] in node class %s. " "Shape is %s but it should be broadcastable " "to shape %s." % (i, self.__class__.__name__, np.shape(self.phi[i]), self.get_shape(i)))
def _update_phi_from_parents(self, *u_parents): # TODO/FIXME: Could this be combined to the function # _update_distribution_and_lowerbound ? # No, because some initialization methods may want to use this. # This makes correct broadcasting self.phi = self._distribution.compute_phi_from_parents(*u_parents) #self.phi = self._compute_phi_from_parents(*u_parents) self.phi = list(self.phi) # Make sure phi has the correct number of axes. It makes life # a bit easier elsewhere. for i in range(len(self.phi)): axes = len(self.plates) + self._distribution.ndims[i] - np.ndim( self.phi[i]) if axes > 0: # Add axes self.phi[i] = utils.add_leading_axes(self.phi[i], axes) elif axes < 0: # Remove extra leading axes first = -(len(self.plates) + self._distribution.ndims[i]) sh = np.shape(self.phi[i])[first:] self.phi[i] = np.reshape(self.phi[i], sh) # Check that the shape is correct if not utils.is_shape_subset(np.shape(self.phi[i]), self.get_shape(i)): raise ValueError("Incorrect shape in phi[%d]. Shape is %s but " "it should be broadcastable to shape %s." % (i, np.shape(self.phi[i]), self.get_shape(i)))
def update_phi_from_parents(self, u_parents): # This makes correct broadcasting self.phi = self.compute_phi_from_parents(u_parents) self.phi = list(self.phi) # Make sure phi has the correct number of axes. It makes life # a bit easier elsewhere. for i in range(len(self.phi)): axes = len(self.plates) + self.ndims[i] - np.ndim(self.phi[i]) if axes > 0: # Add axes self.phi[i] = utils.add_leading_axes(self.phi[i], axes) elif axes < 0: # Remove extra leading axes first = -(len(self.plates)+self.ndims[i]) sh = np.shape(self.phi[i])[first:] self.phi[i] = np.reshape(self.phi[i], sh)
def _set_moments(self, u, mask=True): # Store the computed moments u but do not change moments for # observations, i.e., utilize the mask. for ind in range(len(u)): # Add axes to the mask for the variable dimensions (mask # contains only axes for the plates). u_mask = utils.add_trailing_axes(mask, self._distribution.ndims[ind]) # Enlarge self.u[ind] as necessary so that it can store the # broadcasted result. sh = utils.broadcasted_shape_from_arrays(self.u[ind], u[ind], u_mask) self.u[ind] = utils.repeat_to_shape(self.u[ind], sh) # TODO/FIXME/BUG: The mask of observations is not used, observations # may be overwritten!!! ??? # Hah, this function is used to set the observations! The caller # should be careful what mask he uses! If you want to set only # latent variables, then use such a mask. # Use mask to update only unobserved plates and keep the # observed as before np.copyto(self.u[ind], u[ind], where=u_mask) # Make sure u has the correct number of dimensions: # TODO/FIXME: Maybe it would be good to also check that u has a # shape that is a sub-shape of get_shape. shape = self.get_shape(ind) ndim = len(shape) ndim_u = np.ndim(self.u[ind]) if ndim > ndim_u: self.u[ind] = utils.add_leading_axes(u[ind], ndim - ndim_u) elif ndim < ndim_u: raise RuntimeError( "The size of the variable %s's %s-th moment " "array is %s which is larger than it should " "be, that is, %s, based on the plates %s and " "dimension %s. Check that you have provided " "plates properly." % (self.name, ind, np.shape(self.u[ind]), shape, self.plates, self.dims[ind]))
def _set_moments(self, u, mask=True): # Store the computed moments u but do not change moments for # observations, i.e., utilize the mask. for ind in range(len(u)): # Add axes to the mask for the variable dimensions (mask # contains only axes for the plates). u_mask = utils.add_trailing_axes(mask, self._distribution.ndims[ind]) # Enlarge self.u[ind] as necessary so that it can store the # broadcasted result. sh = utils.broadcasted_shape_from_arrays(self.u[ind], u[ind], u_mask) self.u[ind] = utils.repeat_to_shape(self.u[ind], sh) # TODO/FIXME/BUG: The mask of observations is not used, observations # may be overwritten!!! ??? # Hah, this function is used to set the observations! The caller # should be careful what mask he uses! If you want to set only # latent variables, then use such a mask. # Use mask to update only unobserved plates and keep the # observed as before np.copyto(self.u[ind], u[ind], where=u_mask) # Make sure u has the correct number of dimensions: # TODO/FIXME: Maybe it would be good to also check that u has a # shape that is a sub-shape of get_shape. shape = self.get_shape(ind) ndim = len(shape) ndim_u = np.ndim(self.u[ind]) if ndim > ndim_u: self.u[ind] = utils.add_leading_axes(u[ind], ndim - ndim_u) elif ndim < ndim_u: raise RuntimeError( "The size of the variable %s's %s-th moment " "array is %s which is larger than it should " "be, that is, %s, based on the plates %s and " "dimension %s. Check that you have provided " "plates properly." % (self.name, ind, np.shape( self.u[ind]), shape, self.plates, self.dims[ind]))
def _update_phi_from_parents(self, *u_parents): # TODO/FIXME: Could this be combined to the function # _update_distribution_and_lowerbound ? # No, because some initialization methods may want to use this. # This makes correct broadcasting self.phi = self._compute_phi_from_parents(*u_parents) self.phi = list(self.phi) # Make sure phi has the correct number of axes. It makes life # a bit easier elsewhere. for i in range(len(self.phi)): axes = len(self.plates) + self.ndims[i] - np.ndim(self.phi[i]) if axes > 0: # Add axes self.phi[i] = utils.add_leading_axes(self.phi[i], axes) elif axes < 0: # Remove extra leading axes first = -(len(self.plates) + self.ndims[i]) sh = np.shape(self.phi[i])[first:] self.phi[i] = np.reshape(self.phi[i], sh)
def _update_phi_from_parents(self, *u_parents): # TODO/FIXME: Could this be combined to the function # _update_distribution_and_lowerbound ? # No, because some initialization methods may want to use this. # This makes correct broadcasting self.phi = self._compute_phi_from_parents(*u_parents) self.phi = list(self.phi) # Make sure phi has the correct number of axes. It makes life # a bit easier elsewhere. for i in range(len(self.phi)): axes = len(self.plates) + self.ndims[i] - np.ndim(self.phi[i]) if axes > 0: # Add axes self.phi[i] = utils.add_leading_axes(self.phi[i], axes) elif axes < 0: # Remove extra leading axes first = -(len(self.plates)+self.ndims[i]) sh = np.shape(self.phi[i])[first:] self.phi[i] = np.reshape(self.phi[i], sh)
def lower_bound_contribution(self, gradient=False): # Compute E[ log p(X|parents) - log q(X) ] over q(X)q(parents) # Messages from parents #u_parents = [parent.message_to_child() for parent in self.parents] u_parents = self._message_from_parents() phi = self._compute_phi_from_parents(*u_parents) # G from parents L = self._compute_cgf_from_parents(*u_parents) # L = g # G for unobserved variables (ignored variables are handled # properly automatically) latent_mask = np.logical_not(self.observed) #latent_mask = np.logical_and(self.mask, np.logical_not(self.observed)) # F for observed, G for latent L = L + np.where(self.observed, self.f, -self.g) for (phi_p, phi_q, u_q, dims) in zip(phi, self.phi, self.u, self.dims): # Form a mask which puts observed variables to zero and # broadcasts properly latent_mask_i = utils.add_trailing_axes( utils.add_leading_axes( latent_mask, len(self.plates) - np.ndim(latent_mask)), len(dims)) axis_sum = tuple(range(-len(dims),0)) # Compute the term phi_q = np.where(latent_mask_i, phi_q, 0) # TODO/FIXME: Use einsum here? Z = np.sum((phi_p-phi_q) * u_q, axis=axis_sum) L = L + Z return (np.sum(np.where(self.mask, L, 0)) * self._plate_multiplier(self.plates, np.shape(L), np.shape(self.mask)))