def compute_cgf_from_parents(self, *u_parents): # Compute weighted average of g over the clusters. # Shape(g) = [Nn,..,K,..,N0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0] # Compute g for clusters: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Move cluster axis to last: # Shape(g) = [Nn,..,N0,K] g = utils.moveaxis(g, self.cluster_plate, -1) # Cluster assignments/contributions/probabilities/weights: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Weighted average of g over the clusters. As p and g are # properly aligned, you can just sum p*g over the last # axis and utilize broadcasting: # Shape(result) = [Nn,..,N0] g = utils.sum_product(p, g, axes_to_sum=-1) return g
def _compute_cgf_from_parents(*u_parents): # Compute weighted average of g over the clusters. # Shape(g) = [Nn,..,K,..,N0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0] # Compute g for clusters: # Shape(g) = [Nn,..,K,..,N0] g = distribution._compute_cgf_from_parents(*(u_parents[1:])) # Move cluster axis to last: # Shape(g) = [Nn,..,N0,K] g = utils.moveaxis(g, cluster_plate, -1) # Cluster assignments/contributions/probabilities/weights: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Weighted average of g over the clusters. As p and g are # properly aligned, you can just sum p*g over the last # axis and utilize broadcasting: # Shape(result) = [Nn,..,N0] #print('mixture.compute_g_from_parents p and g:', np.shape(p), np.shape(g)) g = utils.sum_product(p, g, axes_to_sum=-1) #print('mixture.compute_g_from_parents g:', np.sum(g), np.shape(g)) return g
def get_message(self, index, u_parents): (m, mask) = self.message_from_children() parent = self.parents[index] # Compute both messages for i in range(2): # Add extra axes to the message from children #m_shape = np.shape(m[i]) + (1,) * (i+1) #m[i] = np.reshape(m[i], m_shape) # Put masked elements to zero np.copyto(m[i], 0, where=np.logical_not(mask)) # Add extra axes to the mask from children #mask_shape = np.shape(mask) + (1,) * (i+1) #mask_i = np.reshape(mask, mask_shape) #mask_i = mask m[i] = utils.add_trailing_axes(m[i], i+1) #for k in range(i+1): #m[i] = np.expand_dims(m[i], axis=-1) #mask_i = np.expand_dims(mask_i, axis=-1) # List of elements to multiply together A = [m[i]] for k in range(len(u_parents)): if k != index: A.append(u_parents[k][i]) # Find out which axes are summed over. Also, full_shape = utils.broadcasted_shape_from_arrays(*A) axes = utils.axes_to_collapse(full_shape, parent.get_shape(i)) # Compute the multiplier for cancelling the # plate-multiplier. Because we are summing over the # dimensions already in this function (for efficiency), we # need to cancel the effect of the plate-multiplier # applied in the message_to_parent function. r = 1 for j in axes: r *= full_shape[j] # Compute dot product (and cancel plate-multiplier) m[i] = utils.sum_product(*A, axes_to_sum=axes, keepdims=True) / r # Compute the mask s = utils.axes_to_collapse(np.shape(mask), parent.plates) mask = np.any(mask, axis=s, keepdims=True) mask = utils.squeeze_to_dim(mask, len(parent.plates)) return (m, mask)
def get_message(self, index, u_parents): (m, mask) = self.message_from_children() parent = self.parents[index] # Compute both messages for i in range(2): # Add extra axes to the message from children #m_shape = np.shape(m[i]) + (1,) * (i+1) #m[i] = np.reshape(m[i], m_shape) # Put masked elements to zero np.copyto(m[i], 0, where=np.logical_not(mask)) # Add extra axes to the mask from children #mask_shape = np.shape(mask) + (1,) * (i+1) #mask_i = np.reshape(mask, mask_shape) #mask_i = mask m[i] = utils.add_trailing_axes(m[i], i + 1) #for k in range(i+1): #m[i] = np.expand_dims(m[i], axis=-1) #mask_i = np.expand_dims(mask_i, axis=-1) # List of elements to multiply together A = [m[i]] for k in range(len(u_parents)): if k != index: A.append(u_parents[k][i]) # Find out which axes are summed over. Also, full_shape = utils.broadcasted_shape_from_arrays(*A) axes = utils.axes_to_collapse(full_shape, parent.get_shape(i)) # Compute the multiplier for cancelling the # plate-multiplier. Because we are summing over the # dimensions already in this function (for efficiency), we # need to cancel the effect of the plate-multiplier # applied in the message_to_parent function. r = 1 for j in axes: r *= full_shape[j] # Compute dot product (and cancel plate-multiplier) m[i] = utils.sum_product(*A, axes_to_sum=axes, keepdims=True) / r # Compute the mask s = utils.axes_to_collapse(np.shape(mask), parent.plates) mask = np.any(mask, axis=s, keepdims=True) mask = utils.squeeze_to_dim(mask, len(parent.plates)) return (m, mask)
def _compute_phi_from_parents(*u_parents): # Compute weighted average of the parameters # Cluster parameters Phi = distribution._compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if cluster_plate < 0: cluster_axis = cluster_plate - distribution.ndims[ind] #else: # cluster_axis = cluster_plate # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] phi.append(utils.moveaxis(Phi[ind], cluster_axis, -1)) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = utils.add_trailing_axes(P, distribution.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = utils.moveaxis(p, -(distribution.ndims[ind] + 1), -1) #print('Mixture.compute_phi, p:', np.sum(p, axis=-1)) #print('mixture.compute_phi shapes:') #print(np.shape(p)) #print(np.shape(phi[ind])) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = utils.sum_product(p, phi[ind], axes_to_sum=-1) return phi
def _compute_phi_from_parents(*u_parents): # Compute weighted average of the parameters # Cluster parameters Phi = distribution._compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if cluster_plate < 0: cluster_axis = cluster_plate - distribution.ndims[ind] #else: # cluster_axis = cluster_plate # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] phi.append(utils.moveaxis(Phi[ind], cluster_axis, -1)) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = utils.add_trailing_axes(P, distribution.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = utils.moveaxis(p, -(distribution.ndims[ind]+1), -1) #print('Mixture.compute_phi, p:', np.sum(p, axis=-1)) #print('mixture.compute_phi shapes:') #print(np.shape(p)) #print(np.shape(phi[ind])) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = utils.sum_product(p, phi[ind], axes_to_sum=-1) return phi
def compute_phi_from_parents(self, *u_parents, mask=True): # Compute weighted average of the parameters # Cluster parameters Phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.distribution.ndims[ind] else: raise RuntimeError("Cluster plate should be negative") # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] if np.ndim(Phi[ind]) >= abs(cluster_axis): phi.append(utils.moveaxis(Phi[ind], cluster_axis, -1)) else: phi.append(Phi[ind][...,None]) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = utils.add_trailing_axes(P, self.distribution.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = utils.moveaxis(p, -(self.distribution.ndims[ind]+1), -1) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = utils.sum_product(p, phi[ind], axes_to_sum=-1) return phi
def OLD_get_message(self, index, u_parents): (m, mask) = self.message_from_children() parent = self.parents[index] # Compute both messages for i in range(2): # Add extra axes to the message from children #m_shape = np.shape(m[i]) + (1,) * (i+1) #m[i] = np.reshape(m[i], m_shape) # Add extra axes to the mask from children mask_shape = np.shape(mask) + (1,) * (i+1) mask_i = np.reshape(mask, mask_shape) mask_i = mask for k in range(i+1): m[i] = np.expand_dims(m[i], axis=-1) mask_i = np.expand_dims(mask_i, axis=-1) # List of elements to multiply together A = [m[i], mask_i] for k in range(len(u_parents)): if k != index: A.append(u_parents[k][i]) # Find out which axes are summed over. Also, because # we are summing over the dimensions already in this # function (for efficiency), we need to cancel the # effect of the plate-multiplier applied in the # message_to_parent function. full_shape = utils.broadcasted_shape_from_arrays(*A) axes = utils.axes_to_collapse(full_shape, parent.get_shape(i)) r = 1 for j in axes: r *= full_shape[j] # Compute dot product m[i] = utils.sum_product(*A, axes_to_sum=axes, keepdims=True) / r # Compute the mask s = utils.axes_to_collapse(np.shape(mask), parent.plates) mask = np.any(mask, axis=s, keepdims=True) mask = utils.squeeze_to_dim(mask, len(parent.plates)) return (m, mask)
def OLD_get_message(self, index, u_parents): (m, mask) = self.message_from_children() parent = self.parents[index] # Compute both messages for i in range(2): # Add extra axes to the message from children #m_shape = np.shape(m[i]) + (1,) * (i+1) #m[i] = np.reshape(m[i], m_shape) # Add extra axes to the mask from children mask_shape = np.shape(mask) + (1, ) * (i + 1) mask_i = np.reshape(mask, mask_shape) mask_i = mask for k in range(i + 1): m[i] = np.expand_dims(m[i], axis=-1) mask_i = np.expand_dims(mask_i, axis=-1) # List of elements to multiply together A = [m[i], mask_i] for k in range(len(u_parents)): if k != index: A.append(u_parents[k][i]) # Find out which axes are summed over. Also, because # we are summing over the dimensions already in this # function (for efficiency), we need to cancel the # effect of the plate-multiplier applied in the # message_to_parent function. full_shape = utils.broadcasted_shape_from_arrays(*A) axes = utils.axes_to_collapse(full_shape, parent.get_shape(i)) r = 1 for j in axes: r *= full_shape[j] # Compute dot product m[i] = utils.sum_product(*A, axes_to_sum=axes, keepdims=True) / r # Compute the mask s = utils.axes_to_collapse(np.shape(mask), parent.plates) mask = np.any(mask, axis=s, keepdims=True) mask = utils.squeeze_to_dim(mask, len(parent.plates)) return (m, mask)
def compute_phi_from_parents(self, *u_parents, mask=True): # Compute weighted average of the parameters # Cluster parameters Phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() nans = False for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate should be negative") # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] if np.ndim(Phi[ind]) >= abs(cluster_axis): phi.append(utils.moveaxis(Phi[ind], cluster_axis, -1)) else: phi.append(Phi[ind][..., None]) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = utils.add_trailing_axes(P, self.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = utils.moveaxis(p, -(self.ndims[ind] + 1), -1) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = utils.sum_product(p, phi[ind], axes_to_sum=-1) if np.any(np.isnan(phi[ind])): nans = True if nans: warnings.warn( "The natural parameters of mixture distribution " "contain nans. This may happen if you use fixed " "parameters in your model. Technically, one possible " "reason is that the cluster assignment probability " "for some element is zero (p=0) and the natural " "parameter of that cluster is -inf, thus " "0*(-inf)=nan. Solution: Use parameters that assign " "non-zero probabilities for the whole domain." ) return phi