def anneal_clusters(vbParam): N, K = vbParam.rhat.shape stability = calculate_stability(vbParam.rhat) if np.all(stability > 0.8): cc = [[k] for k in range(K)] return vbParam.rhat.argmax(1), stability, cc maha = mfm.calc_mahalonobis(vbParam, vbParam.muhat.transpose((1, 0, 2))) maha = np.maximum(maha, maha.T) maha_thresh_min = 0 for k_target in range(K - 1, 0, -1): # get connected components with k_target number of them cc, maha_thresh_min = get_k_cc(maha, maha_thresh_min, k_target) # calculate soft assignment for each cc rhat_cc = np.zeros([N, len(cc)]) for i, units in enumerate(cc): rhat_cc[:, i] = np.sum(vbParam.rhat[:, units], axis=1) rhat_cc[rhat_cc < 0.001] = 0.0 rhat_cc = rhat_cc / np.sum(rhat_cc, axis=1, keepdims=True) # calculate stability for each component # and make decision stability = calculate_stability(rhat_cc) if np.all(stability > 0.8) or k_target == 1: return rhat_cc.argmax(1), stability, cc
def recover_spikes(self, vbParam, pca, maha_dist=1): N, D = pca.shape # Cat: TODO: check if this maha thresholding recovering distance is good threshold = np.sqrt(chi2.ppf(0.99, D)) # update rhat on full data maskedData = mfm.maskData(pca[:,:,np.newaxis], np.ones([N, 1]), np.arange(N)) vbParam.update_local(maskedData) # calculate mahalanobis distance maha = mfm.calc_mahalonobis(vbParam, pca[:,:,np.newaxis]) idx_recovered = np.where(~np.all(maha >= threshold, axis=1))[0] vbParam.rhat = vbParam.rhat[idx_recovered] # zero out low assignment vals if True: vbParam.rhat[vbParam.rhat < self.assignment_delete_threshold] = 0 vbParam.rhat = vbParam.rhat/np.sum(vbParam.rhat, 1, keepdims=True) return idx_recovered, vbParam
def cluster_annealing(self, vbParam): N, K = vbParam.rhat.shape stability = self.calculate_stability(vbParam.rhat) if (K == 2) or np.all(stability > 0.8): cc = [[k] for k in range(K)] return vbParam.rhat.argmax(1), stability, cc maha = mfm.calc_mahalonobis(vbParam, vbParam.muhat.transpose( (1, 0, 2))) maha = np.maximum(maha, maha.T) #N, K = vbParam.rhat.shape #mu = np.copy(vbParam.muhat[:,:,0].T) #mudiff = mu[:,np.newaxis] - mu #prec = vbParam.Vhat[:,:,:,0].T * vbParam.nuhat[:,np.newaxis, np.newaxis] #maha = np.matmul(np.matmul(mudiff[:, :, np.newaxis], prec[:, np.newaxis]), mudiff[:, :, :, np.newaxis])[:, :, 0, 0] # decrease number of connected components one at a time. # in any step if all components are stables, stop and return # otherwise, go until there are only two connected components and return it maha_thresh_min = 0 for k_target in range(K - 1, 1, -1): # get connected components with k_target number of them cc, maha_thresh_min = self.get_k_cc(maha, maha_thresh_min, k_target) # calculate soft assignment for each cc rhat_cc = np.zeros([N, len(cc)]) for i, units in enumerate(cc): rhat_cc[:, i] = np.sum(vbParam.rhat[:, units], axis=1) rhat_cc[rhat_cc < 0.001] = 0.0 rhat_cc = rhat_cc / np.sum(rhat_cc, axis=1, keepdims=True) # calculate stability for each component # and make decision stability = self.calculate_stability(rhat_cc) if np.all(stability > 0.8) or k_target == 2: return rhat_cc.argmax(1), stability, cc