def draw_m(self, it, x, j, Kmax, verbose): """ Helper function which does the draws from the m_jt full conditional, which implicitly determines the overall clustering structure for each document. Updates the counts and the samples matrices at iteration `it`. Called by gibbs_direct() """ k_next = self.direct_samples[it, :] self.m_ *= 0 # reset the m counts # Cycle through the k values of each restaurant j_idx, k_idx = np.where(self.q_ > 0) # find the occupied clusters for i in np.random.permutation(len(j_idx)): jj, kk = j_idx[i], k_idx[i] max_m = self.q_[jj, kk] abk = self.a0_ * self.beta_samples[it, kk] m_range = np.arange(max_m) + 1 log_s = np.array([self.stir_.stirlog(max_m, m) for m in m_range]) m_dist = np.exp( logg(abk) - logg(abk + max_m) + log_s + m_range * np.log(abk)) m_dist[np.logical_not(np.isfinite(m_dist))] = 0 m_dist += 1e-10 m_dist /= np.sum(m_dist) mm1 = np.random.choice(m_range, p=m_dist) self.m_[jj, kk] = mm1
def draw_m(self, it, x, j, Kmax, verbose): """ Helper function which does the draws from the z_ij full conditional. Updates the counts and the samples matrices at iteration `it`. Called by gibbs_direct() """ k_next = self.direct_samples[it, :, 1] self.m_ *= 0 # reset the m counts # Cycle through the k values of each restaurant j_idx, k_idx = np.where(self.q_ > 0) # find the consumed dishes for i in np.random.permutation(len(j_idx)): jj, kk = j_idx[i], k_idx[i] max_m = self.q_[jj, kk] abk = self.a0_ * self.beta_samples[it, kk] m_range = np.arange(max_m) + 1 log_s = np.array([self.stir_.stirlog(max_m, m) for m in m_range]) m_dist = np.exp( logg(abk) - logg(abk + max_m) + log_s + m_range * np.log(abk)) """MOSTLY FIXED. m_dist should be a proper distribution""" m_dist[np.logical_not(np.isfinite(m_dist))] = 0 m_dist += 1e-10 mm1 = np.random.choice(m_range, p=m_dist / np.sum(m_dist)) self.m_[jj, kk] = mm1
def mnom_fk_cust(i, x, k, Kmax, L, ha, new=False): """ Computes the mixture components for a given customer across all k values. MODEL: base measure H ~ Dirichlet(L, ha_1,...,ha_L), F(x|phi) ~ Multinomial(n_ji, phi_1,...,phi_L) All components are calculated exactly in log-space and then exponentiated. X can be a dense or a sparse csr-style matrix. returns: (Kmax,) vector; if new=True, returns a scalar """ xi, ni = x[i, :], np.sum(x[i, :]) log_con = logg(ni + 1) - np.sum(logg(xi + np.ones(L))) # term constant for all k # Calculate the case where k has no members if new == True: fknew_cust = np.exp( log_con + np.sum(logg(xi + ha)) - logg(np.sum(xi + ha)) + logg(np.sum(ha)) - np.sum(logg(ha)) ) return fknew_cust # Get subset of customers eating kk; each entry is a (#, L) matrix x_kks = [x[k == kk, :] for kk in range(Kmax)] # Compute params from Dirichlet kernel tricks done in fk function a_bot = np.vstack([np.sum(x_kk, axis=0) for x_kk in x_kks]) + ha[None, :] # (Kmax, L) a_bot[k[i], :] -= xi # offset if xi is in this subset a_top = np.apply_along_axis(lambda row: row + xi, 1, a_bot) fk_cust = np.exp( log_con + np.sum(logg(a_top), axis=1) - logg(np.sum(a_top, axis=1)) + logg(np.sum(a_bot, axis=1)) - np.sum(logg(a_bot), axis=1) ) # Convert back to a dense array in case X was sparse return np.asarray(fk_cust).ravel()
def mnom_fk_tabl(jj, tt, x, j, t, k, Kmax, L, ha, new=False): """ Computes the mixture components for a given customer across all k values. MODEL: base measure H ~ Dirichlet(L, ha_1,...,ha_L), F(x|phi) ~ Multinomial(n_ji, phi_1,...,phi_L) All components are calculated exactly in log-space and then exponentiated. returns: (Kmax,) vector; if new=True, returns a scalar """ x_jt = x[np.logical_and(j == jj, t == tt), :] # (|T|, L) kk = k[np.logical_and(j == jj, t == tt)] n_jt = np.sum(x_jt, axis=1) # (|T|,) sum_jt = np.sum(x_jt, axis=0) # (L,) log_con = np.sum(logg(n_jt + 1)) - np.sum(logg(x_jt + 1)) # term constant for all k fknew_tabl = np.exp( log_con + np.sum(logg(sum_jt + ha)) - logg(np.sum(sum_jt + ha)) + logg(np.sum(ha)) - np.sum(logg(ha)) ) # If table jt doesn't exist, just return the "new" mixture component if x_jt.shape[0] == 0: #print(f"WARNING: table {(jj, tt)} does not exist currently") new = True if new == True: return fknew_tabl # Get subset of customers eating kk; each entry is a (#, L) matrix x_kks = [x[k == kk, :] for kk in range(Kmax)] # Compute params from Dirichlet kernel tricks done in fk function a_bot = np.vstack([np.sum(x_kk, axis=0) for x_kk in x_kks]) + ha[None, :] # (Kmax, L) a_bot[kk[0], :] -= sum_jt # offset if table x_jt is in this subset a_top = a_bot + sum_jt[None, :] fk_tabl = np.exp( log_con + np.sum(logg(a_top), axis=1) - logg(np.sum(a_top, axis=1)) + logg(np.sum(a_bot, axis=1)) - np.sum(logg(a_bot), axis=1) ) return fk_tabl
def pois_fk_cust(i, x, k, Kmax, ha, hb, new=False): """ Computes the mixture components for a given customer across all k values. MODEL: base measure H ~ Gamma(ha, hb), F(x|phi) ~ Poisson(phi) All components are calculated exactly in log-space and then exponentiated. returns: (Kmax,) vector; if new=True, returns a scalar """ x = x.flatten() # reshape to 1D, since gibbs routine passes in a 2D array # Calculate the case where k has no members fknew_cust = np.exp(-logg(x[i] + 1) + logg(x[i] + ha) - logg(ha) - (x[i] + ha) * np.log(1 + hb) + ha * np.log(hb)) if new == True: return fknew_cust x_kks = [x[k == kk] for kk in range(Kmax)] # subset of customers eating kk xi_in = np.zeros(Kmax) # offset if x[i] is in this subset xi_in[k[i]] = 1 # Compute (a,b) params from gamma kernel tricks done in fk function av = np.array(list(map(np.sum, x_kks))) - xi_in * x[i] + ha bv = np.array(list(map(len, x_kks))) - xi_in + hb fk_cust = np.exp(-logg(x[i] + 1) + logg(x[i] + av) - logg(av) - (x[i] + av) * np.log(1 + bv) + av * np.log(bv)) return fk_cust
def pois_fk_tabl(jj, tt, x, j, t, k, Kmax, ha, hb, new=False): """ Computes the mixture components for a given table across all k values. MODEL: base measure H ~ Gamma(ha, hb), F(x|phi) ~ Poisson(phi) All components are calculated exactly in log-space and then exponentiated. returns: (Kmax,) vector; if new=True, returns a scalar """ x = x.flatten() # reshape to 1D, since gibbs routine passes in a 2D array x_jt = x[np.logical_and(j == jj, t == tt)] kk = k[np.logical_and(j == jj, t == tt)] fknew_tabl = np.exp( -np.sum(logg(x_jt + 1)) + logg(np.sum(x_jt) + ha) - logg(ha) - (np.sum(x_jt) + ha)*np.log(len(x_jt) + hb) + ha*np.log(hb) ) # If table jt doesn't exist, just return the "new" mixture component if len(x_jt) == 0: #print(f"WARNING: table {(jj, tt)} does not exist currently") new = True if new == True: return np.full(Kmax, fknew_tabl) x_kks = [x[k == kk] for kk in range(Kmax)] # subset of customers at tables serving kk xjt_in = np.zeros(Kmax) # offset if table x_jt is in this subset xjt_in[kk[0]] = 1 # Compute (a,b) params from gamma kernel tricks done in fk function av = np.array(list(map(np.sum, x_kks))) - xjt_in*np.sum(x_jt) + ha bv = np.array(list(map(len, x_kks))) - xjt_in*len(x_jt) + hb fk_tabl = np.exp( -np.sum(logg(x_jt + 1)) + logg(np.sum(x_jt) + av) - logg(av) - (np.sum(x_jt) + av)*np.log(len(x_jt) + bv) + av*np.log(bv) ) return fk_tabl