def write_to_disk(self, path, cols): if not os.path.exists(os.path.split(path)[0]): os.mkdir(os.path.split(path)[0]) if os.path.exists(path): os.remove(path) conn = sql.connect(path) V_df = pd.DataFrame( data.euclidean_to_hypercube(self.G.T[:cols].T), columns=['V_{}'.format(i) for i in range(cols)], ) a_df = pd.DataFrame( self.alpha.T[:cols].T, columns=['alpha_{}'.format(i) for i in range(cols)]) b_df = pd.DataFrame(self.beta.T[:cols].T, columns=['beta_{}'.format(i) for i in range(cols)]) d_df = pd.DataFrame( self.delta.reshape(1, -1), columns=['delta_{}'.format(i) for i in range(self.nDat)]) p_df = pd.DataFrame( self.p.reshape(1, -1), columns=['p_{}'.format(i) for i in range(self.nMix)]) V_df.to_sql('data', conn, index=False) a_df.to_sql('alphas', conn, index=False) b_df.to_sql('betas', conn, index=False) d_df.to_sql('deltas', conn, index=False) p_df.to_sql('ps', conn, index=False) conn.commit() conn.close() return
def populate_cones(self, epsilon): postpred = euclidean_to_hypercube( self.generate_posterior_predictive_gammas()) C_damex = (postpred > epsilon) cones = defaultdict(lambda: 1e-10) for row in C_damex: cones[tuple(row)] += 1 / postpred.shape[0] return cones
def generate_posterior_predictive_hypercube(self, n_per_sample=1): postpred = np.empty((self.nSamp, n_per_sample, self.nCol)) for n in range(self.nSamp): delta_new = choice(self.nMix, n_per_sample, p=self.samples.pi[n]) zeta_new = self.samples.zeta[n, delta_new] postpred[n] = euclidean_to_simplex( gamma(shape=zeta_new, size=(n_per_sample, self.nCol)), ) simplex = postpred.reshape(self.nSamp * n_per_sample, self.nCol) return euclidean_to_hypercube(simplex)
def hypercube_distance(self): mr = self.r.mean(axis=0) mrho = self.rho.mean(axis=0) Y = np.hstack((mr[:, None] * self.data.Yp, mrho)) V = euclidean_to_hypercube(Y) return hypercube_distance_matrix( self.generate_posterior_predictive_gammas(), V, self.pool, )
def generate_posterior_predictive_hypercube(self, n_per_sample=1): postpred = np.empty((self.nSamp, n_per_sample, self.nCol)) for n in range(self.nSamp): delta_new = choice(self.nMix, n_per_sample, p=self.samples.p[n]) alpha_new = self.samples.alpha[n][delta_new] beta_new = self.samples.beta[n][delta_new] postpred[n] = euclidean_to_hypercube( gamma(shape=alpha_new, scale=1 / beta_new, size=(n_per_sample, self.nCol))) return postpred.reshape(-1, self.nCol)
def cone_density(self, epsilon=0.5, **kwargs): cone_prob = self.populate_cones(epsilon) scores = np.empty(self.data.nDat) try: Y = euclidean_to_hypercube( np.hstack((self.samples.r.mean(axis=0)[:, None] * self.data.V, self.samples.rho.mean(axis=0)))) except AttributeError: Y = self.data.V for i in range(self.nDat): scores[i] = cone_prob[tuple(Y[i] > epsilon)] return scores
def generate_posterior_predictive_hypercube(self, n_per_sample=1, m=10): gammas = self.generate_posterior_predictive_gammas(n_per_sample, m) hypcube = euclidean_to_hypercube(gammas[:, :self.nCol]) simplex = [] cat_idx = np.where(self.sigma_unity)[0][1:] for i in range(cat_idx.shape[0]): cat_start = cat_idx[i] try: cat_end = cat_idx[i + 1] except IndexError: cat_end = self.sigma_unity.shape[0] simplex.append(euclidean_to_simplex(gammas[:, cat_start:cat_end])) return np.hstack([hypcube] + simplex)
def hypercube_distance_latent(self): R = self.generate_conditional_posterior_predictive_radii() # (s,n) Y1 = R[:, :, None] * self.data.V[None, :, :] # (s,n,d1), Y2 = self.generate_conditional_posterior_predictive_gammas( )[:, :, self.nCol:] # (s,n,d2) Y_con = np.swapaxes(np.concatenate((Y1, Y2), axis=2), 0, 1) # (n, s, d) V_con = np.array(list(map(euclidean_to_hypercube, Y_con))) V_new = euclidean_to_hypercube( self.generate_posterior_predictive_gammas()) # s1 = choice(np.arange(R.shape[0]), size = R.shape[0]//2, replace = False) # s2 = choice(np.arange(R.shape[0]), size = R.shape[0]//2, replace = False) # res = self.pool.map(hypercube_dmat, zip(repeat(V_new[s1]), V_con[:,s2])) s = np.random.choice(V_new.shape[0], V_new.shape[0] // 2, False) res = self.pool.map(hypercube_dmat, zip(repeat(V_new), V_con[:, s])) return np.array(list(res))
def generate_posterior_predictive_hypercube(self, n_per_sample=1, m=10): gammas = self.generate_posterior_predictive_gammas(n_per_sample, m) # hypercube transformation for real variates hypcube = euclidean_to_hypercube(gammas[:, :self.nCol]) # simplex transformation for categ variates simplex_reverse = [] indices = list(np.arange(self.nCol + self.nCat)) # Foe each category, last first for i in list(range(self.cats.shape[0]))[::-1]: # identify the ending index (+1 to include boundary) cat_length = self.cats[i] cat_end = indices.pop() + 1 # identify starting index for _ in range(cat_length - 1): cat_start = indices.pop() # transform gamma variates to simplex simplex_reverse.append( euclidean_to_simplex(gammas[:, cat_start:cat_end])) # stack hypercube and categorical variables side by side. return np.hstack([hypcube] + simplex_reverse[::-1])
def write_to_disk(self, path, nCol): if not os.path.exists(path): os.mkdir(path) V = data.euclidean_to_hypercube(self.G.T[:nCol].T) R = pareto(1, size=self.nSamp) * (np.ones(self.nSamp) + 0.3 * self.y) Z = (V.T * R).T Z_df = pd.DataFrame(Z, columns=['Z_{}'.format(i) for i in range(nCol)]) y_df = pd.DataFrame({'y': self.y}) z_path = os.path.join(path, 'ad_sim_m{}_c{}_x.csv'.format(self.nMix, nCol)) y_path = os.path.join(path, 'ad_sim_m{}_c{}_y.csv'.format(self.nMix, nCol)) Z_df.to_csv(z_path, index=False) y_df.to_csv(y_path, index=False) return
def generate_posterior_predictive_hypercube(self, n_per_sample=1, m=10): gammas = self.generate_posterior_predictive_gammas(n_per_sample, m) return euclidean_to_hypercube(gammas)
def generate_posterior_predictive_hypercube(self, n_per_sample=1): euc = self.generate_posterior_predictive_gammas(n_per_sample) return euclidean_to_hypercube(euc)
def hypercube_distance_real(self): Vnew = euclidean_to_hypercube( self.generate_posterior_predictive_gammas()[:, :self.nCol], ) return hypercube_distance_matrix(Vnew, self.data.V, self.pool)