def sample(self, n_samples, seed=5): with util.NumpySeedContext(seed=seed): x = np.random.randn(n_samples, self.n_dim) x_true = x[:, :self.n_true] p = np.expand_dims( np.exp(np.sum(x_true, axis=1)) / (1 + np.exp(np.sum(x_true, axis=1))), 1) with util.NumpySeedContext(seed=seed + 1): y_bern = bernoulli.rvs(p=p, size=(p.shape[0], 1)) return x, y_bern
def sample(self, n_samples, seed): n_dim = self.n_dim with util.NumpySeedContext(seed=seed + 2): p = np.random.multivariate_normal(np.zeros(n_dim), np.eye(n_dim), size=(n_samples)) with util.NumpySeedContext(seed=seed + 5): r = np.random.multivariate_normal(np.zeros(n_dim), np.diag(self.v_change), size=(n_samples)) return p, r
def bs_prob(bs_proc, n_samples, n_bootstrap, region, seed=5): """ Calculate bootstrap probability of X in region. """ bs_samples = bs_proc(n_samples,n_bootstrap, seed) with util.NumpySeedContext(seed=seed+40): c = region(bs_samples, n_samples) return c
def sample(self, n_samples, seed=5): l_samples = [] for i, c in enumerate(self.classes): df_samples = self.df[self.df[self.target] == c].drop( self.target, axis=1).sample(n_samples, random_state=seed + i).values with util.NumpySeedContext(seed=seed * (i + 1)): fakes = np.random.randn(n_samples, self.n_fakes) l_samples.append(np.hstack((df_samples, fakes))) return l_samples
def estimates(self, x, y, seed=2): """ Compute Unbiased HSIC Code from: https://www.cc.gatech.edu/~lsong/papers/SonSmoGreBedetal12.pdf """ m = int(x.shape[0] * self.ratio) n_samples = x.shape[0] n_comb = comb(n_samples, 4) with util.NumpySeedContext(seed=seed): S = np.random.randint(0, n_comb, size=m) def mapping(S, n_samples, l): for index in S: res = index coord = [] for power in range(1, 5): norm = np.math.factorial(n_samples - power) / ( np.math.factorial(n_samples - 4) * np.math.factorial(4)) i = int(np.floor(res / norm)) res = res - i * norm coord.append(i) i, j, q, r = coord # Non diagonal elements j = j if i != j else j + 1 q = q if q != i else q + 1 q = q if q != j else q + 1 r = r if r != i else r + 1 r = r if r != j else r + 1 r = r if r != q else r + 1 yield i, j, q, r nx = x.shape ny = y.shape assert nx[0] == ny[0], \ "Argument 1 and 2 have different number of data points" K = self.k.eval(x, x) L = self.l.eval(y, y) kMat, lMat = K - np.diag(K.diagonal()), \ L - np.diag(L.diagonal()) estimates = np.zeros(m) for i, indices in enumerate(mapping(S, nx[0], m)): acc = 0 for s, t, u, v in permutations(indices): acc += kMat[s, t] * (lMat[s, t] + lMat[u, v] - 2 * lMat[s, u]) / 24 estimates[i] = acc return estimates.flatten()
def sample(self, n_samples, seed=5): ## DISJOINT SET model_features = {} ref_samples = [] with util.NumpySeedContext(seed=seed): ## FOR EACH CELEBA CLASS for key, features in self.celeba_features.items(): # CALCULATE HOW MUCH SHOULD BE IN THE REFERENCE POOL n_ref_samples = int( np.round(self.ref_classes_mix[key] * n_samples)) random_features = np.random.permutation(features) ## FOR THE CANDIDATE MODELS model_features[key] = random_features[n_ref_samples:] ## FOR THE REFERENCE ref_samples.append(random_features[:n_ref_samples]) ## samples for models model_samples = [] for j, class_ratios in enumerate(self.model_classes_mix): model_class_samples = [] for i, data_class in enumerate(class_ratios.keys()): n_class_samples = int( np.round(class_ratios[data_class] * n_samples)) seed_class = i * n_samples + seed * j with util.NumpySeedContext(seed=seed_class): indices = np.random.choice( model_features[data_class].shape[0], n_class_samples) model_class_samples.append(model_features[data_class][indices]) class_samples = dict(zip(class_ratios.keys(), model_class_samples)) model_class_stack = np.vstack(list(class_samples.values())) model_samples.append(model_class_stack) #assert model_class_stack.shape[0] == n_samples, "Sample size mismatch: {0} instead of {1}".format(samples.shape[0],n) with util.NumpySeedContext(seed=seed + 5): ref_samples = np.random.permutation(np.vstack(ref_samples)) model_samples = [ np.random.permutation(samples) for samples in model_samples ] assert ref_samples.shape[0] == n_samples, \ "Sample size mismatch: {0} instead of {1}".format(samples.shape[0],n) return np.stack(model_samples,axis=1),\ np.repeat(ref_samples[:,np.newaxis] ,axis=1, repeats=len(model_samples))
def np_bs_n_samples(l_samples, n_samples, n_bs, seed): """ Nonparametric bootstrap samples from l_samples. n_samples: Number of samples to bootstrap from l_samples. n_bs : Number of bootstrap iterations. """ b_samples = [] for i, samples in enumerate(l_samples): with util.NumpySeedContext(seed=(i+1)*seed+i): b_samples.append(samples[np.random.choice(samples.shape[0],size=(n_bs, n_samples))]) return b_samples
def sample(self, n_samples, seed=5): l_samples = [] target_indices = np.isin(self.df[self.target].values, self.classes) with util.NumpySeedContext(seed=seed): df_samples = self.df[target_indices].sample(n_samples, random_state=seed) fakes = np.random.randn(n_samples, self.n_fakes) y = np.expand_dims(df_samples[self.target].values, axis=1) y_enc = self.enc.transform(y).toarray() x = df_samples.drop(self.target, axis=1).values x = np.hstack((x, fakes)).astype(float) return x, y_enc
def p_bs_n_samples(params, n_samples, n_bs, seed): """ Parametric bootstrap samples params: a tuple (mu, sigma) describing the parameters of a normal. n_samples: number of samples to sample from the normal distribution. seed:seed number for reproducability. Note that Sigma does NOT carries (1/n) """ mu, sigma = params[0], params[1] with util.NumpySeedContext(seed=seed): norm_rvs = np.sqrt(sigma/n_samples)* np.random.randn(n_bs,1) + mu return norm_rvs
def p_bs_n_samples(params, n_samples, n_bs, seed): """ Parametric bootstrap samples params: a tuple (mu, sigma) describing the parameters of a normal. n_samples: number of samples to sample from the normal distribution. seed:seed number for reproducability. Note that Sigma carries (1/n) """ mu, sigma, f, n = params with util.NumpySeedContext(seed=seed): norm_rvs = np.random.multivariate_normal(np.sqrt(f(n_samples))*mu, sigma, check_valid='raise', size=(n_bs)) return norm_rvs
def mmd_med_heuristic(models, ref, subsample=1000, seed=100): # subsample first n = ref.shape[0] assert subsample > 0 sub_models = [] with util.NumpySeedContext(seed=seed): ind = np.random.choice(n, min(subsample, n), replace=False) for i in range(len(models)): sub_models.append(models[i][ind, :]) sub_ref = ref[ind, :] med_mz = np.zeros(len(sub_models)) for i, model in enumerate(sub_models): sq_pdist_mz = util.dist_matrix(model, sub_ref)**2 med_mz[i] = np.median(sq_pdist_mz)**0.5 sigma2 = 0.5 * np.mean(med_mz)**2 return sigma2