コード例 #1
0
 def sample(self, n_samples, seed=5):
     with util.NumpySeedContext(seed=seed):
         x = np.random.randn(n_samples, self.n_dim)
     x_true = x[:, :self.n_true]
     p = np.expand_dims(
         np.exp(np.sum(x_true, axis=1)) /
         (1 + np.exp(np.sum(x_true, axis=1))), 1)
     with util.NumpySeedContext(seed=seed + 1):
         y_bern = bernoulli.rvs(p=p, size=(p.shape[0], 1))
     return x, y_bern
コード例 #2
0
    def sample(self, n_samples, seed):
        n_dim = self.n_dim
        with util.NumpySeedContext(seed=seed + 2):
            p = np.random.multivariate_normal(np.zeros(n_dim),
                                              np.eye(n_dim),
                                              size=(n_samples))

        with util.NumpySeedContext(seed=seed + 5):
            r = np.random.multivariate_normal(np.zeros(n_dim),
                                              np.diag(self.v_change),
                                              size=(n_samples))
        return p, r
コード例 #3
0
def bs_prob(bs_proc, n_samples, n_bootstrap, region, seed=5):
    """
        Calculate bootstrap probability of X in region.
    """
    bs_samples = bs_proc(n_samples,n_bootstrap, seed)
    with util.NumpySeedContext(seed=seed+40):
        c = region(bs_samples, n_samples)
    return c
コード例 #4
0
 def sample(self, n_samples, seed=5):
     l_samples = []
     for i, c in enumerate(self.classes):
         df_samples = self.df[self.df[self.target] == c].drop(
             self.target, axis=1).sample(n_samples,
                                         random_state=seed + i).values
         with util.NumpySeedContext(seed=seed * (i + 1)):
             fakes = np.random.randn(n_samples, self.n_fakes)
         l_samples.append(np.hstack((df_samples, fakes)))
     return l_samples
コード例 #5
0
    def estimates(self, x, y, seed=2):
        """
        Compute Unbiased HSIC
        Code from: https://www.cc.gatech.edu/~lsong/papers/SonSmoGreBedetal12.pdf
        """
        m = int(x.shape[0] * self.ratio)
        n_samples = x.shape[0]
        n_comb = comb(n_samples, 4)

        with util.NumpySeedContext(seed=seed):
            S = np.random.randint(0, n_comb, size=m)

        def mapping(S, n_samples, l):
            for index in S:
                res = index
                coord = []
                for power in range(1, 5):
                    norm = np.math.factorial(n_samples - power) / (
                        np.math.factorial(n_samples - 4) *
                        np.math.factorial(4))
                    i = int(np.floor(res / norm))
                    res = res - i * norm
                    coord.append(i)

                i, j, q, r = coord

                # Non diagonal elements
                j = j if i != j else j + 1

                q = q if q != i else q + 1
                q = q if q != j else q + 1

                r = r if r != i else r + 1
                r = r if r != j else r + 1
                r = r if r != q else r + 1
                yield i, j, q, r

        nx = x.shape
        ny = y.shape
        assert nx[0] == ny[0], \
               "Argument 1 and 2 have different number of data points"

        K = self.k.eval(x, x)
        L = self.l.eval(y, y)
        kMat, lMat = K - np.diag(K.diagonal()), \
                    L - np.diag(L.diagonal())

        estimates = np.zeros(m)
        for i, indices in enumerate(mapping(S, nx[0], m)):
            acc = 0
            for s, t, u, v in permutations(indices):
                acc += kMat[s, t] * (lMat[s, t] + lMat[u, v] -
                                     2 * lMat[s, u]) / 24
            estimates[i] = acc
        return estimates.flatten()
コード例 #6
0
    def sample(self, n_samples, seed=5):
        ## DISJOINT SET
        model_features = {}
        ref_samples = []
        with util.NumpySeedContext(seed=seed):
            ## FOR EACH CELEBA CLASS
            for key, features in self.celeba_features.items():
                # CALCULATE HOW MUCH SHOULD BE IN THE REFERENCE POOL
                n_ref_samples = int(
                    np.round(self.ref_classes_mix[key] * n_samples))
                random_features = np.random.permutation(features)

                ## FOR THE CANDIDATE MODELS
                model_features[key] = random_features[n_ref_samples:]
                ## FOR THE REFERENCE
                ref_samples.append(random_features[:n_ref_samples])

        ## samples for models
        model_samples = []
        for j, class_ratios in enumerate(self.model_classes_mix):
            model_class_samples = []
            for i, data_class in enumerate(class_ratios.keys()):
                n_class_samples = int(
                    np.round(class_ratios[data_class] * n_samples))
                seed_class = i * n_samples + seed * j
                with util.NumpySeedContext(seed=seed_class):
                    indices = np.random.choice(
                        model_features[data_class].shape[0], n_class_samples)
                model_class_samples.append(model_features[data_class][indices])
            class_samples = dict(zip(class_ratios.keys(), model_class_samples))
            model_class_stack = np.vstack(list(class_samples.values()))
            model_samples.append(model_class_stack)
            #assert model_class_stack.shape[0] == n_samples, "Sample size mismatch: {0} instead of {1}".format(samples.shape[0],n)
        with util.NumpySeedContext(seed=seed + 5):
            ref_samples = np.random.permutation(np.vstack(ref_samples))
            model_samples = [
                np.random.permutation(samples) for samples in model_samples
            ]
        assert ref_samples.shape[0] == n_samples, \
                "Sample size mismatch: {0} instead of {1}".format(samples.shape[0],n)
        return np.stack(model_samples,axis=1),\
                np.repeat(ref_samples[:,np.newaxis] ,axis=1, repeats=len(model_samples))
コード例 #7
0
def np_bs_n_samples(l_samples, n_samples, n_bs, seed):
    """
        Nonparametric bootstrap samples from l_samples.
        n_samples: Number of samples to bootstrap from l_samples.
        n_bs     : Number of bootstrap iterations.
    """
    b_samples = []
    for i, samples in enumerate(l_samples):
        with util.NumpySeedContext(seed=(i+1)*seed+i):
            b_samples.append(samples[np.random.choice(samples.shape[0],size=(n_bs, n_samples))])
    return b_samples
コード例 #8
0
 def sample(self, n_samples, seed=5):
     l_samples = []
     target_indices = np.isin(self.df[self.target].values, self.classes)
     with util.NumpySeedContext(seed=seed):
         df_samples = self.df[target_indices].sample(n_samples,
                                                     random_state=seed)
         fakes = np.random.randn(n_samples, self.n_fakes)
         y = np.expand_dims(df_samples[self.target].values, axis=1)
         y_enc = self.enc.transform(y).toarray()
         x = df_samples.drop(self.target, axis=1).values
         x = np.hstack((x, fakes)).astype(float)
     return x, y_enc
コード例 #9
0
ファイル: msboot.py プロジェクト: sohel10/multiscale-features
def p_bs_n_samples(params, n_samples, n_bs, seed):
    """
        Parametric bootstrap samples
        params: a tuple (mu, sigma) describing the parameters of a normal.
        n_samples: number of samples to sample from the normal distribution.
        seed:seed number for reproducability. 
        
        Note that Sigma does NOT carries (1/n)
    """
    mu, sigma = params[0], params[1]
    with util.NumpySeedContext(seed=seed):
        norm_rvs = np.sqrt(sigma/n_samples)* np.random.randn(n_bs,1) + mu
    return norm_rvs
コード例 #10
0
def p_bs_n_samples(params, n_samples, n_bs, seed):
    """
        Parametric bootstrap samples
        params: a tuple (mu, sigma) describing the parameters of a normal.
        n_samples: number of samples to sample from the normal distribution.
        seed:seed number for reproducability. 
        
        Note that Sigma carries (1/n)
    """
    mu, sigma, f, n = params
    with util.NumpySeedContext(seed=seed):
        norm_rvs = np.random.multivariate_normal(np.sqrt(f(n_samples))*mu,
                sigma,
                check_valid='raise',
                size=(n_bs))
    return norm_rvs
コード例 #11
0
ファイル: mmd.py プロジェクト: sohel10/multiscale-features
def mmd_med_heuristic(models, ref, subsample=1000, seed=100):
    # subsample first
    n = ref.shape[0]
    assert subsample > 0
    sub_models = []
    with util.NumpySeedContext(seed=seed):
        ind = np.random.choice(n, min(subsample, n), replace=False)
        for i in range(len(models)):
            sub_models.append(models[i][ind, :])
        sub_ref = ref[ind, :]

    med_mz = np.zeros(len(sub_models))
    for i, model in enumerate(sub_models):
        sq_pdist_mz = util.dist_matrix(model, sub_ref)**2
        med_mz[i] = np.median(sq_pdist_mz)**0.5

    sigma2 = 0.5 * np.mean(med_mz)**2
    return sigma2