コード例 #1
0
def get_kernel(request, features):
    try:
        kernel_name = request.POST["kernel"]
    except:
        raise ValueError("Unknown kernel")
    
    if kernel_name == "GaussianKernel":
        try:
            sigma = float(request.POST["sigma"])
        except:
            raise ValueError("Sigma is not correct")
        kernel = sg.GaussianKernel(features, features, sigma)
    elif kernel_name == "LinearKernel":
        kernel = sg.LinearKernel(features, features)
        kernel.set_normalizer(sg.IdentityKernelNormalizer())
    elif kernel_name == "PolynomialKernel":
        try:
            degree = int(request.POST["degree"])
        except:
            raise ValueError("degree is not correct")
        kernel = sg.PolyKernel(features, features, degree, True)
        kernel.set_normalizer(sg.IdentityKernelNormalizer())
    else:
        raise ValueError("Unknown kernel")
    
    return kernel
コード例 #2
0
    def get_shogun_statistics(self):
        # turn data into Shogun representation (columns vectors)
        feat_p = sg.RealFeatures(self._x.reshape(1, len(self._x)))
        feat_q = sg.RealFeatures(self._y.reshape(1, len(self._y)))

        # choose kernel for testing. Here: Gaussian
        kernel_width = 1
        kernel = sg.GaussianKernel(10, kernel_width)

        # create mmd instance of test-statistic
        self._mmd = sg.QuadraticTimeMMD()
        self._mmd.set_kernel(kernel)
        self._mmd.set_p(feat_p)
        self._mmd.set_q(feat_q)

        # compute biased and unbiased test statistic (default is unbiased)
        self._mmd.set_statistic_type(sg.ST_BIASED_FULL)
        biased_statistic = self._mmd.compute_statistic()

        self._mmd.set_statistic_type(sg.ST_UNBIASED_FULL)
        unbiased_statistic = self._mmd.compute_statistic()

        self._statistic = unbiased_statistic

        print("\nShogun tests statistics:")
        print(
            f"biased test statistic {len(self._x)} x MMD_b[X,Y]^2={biased_statistic:.2f}"
        )
        print(
            f"unbiased test statistic {len(self._x)} x MMD_u[X,Y]^2={unbiased_statistic:.2f}"
        )
        return self
コード例 #3
0
def mix_rbf_kernel(X, Y):
    import shogun as sg
    mmd = sg.QuadraticTimeMMD()
    mmd.set_p(sg.Features(X))
    mmd.set_q(sg.Features(Y))
    mmd.add_kernel(sg.GaussianKernel(10, 1.0))
    mmd.set_kernel_selection_strategy(sg.KSM_MAXIMIZE_MMD)
    mmd.set_train_test_mode(True)
    mmd.set_train_test_ratio(1)
    mmd.select_kernel()
    statistic = mmd.compute_statistic()
    return statistic
コード例 #4
0
    def visualise_distribution_test_statistic(self, alpha=0.05):
        num_samples = 500

        # we first sample null distribution
        null_samples = self._mmd.sample_null()

        # we then sample alternative distribution, generate new data for that
        alt_samples = np.zeros(num_samples)
        for i in range(num_samples):
            x = norm.rvs(size=self._n, loc=self._mu, scale=self._sigma_squared)
            y = laplace.rvs(size=self._n, loc=self._mu, scale=self._b)
            feat_p = sg.RealFeatures(np.reshape(x, (1, len(x))))
            feat_q = sg.RealFeatures(np.reshape(y, (1, len(y))))

            kernel_width = 1
            kernel = sg.GaussianKernel(10, kernel_width)

            mmd = sg.QuadraticTimeMMD()
            mmd.set_kernel(kernel)

            mmd.set_p(feat_p)
            mmd.set_q(feat_q)
            alt_samples[i] = mmd.compute_statistic()

        np.std(alt_samples)

        plt.figure(figsize=(18, 5))

        plt.subplot(131)
        plt.hist(null_samples, 50, color='blue')
        plt.title('Null distribution')
        plt.subplot(132)
        plt.title('Alternative distribution')
        plt.hist(alt_samples, 50, color='green')

        plt.subplot(133)
        plt.hist(null_samples, 50, color='blue')
        plt.hist(alt_samples, 50, color='green', alpha=0.5)
        plt.title('Null and alternative distriution')

        # find (1-alpha) element of null distribution
        null_samples_sorted = np.sort(null_samples)
        quantile_idx = int(len(null_samples) * (1 - alpha))
        quantile = null_samples_sorted[quantile_idx]
        plt.axvline(x=quantile,
                    ymin=0,
                    ymax=100,
                    color='red',
                    label=str(int(round(
                        (1 - alpha) * 100))) + '% quantile of null')
        plt.show()
        return self
コード例 #5
0
ファイル: drug_classifier.py プロジェクト: zqfang/snakeflow
    def kernel_prepare(self):
        kernel = shogun.CombinedKernel()
        for kernel_type in self.kernel_dict.keys():
            if kernel_type == 'GaussianKernel':
                for kernel_feature in self.kernel_dict[kernel_type].values():
                    kernel.append_kernel(
                        shogun.GaussianKernel(self.gaussian_width))
            if kernel_type == 'PolyKernel':
                for kernel_feature in self.kernel_dict[kernel_type].values():
                    kernel.append_kernel(
                        shogun.PolyKernel(10, self.poly_degree))
            if kernel_type == 'LinearKernel':
                for kernel_feature in self.kernel_dict[kernel_type].values():
                    kernel.append_kernel(shogun.LinearKernel())

        return kernel
コード例 #6
0
    def get_kernel_function(self, kernel, name):

        if name == 'Matern':
            return (
                'Not sure whether this library supports the specified kernel type'
            )
        elif name == 'RBF':
            return shogun.GaussianKernel(kernel[name]['lengthscale'])
        elif name == 'White':
            return (
                'Not sure whether this library supports the specified kernel type'
            )
        elif name == 'Const':
            return shogun.ConstKernel(kernel[name]['constant'])
        elif name == 'RatQd':
            return (
                'Not sure whether this library supports the specified kernel type'
            )
コード例 #7
0
def shogun_mmd(X,
               Y,
               kernel_width,
               null_samples=1000,
               median_samples=1000,
               cache_size=32):
    '''
    Run an MMD test using a Gaussian kernel.

    Parameters
    ----------
    X : row-instance feature array

    Y : row-instance feature array

    kernel_width : float
        The bandwidth of the RBF kernel (sigma).

    null_samples : int
        How many times to sample from the null distribution.

    Returns
    -------
    p_val : float
        The obtained p value of the test.

    stat : float
        The test statistic.

    null_samples : array of length null_samples
        The samples from the null distribution.
    '''
    import shogun as sg
    mmd = sg.QuadraticTimeMMD()
    mmd.set_p(sg.RealFeatures(X.T.astype(np.float64)))
    mmd.set_q(sg.RealFeatures(Y.T.astype(np.float64)))
    mmd.set_kernel(sg.GaussianKernel(cache_size, float(kernel_width)))

    mmd.set_num_null_samples(null_samples)
    samps = mmd.sample_null()
    stat = mmd.compute_statistic()

    p_val = np.mean(stat <= samps)
    return p_val, stat, samps
コード例 #8
0
def _process(x1_set, x2_set, kernel_width, kernel_name, degree):
    num = len(x1_set)
    if num == 0:
        raise Http404
    examples = np.zeros((2, num))
    for i in xrange(num):
        examples[0, i] = x1_set[i]
        examples[1, i] = x2_set[i]
    feat_train = sg.RealFeatures(examples)

    # construct covariance function
    if kernel_name == "LinearKernel":
        kernel = sg.LinearKernel(feat_train, feat_train)
    elif kernel_name == "PolynomialKernel":
        kernel = sg.PolyKernel(feat_train, feat_train, degree, True)
    elif kernel_name == "GaussianKernel":
        kernel = sg.GaussianKernel(feat_train, feat_train, kernel_width)
    kernel_matrix = kernel.get_kernel_matrix()
    return kernel_matrix.tolist()
コード例 #9
0
def mmd_test(Sample1, Sample2):
    for i in range(Sample1.shape[1]):
        x = Sample1[:, i]
        y = Sample2[:, i]

        feat_p = sg.RealFeatures(x.reshape(1, len(x)))
        feat_q = sg.RealFeatures(y.reshape(1, len(y)))

        # choose kernel for testing. Here: Gaussian
        kernel_width = 1
        kernel = sg.GaussianKernel(10, kernel_width)

        # create mmd instance of test-statistic
        mmd = sg.QuadraticTimeMMD()
        mmd.set_kernel(kernel)
        mmd.set_p(feat_p)
        mmd.set_q(feat_q)

        # compute biased and unbiased test statistic (default is unbiased)
        mmd.set_statistic_type(sg.ST_UNBIASED_FULL)
        statistic = mmd.compute_statistic()

    return statistic
コード例 #10
0
def _read_data(request):
    labels = []
    features = []
    data = json.loads(request.POST['point_set'])
    tau = float(request.POST['Tau'])
    for pt in data:
        labels.append(float(pt["y"]))
        features.append(float(pt["x"]))
    labels = np.array(labels, dtype=np.float64)
    num = len(features)
    if num == 0:
        raise TypeError
    examples = np.zeros((1, num))

    for i in xrange(num):
        examples[0, i] = features[i]

    lab = sg.RegressionLabels(labels)
    train = sg.RealFeatures(examples)

    sigma = float(request.POST["sigma"])
    kernel = sg.GaussianKernel(train, train, sigma)

    return (tau, lab, kernel, train)
コード例 #11
0
ファイル: significance_tests.py プロジェクト: stjordanis/MCSG
f = [f_gauss, f_sif, f_pca, f_mwv, f_wmd]
g = [g_gauss, g_sif, g_pca, g_mwv, g_wmd]
w = [w_gauss, w_sif, w_pca, w_mwv, w_wmd]

rejected = np.zeros((3, 5, 5))
for k, emb in enumerate([f, g, w]):
    for i, X in enumerate(emb):
        for j, Y in enumerate(emb):
            sg.Math.init_random(0)
            # turn data into Shogun representation (columns vectors)
            feat_p = sg.RealFeatures(X.reshape(1, len(X)))
            feat_q = sg.RealFeatures(Y.reshape(1, len(Y)))
            # choose kernel for testing. Here: Gaussian
            kernel_width = 1
            kernel = sg.GaussianKernel(10, kernel_width)
            # create mmd instance of test-statistic
            mmd = sg.QuadraticTimeMMD()
            mmd.set_kernel(kernel)
            mmd.set_p(feat_p)
            mmd.set_q(feat_q)
            # compute unbiased test statistic
            mmd.set_statistic_type(sg.ST_UNBIASED_FULL)
            statistic = unbiased_statistic = mmd.compute_statistic()

            mmd.set_null_approximation_method(sg.NAM_PERMUTATION)
            mmd.set_num_null_samples(1000)
            # compute p-value for computed test statistic
            p_value = mmd.compute_p_value(statistic)

            # compute threshold for rejecting H_0 for a given test power
コード例 #12
0
import numpy as np
import shogun as sg

X = np.random.randn(100, 3)
Y = np.random.randn(100, 3) + .5

mmd = sg.QuadraticTimeMMD()
mmd.set_p(sg.RealFeatures(X.T))
mmd.set_q(sg.RealFeatures(Y.T))
mmd.set_kernel(sg.GaussianKernel(32, 1))
mmd.set_num_null_samples(200)
samps = mmd.sample_null()
stat = mmd.compute_statistic()