예제 #1
0
def get_kernel(request, features):
    try:
        kernel_name = request.POST["kernel"]
    except:
        raise ValueError("Unknown kernel")

    if kernel_name == "GaussianKernel":
        try:
            sigma = float(request.POST["sigma"])
        except:
            raise ValueError("Sigma is not correct")
        kernel = sg.GaussianKernel(features, features, sigma)
    elif kernel_name == "LinearKernel":
        kernel = sg.LinearKernel(features, features)
        kernel.set_normalizer(sg.IdentityKernelNormalizer())
    elif kernel_name == "PolynomialKernel":
        try:
            degree = int(request.POST["degree"])
        except:
            raise ValueError("degree is not correct")
        kernel = sg.PolyKernel(features, features, degree, True)
        kernel.set_normalizer(sg.IdentityKernelNormalizer())
    else:
        raise ValueError("Unknown kernel")

    return kernel
예제 #2
0
def _process(x1_set, x2_set, kernel_width, kernel_name, degree):
    num = len(x1_set)
    if num == 0:
        raise Http404
    examples = np.zeros((2, num))
    for i in xrange(num):
        examples[0,i] = x1_set[i]
        examples[1,i] = x2_set[i]
    feat_train = sg.RealFeatures(examples)

    # construct covariance function
    if kernel_name == "LinearKernel":
        kernel = sg.LinearKernel(feat_train, feat_train)
    elif kernel_name == "PolynomialKernel":
        kernel = sg.PolyKernel(feat_train, feat_train, degree, True)
    elif kernel_name == "GaussianKernel":
        kernel = sg.GaussianKernel(feat_train, feat_train, kernel_width)
    kernel_matrix=kernel.get_kernel_matrix()
    return kernel_matrix.tolist()
예제 #3
0
def shogun_mmd(X, Y, kernel_width, null_samples=1000, median_samples=1000,
               cache_size=32):
    '''
    Run an MMD test using a Gaussian kernel.

    Parameters
    ----------
    X : row-instance feature array

    Y : row-instance feature array

    kernel_width : float
        The bandwidth of the RBF kernel (sigma).

    null_samples : int
        How many times to sample from the null distribution.

    Returns
    -------
    p_val : float
        The obtained p value of the test.

    stat : float
        The test statistic.

    null_samples : array of length null_samples
        The samples from the null distribution.
    '''
    import modshogun as sg
    mmd = sg.QuadraticTimeMMD()
    mmd.set_p(sg.RealFeatures(X.T.astype(np.float64)))
    mmd.set_q(sg.RealFeatures(Y.T.astype(np.float64)))
    mmd.set_kernel(sg.GaussianKernel(cache_size, float(kernel_width)))

    mmd.set_num_null_samples(null_samples)
    samps = mmd.sample_null()
    stat = mmd.compute_statistic()

    p_val = np.mean(stat <= samps)
    return p_val, stat, samps
예제 #4
0
def _read_data(request):
    labels = []
    features = []
    data = json.loads(request.POST['point_set'])
    tau = float(request.POST['Tau'])
    for pt in data:
        labels.append(float(pt["y"]))
        features.append(float(pt["x"]))
    labels = np.array(labels, dtype=np.float64)
    num = len(features)
    if num == 0:
        raise TypeError
    examples = np.zeros((1, num))

    for i in xrange(num):
        examples[0, i] = features[i]

    lab = sg.RegressionLabels(labels)
    train = sg.RealFeatures(examples)

    sigma = float(request.POST["sigma"])
    kernel = sg.GaussianKernel(train, train, sigma)

    return (tau, lab, kernel, train)
예제 #5
0
import numpy as np
import modshogun as sg

X = np.random.randn(100, 3)
Y = np.random.randn(100, 3) + .5

mmd = sg.QuadraticTimeMMD()
mmd.set_p(sg.RealFeatures(X.T))
mmd.set_q(sg.RealFeatures(Y.T))
mmd.set_kernel(sg.GaussianKernel(32, 1))
mmd.set_num_null_samples(200)
samps = mmd.sample_null()
stat = mmd.compute_statistic()
예제 #6
0
def rbf_mmd_test(X,
                 Y,
                 bandwidth='median',
                 null_samples=1000,
                 median_samples=1000,
                 cache_size=32):
    '''
    Run an MMD test using a Gaussian kernel.

    Parameters
    ----------
    X : row-instance feature array

    Y : row-instance feature array

    bandwidth : float or 'median'
        The bandwidth of the RBF kernel (sigma).
        If 'median', estimates the median pairwise distance in the
        aggregate sample and uses that.

    null_samples : int
        How many times to sample from the null distribution.

    median_samples : int
        How many points to use for estimating the bandwidth.

    Returns
    -------
    p_val : float
        The obtained p value of the test.

    stat : float
        The test statistic.

    null_samples : array of length null_samples
        The samples from the null distribution.

    bandwidth : float
        The used kernel bandwidth
    '''

    if bandwidth == 'median':
        from sklearn.metrics.pairwise import euclidean_distances
        sub = lambda feats, n: feats[np.random.choice(
            feats.shape[0], min(feats.shape[0], n), replace=False)]
        Z = np.r_[sub(X, median_samples // 2), sub(Y, median_samples // 2)]
        D2 = euclidean_distances(Z, squared=True)
        upper = D2[np.triu_indices_from(D2, k=1)]
        kernel_width = np.median(upper, overwrite_input=True)
        bandwidth = np.sqrt(kernel_width / 2)
        # sigma = median / sqrt(2); works better, sometimes at least
        del Z, D2, upper
    else:
        kernel_width = 2 * bandwidth**2

    mmd = sg.QuadraticTimeMMD()
    mmd.set_p(sg.RealFeatures(X.T.astype(np.float64)))
    mmd.set_q(sg.RealFeatures(Y.T.astype(np.float64)))
    mmd.set_kernel(sg.GaussianKernel(cache_size, kernel_width))

    mmd.set_num_null_samples(null_samples)
    samps = mmd.sample_null()
    stat = mmd.compute_statistic()

    p_val = np.mean(stat <= samps)
    return p_val, stat, samps, bandwidth
예제 #7
0
def get_estimates(gen, sigmas=None, n_reps=100, n_null_samps=1000,
                  cache_size=64, rep_states=False, name=None,
                  save_samps=False, thresh_levels=(.2, .1, .05, .01)):
    if sigmas is None:
        sigmas = np.logspace(-1.7, 1.7, num=30)
    sigmas = np.asarray(sigmas)

    mmd = sg.QuadraticTimeMMD()
    mmd.set_num_null_samples(n_null_samps)
    mmd_mk = mmd.multikernel()
    for s in sigmas:
        mmd_mk.add_kernel(sg.GaussianKernel(cache_size, 2 * s**2))

    info = OrderedDict()
    for k in 'sigma rep mmd_est var_est p'.split():
        info[k] = []
    thresh_names = []
    for l in thresh_levels:
        s = 'thresh_{}'.format(l)
        thresh_names.append(s)
        info[s] = []
    if save_samps:
        info['samps'] = []

    thresh_prob = 1 - np.asarray(thresh_levels)

    bar = pb.ProgressBar()
    if name is not None:
        bar.start()
        bar.widgets.insert(0, '{} '.format(name))
    for rep in bar(xrange(n_reps)):
        if rep_states:
            rep = np.random.randint(0, 2**32)
            X, Y = gen(rs=rep)
        else:
            X, Y = gen()
        n = X.shape[0]
        assert Y.shape[0] == n
        mmd.set_p(sg.RealFeatures(X.T))
        mmd.set_q(sg.RealFeatures(Y.T))

        info['sigma'].extend(sigmas)
        info['rep'].extend([rep] * len(sigmas))

        stat = mmd_mk.compute_statistic()
        info['mmd_est'].extend(stat / (n / 2))

        samps = mmd_mk.sample_null()
        info['p'].extend(np.mean(samps >= stat, axis=0))
        if save_samps:
            info['samps'].extend(samps.T)

        info['var_est'].extend(mmd_mk.compute_variance_h1())

        threshes = np.asarray(mquantiles(samps, prob=thresh_prob, axis=0))
        for s, t in zip(thresh_names, threshes):
            info[s].extend(t)

    info = pd.DataFrame(info)
    info.set_index(['sigma', 'rep'], inplace=True)
    return info
예제 #8
0
import data
import numpy as np

# load data
feature_matrix = data.swissroll()
# create features instance
features = sg.RealFeatures(feature_matrix)

# create Diffusion Maps converter instance
converter = sg.DiffusionMaps()

# set target dimensionality
converter.set_target_dim(2)
# set number of time-steps
converter.set_t(2)

# create Gaussian kernel instance
kernel = sg.GaussianKernel(100, 10.0)
# enable converter instance to use created kernel instance
converter.set_kernel(kernel)

# compute embedding with Diffusion Maps method
embedding = converter.embed(features)

# compute linear kernel matrix
kernel_matrix = np.dot(feature_matrix.T, feature_matrix)
# create Custom Kernel instance
custom_kernel = sg.CustomKernel(kernel_matrix)
# construct embedding based on created kernel
kernel_embedding = converter.embed_kernel(custom_kernel)