def kernel_combined_custom_poly_modular(train_fname=traindat,
                                        test_fname=testdat,
                                        train_label_fname=label_traindat):
    from modshogun import CombinedFeatures, RealFeatures, BinaryLabels
    from modshogun import CombinedKernel, PolyKernel, CustomKernel
    from modshogun import LibSVM, CSVFile

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()

    tfeats = RealFeatures(CSVFile(train_fname))
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_train = RealFeatures(CSVFile(train_fname))
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = BinaryLabels(CSVFile(train_label_fname))
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(CSVFile(test_fname))
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(CSVFile(test_fname))
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.apply()
    km_train = kernel.get_kernel_matrix()
    return km_train, kernel
Beispiel #2
0
def word_kernel(words):
    N = len(words)
    dist_matrix = np.zeros([N, N])
    for i in xrange(N):
        for j in xrange(i, N):
            s = difflib.SequenceMatcher(None, words[i], words[j])
            dist_matrix[i, j] = s.ratio()
    dist_matrix = 0.5 * (dist_matrix + dist_matrix.T)
    return CustomKernel(dist_matrix)
def classifier_custom_kernel_modular(C=1, dim=7):
    from modshogun import RealFeatures, BinaryLabels, CustomKernel, LibSVM
    from numpy import diag, ones, sign
    from numpy.random import rand, seed

    seed((C, dim))

    lab = sign(2 * rand(dim) - 1)
    data = rand(dim, dim)
    symdata = data * data.T + diag(ones(dim))

    kernel = CustomKernel()
    kernel.set_full_kernel_matrix_from_full(data)
    labels = BinaryLabels(lab)
    svm = LibSVM(C, kernel, labels)
    svm.train()
    predictions = svm.apply()
    out = svm.apply().get_labels()
    return svm, out
def classifier_custom_kernel_modular (C=1,dim=7):
	from modshogun import RealFeatures, BinaryLabels, CustomKernel, LibSVM
	from numpy import diag,ones,sign
	from numpy.random import rand,seed

	seed((C,dim))

	lab=sign(2*rand(dim) - 1)
	data=rand(dim, dim)
	symdata=data*data.T + diag(ones(dim))
    
	kernel=CustomKernel()
	kernel.set_full_kernel_matrix_from_full(data)
	labels=BinaryLabels(lab)
	svm=LibSVM(C, kernel, labels)
	svm.train()
	predictions =svm.apply() 
	out=svm.apply().get_labels()
	return svm,out
Beispiel #5
0
def kernel_custom_modular(dim=7):
    from numpy.random import rand, seed
    from numpy import array, float32, int32
    from modshogun import RealFeatures
    from modshogun import CustomKernel
    from modshogun import IndexFeatures

    seed(17)
    data = rand(dim, dim)
    feats = RealFeatures(data)
    symdata = data + data.T
    lowertriangle = array([
        symdata[(x, y)] for x in range(symdata.shape[1])
        for y in range(symdata.shape[0]) if y <= x
    ])

    kernel = CustomKernel()

    # once with float64's
    kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
    km_triangletriangle = kernel.get_kernel_matrix()

    kernel.set_triangle_kernel_matrix_from_full(symdata)
    km_fulltriangle = kernel.get_kernel_matrix()

    kernel.set_full_kernel_matrix_from_full(symdata)
    km_fullfull = kernel.get_kernel_matrix()

    # get subset of kernel
    row_idx = array(range(3), dtype=int32)
    col_idx = array(range(2), dtype=int32)
    row_idx_feat = IndexFeatures(row_idx)
    col_idx_feat = IndexFeatures(col_idx)
    kernel.init(row_idx_feat, col_idx_feat)
    km_sub_kernel = kernel.get_kernel_matrix()
    # print('Subkernel(3x2):\n%s'%km_sub_kernel)
    kernel.remove_all_col_subsets()
    kernel.remove_all_row_subsets()

    # now once with float32's
    data = array(data, dtype=float32)

    kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
    km_triangletriangle = kernel.get_kernel_matrix()

    kernel.set_triangle_kernel_matrix_from_full(symdata)
    km_fulltriangle = kernel.get_kernel_matrix()

    kernel.set_full_kernel_matrix_from_full(symdata)
    km_fullfull = kernel.get_kernel_matrix()
    return km_fullfull, kernel, km_sub_kernel
def statistics_quadratic_time_mmd(m, dim, difference):
    from modshogun import RealFeatures
    from modshogun import MeanShiftDataGenerator
    from modshogun import GaussianKernel, CustomKernel
    from modshogun import QuadraticTimeMMD
    from modshogun import BOOTSTRAP, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED
    from modshogun import Statistics, IntVector, RealVector, Math

    # init seed for reproducability
    Math.init_random(1)
    random.seed(17)

    # number of examples kept low in order to make things fast

    # streaming data generator for mean shift distributions
    gen_p = MeanShiftDataGenerator(0, dim)
    #gen_p.parallel.set_num_threads(1)
    gen_q = MeanShiftDataGenerator(difference, dim)

    # stream some data from generator
    feat_p = gen_p.get_streamed_features(m)
    feat_q = gen_q.get_streamed_features(m)

    # set kernel a-priori. usually one would do some kernel selection. See
    # other examples for this.
    width = 10
    kernel = GaussianKernel(10, width)

    # create quadratic time mmd instance. Note that this constructor
    # copies p and q and does not reference them
    mmd = QuadraticTimeMMD(kernel, feat_p, feat_q)

    # perform test: compute p-value and test if null-hypothesis is rejected for
    # a test level of 0.05
    alpha = 0.05

    # using bootstrapping (slow, not the most reliable way. Consider pre-
    # computing the kernel when using it, see below).
    # Also, in practice, use at least 250 iterations
    mmd.set_null_approximation_method(BOOTSTRAP)
    mmd.set_bootstrap_iterations(3)
    p_value_boot = mmd.perform_test()
    # reject if p-value is smaller than test level
    #print "bootstrap: p!=q: ", p_value_boot<alpha

    # using spectrum method. Use at least 250 samples from null.
    # This is consistent but sometimes breaks, always monitor type I error.
    # See tutorial for number of eigenvalues to use .
    # Only works with BIASED statistic
    mmd.set_statistic_type(BIASED)
    mmd.set_null_approximation_method(MMD2_SPECTRUM)
    mmd.set_num_eigenvalues_spectrum(3)
    mmd.set_num_samples_sepctrum(250)
    p_value_spectrum = mmd.perform_test()
    # reject if p-value is smaller than test level
    #print "spectrum: p!=q: ", p_value_spectrum<alpha

    # using gamma method. This is a quick hack, which works most of the time
    # but is NOT guaranteed to. See tutorial for details.
    # Only works with BIASED statistic
    mmd.set_statistic_type(BIASED)
    mmd.set_null_approximation_method(MMD2_GAMMA)
    p_value_gamma = mmd.perform_test()
    # reject if p-value is smaller than test level
    #print "gamma: p!=q: ", p_value_gamma<alpha

    # compute tpye I and II error (use many more trials in practice).
    # Type I error is not necessary if one uses bootstrapping. We do it here
    # anyway, but note that this is an efficient way of computing it.
    # Also note that testing has to happen on
    # difference data than kernel selection, but the linear time mmd does this
    # implicitly and we used a fixed kernel here.
    mmd.set_null_approximation_method(BOOTSTRAP)
    mmd.set_bootstrap_iterations(5)
    num_trials = 5
    type_I_errors = RealVector(num_trials)
    type_II_errors = RealVector(num_trials)
    inds = int32(array([x for x in range(2 * m)]))  # numpy
    p_and_q = mmd.get_p_and_q()

    # use a precomputed kernel to be faster
    kernel.init(p_and_q, p_and_q)
    precomputed = CustomKernel(kernel)
    mmd.set_kernel(precomputed)
    for i in range(num_trials):
        # this effectively means that p=q - rejecting is tpye I error
        inds = random.permutation(inds)  # numpy permutation
        precomputed.add_row_subset(inds)
        precomputed.add_col_subset(inds)
        type_I_errors[i] = mmd.perform_test() > alpha
        precomputed.remove_row_subset()
        precomputed.remove_col_subset()

        # on normal data, this gives type II error
        type_II_errors[i] = mmd.perform_test() > alpha

    return type_I_errors.get(), type_I_errors.get(
    ), p_value_boot, p_value_spectrum, p_value_gamma,
def statistics_quadratic_time_mmd (m,dim,difference):
	from modshogun import RealFeatures
	from modshogun import MeanShiftDataGenerator
	from modshogun import GaussianKernel, CustomKernel
	from modshogun import QuadraticTimeMMD
	from modshogun import PERMUTATION, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, BIASED_DEPRECATED
	from modshogun import Statistics, IntVector, RealVector, Math

	# init seed for reproducability
	Math.init_random(1)
	random.seed(17)

	# number of examples kept low in order to make things fast

	# streaming data generator for mean shift distributions
	gen_p=MeanShiftDataGenerator(0, dim);
	#gen_p.parallel.set_num_threads(1)
	gen_q=MeanShiftDataGenerator(difference, dim);

	# stream some data from generator
	feat_p=gen_p.get_streamed_features(m);
	feat_q=gen_q.get_streamed_features(m);

	# set kernel a-priori. usually one would do some kernel selection. See
	# other examples for this.
	width=10;
	kernel=GaussianKernel(10, width);

	# create quadratic time mmd instance. Note that this constructor
	# copies p and q and does not reference them
	mmd=QuadraticTimeMMD(kernel, feat_p, feat_q);

	# perform test: compute p-value and test if null-hypothesis is rejected for
	# a test level of 0.05
	alpha=0.05;

	# using permutation (slow, not the most reliable way. Consider pre-
	# computing the kernel when using it, see below).
	# Also, in practice, use at least 250 iterations
	mmd.set_null_approximation_method(PERMUTATION);
	mmd.set_num_null_samples(3);
	p_value_null=mmd.perform_test();
	# reject if p-value is smaller than test level
	#print "bootstrap: p!=q: ", p_value_null<alpha

	# using spectrum method. Use at least 250 samples from null.
	# This is consistent but sometimes breaks, always monitor type I error.
	# See tutorial for number of eigenvalues to use .
	mmd.set_statistic_type(BIASED);
	mmd.set_null_approximation_method(MMD2_SPECTRUM);
	mmd.set_num_eigenvalues_spectrum(3);
	mmd.set_num_samples_spectrum(250);
	p_value_spectrum=mmd.perform_test();
	# reject if p-value is smaller than test level
	#print "spectrum: p!=q: ", p_value_spectrum<alpha

	# using gamma method. This is a quick hack, which works most of the time
	# but is NOT guaranteed to. See tutorial for details.
	# Only works with BIASED_DEPRECATED statistic
	mmd.set_statistic_type(BIASED_DEPRECATED);
	mmd.set_null_approximation_method(MMD2_GAMMA);
	p_value_gamma=mmd.perform_test();
	# reject if p-value is smaller than test level
	#print "gamma: p!=q: ", p_value_gamma<alpha

	# compute tpye I and II error (use many more trials in practice).
	# Type I error is not necessary if one uses permutation. We do it here
	# anyway, but note that this is an efficient way of computing it.
	# Also note that testing has to happen on
	# difference data than kernel selection, but the linear time mmd does this
	# implicitly and we used a fixed kernel here.
	mmd.set_statistic_type(BIASED);
	mmd.set_null_approximation_method(PERMUTATION);
	mmd.set_num_null_samples(5);
	num_trials=5;
	type_I_errors=RealVector(num_trials);
	type_II_errors=RealVector(num_trials);
	inds=int32(array([x for x in range(2*m)])) # numpy
	p_and_q=mmd.get_p_and_q();

	# use a precomputed kernel to be faster
	kernel.init(p_and_q, p_and_q);
	precomputed=CustomKernel(kernel);
	mmd.set_kernel(precomputed);
	for i in range(num_trials):
		# this effectively means that p=q - rejecting is tpye I error
		inds=random.permutation(inds) # numpy permutation
		precomputed.add_row_subset(inds);
		precomputed.add_col_subset(inds);
		type_I_errors[i]=mmd.perform_test()>alpha;
		precomputed.remove_row_subset();
		precomputed.remove_col_subset();

		# on normal data, this gives type II error
		type_II_errors[i]=mmd.perform_test()>alpha;

	return type_I_errors.get(),type_I_errors.get(),p_value_null,p_value_spectrum,p_value_gamma,
Beispiel #8
0
def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat):

    ##################################
    # set up and train

    # create some poly train/test matrix
    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, tfeats)
    K_train = tkernel.get_kernel_matrix()

    pfeats = RealFeatures(fm_test_real)
    tkernel.init(tfeats, pfeats)
    K_test = tkernel.get_kernel_matrix()

    # create combined train features
    feats_train = CombinedFeatures()
    feats_train.append_feature_obj(RealFeatures(fm_train_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_train))
    kernel.append_kernel(PolyKernel(10,2))
    kernel.init(feats_train, feats_train)

    # train mkl
    labels = BinaryLabels(fm_label_twoclass)
    mkl = MKLClassification()

    # which norm to use for MKL
    mkl.set_mkl_norm(1) #2,3

    # set cost (neg, pos)
    mkl.set_C(1, 1)

    # set kernel and labels
    mkl.set_kernel(kernel)
    mkl.set_labels(labels)

    # train
    mkl.train()
    #w=kernel.get_subkernel_weights()
    #kernel.set_subkernel_weights(w)


    ##################################
    # test

    # create combined test features
    feats_pred = CombinedFeatures()
    feats_pred.append_feature_obj(RealFeatures(fm_test_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_test))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_pred)

    # and classify
    mkl.set_kernel(kernel)
    mkl.apply()
    return mkl.apply(),kernel
def kernel_custom_modular (dim=7):
	from numpy.random import rand, seed
	from numpy import array, float32
	from modshogun import RealFeatures
	from modshogun import CustomKernel

	seed(17)
	data=rand(dim, dim)
	feats=RealFeatures(data)
	symdata=data+data.T
	lowertriangle=array([symdata[(x,y)] for x in range(symdata.shape[1])
		for y in range(symdata.shape[0]) if y<=x])

	kernel=CustomKernel()

	# once with float64's
	kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	km_triangletriangle=kernel.get_kernel_matrix()

	kernel.set_triangle_kernel_matrix_from_full(symdata)
	km_fulltriangle=kernel.get_kernel_matrix()

	kernel.set_full_kernel_matrix_from_full(symdata)
	km_fullfull=kernel.get_kernel_matrix()

	# now once with float32's
	data=array(data,dtype=float32)

	kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	km_triangletriangle=kernel.get_kernel_matrix()

	kernel.set_triangle_kernel_matrix_from_full(symdata)
	km_fulltriangle=kernel.get_kernel_matrix()

	kernel.set_full_kernel_matrix_from_full(symdata)
	km_fullfull=kernel.get_kernel_matrix()
	return km_fullfull,kernel
def kernel_custom_modular (dim=7):
	from numpy.random import rand, seed
	from numpy import array, float32, int32
	from modshogun import RealFeatures
	from modshogun import CustomKernel
	from modshogun import IndexFeatures

	seed(17)
	data=rand(dim, dim)
	feats=RealFeatures(data)
	symdata=data+data.T
	lowertriangle=array([symdata[(x,y)] for x in range(symdata.shape[1])
		for y in range(symdata.shape[0]) if y<=x])

	kernel=CustomKernel()

	# once with float64's
	kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	km_triangletriangle=kernel.get_kernel_matrix()

	kernel.set_triangle_kernel_matrix_from_full(symdata)
	km_fulltriangle=kernel.get_kernel_matrix()

	kernel.set_full_kernel_matrix_from_full(symdata)
	km_fullfull=kernel.get_kernel_matrix()

	# get subset of kernel
	row_idx=array(range(3),dtype=int32)
	col_idx=array(range(2),dtype=int32)
	row_idx_feat=IndexFeatures(row_idx)
	col_idx_feat=IndexFeatures(col_idx)
	kernel.init(row_idx_feat, col_idx_feat)
	km_sub_kernel=kernel.get_kernel_matrix()
	# print('Subkernel(3x2):\n%s'%km_sub_kernel)
	kernel.remove_all_col_subsets()
	kernel.remove_all_row_subsets()

	# now once with float32's
	data=array(data,dtype=float32)

	kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	km_triangletriangle=kernel.get_kernel_matrix()

	kernel.set_triangle_kernel_matrix_from_full(symdata)
	km_fulltriangle=kernel.get_kernel_matrix()

	kernel.set_full_kernel_matrix_from_full(symdata)
	km_fullfull=kernel.get_kernel_matrix()
	return km_fullfull,kernel,km_sub_kernel