Ejemplo n.º 1
0
def features_sparse_modular (A):
	from scipy.sparse import csc_matrix
	from modshogun import SparseRealFeatures
	from numpy import array, float64, all

	# sparse representation X of dense matrix A
	# note, will work with types other than float64 too,
	# but requires recent scipy.sparse
	X=csc_matrix(A)
	#print(A)

	# create sparse shogun features from dense matrix A
	a=SparseRealFeatures(A)
	a_out=a.get_full_feature_matrix()
	#print(a_out)
	assert(all(a_out==A))
	#print(a_out)

	# create sparse shogun features from sparse matrix X
	a.set_sparse_feature_matrix(X)
	a_out=a.get_full_feature_matrix()
	#print(a_out)
	assert(all(a_out==A))

	# create sparse shogun features from sparse matrix X
	a=SparseRealFeatures(X)
	a_out=a.get_full_feature_matrix()
	#print(a_out)
	assert(all(a_out==A))

	# obtain (data,row,indptr) csc arrays of sparse shogun features
	z=csc_matrix(a.get_sparse_feature_matrix())
	z_out=z.todense()
	#print(z_out)
	assert(all(z_out==A))
def features_read_svmlight_format_modular(fname):
    import os
    from modshogun import SparseRealFeatures
    from modshogun import LibSVMFile

    f = SparseRealFeatures()
    lab = f.load_with_labels(LibSVMFile(fname))
    f.save_with_labels(LibSVMFile("testwrite.light", "w"), lab)
def features_read_svmlight_format_modular(fname):
    import os
    from modshogun import SparseRealFeatures
    from modshogun import LibSVMFile

    f = SparseRealFeatures()
    lab = f.load_with_labels(LibSVMFile(fname))
    f.save_with_labels(LibSVMFile('testwrite.light', 'w'), lab)
def load_sparse_data(filename, dimension=None):
	input_file = LibSVMFile(args.dataset)
	sparse_feats = SparseRealFeatures()
	label_array = sparse_feats.load_with_labels(input_file)
	labels = BinaryLabels(label_array)

	if dimension!=None:
		sparse_feats.set_num_features(dimension)

	return {'data':sparse_feats, 'labels':labels}
def load_sparse_data(filename, dimension=None):
    input_file = LibSVMFile(args.dataset)
    sparse_feats = SparseRealFeatures()
    label_array = sparse_feats.load_with_labels(input_file)
    labels = BinaryLabels(label_array)

    if dimension != None:
        sparse_feats.set_num_features(dimension)

    return {'data': sparse_feats, 'labels': labels}
def kernel_sparse_gaussian_modular(fm_train_real=traindat,
                                   fm_test_real=testdat,
                                   width=1.1):
    from modshogun import SparseRealFeatures
    from modshogun import GaussianKernel

    feats_train = SparseRealFeatures(fm_train_real)
    feats_test = SparseRealFeatures(fm_test_real)

    kernel = GaussianKernel(feats_train, feats_train, width)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
def kernel_sparse_linear_modular(fm_train_real=traindat,
                                 fm_test_real=testdat,
                                 scale=1.1):
    from modshogun import SparseRealFeatures
    from modshogun import LinearKernel, AvgDiagKernelNormalizer

    feats_train = SparseRealFeatures(fm_train_real)
    feats_test = SparseRealFeatures(fm_test_real)

    kernel = LinearKernel()
    kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
    kernel.init(feats_train, feats_train)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
Ejemplo n.º 8
0
def kernel_sparse_poly_modular (fm_train_real=traindat,fm_test_real=testdat,
		 size_cache=10,degree=3,inhomogene=True ):

	from modshogun import SparseRealFeatures
	from modshogun import PolyKernel

	feats_train=SparseRealFeatures(fm_train_real)
	feats_test=SparseRealFeatures(fm_test_real)



	kernel=PolyKernel(feats_train, feats_train, size_cache,
		inhomogene, degree)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
Ejemplo n.º 9
0
def compute_output_plot_isolines(classifier,
                                 kernel=None,
                                 train=None,
                                 sparse=False,
                                 pos=None,
                                 neg=None,
                                 regression=False):
    size = 100
    if pos is not None and neg is not None:
        x1_max = max(1.2 * pos[0, :])
        x1_min = min(1.2 * neg[0, :])
        x2_min = min(1.2 * neg[1, :])
        x2_max = max(1.2 * pos[1, :])
        x1 = linspace(x1_min, x1_max, size)
        x2 = linspace(x2_min, x2_max, size)
    else:
        x1 = linspace(-5, 5, size)
        x2 = linspace(-5, 5, size)

    x, y = meshgrid(x1, x2)

    dense = RealFeatures(array((ravel(x), ravel(y))))
    if sparse:
        test = SparseRealFeatures()
        test.obtain_from_simple(dense)
    else:
        test = dense

    if kernel and train:
        kernel.init(train, test)
    else:
        classifier.set_features(test)

    labels = None
    if regression:
        labels = classifier.apply().get_labels()
    else:
        labels = classifier.apply().get_values()
    z = labels.reshape((size, size))

    return x, y, z
Ejemplo n.º 10
0
def compute_output_plot_isolines(
    classifier, kernel=None, train=None, sparse=False, pos=None, neg=None, regression=False
):
    size = 100
    if pos is not None and neg is not None:
        x1_max = max(1.2 * pos[0, :])
        x1_min = min(1.2 * neg[0, :])
        x2_min = min(1.2 * neg[1, :])
        x2_max = max(1.2 * pos[1, :])
        x1 = linspace(x1_min, x1_max, size)
        x2 = linspace(x2_min, x2_max, size)
    else:
        x1 = linspace(-5, 5, size)
        x2 = linspace(-5, 5, size)

    x, y = meshgrid(x1, x2)

    dense = RealFeatures(array((ravel(x), ravel(y))))
    if sparse:
        test = SparseRealFeatures()
        test.obtain_from_simple(dense)
    else:
        test = dense

    if kernel and train:
        kernel.init(train, test)
    else:
        classifier.set_features(test)

    labels = None
    if regression:
        labels = classifier.apply().get_labels()
    else:
        labels = classifier.apply().get_values()
    z = labels.reshape((size, size))

    return x, y, z
Ejemplo n.º 11
0
def distance_sparseeuclidean_modular(train_fname=traindat, test_fname=testdat):
    from modshogun import RealFeatures, SparseRealFeatures, SparseEuclideanDistance, CSVFile

    realfeat = RealFeatures(CSVFile(train_fname))
    feats_train = SparseRealFeatures()
    feats_train.obtain_from_simple(realfeat)
    realfeat = RealFeatures(CSVFile(test_fname))
    feats_test = SparseRealFeatures()
    feats_test.obtain_from_simple(realfeat)

    distance = SparseEuclideanDistance(feats_train, feats_train)

    dm_train = distance.get_distance_matrix()
    distance.init(feats_train, feats_test)
    dm_test = distance.get_distance_matrix()

    return distance, dm_train, dm_test
def distance_sparseeuclidean_modular (train_fname=traindat,test_fname=testdat):
	from modshogun import RealFeatures, SparseRealFeatures, SparseEuclideanDistance, CSVFile

	realfeat=RealFeatures(CSVFile(train_fname))
	feats_train=SparseRealFeatures()
	feats_train.obtain_from_simple(realfeat)
	realfeat=RealFeatures(CSVFile(test_fname))
	feats_test=SparseRealFeatures()
	feats_test.obtain_from_simple(realfeat)

	distance=SparseEuclideanDistance(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test
Ejemplo n.º 13
0
def features_io_modular (fm_train_real, label_train_twoclass):
	import numpy
	from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels
	from modshogun import GaussianKernel
	from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File

	feats=SparseRealFeatures(fm_train_real)
	feats2=SparseRealFeatures()

	f=BinaryFile("tmp/fm_train_sparsereal.bin","w")
	feats.save(f)

	f=LibSVMFile("tmp/fm_train_sparsereal.ascii","w")
	feats.save(f)

	f=BinaryFile("tmp/fm_train_sparsereal.bin")
	feats2.load(f)

	f=LibSVMFile("tmp/fm_train_sparsereal.ascii")
	feats2.load(f)

	feats=RealFeatures(fm_train_real)
	feats2=RealFeatures()

	f=BinaryFile("tmp/fm_train_real.bin","w")
	feats.save(f)

	f=HDF5File("tmp/fm_train_real.h5","w", "/data/doubles")
	feats.save(f)

	f=CSVFile("tmp/fm_train_real.ascii","w")
	feats.save(f)

	f=BinaryFile("tmp/fm_train_real.bin")
	feats2.load(f)
	#print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

	f=CSVFile("tmp/fm_train_real.ascii")
	feats2.load(f)
	#print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

	lab=MulticlassLabels(numpy.array([0.0,1.0,2.0,3.0]))
	lab2=MulticlassLabels()
	f=CSVFile("tmp/label_train_twoclass.ascii","w")
	lab.save(f)

	f=BinaryFile("tmp/label_train_twoclass.bin","w")
	lab.save(f)

	f=HDF5File("tmp/label_train_real.h5","w", "/data/labels")
	lab.save(f)

	f=CSVFile("tmp/label_train_twoclass.ascii")
	lab2.load(f)

	f=BinaryFile("tmp/label_train_twoclass.bin")
	lab2.load(f)

	f=HDF5File("tmp/fm_train_real.h5","r", "/data/doubles")
	feats2.load(f)
	#print(feats2.get_feature_matrix())
	f=HDF5File("tmp/label_train_real.h5","r", "/data/labels")
	lab2.load(f)
	#print(lab2.get_labels())

	#clean up
	import os
	for f in ['tmp/fm_train_sparsereal.bin','tmp/fm_train_sparsereal.ascii',
			'tmp/fm_train_real.bin','tmp/fm_train_real.h5','tmp/fm_train_real.ascii',
			'tmp/label_train_real.h5', 'tmp/label_train_twoclass.ascii','tmp/label_train_twoclass.bin']:
		os.unlink(f)
	return feats, feats2, lab, lab2
Ejemplo n.º 14
0
def get_features_and_labels(input_file):
	feats = SparseRealFeatures()
	label_array = feats.load_with_labels(input_file)
	labels = MulticlassLabels(label_array)
	return feats, labels
Ejemplo n.º 15
0
def get_features_and_labels(input_file):
	feats = SparseRealFeatures()
	label_array = feats.load_with_labels(input_file)
	labels = MulticlassLabels(label_array)
	return feats, labels
Ejemplo n.º 16
0
def features_io_modular(fm_train_real, label_train_twoclass):
    import numpy
    from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels
    from modshogun import GaussianKernel
    from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File

    feats = SparseRealFeatures(fm_train_real)
    feats2 = SparseRealFeatures()

    f = BinaryFile("fm_train_sparsereal.bin", "w")
    feats.save(f)

    f = LibSVMFile("fm_train_sparsereal.ascii", "w")
    feats.save(f)

    f = BinaryFile("fm_train_sparsereal.bin")
    feats2.load(f)

    f = LibSVMFile("fm_train_sparsereal.ascii")
    feats2.load(f)

    feats = RealFeatures(fm_train_real)
    feats2 = RealFeatures()

    f = BinaryFile("fm_train_real.bin", "w")
    feats.save(f)

    f = HDF5File("fm_train_real.h5", "w", "/data/doubles")
    feats.save(f)

    f = CSVFile("fm_train_real.ascii", "w")
    feats.save(f)

    f = BinaryFile("fm_train_real.bin")
    feats2.load(f)
    #print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

    f = CSVFile("fm_train_real.ascii")
    feats2.load(f)
    #print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

    lab = MulticlassLabels(numpy.array([0.0, 1.0, 2.0, 3.0]))
    lab2 = MulticlassLabels()
    f = CSVFile("label_train_twoclass.ascii", "w")
    lab.save(f)

    f = BinaryFile("label_train_twoclass.bin", "w")
    lab.save(f)

    f = HDF5File("label_train_real.h5", "w", "/data/labels")
    lab.save(f)

    f = CSVFile("label_train_twoclass.ascii")
    lab2.load(f)

    f = BinaryFile("label_train_twoclass.bin")
    lab2.load(f)

    f = HDF5File("fm_train_real.h5", "r", "/data/doubles")
    feats2.load(f)
    #print(feats2.get_feature_matrix())
    f = HDF5File("label_train_real.h5", "r", "/data/labels")
    lab2.load(f)
    #print(lab2.get_labels())

    #clean up
    import os
    for f in [
            'fm_train_sparsereal.bin', 'fm_train_sparsereal.ascii',
            'fm_train_real.bin', 'fm_train_real.h5', 'fm_train_real.ascii',
            'label_train_real.h5', 'label_train_twoclass.ascii',
            'label_train_twoclass.bin'
    ]:
        os.unlink(f)
    return feats, feats2, lab, lab2