def features_sparse_modular(A): from scipy.sparse import csc_matrix from shogun.Features import SparseRealFeatures from numpy import array, float64, all # sparse representation X of dense matrix A # note, will work with types other than float64 too, # but requires recent scipy.sparse X = csc_matrix(A) #print A # create sparse shogun features from dense matrix A a = SparseRealFeatures(A) a_out = a.get_full_feature_matrix() #print a_out assert (all(a_out == A)) #print a_out # create sparse shogun features from sparse matrix X a.set_sparse_feature_matrix(X) a_out = a.get_full_feature_matrix() #print a_out assert (all(a_out == A)) # create sparse shogun features from sparse matrix X a = SparseRealFeatures(X) a_out = a.get_full_feature_matrix() #print a_out assert (all(a_out == A)) # obtain (data,row,indptr) csc arrays of sparse shogun features z = csc_matrix(a.get_sparse_feature_matrix()) z_out = z.todense() #print z_out assert (all(z_out == A))
def compute_output_plot_isolines(classifier, kernel=None, train=None, sparse=False, pos=None, neg=None, regression=False): size=100 if pos is not None and neg is not None: x1_max=max(1.2*pos[0,:]) x1_min=min(1.2*neg[0,:]) x2_min=min(1.2*neg[1,:]) x2_max=max(1.2*pos[1,:]) x1=linspace(x1_min, x1_max, size) x2=linspace(x2_min, x2_max, size) else: x1=linspace(-5, 5, size) x2=linspace(-5, 5, size) x, y=meshgrid(x1, x2) dense=RealFeatures(array((ravel(x), ravel(y)))) if sparse: test=SparseRealFeatures() test.obtain_from_simple(dense) else: test=dense if kernel and train: kernel.init(train, test) else: classifier.set_features(test) labels = None if regression: labels=classifier.apply().get_labels() else: labels=classifier.apply().get_values() z=labels.reshape((size, size)) return x, y, z
def compute_output_plot_isolines(classifier, kernel=None, train=None, sparse=False, pos=None, neg=None, regression=False): size=100 if pos is not None and neg is not None: x1_max=max(1.2*pos[0,:]) x1_min=min(1.2*neg[0,:]) x2_min=min(1.2*neg[1,:]) x2_max=max(1.2*pos[1,:]) x1=linspace(x1_min, x1_max, size) x2=linspace(x2_min, x2_max, size) else: x1=linspace(-5, 5, size) x2=linspace(-5, 5, size) x, y=meshgrid(x1, x2) dense=RealFeatures(array((ravel(x), ravel(y)))) if sparse: test=SparseRealFeatures() test.obtain_from_simple(dense) else: test=dense if kernel and train: kernel.init(train, test) else: classifier.set_features(test) labels = None if regression: labels=classifier.apply().get_labels() else: labels=classifier.apply().get_confidences() z=labels.reshape((size, size)) return x, y, z
def features_read_svmlight_format_modular(fname): import os from shogun.Features import SparseRealFeatures f = SparseRealFeatures() lab = f.load_svmlight_file(fname) f.write_svmlight_file('testwrite.light', lab) os.unlink('testwrite.light')
def features_read_svmlight_format_modular(fname): import os from shogun.Features import SparseRealFeatures f=SparseRealFeatures() lab=f.load_svmlight_file(fname) f.write_svmlight_file('testwrite.light', lab) os.unlink('testwrite.light')
def convSparseToShog(data,delFeature=False): resFeat = SparseRealFeatures() resFeat.create_sparse_feature_matrix(len(data)) for iRec in xrange(len(data)): feat = data[iRec]["feature"] resFeat.set_sparse_feature_vector(iRec,feat["ind"].astype('i4')-1,feat["val"].astype('f8')) if delFeature: data[iRec]["feature"] = None return resFeat
def classify(true_labels): num_feats = 2 num_vec = true_labels.get_num_labels() data_train = numpy.concatenate( (numpy.random.randn(num_feats, num_vec / 2) - 1, numpy.random.randn(num_feats, num_vec / 2) + 1), axis=1) realfeat = RealFeatures(data_train) feats_train = SparseRealFeatures() feats_train.obtain_from_simple(realfeat) C = 3. svm = SVMOcas(C, feats_train, true_labels) svm.train() data_test = numpy.concatenate( (numpy.random.randn(num_feats, num_vec / 2) - 1, numpy.random.randn(num_feats, num_vec / 2) + 1), axis=1) realfeat = RealFeatures(data_test) feats_test = SparseRealFeatures() feats_test.obtain_from_simple(realfeat) svm.set_features(feats_test) return numpy.array(svm.classify().get_labels())
def classifier_svmocas_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=label_traindat, C=0.9, epsilon=1e-5, num_threads=1): from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels from shogun.Classifier import SVMOcas realfeat = RealFeatures(fm_train_real) feats_train = SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat = RealFeatures(fm_test_real) feats_test = SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels = BinaryLabels(label_train_twoclass) svm = SVMOcas(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(False) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def classifier_subgradientsvm_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon, max_train_time): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SubGradientSVM realfeat = RealFeatures(fm_train_real) feats_train = SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat = RealFeatures(fm_test_real) feats_test = SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels = Labels(label_train_twoclass) svm = SubGradientSVM(C, feats_train, labels) svm.set_epsilon(epsilon) svm.set_max_train_time(max_train_time) svm.train() svm.set_features(feats_test) labels = svm.apply().get_labels() return labels, svm
def kernel_sparse_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.1 ): from shogun.Features import SparseRealFeatures from shogun.Kernel import GaussianKernel feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def classifier_svmsgd_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=label_traindat, C=0.9, num_threads=1, num_iter=5): from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels from shogun.Classifier import SVMSGD realfeat = RealFeatures(fm_train_real) feats_train = SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat = RealFeatures(fm_test_real) feats_test = SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels = BinaryLabels(label_train_twoclass) svm = SVMSGD(C, feats_train, labels) svm.set_epochs(num_iter) #svm.io.set_loglevel(0) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def features_sparse_modular(A): from scipy.sparse import csc_matrix from shogun.Features import SparseRealFeatures from numpy import array, float64, all # sparse representation X of dense matrix A # note, will work with types other than float64 too, # but requires recent scipy.sparse X=csc_matrix(A) #print A # create sparse shogun features from dense matrix A a=SparseRealFeatures(A) a_out=a.get_full_feature_matrix() #print a_out assert(all(a_out==A)) #print a_out # create sparse shogun features from sparse matrix X a.set_sparse_feature_matrix(X) a_out=a.get_full_feature_matrix() #print a_out assert(all(a_out==A)) # create sparse shogun features from sparse matrix X a=SparseRealFeatures(X) a_out=a.get_full_feature_matrix() #print a_out assert(all(a_out==A)) # obtain (data,row,indptr) csc arrays of sparse shogun features z=csc_matrix(a.get_sparse_feature_matrix()) z_out=z.todense() #print z_out assert(all(z_out==A))
def kernel_sparse_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.1): from shogun.Features import SparseRealFeatures from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) kernel=LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_sparse_poly_modular (fm_train_real=traindat,fm_test_real=testdat, size_cache=10,degree=3,inhomogene=True ): from shogun.Features import SparseRealFeatures from shogun.Kernel import PolyKernel feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) kernel=PolyKernel(feats_train, feats_train, size_cache, degree, inhomogene) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def getSparseRealFeatures(self,sequences,method="frequences"): maxSeqLen = max( ( len(seq) for seq in sequences ) ) kmer_ind = numpy.zeros(maxSeqLen,dtype='i8') if method == 'frequences': kmer_val = numpy.zeros(maxSeqLen,dtype='f4') else: kmer_val = numpy.zeros(maxSeqLen,dtype='i4') kmerMethod = getattr(self,method) resFeat = SparseRealFeatures() resFeat.create_sparse_feature_matrix(len(sequences)) for iSeq in xrange(len(sequences)): seq = sequences[iSeq] if isinstance(seq,str): seq = numpy.fromstring(seq,'S1') self.process(seq) (size,total) = kmerMethod(kmer_val,kmer_ind) #print size, total, kmer_val[:10],kmer_ind[:10] resFeat.set_sparse_feature_vector(iSeq,kmer_ind[:size].astype('i4')-1,kmer_val[:size].astype('f8')) #pdb.set_trace() return resFeat
def classifier_svmlin_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,epsilon=1e-5,num_threads=1): from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels from shogun.Classifier import SVMLin realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels=BinaryLabels(label_train_twoclass) svm=SVMLin(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.get_bias() svm.get_w() svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def classify (true_labels): num_feats=2 num_vec=true_labels.get_num_labels() data_train=numpy.concatenate( (numpy.random.randn(num_feats, num_vec/2)-1, numpy.random.randn(num_feats, num_vec/2)+1), axis=1) realfeat=RealFeatures(data_train) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) C=3. svm=SVMOcas(C, feats_train, true_labels) svm.train() data_test=numpy.concatenate( (numpy.random.randn(num_feats, num_vec/2)-1, numpy.random.randn(num_feats, num_vec/2)+1), axis=1) realfeat=RealFeatures(data_test) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) svm.set_features(feats_test) return numpy.array(svm.classify().get_labels())
def svmlin (): print 'SVMLin' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMLin realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 epsilon=1e-5 num_threads=1 labels=Labels(label_train_twoclass) svm=SVMLin(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.get_bias() svm.get_w() svm.classify().get_labels()
def subgradient_svm (): print 'SubGradientSVM' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SubGradientSVM realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 epsilon=1e-3 num_threads=1 max_train_time=1. labels=Labels(label_train_twoclass) svm=SubGradientSVM(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(False) svm.set_max_train_time(max_train_time) svm.train() svm.set_features(feats_test) svm.classify().get_labels()
def svmsgd (): print 'SVMSGD' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMSGD realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 num_threads=1 num_iter=5 labels=Labels(label_train_twoclass) svm=SVMSGD(C, feats_train, labels) svm.set_epochs(num_iter) #svm.io.set_loglevel(0) svm.train() svm.set_features(feats_test) labelPrediction = svm.classify().get_labels() print labelPrediction>0
def distance_sparseeuclidean_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures, SparseRealFeatures from shogun.Distance import SparseEuclidianDistance realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) distance=SparseEuclidianDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test
def sparse_euclidian_distance (): print 'SparseEuclidianDistance' from shogun.Features import RealFeatures, SparseRealFeatures from shogun.Distance import SparseEuclidianDistance realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) distance=SparseEuclidianDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix()
def classifier_svmsgd_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,num_threads=1,num_iter=5): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMSGD realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels=Labels(label_train_twoclass) svm=SVMSGD(C, feats_train, labels) svm.set_epochs(num_iter) #svm.io.set_loglevel(0) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def svmsgd (): print 'SVMSGD' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMSGD realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 epsilon=1e-5 num_threads=1 labels=Labels(label_train_twoclass) svm=SVMSGD(C, feats_train, labels) #svm.io.set_loglevel(0) svm.train() svm.set_features(feats_test) svm.classify().get_labels()
from scipy.sparse import csc_matrix from shogun.Features import SparseRealFeatures from numpy import array, float64, all # create dense matrix A and its sparse representation X # note, will work with types other than float64 too, # but requires recent scipy.sparse A=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64) X=csc_matrix(A) print A # create sparse shogun features from dense matrix A a=SparseRealFeatures(A) a_out=a.get_full_feature_matrix() print a_out assert(all(a_out==A)) print a_out # create sparse shogun features from sparse matrix X a.set_sparse_feature_matrix(X) a_out=a.get_full_feature_matrix() print a_out assert(all(a_out==A)) # create sparse shogun features from sparse matrix X a=SparseRealFeatures(X) a_out=a.get_full_feature_matrix() print a_out assert(all(a_out==A)) # obtain (data,row,indptr) csc arrays of sparse shogun features
import os from shogun.Features import SparseRealFeatures f=SparseRealFeatures() lab=f.load_svmlight_file('../data/train_sparsereal.light') f.write_svmlight_file('testwrite.light', lab) os.unlink('testwrite.light')
def features_io_modular(fm_train_real, label_train_twoclass): import numpy from shogun.Features import SparseRealFeatures, RealFeatures, MulticlassLabels from shogun.Kernel import GaussianKernel from shogun.IO import AsciiFile, BinaryFile, HDF5File feats = SparseRealFeatures(fm_train_real) feats2 = SparseRealFeatures() f = BinaryFile("fm_train_sparsereal.bin", "w") feats.save(f) f = AsciiFile("fm_train_sparsereal.ascii", "w") feats.save(f) f = BinaryFile("fm_train_sparsereal.bin") feats2.load(f) f = AsciiFile("fm_train_sparsereal.ascii") feats2.load(f) feats = RealFeatures(fm_train_real) feats2 = RealFeatures() f = BinaryFile("fm_train_real.bin", "w") feats.save(f) f = HDF5File("fm_train_real.h5", "w", "/data/doubles") feats.save(f) f = AsciiFile("fm_train_real.ascii", "w") feats.save(f) f = BinaryFile("fm_train_real.bin") feats2.load(f) #print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))) f = AsciiFile("fm_train_real.ascii") feats2.load(f) #print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))) lab = MulticlassLabels(numpy.array([0.0, 1.0, 2.0, 3.0])) lab2 = MulticlassLabels() f = AsciiFile("label_train_twoclass.ascii", "w") lab.save(f) f = BinaryFile("label_train_twoclass.bin", "w") lab.save(f) f = HDF5File("label_train_real.h5", "w", "/data/labels") lab.save(f) f = AsciiFile("label_train_twoclass.ascii") lab2.load(f) f = BinaryFile("label_train_twoclass.bin") lab2.load(f) f = HDF5File("fm_train_real.h5", "r", "/data/doubles") feats2.load(f) #print(feats2.get_feature_matrix()) f = HDF5File("label_train_real.h5", "r", "/data/labels") lab2.load(f) #print(lab2.get_labels()) #clean up import os for f in [ 'fm_train_sparsereal.bin', 'fm_train_sparsereal.ascii', 'fm_train_real.bin', 'fm_train_real.h5', 'fm_train_real.ascii', 'label_train_real.h5', 'label_train_twoclass.ascii', 'label_train_twoclass.bin' ]: os.unlink(f) return feats, feats2, lab, lab2
def features_io_modular(fm_train_real, label_train_twoclass): import numpy from shogun.Features import SparseRealFeatures, RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.IO import AsciiFile, BinaryFile, HDF5File feats=SparseRealFeatures(fm_train_real) feats2=SparseRealFeatures() f=BinaryFile("fm_train_sparsereal.bin","w") feats.save(f) f=AsciiFile("fm_train_sparsereal.ascii","w") feats.save(f) f=BinaryFile("fm_train_sparsereal.bin") feats2.load(f) f=AsciiFile("fm_train_sparsereal.ascii") feats2.load(f) feats=RealFeatures(fm_train_real) feats2=RealFeatures() f=BinaryFile("fm_train_real.bin","w") feats.save(f) f=HDF5File("fm_train_real.h5","w", "/data/doubles") feats.save(f) f=AsciiFile("fm_train_real.ascii","w") feats.save(f) f=BinaryFile("fm_train_real.bin") feats2.load(f) #print "diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())) f=AsciiFile("fm_train_real.ascii") feats2.load(f) #print "diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())) lab=Labels(numpy.array([1.0,2.0,3.0])) lab2=Labels() f=AsciiFile("label_train_twoclass.ascii","w") lab.save(f) f=BinaryFile("label_train_twoclass.bin","w") lab.save(f) f=HDF5File("label_train_real.h5","w", "/data/labels") lab.save(f) f=AsciiFile("label_train_twoclass.ascii") lab2.load(f) f=BinaryFile("label_train_twoclass.bin") lab2.load(f) f=HDF5File("fm_train_real.h5","r", "/data/doubles") feats2.load(f) #print feats2.get_feature_matrix() f=HDF5File("label_train_real.h5","r", "/data/labels") lab2.load(f) #print lab2.get_labels() #clean up import os for f in ['fm_train_sparsereal.bin','fm_train_sparsereal.ascii', 'fm_train_real.bin','fm_train_real.h5','fm_train_real.ascii', 'label_train_real.h5', 'label_train_twoclass.ascii','label_train_twoclass.bin']: os.unlink(f) return feats, feats2, lab, lab2