def classifier_multiclassocas_modular (num_vec=10,num_class=3,distance=15,width=2.1,C=1,epsilon=1e-5,seed=1):
	from modshogun import RealFeatures, MulticlassLabels
	from modshogun import MulticlassOCAS
	from modshogun import Math_init_random

	# reproducible results
	random.seed(seed)
	Math_init_random(seed)

	# generate some training data where each class pair is linearly separable
	label_train=array([mod(x,num_class) for x in range(num_vec)],dtype="float64")
	label_test=array([mod(x,num_class) for x in range(num_vec)],dtype="float64")
	fm_train=array(random.randn(num_class,num_vec))
	fm_test=array(random.randn(num_class,num_vec))
	for i in range(len(label_train)):
		fm_train[label_train[i],i]+=distance
		fm_test[label_test[i],i]+=distance

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)

	labels=MulticlassLabels(label_train)

	classifier = MulticlassOCAS(C,feats_train,labels)
	classifier.train()

	out = classifier.apply(feats_test).get_labels()
	#print label_test
	#print out
	return out,classifier
Ejemplo n.º 2
0
        def RunKMeansShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the centroids
            # file.
            Log.Info("Loading dataset", self.verbose)
            if len(self.dataset) == 2:
                data = np.genfromtxt(self.dataset[0], delimiter=',')
                centroids = np.genfromtxt(self.dataset[1], delimiter=',')
            else:
                data = np.genfromtxt(self.dataset[0], delimiter=',')

            # Gather parameters.
            if "clusters" in options:
                clusters = int(options.pop("clusters"))
            elif len(self.dataset) != 2:
                Log.Fatal(
                    "Required option: Number of clusters or cluster locations."
                )
                q.put(-1)
                return -1
            if "max_iterations" in options:
                maxIterations = int(options.pop("max_iterations"))
            seed = None
            if "seed" in options:
                seed = int(options.pop("seed"))

            if len(options) > 0:
                Log.Fatal("Unknown parameters: " + str(options))
                raise Exception("unknown parameters")

            if seed:
                Math_init_random(seed)
            try:
                dataFeat = RealFeatures(data.T)
                distance = EuclideanDistance(dataFeat, dataFeat)

                # Create the K-Means object and perform K-Means clustering.
                with totalTimer:
                    if len(self.dataset) == 2:
                        model = KMeans(clusters, distance, centroids.T)
                    else:
                        model = KMeans(clusters, distance)

                    model.set_max_iter(m)
                    model.train()

                    labels = model.apply().get_labels()
                    centers = model.get_cluster_centers()
            except Exception as e:
                print(e)
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
Ejemplo n.º 3
0
    def RunKMeansShogun(q):
      totalTimer = Timer()

      # Load input dataset.
      # If the dataset contains two files then the second file is the centroids
      # file.
      Log.Info("Loading dataset", self.verbose)
      if len(self.dataset) == 2:
        data = np.genfromtxt(self.dataset[0], delimiter=',')
        centroids = np.genfromtxt(self.dataset[1], delimiter=',')
      else:
        data = np.genfromtxt(self.dataset[0], delimiter=',')

      # Gather parameters.
      clusters = re.search("-c (\d+)", options)
      maxIterations = re.search("-m (\d+)", options)
      seed = re.search("-s (\d+)", options)

      # Now do validation of options.
      if not clusters and len(self.dataset) != 2:
        Log.Fatal("Required option: Number of clusters or cluster locations.")
        q.put(-1)
        return -1
      elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2:
        Log.Fatal("Invalid number of clusters requested! Must be greater than"
            + " or equal to 1.")
        q.put(-1)
        return -1

      m = 1000 if not maxIterations else int(maxIterations.group(1))


      if seed:
        Math_init_random(seed.group(1))
      try:
        dataFeat = RealFeatures(data.T)
        distance = EuclideanDistance(dataFeat, dataFeat)

        # Create the K-Means object and perform K-Means clustering.
        with totalTimer:
          if len(self.dataset) == 2:
            model = KMeans(int(clusters.group(1)), distance, RealFeatures(centroids))
          else:
            model = KMeans(int(clusters.group(1)), distance)

          model.set_mbKMeans_iter(m)
          model.train()

          labels = model.apply().get_labels()
          centers = model.get_cluster_centers()
      except Exception as e:
        print(e)
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
def classifier_multiclass_ecoc_random(fm_train_real=traindat,
                                      fm_test_real=testdat,
                                      label_train_multiclass=label_traindat,
                                      label_test_multiclass=label_testdat,
                                      lawidth=2.1,
                                      C=1,
                                      epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels
    from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from modshogun import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder
    from modshogun import Math_init_random
    Math_init_random(12345)

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(),
                                      ECOCHDDecoder())
    rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(),
                                       ECOCHDDecoder())

    dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train,
                                               classifier, labels)
    dense_classifier.train()
    label_dense = dense_classifier.apply(feats_test)
    out_dense = label_dense.get_labels()

    sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy,
                                                feats_train, classifier,
                                                labels)
    sparse_classifier.train()
    label_sparse = sparse_classifier.apply(feats_test)
    out_sparse = label_sparse.get_labels()

    if label_test_multiclass is not None:
        from modshogun import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc_dense = evaluator.evaluate(label_dense, labels_test)
        acc_sparse = evaluator.evaluate(label_sparse, labels_test)
        print('Random Dense Accuracy  = %.4f' % acc_dense)
        print('Random Sparse Accuracy = %.4f' % acc_sparse)

    return out_sparse, out_dense
Ejemplo n.º 5
0
def clustering_kmeans_modular (fm_train=traindat,k=3):
	from modshogun import EuclideanDistance, RealFeatures, KMeans, Math_init_random, CSVFile
	Math_init_random(17)

	feats_train=RealFeatures(CSVFile(fm_train))
	distance=EuclideanDistance(feats_train, feats_train)

	kmeans=KMeans(k, distance)
	kmeans.train()

	out_centers = kmeans.get_cluster_centers()
	kmeans.get_radiuses()

	return out_centers, kmeans
Ejemplo n.º 6
0
def _test_mfile(fnam):
    try:
        mfile = open(fnam, mode='r')
    except IOError as e:
        print(e)
        return False

    indata = {}

    name_fun = _get_name_fun(fnam)
    if name_fun is None:
        return False

    for line in mfile:
        line = line.strip(" \t\n;")
        param = line.split('=')[0].strip()

        if param == 'name':
            name = line.split('=')[1].strip().split("'")[1]
            indata[param] = name
        elif param == 'kernel_symdata' or param == 'kernel_data':
            indata[param] = _read_matrix(line)
        elif param.startswith('kernel_matrix') or \
         param.startswith('distance_matrix'):
            indata[param] = _read_matrix(line)
        elif param.find('data_train') > -1 or param.find('data_test') > -1:
            # data_{train,test} might be prepended by 'subkernelX_'
            indata[param] = _read_matrix(line)
        elif param == 'classifier_alphas' or param == 'classifier_support_vectors':
            try:
                indata[param] = eval(line.split('=')[1])
            except SyntaxError:  # might be MultiClass SVM and hence matrix
                indata[param] = _read_matrix(line)
        elif param == 'clustering_centers' or param == 'clustering_pairs':
            indata[param] = _read_matrix(line)
        else:
            if (line.find("'") == -1):
                indata[param] = eval(line.split('=')[1])
            else:
                indata[param] = line.split('=')[1].strip().split("'")[1]

    mfile.close()
    fun = eval(name_fun)

    # seed random to constant value used at data file's creation
    Math_init_random(indata['init_random'])
    random.seed(indata['init_random'])

    return fun(indata)
Ejemplo n.º 7
0
def converter_tdistributedstochasticneighborembedding_modular(
        data_fname, seed=1):
    try:
        from modshogun import RealFeatures, TDistributedStochasticNeighborEmbedding
        from modshogun import Math_init_random, CSVFile

        # reproducible results
        Math_init_random(seed)
        features = RealFeatures(CSVFile(data_fname))

        converter = TDistributedStochasticNeighborEmbedding()
        converter.set_target_dim(2)

        embedding = converter.apply(features)

        return embedding
    except ImportError:
        print('No Eigen3 available')
def classifier_liblinear_modular (train_fname, test_fname,
		label_fname, C, epsilon):

	from modshogun import RealFeatures, SparseRealFeatures, BinaryLabels
	from modshogun import LibLinear, L2R_L2LOSS_SVC_DUAL
	from modshogun import Math_init_random, CSVFile
	Math_init_random(17)

	feats_train=RealFeatures(CSVFile(train_fname))
	feats_test=RealFeatures(CSVFile(test_fname))
	labels=BinaryLabels(CSVFile(label_fname))

	svm=LibLinear(C, feats_train, labels)
	svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
	svm.set_epsilon(epsilon)
	svm.set_bias_enabled(True)
	svm.train()

	predictions = svm.apply(feats_test)
	return predictions, svm, predictions.get_labels()
Ejemplo n.º 9
0
def neuralnets_simple_modular(train_fname, test_fname, label_fname, C,
                              epsilon):

    from modshogun import NeuralLayers, NeuralNetwork, RealFeatures, BinaryLabels
    from modshogun import Math_init_random, CSVFile
    Math_init_random(17)

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = BinaryLabels(CSVFile(label_fname))

    layers = NeuralLayers()
    network = NeuralNetwork(
        layers.input(
            feats_train.get_num_features()).linear(50).softmax(2).done())
    network.quick_connect()
    network.initialize_neural_network()

    network.set_labels(labels)
    network.train(feats_train)
    return network, network.apply_multiclass(feats_test)
Ejemplo n.º 10
0
#!/usr/bin/env python
from tools.load import LoadMatrix
lm = LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat, testdat, 1.5, 10], [traindat, testdat, 1.5, 10]]

from modshogun import Math_init_random
Math_init_random(12345)


def preprocessor_randomfouriergausspreproc_modular(fm_train_real=traindat,
                                                   fm_test_real=testdat,
                                                   width=1.4,
                                                   size_cache=10):
    from modshogun import Chi2Kernel
    from modshogun import RealFeatures
    from modshogun import RandomFourierGaussPreproc

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    preproc = RandomFourierGaussPreproc()
    preproc.init(feats_train)
    feats_train.add_preprocessor(preproc)
    feats_train.apply_preprocessor()
    feats_test.add_preprocessor(preproc)
    feats_test.apply_preprocessor()