def classifier_multiclassocas_modular (num_vec=10,num_class=3,distance=15,width=2.1,C=1,epsilon=1e-5,seed=1): from modshogun import RealFeatures, MulticlassLabels from modshogun import MulticlassOCAS from modshogun import Math_init_random # reproducible results random.seed(seed) Math_init_random(seed) # generate some training data where each class pair is linearly separable label_train=array([mod(x,num_class) for x in range(num_vec)],dtype="float64") label_test=array([mod(x,num_class) for x in range(num_vec)],dtype="float64") fm_train=array(random.randn(num_class,num_vec)) fm_test=array(random.randn(num_class,num_vec)) for i in range(len(label_train)): fm_train[label_train[i],i]+=distance fm_test[label_test[i],i]+=distance feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) labels=MulticlassLabels(label_train) classifier = MulticlassOCAS(C,feats_train,labels) classifier.train() out = classifier.apply(feats_test).get_labels() #print label_test #print out return out,classifier
def RunKMeansShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the centroids # file. Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: data = np.genfromtxt(self.dataset[0], delimiter=',') centroids = np.genfromtxt(self.dataset[1], delimiter=',') else: data = np.genfromtxt(self.dataset[0], delimiter=',') # Gather parameters. if "clusters" in options: clusters = int(options.pop("clusters")) elif len(self.dataset) != 2: Log.Fatal( "Required option: Number of clusters or cluster locations." ) q.put(-1) return -1 if "max_iterations" in options: maxIterations = int(options.pop("max_iterations")) seed = None if "seed" in options: seed = int(options.pop("seed")) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") if seed: Math_init_random(seed) try: dataFeat = RealFeatures(data.T) distance = EuclideanDistance(dataFeat, dataFeat) # Create the K-Means object and perform K-Means clustering. with totalTimer: if len(self.dataset) == 2: model = KMeans(clusters, distance, centroids.T) else: model = KMeans(clusters, distance) model.set_max_iter(m) model.train() labels = model.apply().get_labels() centers = model.get_cluster_centers() except Exception as e: print(e) q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunKMeansShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the centroids # file. Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: data = np.genfromtxt(self.dataset[0], delimiter=',') centroids = np.genfromtxt(self.dataset[1], delimiter=',') else: data = np.genfromtxt(self.dataset[0], delimiter=',') # Gather parameters. clusters = re.search("-c (\d+)", options) maxIterations = re.search("-m (\d+)", options) seed = re.search("-s (\d+)", options) # Now do validation of options. if not clusters and len(self.dataset) != 2: Log.Fatal("Required option: Number of clusters or cluster locations.") q.put(-1) return -1 elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2: Log.Fatal("Invalid number of clusters requested! Must be greater than" + " or equal to 1.") q.put(-1) return -1 m = 1000 if not maxIterations else int(maxIterations.group(1)) if seed: Math_init_random(seed.group(1)) try: dataFeat = RealFeatures(data.T) distance = EuclideanDistance(dataFeat, dataFeat) # Create the K-Means object and perform K-Means clustering. with totalTimer: if len(self.dataset) == 2: model = KMeans(int(clusters.group(1)), distance, RealFeatures(centroids)) else: model = KMeans(int(clusters.group(1)), distance) model.set_mbKMeans_iter(m) model.train() labels = model.apply().get_labels() centers = model.get_cluster_centers() except Exception as e: print(e) q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def classifier_multiclass_ecoc_random(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder from modshogun import Math_init_random Math_init_random(12345) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder()) rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder()) dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels) dense_classifier.train() label_dense = dense_classifier.apply(feats_test) out_dense = label_dense.get_labels() sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels) sparse_classifier.train() label_sparse = sparse_classifier.apply(feats_test) out_sparse = label_sparse.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_dense = evaluator.evaluate(label_dense, labels_test) acc_sparse = evaluator.evaluate(label_sparse, labels_test) print('Random Dense Accuracy = %.4f' % acc_dense) print('Random Sparse Accuracy = %.4f' % acc_sparse) return out_sparse, out_dense
def clustering_kmeans_modular (fm_train=traindat,k=3): from modshogun import EuclideanDistance, RealFeatures, KMeans, Math_init_random, CSVFile Math_init_random(17) feats_train=RealFeatures(CSVFile(fm_train)) distance=EuclideanDistance(feats_train, feats_train) kmeans=KMeans(k, distance) kmeans.train() out_centers = kmeans.get_cluster_centers() kmeans.get_radiuses() return out_centers, kmeans
def _test_mfile(fnam): try: mfile = open(fnam, mode='r') except IOError as e: print(e) return False indata = {} name_fun = _get_name_fun(fnam) if name_fun is None: return False for line in mfile: line = line.strip(" \t\n;") param = line.split('=')[0].strip() if param == 'name': name = line.split('=')[1].strip().split("'")[1] indata[param] = name elif param == 'kernel_symdata' or param == 'kernel_data': indata[param] = _read_matrix(line) elif param.startswith('kernel_matrix') or \ param.startswith('distance_matrix'): indata[param] = _read_matrix(line) elif param.find('data_train') > -1 or param.find('data_test') > -1: # data_{train,test} might be prepended by 'subkernelX_' indata[param] = _read_matrix(line) elif param == 'classifier_alphas' or param == 'classifier_support_vectors': try: indata[param] = eval(line.split('=')[1]) except SyntaxError: # might be MultiClass SVM and hence matrix indata[param] = _read_matrix(line) elif param == 'clustering_centers' or param == 'clustering_pairs': indata[param] = _read_matrix(line) else: if (line.find("'") == -1): indata[param] = eval(line.split('=')[1]) else: indata[param] = line.split('=')[1].strip().split("'")[1] mfile.close() fun = eval(name_fun) # seed random to constant value used at data file's creation Math_init_random(indata['init_random']) random.seed(indata['init_random']) return fun(indata)
def converter_tdistributedstochasticneighborembedding_modular( data_fname, seed=1): try: from modshogun import RealFeatures, TDistributedStochasticNeighborEmbedding from modshogun import Math_init_random, CSVFile # reproducible results Math_init_random(seed) features = RealFeatures(CSVFile(data_fname)) converter = TDistributedStochasticNeighborEmbedding() converter.set_target_dim(2) embedding = converter.apply(features) return embedding except ImportError: print('No Eigen3 available')
def classifier_liblinear_modular (train_fname, test_fname, label_fname, C, epsilon): from modshogun import RealFeatures, SparseRealFeatures, BinaryLabels from modshogun import LibLinear, L2R_L2LOSS_SVC_DUAL from modshogun import Math_init_random, CSVFile Math_init_random(17) feats_train=RealFeatures(CSVFile(train_fname)) feats_test=RealFeatures(CSVFile(test_fname)) labels=BinaryLabels(CSVFile(label_fname)) svm=LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() predictions = svm.apply(feats_test) return predictions, svm, predictions.get_labels()
def neuralnets_simple_modular(train_fname, test_fname, label_fname, C, epsilon): from modshogun import NeuralLayers, NeuralNetwork, RealFeatures, BinaryLabels from modshogun import Math_init_random, CSVFile Math_init_random(17) feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = BinaryLabels(CSVFile(label_fname)) layers = NeuralLayers() network = NeuralNetwork( layers.input( feats_train.get_num_features()).linear(50).softmax(2).done()) network.quick_connect() network.initialize_neural_network() network.set_labels(labels) network.train(feats_train) return network, network.apply_multiclass(feats_test)
#!/usr/bin/env python from tools.load import LoadMatrix lm = LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat, testdat, 1.5, 10], [traindat, testdat, 1.5, 10]] from modshogun import Math_init_random Math_init_random(12345) def preprocessor_randomfouriergausspreproc_modular(fm_train_real=traindat, fm_test_real=testdat, width=1.4, size_cache=10): from modshogun import Chi2Kernel from modshogun import RealFeatures from modshogun import RandomFourierGaussPreproc feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) preproc = RandomFourierGaussPreproc() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor()