def RunAllKnnShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the query # file. try: Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: referenceData = np.genfromtxt(self.dataset[0], delimiter=',') queryData = np.genfromtxt(self.dataset[1], delimiter=',') queryFeat = RealFeatures(queryFeat.T) else: referenceData = np.genfromtxt(self.dataset, delimiter=',') # Labels are the last row of the dataset. labels = MulticlassLabels( referenceData[:, (referenceData.shape[1] - 1)]) referenceData = referenceData[:, :-1] with totalTimer: # Get all the parameters. k = re.search("-k (\d+)", options) if not k: Log.Fatal( "Required option: Number of furthest neighbors to find." ) q.put(-1) return -1 else: k = int(k.group(1)) if (k < 1 or k > referenceData.shape[0]): Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0" + " and less or equal than " + str(referenceData.shape[0])) q.put(-1) return -1 referenceFeat = RealFeatures(referenceData.T) distance = EuclideanDistance(referenceFeat, referenceFeat) # Perform All K-Nearest-Neighbors. model = SKNN(k, distance, labels) model.train() if len(self.dataset) == 2: out = model.apply(queryFeat).get_labels() else: out = model.apply(referenceFeat).get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunAllKnnShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the query # file. try: Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: referenceData = np.genfromtxt(self.dataset[0], delimiter=',') queryData = np.genfromtxt(self.dataset[1], delimiter=',') queryFeat = RealFeatures(queryFeat.T) else: referenceData = np.genfromtxt(self.dataset, delimiter=',') # Labels are the last row of the dataset. labels = MulticlassLabels(referenceData[:, (referenceData.shape[1] - 1)]) referenceData = referenceData[:,:-1] with totalTimer: # Get all the parameters. k = re.search("-k (\d+)", options) if not k: Log.Fatal("Required option: Number of furthest neighbors to find.") q.put(-1) return -1 else: k = int(k.group(1)) if (k < 1 or k > referenceData.shape[0]): Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0" + " and less or equal than " + str(referenceData.shape[0])) q.put(-1) return -1 referenceFeat = RealFeatures(referenceData.T) distance = EuclideanDistance(referenceFeat, referenceFeat) # Perform All K-Nearest-Neighbors. model = SKNN(k, distance, labels) model.train() if len(self.dataset) == 2: out = model.apply(queryFeat).get_labels() else: out = model.apply(referenceFeat).get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def assign_labels(data, centroids): from shogun.Classifier import KNN from numpy import arange labels = Labels(arange(1.,11.)) fea = RealFeatures(data) fea_centroids = RealFeatures(centroids) distance = EuclidianDistance(fea_centroids, fea_centroids) knn = KNN(1, distance, labels) knn.train() return knn.apply(fea)
def assign_labels(data, centroids): from shogun.Classifier import KNN from numpy import arange labels = Labels(arange(1., 11.)) fea = RealFeatures(data) fea_centroids = RealFeatures(centroids) distance = EuclidianDistance(fea_centroids, fea_centroids) knn = KNN(1, distance, labels) knn.train() return knn.apply(fea)
def assign_labels(data, centroids, ncenters): from shogun.Distance import EuclideanDistance from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import KNN from numpy import arange labels = MulticlassLabels(arange(0.,ncenters)) fea = RealFeatures(data) fea_centroids = RealFeatures(centroids) distance = EuclideanDistance(fea_centroids, fea_centroids) knn = KNN(1, distance, labels) knn.train() return knn.apply(fea)
def assign_labels(data, centroids, ncenters): from shogun.Distance import EuclideanDistance from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import KNN from numpy import arange labels = MulticlassLabels(arange(0., ncenters)) fea = RealFeatures(data) fea_centroids = RealFeatures(centroids) distance = EuclideanDistance(fea_centroids, fea_centroids) knn = KNN(1, distance, labels) knn.train() return knn.apply(fea)
def classifier_knn_modular(fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat, k=3 ): from shogun.Features import RealFeatures, Labels from shogun.Classifier import KNN from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) labels=Labels(label_train_multiclass) knn=KNN(k, distance, labels) knn_train = knn.train() output=knn.apply(feats_test).get_labels() return knn,knn_train,output
def classifier_knn_modular(fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat, k=3 ): from shogun.Features import RealFeatures, Labels from shogun.Classifier import KNN from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) labels=Labels(label_train_multiclass) knn=KNN(k, distance, labels) knn_train = knn.train() output=knn.apply(feats_test).get_labels() multiple_k=knn.classify_for_multiple_k() return knn,knn_train,output,multiple_k
trainData = trainData.reshape(-1, 10000) f.close() f = open(os.path.dirname(__file__) + '../data/arcene_train.label') trainLabel = np.fromfile(f, dtype=np.int32, sep=' ') f.close() # Load test data. f = open(os.path.dirname(__file__) + '../data/arcene_test.data') testData = np.fromfile(f, dtype=np.float64, sep=' ') testData = testData.reshape(-1, 10000) f.close() f = open(os.path.dirname(__file__) + '../data/arcene_test.label') testLabel = np.fromfile(f, dtype=np.float64, sep=' ') f.close() # Construct a KNN classifier with a neighborhood size of 9. feat = RealFeatures(trainData.T) distance = EuclideanDistance(feat, feat) labels = BinaryLabels(trainLabel.astype(np.float64)) testFeat = RealFeatures(testData.T) knn = KNN(9, distance, labels) knn.train() # Predict the classification. output = knn.apply(testFeat).get_labels() # Validate the classification. print output == testLabel