def RunKMeansShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the centroids # file. Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: data = np.genfromtxt(self.dataset[0], delimiter=',') centroids = np.genfromtxt(self.dataset[1], delimiter=',') else: data = np.genfromtxt(self.dataset[0], delimiter=',') # Gather parameters. if "clusters" in options: clusters = int(options.pop("clusters")) elif len(self.dataset) != 2: Log.Fatal( "Required option: Number of clusters or cluster locations." ) q.put(-1) return -1 if "max_iterations" in options: maxIterations = int(options.pop("max_iterations")) seed = None if "seed" in options: seed = int(options.pop("seed")) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") if seed: Math_init_random(seed) try: dataFeat = RealFeatures(data.T) distance = EuclideanDistance(dataFeat, dataFeat) # Create the K-Means object and perform K-Means clustering. with totalTimer: if len(self.dataset) == 2: model = KMeans(clusters, distance, centroids.T) else: model = KMeans(clusters, distance) model.set_max_iter(m) model.train() labels = model.apply().get_labels() centers = model.get_cluster_centers() except Exception as e: print(e) q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunKMeansShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the centroids # file. Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: data = np.genfromtxt(self.dataset[0], delimiter=',') centroids = np.genfromtxt(self.dataset[1], delimiter=',') else: data = np.genfromtxt(self.dataset[0], delimiter=',') # Gather parameters. clusters = re.search("-c (\d+)", options) maxIterations = re.search("-m (\d+)", options) seed = re.search("-s (\d+)", options) # Now do validation of options. if not clusters and len(self.dataset) != 2: Log.Fatal("Required option: Number of clusters or cluster locations.") q.put(-1) return -1 elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2: Log.Fatal("Invalid number of clusters requested! Must be greater than" + " or equal to 1.") q.put(-1) return -1 m = 1000 if not maxIterations else int(maxIterations.group(1)) if seed: Math_init_random(seed.group(1)) try: dataFeat = RealFeatures(data.T) distance = EuclideanDistance(dataFeat, dataFeat) # Create the K-Means object and perform K-Means clustering. with totalTimer: if len(self.dataset) == 2: model = KMeans(int(clusters.group(1)), distance, RealFeatures(centroids)) else: model = KMeans(int(clusters.group(1)), distance) model.set_mbKMeans_iter(m) model.train() labels = model.apply().get_labels() centers = model.get_cluster_centers() except Exception as e: print(e) q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def run_clustering(data, k): from modshogun import KMeans from modshogun import EuclideanDistance from modshogun import RealFeatures fea = RealFeatures(data) distance = EuclideanDistance(fea, fea) kmeans=KMeans(k, distance) # print("Running clustering...") kmeans.train() return kmeans.get_cluster_centers()
def run_clustering(data, k): from modshogun import KMeans from modshogun import EuclideanDistance from modshogun import RealFeatures fea = RealFeatures(data) distance = EuclideanDistance(fea, fea) kmeans = KMeans(k, distance) #print("Running clustering...") kmeans.train() return kmeans.get_cluster_centers()
def clustering_kmeans_modular (fm_train=traindat,k=3): from modshogun import EuclideanDistance, RealFeatures, KMeans, Math_init_random, CSVFile Math_init_random(17) feats_train=RealFeatures(CSVFile(fm_train)) distance=EuclideanDistance(feats_train, feats_train) kmeans=KMeans(k, distance) kmeans.train() out_centers = kmeans.get_cluster_centers() kmeans.get_radiuses() return out_centers, kmeans