def RunKMeansShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the centroids # file. Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: data = np.genfromtxt(self.dataset[0], delimiter=',') centroids = np.genfromtxt(self.dataset[1], delimiter=',') else: data = np.genfromtxt(self.dataset[0], delimiter=',') # Gather parameters. clusters = re.search("-c (\d+)", options) maxIterations = re.search("-m (\d+)", options) seed = re.search("-s (\d+)", options) # Now do validation of options. if not clusters and len(self.dataset) != 2: Log.Fatal("Required option: Number of clusters or cluster locations.") q.put(-1) return -1 elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2: Log.Fatal("Invalid number of clusters requested! Must be greater than" + " or equal to 1.") q.put(-1) return -1 m = 1000 if not maxIterations else int(maxIterations.group(1)) if seed: Math_init_random(seed.group(1)) try: dataFeat = RealFeatures(data.T) distance = EuclideanDistance(dataFeat, dataFeat) # Create the K-Means object and perform K-Means clustering. with totalTimer: if len(self.dataset) == 2: model = KMeans(int(clusters.group(1)), distance, RealFeatures(centroids)) else: model = KMeans(int(clusters.group(1)), distance) model.set_mbKMeans_iter(m) model.train() labels = model.apply().get_labels() centers = model.get_cluster_centers() except Exception as e: print(e) q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time