def run_clustering(data, k):
    from modshogun import KMeans
    from modshogun import EuclideanDistance
    from modshogun import RealFeatures

    fea = RealFeatures(data)
    distance = EuclideanDistance(fea, fea)
    kmeans = KMeans(k, distance)

    #print("Running clustering...")
    kmeans.train()

    return kmeans.get_cluster_centers()
Example #2
0
def clustering_kmeans_modular (fm_train=traindat,k=3):
	from modshogun import EuclideanDistance, RealFeatures, KMeans, Math_init_random, CSVFile
	Math_init_random(17)

	feats_train=RealFeatures(CSVFile(fm_train))
	distance=EuclideanDistance(feats_train, feats_train)

	kmeans=KMeans(k, distance)
	kmeans.train()

	out_centers = kmeans.get_cluster_centers()
	kmeans.get_radiuses()

	return out_centers, kmeans
Example #3
0
        def RunKMeansShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the centroids
            # file.
            Log.Info("Loading dataset", self.verbose)
            if len(self.dataset) == 2:
                data = np.genfromtxt(self.dataset[0], delimiter=',')
                centroids = np.genfromtxt(self.dataset[1], delimiter=',')
            else:
                data = np.genfromtxt(self.dataset, delimiter=',')

            # Gather parameters.
            clusters = re.search("-c (\d+)", options)
            maxIterations = re.search("-m (\d+)", options)
            seed = re.search("-s (\d+)", options)

            # Now do validation of options.
            if not clusters and len(self.dataset) != 2:
                Log.Fatal(
                    "Required option: Number of clusters or cluster locations."
                )
                q.put(-1)
                return -1
            elif (not clusters
                  or int(clusters.group(1)) < 1) and len(self.dataset) != 2:
                Log.Fatal(
                    "Invalid number of clusters requested! Must be greater than"
                    + " or equal to 1.")
                q.put(-1)
                return -1

            m = 1000 if not maxIterations else int(maxIterations.group(1))

            if seed:
                Math_init_random(seed.group(1))
            try:
                dataFeat = RealFeatures(data.T)
                distance = EuclideanDistance(dataFeat, dataFeat)

                # Create the K-Means object and perform K-Means clustering.
                with totalTimer:
                    if len(self.dataset) == 2:
                        model = KMeans(int(clusters.group(1)), distance,
                                       centroids.T)
                    else:
                        model = KMeans(int(clusters.group(1)), distance)

                    model.set_max_iter(m)
                    model.train()

                    labels = model.apply().get_labels()
                    centers = model.get_cluster_centers()
            except Exception as e:
                print(e)
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time