コード例 #1
0
ファイル: kmeans.py プロジェクト: cnzhoufang/benchmarks
        def RunKMeansShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the centroids
            # file.
            Log.Info("Loading dataset", self.verbose)
            if len(self.dataset) == 2:
                data = np.genfromtxt(self.dataset[0], delimiter=',')
                centroids = np.genfromtxt(self.dataset[1], delimiter=',')
            else:
                data = np.genfromtxt(self.dataset[0], delimiter=',')

            # Gather parameters.
            if "clusters" in options:
                clusters = int(options.pop("clusters"))
            elif len(self.dataset) != 2:
                Log.Fatal(
                    "Required option: Number of clusters or cluster locations."
                )
                q.put(-1)
                return -1
            if "max_iterations" in options:
                maxIterations = int(options.pop("max_iterations"))
            seed = None
            if "seed" in options:
                seed = int(options.pop("seed"))

            if len(options) > 0:
                Log.Fatal("Unknown parameters: " + str(options))
                raise Exception("unknown parameters")

            if seed:
                Math_init_random(seed)
            try:
                dataFeat = RealFeatures(data.T)
                distance = EuclideanDistance(dataFeat, dataFeat)

                # Create the K-Means object and perform K-Means clustering.
                with totalTimer:
                    if len(self.dataset) == 2:
                        model = KMeans(clusters, distance, centroids.T)
                    else:
                        model = KMeans(clusters, distance)

                    model.set_max_iter(m)
                    model.train()

                    labels = model.apply().get_labels()
                    centers = model.get_cluster_centers()
            except Exception as e:
                print(e)
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
コード例 #2
0
ファイル: kmeans.py プロジェクト: youssef-emad/benchmarks
    def RunKMeansShogun(q):
      totalTimer = Timer()

      # Load input dataset.
      # If the dataset contains two files then the second file is the centroids
      # file.
      Log.Info("Loading dataset", self.verbose)
      if len(self.dataset) == 2:
        data = np.genfromtxt(self.dataset[0], delimiter=',')
        centroids = np.genfromtxt(self.dataset[1], delimiter=',')
      else:
        data = np.genfromtxt(self.dataset[0], delimiter=',')

      # Gather parameters.
      clusters = re.search("-c (\d+)", options)
      maxIterations = re.search("-m (\d+)", options)
      seed = re.search("-s (\d+)", options)

      # Now do validation of options.
      if not clusters and len(self.dataset) != 2:
        Log.Fatal("Required option: Number of clusters or cluster locations.")
        q.put(-1)
        return -1
      elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2:
        Log.Fatal("Invalid number of clusters requested! Must be greater than"
            + " or equal to 1.")
        q.put(-1)
        return -1

      m = 1000 if not maxIterations else int(maxIterations.group(1))


      if seed:
        Math_init_random(seed.group(1))
      try:
        dataFeat = RealFeatures(data.T)
        distance = EuclideanDistance(dataFeat, dataFeat)

        # Create the K-Means object and perform K-Means clustering.
        with totalTimer:
          if len(self.dataset) == 2:
            model = KMeans(int(clusters.group(1)), distance, RealFeatures(centroids))
          else:
            model = KMeans(int(clusters.group(1)), distance)

          model.set_mbKMeans_iter(m)
          model.train()

          labels = model.apply().get_labels()
          centers = model.get_cluster_centers()
      except Exception as e:
        print(e)
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
コード例 #3
0
def run_clustering(data, k):
	from modshogun import KMeans
	from modshogun import EuclideanDistance
	from modshogun import RealFeatures

	fea = RealFeatures(data)
	distance = EuclideanDistance(fea, fea)
	kmeans=KMeans(k, distance)

	# print("Running clustering...")
	kmeans.train()

	return kmeans.get_cluster_centers()
コード例 #4
0
def run_clustering(data, k):
    from modshogun import KMeans
    from modshogun import EuclideanDistance
    from modshogun import RealFeatures

    fea = RealFeatures(data)
    distance = EuclideanDistance(fea, fea)
    kmeans = KMeans(k, distance)

    #print("Running clustering...")
    kmeans.train()

    return kmeans.get_cluster_centers()
コード例 #5
0
def clustering_kmeans_modular (fm_train=traindat,k=3):
	from modshogun import EuclideanDistance, RealFeatures, KMeans, Math_init_random, CSVFile
	Math_init_random(17)

	feats_train=RealFeatures(CSVFile(fm_train))
	distance=EuclideanDistance(feats_train, feats_train)

	kmeans=KMeans(k, distance)
	kmeans.train()

	out_centers = kmeans.get_cluster_centers()
	kmeans.get_radiuses()

	return out_centers, kmeans