Example #1
0
    def RunKMeansShogun(q):
      totalTimer = Timer()

      # Load input dataset.
      # If the dataset contains two files then the second file is the centroids
      # file.
      Log.Info("Loading dataset", self.verbose)
      if len(self.dataset) == 2:
        data = np.genfromtxt(self.dataset[0], delimiter=',')
        centroids = np.genfromtxt(self.dataset[1], delimiter=',')
      else:
        data = np.genfromtxt(self.dataset[0], delimiter=',')

      # Gather parameters.
      clusters = re.search("-c (\d+)", options)
      maxIterations = re.search("-m (\d+)", options)
      seed = re.search("-s (\d+)", options)

      # Now do validation of options.
      if not clusters and len(self.dataset) != 2:
        Log.Fatal("Required option: Number of clusters or cluster locations.")
        q.put(-1)
        return -1
      elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2:
        Log.Fatal("Invalid number of clusters requested! Must be greater than"
            + " or equal to 1.")
        q.put(-1)
        return -1

      m = 1000 if not maxIterations else int(maxIterations.group(1))


      if seed:
        Math_init_random(seed.group(1))
      try:
        dataFeat = RealFeatures(data.T)
        distance = EuclideanDistance(dataFeat, dataFeat)

        # Create the K-Means object and perform K-Means clustering.
        with totalTimer:
          if len(self.dataset) == 2:
            model = KMeans(int(clusters.group(1)), distance, RealFeatures(centroids))
          else:
            model = KMeans(int(clusters.group(1)), distance)

          model.set_mbKMeans_iter(m)
          model.train()

          labels = model.apply().get_labels()
          centers = model.get_cluster_centers()
      except Exception as e:
        print(e)
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time