def get_km(xs, n): km = KMedoids(n_cluster=n, max_iter=1000, tol=1e-5) km.fit(xs) kmidx = list(km.medoids) testidx = [i for i in range(len(xs)) if i not in kmidx] np.random.shuffle(testidx) return kmidx, testidx[:100]
def fit(self, X, y, **kwargs): if self.verbose: print("fitting", self) clss = sorted(set(y)) meds = [] for c, k in enumerate(clss): idxs_c = np.where(y == k)[0] kmpp_idxs = kmeanspp(X[idxs_c], self.mk, seed=self.seed) kmed = KMedoids(self.mk, init=kmpp_idxs) kmed.fit(X[idxs_c], dist=False) meds.append(idxs_c[kmed.medoids].tolist()) self.idxs = np.concatenate(meds, axis=0) self.vecs = X[self.idxs] cc = Counter(y[self.idxs]) assert set(cc.values()) == set({self.mk}) if self.verbose: print("fitted KMedoids seed={},".format(self.seed), cc) self.clf.fit(self.vecs, y[self.idxs]) return self