def test_err(): try: km = knor.Kmeans(7.2) return km.fit(FN, nrow=50, ncol=5) except Exception as msg: print( ("Successful test: CORRECTLY fails with message: {}".format(msg)))
def cluster(data, init0='random', clf='kmeans'): if clf=='kmeans' or clf=='gmm': if clf=='kmeans': classifier = KMeans(n_clusters=2, init=init0, max_iter=1) if clf=='kmeans-conv': classifier = KMeans(n_clusters=2, init=init0) elif clf=='gmm': classifier = GaussianMixture(n_components=2, init_params=init0, covariance_type='diag') t0 = time.time() classifier.fit(data) t1 = time.time() dur = t1-t0 klabels = classifier.predict(data) elif clf=='knor': t0 = time.time() ret = knor.Kmeans(data, 2) t1 = time.time() dur = t1-t0 klabels = ret.get_clusters() else: print('specify classifier') if np.sum(klabels[:50]) > 25: klabels = 1-klabels klabels_0 = data[klabels==0] klabels_1 = data[klabels==1] half = len(data)//2 dlabels = np.concatenate(([0]*half,[1]*(len(data)-half)),axis=0) err = np.sum(((dlabels!=klabels)))/len(data) return klabels, klabels_0, klabels_1, err, dur
def heatmap(self, mode=None): """ Generate heatmap of all observations If n > 1000, then a kmeans++ initializaiton is performed to derive 1000 cluster centers. """ if not mode: mode = self._mode if self._ds.n > 1000: #if sample size is > 1000, run kmeans++ initialization ret = knor.Kmeans(self._ds_normed.D.values, 1000, max_iters=0, init='kmeanspp') centroids_df = pd.DataFrame(ret.get_centroids(), columns=self._ds.D.columns) centroids_ds = lds.DataSet(centroids_df, name=self._ds.name) return lpl.Heatmap(centroids_ds, mode=mode).plot(showticklabels=True) else: return lpl.Heatmap(self._ds_normed, mode=mode).plot(showticklabels=True)
def histogram_heatmap(self, mode=None): """ Generate 1d heatmap """ if not mode: mode = self._mode if self._ds.n > 1000: #if sample size is > 1000, run kmeans++ initialization ret = knor.Kmeans(self._ds_normed.D.values, 1000, max_iters=0, init='kmeanspp') centroids_df = pd.DataFrame(ret.get_centroids(), columns=self._ds.D.columns) centroids_ds = lds.DataSet(centroids_df, name=self._ds.name) return lpl.HistogramHeatmap( centroids_ds, mode=mode).plot(showticklabels=self._showticklabels) else: return lpl.HistogramHeatmap( self._ds_normed, mode=mode).plot(showticklabels=self._showticklabels)
def dexm_test_c_im(): k = 2 centers = np.random.random((k, 5)) km = knor.Kmeans(k, centers=centers) return km.fit(FN, 50, 5)
def dexm_test_c_comp(): km = knor.Kmeans(8) return km.fit(FN, 50, 5)
def dim_test_c_im(): data = np.random.random((10, 3)) centers = np.random.random((3, 3)) km = knor.Kmeans(3, centers=centers) # TODO: Infer k return km.fit(data)
def dim_test_c_comp(): data = np.random.random((10, 3)) km = knor.Kmeans(4) return km.fit(data)
def dexm_test_c_im(): centers = np.random.random((2,5)) return knor.Kmeans(FN, centers, nrow=50, ncol=5)
def dexm_test_c_comp(): centers = 8 return knor.Kmeans(FN, centers, nrow=50, ncol=5)
def dim_test_c_im(): data = np.random.random((10,3)) centers = np.random.random((3,3)) return knor.Kmeans(data, centers)
def dim_test_c_comp(): data = np.random.random((10,3)) return knor.Kmeans(data, 4)