def test_mb_k_means(self) -> None: s = self.scheduler() n_clusters = 3 try: dataset = (get_dataset("cluster:s3"), ) except TimeoutError: print("Cannot download cluster:s3") return with s: csv = CSVLoader( dataset, sep=" ", skipinitialspace=True, header=None, index_col=False, scheduler=s, ) km = MBKMeans( n_clusters=n_clusters, random_state=42, is_input=False, is_greedy=False, scheduler=s, ) # km.input.table = csv.output.result km.create_dependent_modules(csv) pr = Print(proc=self.terse, scheduler=s) pr.input[0] = km.output.result e = Every(proc=self.terse, scheduler=s) e.input[0] = km.output.labels aio.run(s.start()) labels = km.labels() assert labels is not None self.assertEqual(len(csv.table), len(labels))
def test_mb_k_means(self): #log_level() s=Scheduler() n_clusters = 3 csv = CSVLoader(get_dataset('cluster:s3'),sep=' ',skipinitialspace=True,header=None,index_col=False,scheduler=s) km = MBKMeans(n_clusters=n_clusters, random_state=42, is_input=False, scheduler=s) km.input.df = csv.output.df pr = Print(scheduler=s) pr.input.df = km.output.df e = Every(scheduler=s) e.input.df = km.output.labels s.start() self.assertEquals(len(csv.df()), len(km.labels()))
def test_mb_k_means(self): #log_level() s = self.scheduler() n_clusters = 3 csv = CSVLoader(get_dataset('cluster:s3'), sep=' ', skipinitialspace=True, header=None, index_col=False, scheduler=s) km = MBKMeans(n_clusters=n_clusters, random_state=42, is_input=False, scheduler=s) km.input.table = csv.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = km.output.table e = Every(proc=self.terse, scheduler=s) e.input.df = km.output.labels s.start() s.join() self.assertEqual(len(csv.table()), len(km.labels()))