def test_mb_k_means(self) -> None: s = self.scheduler() n_clusters = 3 try: dataset = (get_dataset("cluster:s3"), ) except TimeoutError: print("Cannot download cluster:s3") return with s: csv = CSVLoader( dataset, sep=" ", skipinitialspace=True, header=None, index_col=False, scheduler=s, ) km = MBKMeans( n_clusters=n_clusters, random_state=42, is_input=False, is_greedy=False, scheduler=s, ) # km.input.table = csv.output.result km.create_dependent_modules(csv) pr = Print(proc=self.terse, scheduler=s) pr.input[0] = km.output.result e = Every(proc=self.terse, scheduler=s) e.input[0] = km.output.labels aio.run(s.start()) labels = km.labels() assert labels is not None self.assertEqual(len(csv.table), len(labels))
'name': cname, 'x_column': '_0', 'y_column': '_1', 'sample': mbkmeans if i == 0 else None, 'input_module': filt, 'input_slot': 'table' }) sp = MCScatterPlot(scheduler=s, classes=classes) sp.create_dependent_modules() for i in range(n_clusters): cname = f"k{i}" sp[cname].min_value._table = PsDict({'_0': -np.inf, '_1': -np.inf}) sp[cname].max_value._table = PsDict({'_0': np.inf, '_1': np.inf}) mbkmeans.input.table = data.output.table mbkmeans.create_dependent_modules() sp.move_point = mbkmeans.moved_center # for input management def myprint(d): if d['convergence'] != 'unknown': print(d) else: print('.', end='') prn = Every(scheduler=s, proc=print) prn.input.df = mbkmeans.output.conv if __name__ == '__main__': #data.start()