Ejemplo n.º 1
0
    def test_mb_k_means(self) -> None:
        s = self.scheduler()
        n_clusters = 3
        try:
            dataset = (get_dataset("cluster:s3"), )
        except TimeoutError:
            print("Cannot download cluster:s3")
            return

        with s:
            csv = CSVLoader(
                dataset,
                sep=" ",
                skipinitialspace=True,
                header=None,
                index_col=False,
                scheduler=s,
            )
            km = MBKMeans(
                n_clusters=n_clusters,
                random_state=42,
                is_input=False,
                is_greedy=False,
                scheduler=s,
            )
            # km.input.table = csv.output.result
            km.create_dependent_modules(csv)
            pr = Print(proc=self.terse, scheduler=s)
            pr.input[0] = km.output.result
            e = Every(proc=self.terse, scheduler=s)
            e.input[0] = km.output.labels
        aio.run(s.start())
        labels = km.labels()
        assert labels is not None
        self.assertEqual(len(csv.table), len(labels))
Ejemplo n.º 2
0
        'name': cname,
        'x_column': '_0',
        'y_column': '_1',
        'sample': mbkmeans if i == 0 else None,
        'input_module': filt,
        'input_slot': 'table'
    })

sp = MCScatterPlot(scheduler=s, classes=classes)
sp.create_dependent_modules()
for i in range(n_clusters):
    cname = f"k{i}"
    sp[cname].min_value._table = PsDict({'_0': -np.inf, '_1': -np.inf})
    sp[cname].max_value._table = PsDict({'_0': np.inf, '_1': np.inf})
mbkmeans.input.table = data.output.table
mbkmeans.create_dependent_modules()
sp.move_point = mbkmeans.moved_center  # for input management


def myprint(d):
    if d['convergence'] != 'unknown':
        print(d)
    else:
        print('.', end='')


prn = Every(scheduler=s, proc=print)
prn.input.df = mbkmeans.output.conv

if __name__ == '__main__':
    #data.start()