Exemple #1
0
    def test_mb_k_means(self) -> None:
        s = self.scheduler()
        n_clusters = 3
        try:
            dataset = (get_dataset("cluster:s3"), )
        except TimeoutError:
            print("Cannot download cluster:s3")
            return

        with s:
            csv = CSVLoader(
                dataset,
                sep=" ",
                skipinitialspace=True,
                header=None,
                index_col=False,
                scheduler=s,
            )
            km = MBKMeans(
                n_clusters=n_clusters,
                random_state=42,
                is_input=False,
                is_greedy=False,
                scheduler=s,
            )
            # km.input.table = csv.output.result
            km.create_dependent_modules(csv)
            pr = Print(proc=self.terse, scheduler=s)
            pr.input[0] = km.output.result
            e = Every(proc=self.terse, scheduler=s)
            e.input[0] = km.output.labels
        aio.run(s.start())
        labels = km.labels()
        assert labels is not None
        self.assertEqual(len(csv.table), len(labels))
 def test_mb_k_means(self):
     #log_level()
     s=Scheduler()
     n_clusters = 3
     csv = CSVLoader(get_dataset('cluster:s3'),sep=' ',skipinitialspace=True,header=None,index_col=False,scheduler=s)
     km = MBKMeans(n_clusters=n_clusters, random_state=42, is_input=False, scheduler=s)
     km.input.df = csv.output.df
     pr = Print(scheduler=s)
     pr.input.df = km.output.df
     e = Every(scheduler=s)
     e.input.df = km.output.labels
     s.start()
     self.assertEquals(len(csv.df()), len(km.labels()))
 def test_mb_k_means(self):
     #log_level()
     s = self.scheduler()
     n_clusters = 3
     csv = CSVLoader(get_dataset('cluster:s3'),
                     sep=' ',
                     skipinitialspace=True,
                     header=None,
                     index_col=False,
                     scheduler=s)
     km = MBKMeans(n_clusters=n_clusters,
                   random_state=42,
                   is_input=False,
                   scheduler=s)
     km.input.table = csv.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = km.output.table
     e = Every(proc=self.terse, scheduler=s)
     e.input.df = km.output.labels
     s.start()
     s.join()
     self.assertEqual(len(csv.table()), len(km.labels()))