예제 #1
0
    def test_normalize_data(self):
        # not normalized
        self.widget.controls.normalize.setChecked(False)

        data = Table("heart_disease")
        self.send_signal(self.widget.Inputs.data, data)

        kwargs = {
            "eps": self.widget.eps,
            "min_samples": self.widget.min_samples,
            "metric": "euclidean"
        }
        clusters = DBSCAN(**kwargs)(data)

        output = self.get_output(self.widget.Outputs.annotated_data)
        output_clusters = output.metas[:, 0].copy()
        output_clusters[np.isnan(output_clusters)] = -1
        np.testing.assert_array_equal(output_clusters, clusters)

        # normalized
        self.widget.controls.normalize.setChecked(True)

        kwargs = {
            "eps": self.widget.eps,
            "min_samples": self.widget.min_samples,
            "metric": "euclidean"
        }
        for pp in (Continuize(), Normalize(), SklImpute()):
            data = pp(data)
        clusters = DBSCAN(**kwargs)(data)

        output = self.get_output(self.widget.Outputs.annotated_data)
        output_clusters = output.metas[:, 0].copy()
        output_clusters[np.isnan(output_clusters)] = -1
        np.testing.assert_array_equal(output_clusters, clusters)
예제 #2
0
 def cluster(self):
     if not self.check_data_size():
         return
     self.model = DBSCAN(eps=self.eps,
                         min_samples=self.min_samples,
                         metric=self.METRICS[self.metric_idx][1])(self.data)
     self.send_data()