def test_normalize_data(self): # not normalized self.widget.controls.normalize.setChecked(False) data = Table("heart_disease") self.send_signal(self.widget.Inputs.data, data) kwargs = { "eps": self.widget.eps, "min_samples": self.widget.min_samples, "metric": "euclidean" } clusters = DBSCAN(**kwargs)(data) output = self.get_output(self.widget.Outputs.annotated_data) output_clusters = output.metas[:, 0].copy() output_clusters[np.isnan(output_clusters)] = -1 np.testing.assert_array_equal(output_clusters, clusters) # normalized self.widget.controls.normalize.setChecked(True) kwargs = { "eps": self.widget.eps, "min_samples": self.widget.min_samples, "metric": "euclidean" } for pp in (Continuize(), Normalize(), SklImpute()): data = pp(data) clusters = DBSCAN(**kwargs)(data) output = self.get_output(self.widget.Outputs.annotated_data) output_clusters = output.metas[:, 0].copy() output_clusters[np.isnan(output_clusters)] = -1 np.testing.assert_array_equal(output_clusters, clusters)
def cluster(self): if not self.check_data_size(): return self.model = DBSCAN(eps=self.eps, min_samples=self.min_samples, metric=self.METRICS[self.metric_idx][1])(self.data) self.send_data()