コード例 #1
0
 def test_retain_vars_attributes(self):
     data = Table("iris")
     attributes = {"foo": "foo", "baz": 1}
     data.domain.attributes[0].attributes = attributes
     self.assertDictEqual(
         Normalize(norm_type=Normalize.NormalizeBySD)(
             data).domain.attributes[0].attributes, attributes)
     self.assertDictEqual(
         Normalize(norm_type=Normalize.NormalizeBySpan)(
             data).domain.attributes[0].attributes, attributes)
コード例 #2
0
ファイル: owpreprocess.py プロジェクト: coro-binal/orange3
 def createinstance(params):
     method = params.get("method", Scale.NormalizeBySD)
     if method == Scale.CenterByMean:
         return _Scale(_Scale.CenteringType.Mean,
                       _Scale.ScalingType.NoScaling)
     elif method == Scale.ScaleBySD:
         return _Scale(_Scale.CenteringType.NoCentering,
                       _Scale.ScalingType.Std)
     elif method == Scale.NormalizeBySD:
         return Normalize(norm_type=Normalize.NormalizeBySD)
     elif method == Scale.NormalizeBySpan_ZeroBased:
         return Normalize(norm_type=Normalize.NormalizeBySpan)
     else:  # method == Scale.NormalizeSpan_NonZeroBased
         return Normalize(norm_type=Normalize.NormalizeBySpan,
                          zero_based=False)
コード例 #3
0
    def _get_pca(self):
        data = self.data
        MAX_COMPONENTS = 2
        ncomponents = 2
        DECOMPOSITIONS = [PCA]  # TruncatedSVD
        cls = DECOMPOSITIONS[0]
        pca_projector = cls(n_components=MAX_COMPONENTS)
        pca_projector.component = ncomponents
        pca_projector.preprocessors = cls.preprocessors + [Normalize()]

        pca = pca_projector(data)
        variance_ratio = pca.explained_variance_ratio_
        cumulative = np.cumsum(variance_ratio)

        self._pca = pca
        if not np.isfinite(cumulative[-1]):
            self.Warning.trivial_components()

        coords = pca(data).X
        valid_mask = ~np.isnan(coords).any(axis=1)
        # scale axes
        max_radius = np.min(
            [np.abs(np.min(coords, axis=0)),
             np.max(coords, axis=0)])
        axes = pca.components_.T.copy()
        axes *= max_radius / np.max(np.linalg.norm(axes, axis=1))
        return valid_mask, coords, axes
コード例 #4
0
class SGDRegressionLearner(LinearRegressionLearner):
    __wraps__ = skl_linear_model.SGDRegressor
    preprocessors = SklLearner.preprocessors + [Normalize()]

    def __init__(
        self,
        loss="squared_loss",
        penalty="l2",
        alpha=0.0001,
        l1_ratio=0.15,
        fit_intercept=True,
        max_iter=5,
        tol=None,
        shuffle=True,
        epsilon=0.1,
        n_jobs=1,
        random_state=None,
        learning_rate="invscaling",
        eta0=0.01,
        power_t=0.25,
        class_weight=None,
        warm_start=False,
        average=False,
        preprocessors=None,
    ):
        super().__init__(preprocessors=preprocessors)
        self.params = vars()
コード例 #5
0
class NNBase:
    """Base class for neural network (classification and regression) learners
    """
    preprocessors = SklLearner.preprocessors + [Normalize()]

    def __init__(self,
                 hidden_layer_sizes=(100, ),
                 activation='relu',
                 solver='adam',
                 alpha=0.0001,
                 batch_size='auto',
                 learning_rate='constant',
                 learning_rate_init=0.001,
                 power_t=0.5,
                 max_iter=200,
                 shuffle=True,
                 random_state=None,
                 tol=0.0001,
                 verbose=False,
                 warm_start=False,
                 momentum=0.9,
                 nesterovs_momentum=True,
                 early_stopping=False,
                 validation_fraction=0.1,
                 beta_1=0.9,
                 beta_2=0.999,
                 epsilon=1e-08,
                 preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.params = vars()
コード例 #6
0
class SGDRegressionLearner(LinearRegressionLearner):
    __wraps__ = skl_linear_model.SGDRegressor
    preprocessors = SklLearner.preprocessors + [Normalize()]

    # Arguments are needed for signatures, pylint: disable=unused-argument
    def __init__(self,
                 loss='squared_loss',
                 penalty='l2',
                 alpha=0.0001,
                 l1_ratio=0.15,
                 fit_intercept=True,
                 max_iter=5,
                 tol=1e-3,
                 shuffle=True,
                 epsilon=0.1,
                 n_jobs=1,
                 random_state=None,
                 learning_rate='invscaling',
                 eta0=0.01,
                 power_t=0.25,
                 class_weight=None,
                 warm_start=False,
                 average=False,
                 preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.params = vars()
コード例 #7
0
    def test_normalize_sparse(self):
        domain = Domain([ContinuousVariable(str(i)) for i in range(3)])
        # pylint: disable=bad-whitespace
        X = np.array([
            [0, -1, -2],
            [0, 1, 2],
        ])
        data = Table.from_numpy(domain, X).to_sparse()

        # pylint: disable=bad-whitespace
        solution = sp.csr_matrix(np.array([
            [0, -1, -1],
            [0, 1, 1],
        ]))

        normalizer = Normalize()
        normalized = normalizer(data)
        self.assertEqual((normalized.X != solution).nnz, 0)

        # raise error for non-zero offsets
        data.X = sp.csr_matrix(np.array([
            [0, 0, 0],
            [0, 1, 3],
            [0, 2, 4],
        ]))
        with self.assertRaises(ValueError):
            normalizer(data)
コード例 #8
0
class SGDClassificationLearner(SklLearner):
    name = 'sgd'
    __wraps__ = SGDClassifier
    __returns__ = LinearModel
    preprocessors = SklLearner.preprocessors + [Normalize()]

    def __init__(self,
                 loss='hinge',
                 penalty='l2',
                 alpha=0.0001,
                 l1_ratio=0.15,
                 fit_intercept=True,
                 max_iter=5,
                 tol=None,
                 shuffle=True,
                 epsilon=0.1,
                 random_state=None,
                 learning_rate='invscaling',
                 eta0=0.01,
                 power_t=0.25,
                 warm_start=False,
                 average=False,
                 preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.params = vars()
コード例 #9
0
    def test_optimized(self):
        """
        Test if optimized works well
        """
        lr = self.linear_regression

        lr.set_data(self.housing)
        op_theta = lr.optimized()
        self.assertEqual(len(op_theta), len(self.housing.domain.attributes))

        # check if really minimal, function is monotonic so everywhere around
        # j should be higher
        attr_x = self.housing.domain['CRIM']
        attr_y = self.housing.domain['ZN']
        cols = []
        for attr in (attr_x, attr_y) if attr_y is not None else (attr_x, ):
            subset = self.housing[:, attr]
            cols.append(subset.X)
        x = np.column_stack(cols)

        domain = Domain([attr_x, attr_y], self.housing.domain.class_var)
        data = Normalize(transform_class=True)(Table(domain, x,
                                                     self.housing.Y))

        lr.set_data(data)
        op_theta = lr.optimized()

        self.assertLessEqual(lr.j(op_theta), lr.j(op_theta + np.array([1, 0])))
        self.assertLessEqual(lr.j(op_theta), lr.j(op_theta + np.array([0, 1])))
        self.assertLessEqual(lr.j(op_theta),
                             lr.j(op_theta + np.array([-1, 0])))
        self.assertLessEqual(lr.j(op_theta),
                             lr.j(op_theta + np.array([0, -1])))
コード例 #10
0
 def test_normalize_default(self):
     normalizer = Normalize()
     data_norm = normalizer(self.data)
     solution = [[0., 1.225, 'a', 'a', '?', 'a', 1.225, 'a', '?', 'a', 2],
                 [0., -1.225, 'a', 'b', -1., '?', 0., 'b', '?', 'b', 0],
                 [0., 0., 'a', 'b', 1., 'b', -1.225, 'c', '?', 'c', 1]]
     self.compare_tables(data_norm, solution)
コード例 #11
0
    def test_normalize_data(self):
        # not normalized
        self.widget.controls.normalize.setChecked(False)

        data = Table("heart_disease")
        self.send_signal(self.widget.Inputs.data, data)

        kwargs = {
            "eps": self.widget.eps,
            "min_samples": self.widget.min_samples,
            "metric": "euclidean"
        }
        clusters = DBSCAN(**kwargs)(data)

        output = self.get_output(self.widget.Outputs.annotated_data)
        output_clusters = output.metas[:, 0].copy()
        output_clusters[np.isnan(output_clusters)] = -1
        np.testing.assert_array_equal(output_clusters, clusters)

        # normalized
        self.widget.controls.normalize.setChecked(True)

        kwargs = {
            "eps": self.widget.eps,
            "min_samples": self.widget.min_samples,
            "metric": "euclidean"
        }
        for pp in (Continuize(), Normalize(), SklImpute()):
            data = pp(data)
        clusters = DBSCAN(**kwargs)(data)

        output = self.get_output(self.widget.Outputs.annotated_data)
        output_clusters = output.metas[:, 0].copy()
        output_clusters[np.isnan(output_clusters)] = -1
        np.testing.assert_array_equal(output_clusters, clusters)
コード例 #12
0
 def __init__(self,
              preprocessors=None,
              penalty=1,
              opt_penalty=False,
              rule_learner=None,
              basic_attributes=True,
              fit_intercept=True,
              intercept_scaling=2,
              penalize_rules=True):
     """
     Parameters
     ----------
     preprocessors :
         A sequence of data preprocessors to apply on data prior to
         fitting the model.
     penalty : L2-penalty in loss function.
     rule_learner: Rule learner used to construct new attributes.
     fit_intercept: Should we add a constant column to data?
     intercept_scaling: Value of constant in the intercept column. Note that
         intercept column is appended after normalization, therefore higher
         values will be less affected by penalization.
     """
     super().__init__(preprocessors)
     self.penalty = penalty
     self.opt_penalty = opt_penalty
     self.rule_learner = rule_learner
     self.fit_intercept = fit_intercept
     self.intercept_scaling = intercept_scaling
     self.basic_attributes = basic_attributes
     self.penalize_rules = penalize_rules
     # Post rule learning preprocessing should not decrease the
     # number of examples.
     self.post_rule_preprocess = [Normalize(), Continuize()]
コード例 #13
0
ファイル: owpca.py プロジェクト: josh200501/orange3
    def fit(self):
        self.clear()
        self.start_button.setEnabled(False)
        if self.data is None:
            return
        data = self.data
        self._transformed = None
        if isinstance(data, SqlTable):  # data was big and remote available
            self.sampling_box.setVisible(True)
            self.start_button.setText("Start remote computation")
            self.start_button.setEnabled(True)
        else:
            # TODO move the following normalization outside
            # so it is applied for SqlTables as well (when it works on them)
            if self.normalize:
                data = Normalize(data)
            self.sampling_box.setVisible(False)
            pca = Orange.projection.PCA()
            pca = pca(data)
            variance_ratio = pca.explained_variance_ratio_
            cumulative = numpy.cumsum(variance_ratio)
            self.components_spin.setRange(0, len(cumulative))

            self._pca = pca
            self._variance_ratio = variance_ratio
            self._cumulative = cumulative
            self._setup_plot()

            self.unconditional_commit()
コード例 #14
0
    def test_skip_normalization(self):
        data = self.data.copy()
        for attr in data.domain.attributes:
            attr.attributes = {'skip-normalization': True}

        normalizer = Normalize()
        normalized = normalizer(data)
        np.testing.assert_array_equal(data.X, normalized.X)
コード例 #15
0
 def preproces(self, data):
     if self.normalize:
         if sp.issparse(data.X):
             self.Warning.no_sparse_normalization()
         else:
             data = Normalize()(data)
     for preprocessor in KMeans.preprocessors:  # use same preprocessors than
         data = preprocessor(data)
     return data
コード例 #16
0
 def test_normalize_transform_by_span_zero_class(self):
     normalizer = Normalize(zero_based=True,
                            norm_type=Normalize.NormalizeBySpan,
                            transform_class=True)
     data_norm = normalizer(self.data)
     solution = [[0., 1., 'a', 'a', '?', 'a', 1., 'a', '?', 'a', 1.],
                 [0., 0., 'a', 'b', 0., '?', 0.5, 'b', '?', 'b', 0.],
                 [0., 0.5, 'a', 'b', 1., 'b', 0., 'c', '?', 'c', 0.5]]
     self.compare_tables(data_norm, solution)
コード例 #17
0
 def test_normalize_default(self):
     normalizer = Normalize()
     data_norm = normalizer(self.data)
     solution = [
         [0.0, 1.225, "a", "a", "?", "a", 1.225, "a", "a", 2],
         [0.0, -1.225, "a", "b", -1.0, "?", 0.0, "b", "b", 0],
         [0.0, 0.0, "a", "b", 1.0, "b", -1.225, "c", "c", 1],
     ]
     self.compare_tables(data_norm, solution)
コード例 #18
0
 def _update_normalize(self):
     if self.normalize:
         pp = self._pca_preprocessors + [Normalize()]
     else:
         pp = self._pca_preprocessors
     self._pca_projector.preprocessors = pp
     self.fit()
     if self.data is None:
         self._invalidate_selection()
コード例 #19
0
 def test_normalize_transform_by_sd(self):
     normalizer = Normalize(zero_based=False,
                            norm_type=Normalize.NormalizeBySD,
                            transform_class=False)
     data_norm = normalizer(self.data)
     solution = [[0., 1.225, 'a', 'a', '?', 'a', 1.225, 'a', '?', 'a', 2],
                 [0., -1.225, 'a', 'b', -1., '?', 0., 'b', '?', 'b', 0],
                 [0., 0., 'a', 'b', 1., 'b', -1.225, 'c', '?', 'c', 1]]
     self.compare_tables(data_norm, solution)
コード例 #20
0
ファイル: tests.py プロジェクト: am93/fri-ozip-naloge
 def setUp(self):
     self.data = Table("iris")
     self.data = Normalize()(self.data)
     self.X, self.y = self.data.X, self.data.Y
     self.m, self.n = self.X.shape
     self.k = len(self.data.domain.class_var.values)
     self.X1 = np.hstack((np.ones((self.m, 1)), self.X))
     self.theta = np.ones((self.k, self.n + 1)).flatten()
     self.sm = SoftmaxLearner()
     self.sm_reg = SoftmaxLearner_reg()
コード例 #21
0
ファイル: test_normalize.py プロジェクト: wibrt/orange3
 def test_normalize_transform_by_span_class(self):
     data = Table("test5.tab")
     normalizer = Normalize(zero_based=False,
                            norm_type=Normalize.NormalizeBySpan,
                            transform_class=True)
     data_norm = normalizer(data)
     solution = [[0., 1., 'a', 'a', '?', 'a', 1., 'a', 'a', 1.],
                 [0., -1., 'a', 'b', -1., '?', 0., 'b', 'b', -1.],
                 [0., 0., 'a', 'b', 1., 'b', -1., 'c', 'c', 0.]]
     self.compare_tables(data_norm, solution)
コード例 #22
0
ファイル: test_normalize.py プロジェクト: ylyking/orange3
 def test_datetime_normalization(self):
     data = Table(test_filename("datasets/test10.tab"))
     normalizer = Normalize(zero_based=False,
                            norm_type=Normalize.NormalizeBySD,
                            transform_class=False)
     data_norm = normalizer(data)
     solution = [[0., '1995-01-21', 'a', 'a', '?', 'a', 1.225, 'a', '?', 'a', 2],
                 [0., '2003-07-23', 'a', 'b', -1., '?', 0., 'b', '?', 'b', 0],
                 [0., '1967-03-12', 'a', 'b', 1., 'b', -1.225, 'c', '?', 'c', 1]]
     self.compare_tables(data_norm, solution)
コード例 #23
0
    def test_abs_error_normalized(self):
        tab = Table('housing')
        normalizer = Normalize(zero_based=True,
                               norm_type=Normalize.NormalizeBySpan)
        tab = normalizer(tab)

        icr = InductiveRegressor(AbsError(LinearRegressionLearner()))
        icr_knn = InductiveRegressor(AbsError(KNNRegressionLearner(4)))
        icr_norm = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4),
                               Euclidean,
                               4,
                               exp=False))
        icr_norm_exp = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4,
                               exp=True))
        icr_norm_rf = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4),
                               Euclidean,
                               4,
                               rf=RandomForestRegressor()))

        r, r_knn, r_norm, r_norm_exp, r_norm_rf = ResultsRegr(), ResultsRegr(
        ), ResultsRegr(), ResultsRegr(), ResultsRegr()
        eps = 0.05
        for rep in range(10):
            for train, test in CrossSampler(tab, 10):
                train, calibrate = next(
                    RandomSampler(train,
                                  len(train) - 100, 100))
                r.concatenate(run_train_test(icr, eps, train, test, calibrate))
                r_knn.concatenate(
                    run_train_test(icr_knn, eps, train, test, calibrate))
                r_norm.concatenate(
                    run_train_test(icr_norm, eps, train, test, calibrate))
                r_norm_exp.concatenate(
                    run_train_test(icr_norm_exp, eps, train, test, calibrate))
                r_norm_rf.concatenate(
                    run_train_test(icr_norm_rf, eps, train, test, calibrate))

        print(r.median_range(), r.interdecile_mean(), 1 - r.accuracy())
        print(r_knn.median_range(), r_knn.interdecile_mean(),
              1 - r_knn.accuracy())
        print(r_norm.median_range(), r_norm.interdecile_mean(),
              1 - r_norm.accuracy())
        print(r_norm_exp.median_range(), r_norm_exp.interdecile_mean(),
              1 - r_norm_exp.accuracy())
        print(r_norm_rf.median_range(), r_norm_rf.interdecile_mean(),
              1 - r_norm_rf.accuracy())
        self.assertGreater(r.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_knn.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm_exp.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm_rf.accuracy(), 1 - eps - 0.03)
        """
コード例 #24
0
 def test_PCA_scorer(self):
     data = self.iris
     pca = PCA(preprocessors=[Normalize()])
     pca.component = 1
     scores = pca.score_data(data)
     self.assertEqual(scores.shape[1], len(data.domain.attributes))
     self.assertEqual(['petal length', 'petal width'],
                      sorted([data.domain.attributes[i].name
                              for i in np.argsort(scores[0])[-2:]]))
     self.assertEqual([round(s, 4) for s in scores[0]],
                      [0.5224, 0.2634, 0.5813, 0.5656])
コード例 #25
0
 def test_normalize_transform_by_span_class(self):
     normalizer = Normalize(zero_based=False,
                            norm_type=Normalize.NormalizeBySpan,
                            transform_class=True)
     data_norm = normalizer(self.data)
     solution = [
         [0.0, 1.0, "a", "a", "?", "a", 1.0, "a", "a", 1.0],
         [0.0, -1.0, "a", "b", -1.0, "?", 0.0, "b", "b", -1.0],
         [0.0, 0.0, "a", "b", 1.0, "b", -1.0, "c", "c", 0.0],
     ]
     self.compare_tables(data_norm, solution)
コード例 #26
0
    def init_projection(self):
        if self.placement == Placement.Circular:
            self.projector = CircularPlacement()
        elif self.placement == Placement.LDA:
            self.projector = LDA(solver="eigen", n_components=2)
        elif self.placement == Placement.PCA:
            self.projector = PCA(n_components=2)
            self.projector.component = 2
            self.projector.preprocessors = PCA.preprocessors + [Normalize()]

        super().init_projection()
コード例 #27
0
    def test_number_of_decimals(self):
        foo = ContinuousVariable("Foo", number_of_decimals=0)
        data = Table.from_list(Domain((foo, )), [[1], [2], [3]])

        normalized = Normalize()(data)
        norm_foo: ContinuousVariable = normalized.domain.attributes[0]

        self.assertGreater(norm_foo.number_of_decimals, 0)

        for val1, val2 in zip(normalized[:, "Foo"],
                              ["-1.225", "0.0", "1.225"]):
            self.assertEqual(str(val1[0]), val2)
コード例 #28
0
ファイル: test_pca.py プロジェクト: wibrt/orange3
 def test_PCA_scorer(self):
     data = Orange.data.Table('iris')
     pca = PCA(preprocessors=[Normalize()])
     scores = pca.score_data(data)
     self.assertEqual(len(scores), len(data.domain.attributes))
     self.assertEqual(['petal length', 'petal width'],
                      sorted([
                          data.domain.attributes[i].name
                          for i in np.argsort(scores)[-2:]
                      ]))
     self.assertEqual([round(s, 4) for s in scores],
                      [0.5224, 0.2634, 0.5813, 0.5656])
コード例 #29
0
ファイル: owcorrelations.py プロジェクト: larazupan/orange3
    def get_clusters_of_attributes(self):
        """
        Generates groupes of attribute IDs, grouped by cluster. Clusters are
        obtained by KMeans algorithm.

        :return: generator of attributes grouped by cluster
        """
        data = Normalize()(self.data).X.T
        kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(data)
        labels_attrs = sorted([(l, i) for i, l in enumerate(kmeans.labels_)])
        return [Cluster(instances=list(pair[1] for pair in group),
                        centroid=kmeans.cluster_centers_[l])
                for l, group in groupby(labels_attrs, key=lambda x: x[0])]
コード例 #30
0
ファイル: owcorrelations.py プロジェクト: haojia632/orange3
    def get_clusters_of_attributes(self):
        """
        Generates groupes of attribute IDs, grouped by cluster. Clusters are
        obtained by KMeans algorithm.

        :return: generator of attributes grouped by cluster
        """
        data = Normalize()(self.data).X.T
        kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(data)
        labels_attrs = sorted([(l, i) for i, l in enumerate(kmeans.labels_)])
        for _, group in groupby(labels_attrs, key=lambda x: x[0]):
            group = list(group)
            if len(group) > 1:
                yield list(pair[1] for pair in group)
コード例 #31
0
ファイル: scoring.py プロジェクト: astaric/orange-astaric
def test(datasets=(),
         normalization="stdev", reorder='none', score='probability',
         print_latex=True, k=10, eps=1e-15):
    global results
    #print("%% normalization=%s,reorder=%s,score=%s, %%" % (normalization, reorder, score))

    if print_latex:
        print(r"\begin{tabular}{ l r r r }")
        print(r"dataset & S(k-means) & S(gmm)& S(lac) \\")
        print(r"\hline")
    results = []
    #for ds in [Table('vehicle', name='vehicle')]:
    #for ds in GDS_datasets():
    for ds2 in datasets:
        np.random.seed(42)
    #for ds in temporal_datasets():
        ds = impute(ds2)
        ds.name = ds2.name
        n_steps = 99

        if normalization == 'none' or normalization is None:
            pass
        elif normalization == '01':
            ds = Normalize(norm_type=Normalize.NormalizeBySpan)(ds)
        elif normalization == 'stdev':
            ds = Normalize(norm_type=Normalize.NormalizeBySD)(ds)
        else:
            raise AttributeError('Unknonwn normalization type "%s"' % (normalization,))

        if reorder == 'none' or reorder is None:
            pass
        elif reorder == 'shuffle':
            np.random.shuffle(x.T)
        elif reorder == 'covariance':
            ds = reorder_attributes_covariance(ds)
        elif reorder == 'probability':
            ds = reorder_attributes_probability_score(ds, k)
        else:
            raise AttributeError('Unknown feature reordering type "%s"' % (reorder,))

        if score == 'covariance':
            scorer = covariance_score
        elif score == 'probability':
            scorer = probability_score
        elif score == 'global_probability':
            scorer = global_probability_score
        elif score == 'best_triplet':
            scorer = best_triplet_variance_score
        elif score == 'best_triplet_probability':
            scorer = best_triplet_probability_score
        elif score == 'silhouette':
            scorer = silhouette_score
        elif score == 'silhouette_d':
            scorer = silhouette_d_score
        else:
            raise AttributeError('Unknown scorer type "%s"' % (score,))


        all_lac_scores = []
        for n in range(10,11):
            #print('.', end='')
            lac = LAC(ds, k)
            all_lac_scores.append((lac.k, scorer(lac, ds.X)))

#        print()
#        for i in range(10, 11):
#            lac_scores = [s for k, s in all_lac_scores if k == i]
#            print("lac, %i, %f, %f, %f, %f" % (i, min(lac_scores or [0]), min([l for l in lac_scores if l] or [0]), max(lac_scores or [0]), sum([l for l in lac_scores if l] or [0]) / len([l for l in lac_scores if l] or [0])))

        realk = max(k for k, s in all_lac_scores)
        #if lac.k < 2:
        #    continue

        try:
            lac = LAC(ds, realk)
            km = KM(ds.X, realk)
            gmm = GMM(ds.X, realk)
        except:
            print("Error")
            continue

        lac_score = scorer(lac, ds.X)
        opts = dict(defined_=lac_score.defined) if hasattr(lac_score, 'defined') else {}
        km_score = scorer(km, ds.X)
        km_score_d = scorer(km, ds.X, **opts)
        gmm_score = scorer(gmm, ds.X)
        gmm_score_d = scorer(gmm, ds.X, **opts)

        knn_score = LouAUC(ds)

        results.append((km_score, gmm_score, lac_score))
        if not print_latex:
            print("dataset: %s (%s rows, %s features)" % (ds.name, len(ds), len(ds.domain)))
            print("normalization: ", normalization)
            print("reorder: ", reorder)
            print("scoring function: ", score)
            print("----------------")
            print("k-means:           %.5f" % km_score)
            print("gmm:               %.5f" % gmm_score)
            print("k-means (dropout): %.5f" % km_score_d)
            print("gmm (dropout):     %.5f" % gmm_score_d)
            print("lac:               %.5f" % lac_score)
            print("----------------")
            print("knn AUC (lou)      %.5f" % knn_score)
            print("----------------")
            print("k=%s, dropout %s (%.1f%%)" % (realk, sum(~lac_score.defined), (sum(~lac_score.defined) / len(
                ds.X)) *
                  100))
            print()
            print()

            # print("%s,%s,%s,%s,%f,%f,%f,%i,%f,%f" % (normalization, reorder, score, ds.name, km_score, gmm_score,
            #                                       lac_score, realk, sum(~lac_score.defined), sum(~lac_score.defined) /
            #                                       len(ds.X)))

        w1, _ = get_cluster_weights(lac.priors, lac.means, lac.covars, ds.X, crisp=False)
        w2, _ = get_cluster_weights(lac.priors, lac.means, lac.covars, ds.X, crisp=True)
        w2 = np.argmax(w2, axis=1)[:, None]

        domain = Domain([ContinuousVariable("p%d" % i) for i in range(w1.shape[1])])
        probs = Table(domain, w1)
        labels = Table(Domain([DiscreteVariable("label", values=list(range(k)))]), w2)

        ds2.name = ds2.name.replace("/", "_")
        ds.name = ds2.name.replace("/", "_")
        tbl = Table.concatenate((ds, probs, labels))
        tbl.save(os.path.join('output', ds2.name + ".lac.tab"))




        def annotate(minis):
            def _annotate(ax):
                for m in minis:
                    ax.plot([m-1, m, m+1], [.5, .5, .5])
            return _annotate

        parallel_coordinates_plot(ds.name + ".kmeans.pdf", ds.X,
                                  means=km.means, stdevs=np.sqrt(km.covars), annotate=annotate(km.minis))
        parallel_coordinates_plot(ds.name + ".lac.pdf", ds.X,
                                  means=lac.means, stdevs=np.sqrt(lac.covars), annotate=annotate(lac.minis))
        parallel_coordinates_plot(ds.name + ".gmm.pdf", ds.X,
                                  means=gmm.means, stdevs=np.sqrt(gmm.covars), annotate=annotate(gmm.minis))

        import matplotlib.pyplot as plt
        import matplotlib.mlab as mlab
        import math

        #iris = Table("wine")
        #for m in range(lac.means.shape[1]):
        #    plt.clf()
        #    for p, mean, variance, c in zip(lac.priors, lac.means[:, m], lac.covars[:, m], "grb"):
        #        sigma = math.sqrt(variance)
        #        x = np.linspace(0,1,100)
        #         plt.plot(x,p * mlab.normpdf(x, mean,sigma), color=c)
        #     plt.plot(ds.X[:, m].ravel() + np.random.random(len(ds)) * 0.02, [0.05]*len(ds.X) + iris.Y.ravel() * 0.1,
        #              "k|")
        #     plt.ylabel("pdf")
        #     plt.xlabel(iris.domain[m].name)
        #     plt.savefig("axis-%d.pdf" % m)

    if print_latex:
        print(r"\end{tabular}")
    results = np.array(results)