예제 #1
0
    def test_attr_label_from_data(self):
        w = self.widget
        # Don't run the MDS optimization to save time and to prevent the
        # widget be in a blocking state when trying to send the next signal
        w.start = Mock()

        data = Table("zoo")
        dist = Euclidean(data)
        self.send_signal(w.Inputs.distances, dist)
        self.assertTrue(set(chain(data.domain.variables, data.domain.metas))
                        < set(w.controls.attr_label.model()))
예제 #2
0
    def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        cls.signal_name = "Distances"
        cls.signal_data = Euclidean(cls.data)
        cls.same_input_output_domain = False

        my_dir = os.path.dirname(__file__)
        datasets_dir = os.path.join(my_dir, '..', '..', '..', 'datasets')
        cls.datasets_dir = os.path.realpath(datasets_dir)
예제 #3
0
 def test_num_meta_labels(self):
     x, y = (ContinuousVariable(c) for c in "xy")
     s = StringVariable("s")
     data = Table.from_list(Domain([x], [], [y, s]),
                            [[0, 1, "a"], [1, np.nan, "b"]])
     distances = Euclidean(data)
     self.widget.set_distances(distances)
     ac = self.widget.annot_combo
     idx = ac.model().indexOf(y)
     ac.setCurrentIndex(idx)
     ac.activated.emit(idx)
     self.assertEqual(self.widget.tablemodel.labels, ["1", "?"])
예제 #4
0
파일: kmeans.py 프로젝트: karoema/orange3
 def fit(self, X, Y=None):
     proj = skl_cluster.KMeans(**self.params)
     proj = proj.fit(X, Y)
     if 2 <= proj.n_clusters < len(X):
         proj.silhouette = silhouette_score(X, proj.labels_)
     else:
         proj.silhouette = 0
     proj.inertia = proj.inertia_ / len(X)
     cluster_dist = Euclidean(proj.cluster_centers_)
     proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from(
         cluster_dist, 1)])
     return KMeansModel(proj, self.preprocessors)
예제 #5
0
 def fit(self, X, Y=None):
     proj = skl_cluster.KMeans(**self.params)
     proj = proj.fit(X, Y)
     proj.silhouette = np.nan
     try:
         if self._compute_silhouette and 2 <= proj.n_clusters < X.shape[0]:
             proj.silhouette = silhouette_score(X, proj.labels_, sample_size=5000)
     except MemoryError:  # Pairwise dist in silhouette fails for large data
         pass
     proj.inertia = proj.inertia_ / X.shape[0]
     cluster_dist = Euclidean(proj.cluster_centers_)
     proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from(cluster_dist, 1)])
     return KMeansModel(proj, self.preprocessors)
예제 #6
0
    def test_torgerson(self):
        data = self.ionosphere[::5]
        dis = Euclidean(data)

        e1 = torgerson(dis, eigen_solver="auto")
        e2 = torgerson(dis, eigen_solver="lapack")
        e3 = torgerson(dis, eigen_solver="arpack")

        np.testing.assert_almost_equal(np.abs(e1), np.abs(e2))
        np.testing.assert_almost_equal(np.abs(e2), np.abs(e3))

        with self.assertRaises(ValueError):
            torgerson(dis, eigen_solver="madness")
예제 #7
0
    def __tsne_test_helper(self, data, n_com):
        tsne_def = TSNE(n_components=n_com, metric="euclidean")
        tsne_def = tsne_def(data)

        tsne_euc = TSNE(n_components=n_com, metric=Euclidean)
        tsne_euc = tsne_euc(data)

        tsne_pre = TSNE(n_components=n_com, metric="precomputed")
        tsne_pre = tsne_pre(Euclidean(data))

        self.assertEqual((data.X.shape[0], n_com), tsne_def.embedding_.shape)
        self.assertEqual((data.X.shape[0], n_com), tsne_euc.embedding_.shape)
        self.assertEqual((data.X.shape[0], n_com), tsne_pre.embedding_.shape)
예제 #8
0
    def _initialize(self):
        matrix_existed = self.effective_matrix is not None
        effective_matrix = self.effective_matrix
        self._invalidated = True
        self.data = None
        self.effective_matrix = None
        self.closeContext()
        self.clear_messages()

        # if no data nor matrix is present reset plot
        if self.signal_data is None and self.matrix is None:
            self.clear()
            self.init_attr_values()
            return

        if self.signal_data is not None and self.matrix is not None and \
                len(self.signal_data) != len(self.matrix):
            self.Error.mismatching_dimensions()
            self.clear()
            self.init_attr_values()
            return

        if self.signal_data is not None:
            self.data = self.signal_data
        elif self.matrix_data is not None:
            self.data = self.matrix_data

        if self.matrix is not None:
            self.effective_matrix = self.matrix
            if self.matrix.axis == 0 and self.data is not None \
                    and self.data is self.matrix_data:
                names = [[attr.name] for attr in self.data.domain.attributes]
                domain = Domain([], metas=[StringVariable("labels")])
                self.data = Table.from_list(domain, names)
        elif self.data.domain.attributes:
            preprocessed_data = MDS().preprocess(self.data)
            self.effective_matrix = Euclidean(preprocessed_data)
        else:
            self.Error.no_attributes()
            self.clear()
            self.init_attr_values()
            return

        self.init_attr_values()
        self.openContext(self.data)
        self._invalidated = not (
            matrix_existed and self.effective_matrix is not None
            and array_equal(effective_matrix, self.effective_matrix))
        if self._invalidated:
            self.clear()
        self.graph.set_effective_matrix(self.effective_matrix)
예제 #9
0
    def cluster_data(self, matrix):
        with self.progressBar():
            # cluster rows
            if len(matrix) > 1:
                rows_distances = Euclidean(matrix)
                cluster = hierarchical.dist_matrix_clustering(rows_distances)
                row_order = hierarchical.optimal_leaf_ordering(
                    cluster, rows_distances, progress_callback=self.progressBarSet)
                row_order = np.array([x.value.index for x in leaves(row_order)])
            else:
                row_order = np.array([0])

            # cluster columns
            if matrix.X.shape[1] > 1:
                columns_distances = Euclidean(matrix, axis=0)
                cluster = hierarchical.dist_matrix_clustering(columns_distances)
                columns_order = hierarchical.optimal_leaf_ordering(
                    cluster, columns_distances,
                    progress_callback=self.progressBarSet)
                columns_order = np.array([x.value.index for x in leaves(columns_order)])
            else:
                columns_order = np.array([0])
        return row_order, columns_order
예제 #10
0
 def __init__(self,
              classifier,
              distance=Euclidean(),
              k=10,
              relative=True,
              include=False,
              neighbourhood='fixed'):
     """Initialize the parameters."""
     super().__init__(distance, k)
     self.classifier = classifier
     self.relative = relative
     self.include = include
     assert neighbourhood in ['fixed', 'variable']
     self.neighbourhood = neighbourhood
예제 #11
0
    def _initialize(self):
        matrix_existed = self.effective_matrix is not None
        effective_matrix = self.effective_matrix
        self.__invalidated = True
        self.data = None
        self.effective_matrix = None
        self.closeContext()
        self.clear_messages()

        # if no data nor matrix is present reset plot
        if self.signal_data is None and self.matrix is None:
            self.clear()
            self.init_attr_values()
            return

        if self.signal_data is not None and self.matrix is not None and \
                len(self.signal_data) != len(self.matrix):
            self.Error.mismatching_dimensions()
            self.clear()
            self.init_attr_values()
            return

        if self.signal_data is not None:
            self.data = self.signal_data
        elif self.matrix_data is not None:
            self.data = self.matrix_data

        if self.matrix is not None:
            self.effective_matrix = self.matrix
            if self.matrix.axis == 0 and self.data is self.matrix_data:
                self.data = None
        elif self.data.domain.attributes:
            preprocessed_data = MDS().preprocess(self.data)
            self.effective_matrix = Euclidean(preprocessed_data)
        else:
            self.Error.no_attributes()
            self.clear()
            self.init_attr_values()
            return

        self.init_attr_values()
        self.openContext(self.data)
        self.__invalidated = not (matrix_existed and
                                  self.effective_matrix is not None and
                                  np.array_equal(effective_matrix,
                                                 self.effective_matrix))
        if self.__invalidated:
            self.clear()
        self.graph.set_effective_matrix(self.effective_matrix)
예제 #12
0
    def test_labels(self):
        x, y = (ContinuousVariable(c) for c in "xy")
        s = StringVariable("s")
        grades = Table.from_list(Domain(
            [x, y], [], [s]), [[91.0, 89.0, "Bill"], [51.0, 100.0, "Cynthia"],
                               [9.0, 61.0, "Demi"], [49.0, 92.0, "Fred"],
                               [91.0, 49.0, "George"]])

        distances = Euclidean(grades)
        self.widget.set_distances(distances)
        ac = self.widget.annot_combo
        idx = ac.model().indexOf(grades.domain.metas[0])
        ac.setCurrentIndex(idx)
        ac.activated.emit(idx)
        self.assertIsNone(self.widget.tablemodel.label_colors)
예제 #13
0
 def test_infinite_distances(self):
     """
     Scipy does not accept infinite distances and neither does this widget.
     Error is shown.
     GH-2380
     """
     table = Table(
         Domain([ContinuousVariable("a")],
                [DiscreteVariable("b", values=["y"])]),
         list(zip([1.79e308, -1e120], "yy")))
     distances = Euclidean(table)
     self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
     self.send_signal(self.widget.Inputs.distances, distances)
     self.assertTrue(self.widget.Error.not_finite_distances.is_shown())
     self.send_signal(self.widget.Inputs.distances, self.distances)
     self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
예제 #14
0
 def __init__(self,
              classifier,
              distance=Euclidean(),
              k=10,
              gamma=0.5,
              rho=0.5,
              exp=True,
              rf=None):
     """Initialize the parameters."""
     RegrModelNC.__init__(self, classifier)
     NearestNeighbours.__init__(self, distance, k)
     self._gamma = gamma  # distance sensitivity
     self._rho = rho  # variance sensitivity
     self.exp = exp  # type of normalization
     self.rf = rf  # random forest for normalization
     if self.rf:
         assert isinstance(rf, RandomForestRegressor), \
             "Rf must be an instance of sklearn's RandomForestRegressor."
 def test_infinite_distances(self):
     """
     Scipy does not accept infinite distances and neither does this widget.
     Error is shown.
     GH-2380
     """
     table = Table.from_list(
         Domain([ContinuousVariable("a")],
                [DiscreteVariable("b", values=("y", ))]),
         list(zip([1.79e308, -1e120], "yy")))
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", ".*", RuntimeWarning)
         distances = Euclidean(table)
     self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
     self.send_signal(self.widget.Inputs.distances, distances)
     self.assertTrue(self.widget.Error.not_finite_distances.is_shown())
     self.send_signal(self.widget.Inputs.distances, self.distances)
     self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
예제 #16
0
class TestDistMatrix(TestCase):
    def setUp(self):
        self.iris = Table('iris')
        self.dist = Euclidean(self.iris)

    def test_submatrix(self):
        sub = self.dist.submatrix([2, 3, 4])
        np.testing.assert_equal(sub, self.dist[2:5, 2:5])
        self.assertTrue(tables_equal(sub.row_items, self.dist.row_items[2:5]))

    def test_pickling(self):
        unpickled_dist = pickle.loads(pickle.dumps(self.dist))
        np.testing.assert_equal(unpickled_dist, self.dist)
        self.assertTrue(
            tables_equal(unpickled_dist.row_items, self.dist.row_items))
        self.assertTrue(
            tables_equal(unpickled_dist.col_items, self.dist.col_items))
        self.assertEqual(unpickled_dist.axis, self.dist.axis)
예제 #17
0
    def __mds_test_helper(self, data, n_com):
        mds_fit = MDS(
            n_components=n_com, dissimilarity=Euclidean, random_state=0)
        mds_fit = mds_fit(data)

        mds_odist = MDS(
            n_components=n_com, dissimilarity='precomputed', random_state=0)
        mds_odist = mds_odist(Euclidean(data))

        mds_sdist = MDS(
            n_components=n_com, dissimilarity='euclidean', random_state=0)
        mds_sdist = mds_sdist(data)

        eshape = data.X.shape[0], n_com
        self.assertTrue(np.allclose(mds_fit.embedding_, mds_odist.embedding_))
        self.assertTrue(np.allclose(mds_fit.embedding_, mds_sdist.embedding_))
        self.assertEqual(eshape, mds_fit.embedding_.shape)
        self.assertEqual(eshape, mds_odist.embedding_.shape)
        self.assertEqual(eshape, mds_sdist.embedding_.shape)
    def setUp(self):
        self.widget = self.create_widget(
            OWNxFromDistances)  # type: OWNxFromDistances
        self.data = Table("iris")
        self.distances = Euclidean(self.data)

        # When converted to a graph, this has the following components:
        # At threshold 0.5:  {1, 6} and disconnected {0}, {2}, {3}, {4}, {5}
        # At threshold 1 {0, 1, 2, 6}, {3, 5}, {4}
        # At threshold 2 {0, 1, 2, 3, 5, 6}, {4}
        m = np.full((7, 7), 10.0)
        m[1, 6] = m[6, 1] = 0.5

        m[0, 1] = m[1, 2] = m[2, 6] = m[0, 6] = 1
        m[1, 0] = m[2, 1] = m[6, 2] = m[6, 0] = 1

        m[3, 5] = m[5, 3] = 1

        m[2, 3] = m[3, 2] = 2
        self.distances1 = DistMatrix(m)
예제 #19
0
 def __call__(self, data):
     distances = SklDistance, SpearmanDistance, PearsonDistance
     if isinstance(self._metric, distances):
         data = self.preprocess(data)
         _X, Y, domain = data.X, data.Y, data.domain
         X = dist_matrix = self._metric(_X)
         self.params['dissimilarity'] = 'precomputed'
     elif self._metric is 'precomputed':
         dist_matrix, Y, domain = data, None, None
         X = dist_matrix
     else:
         data = self.preprocess(data)
         X, Y, domain = data.X, data.Y, data.domain
         if self.init_type == "PCA":
             dist_matrix = Euclidean(X)
     if self.init_type == "PCA" and self.init_data is None:
         self.init_data = torgerson(dist_matrix,
                                    self.params['n_components'])
     clf = self.fit(X, Y=Y)
     clf.domain = domain
     return clf
예제 #20
0
    def _initialize(self):
        # clear everything
        self.closeContext()
        self._clear()
        self.Error.clear()
        self.effective_matrix = None
        self.embedding = None

        # if no data nor matrix is present reset plot
        if self.signal_data is None and self.matrix_data is None:
            self.data = None
            self.init_attr_values()
            return

        if self.signal_data is not None and self.matrix is not None and \
                len(self.signal_data) != len(self.matrix):
            self.Error.mismatching_dimensions()
            self._update_plot()
            return

        if self.signal_data is not None:
            self.data = self.signal_data
        elif self.matrix_data is not None:
            self.data = self.matrix_data

        if self.matrix is not None:
            self.effective_matrix = self.matrix
            if self.matrix.axis == 0 and self.data is self.matrix_data:
                self.data = None
        elif self.data.domain.attributes:
            preprocessed_data = MDS().preprocess(self.data)
            self.effective_matrix = Euclidean(preprocessed_data)
        else:
            self.Error.no_attributes()
            return

        self.init_attr_values()
        self.openContext(self.data)
        self.graph.set_effective_matrix(self.effective_matrix)
예제 #21
0
 def setUpClass(cls):
     cls.data = Table("iris")
     cls.distances = Euclidean(cls.data)
     cls.init = torgerson(cls.distances)
     cls.args = (cls.distances, 300, 25, 0, cls.init)
예제 #22
0
 def test_matrix_columns_labels(self):
     dist = Euclidean(self.data, axis=0)
     self.send_signal(self.widget.Inputs.distances, dist)
     simulate.combobox_activate_index(self.widget.controls.attr_label, 2)
예제 #23
0
 def test_matrix_columns_default_label(self):
     dist = Euclidean(self.data, axis=0)
     self.send_signal(self.widget.Inputs.distances, dist)
     label_text = self.widget.controls.attr_label.currentText()
     self.assertEqual(label_text, "labels")
예제 #24
0
    def setRegion(self, low, high):
        low, high = np.clip([low, high], *self.boundary())
        self.region.setRegion((low, high))

    def getRegion(self):
        return self.region.getRegion()

    def setValues(self, values):
        self.fillCurve.setData([0,1], [0])
        if not len(values):
            self.curve.setData([0, 1], [0])
            self.setBoundary(0, 0)
            return
        nbins = int(min(np.sqrt(len(values)), 100))
        freq, edges = np.histogram(values, bins=nbins)
        self.curve.setData(edges, freq)
        self.setBoundary(edges[0], edges[-1])
        self.autoRange()

    @property
    def xData(self):
        return self.curve.xData

    @property
    def yData(self):
        return self.curve.yData


if __name__ == "__main__":
    WidgetPreview(OWNxFromDistances).run(set_matrix=(Euclidean(Table("iris"))))
예제 #25
0
 def test_matrix_columns_tooltip(self):
     dist = Euclidean(self.data, axis=0)
     self.send_signal(self.widget.Inputs.distances, dist)
     self.assertIn("sepal length", self.widget.get_tooltip([0]))
예제 #26
0
 def __init__(self, distance=Euclidean(), k=1, weighted=False):
     super().__init__(distance, k)
     self.weighted = weighted
예제 #27
0
        self.fillCurve.setData([0, 1], [0])
        if not len(values):
            self.curve.setData([0, 1], [0])
            self.setBoundary(0, 0)
            self.autoRange()
            return
        nbins = min(len(values), 100)
        freq, edges = np.histogram(values, bins=nbins)
        self.curve.setData(edges, freq)
        self.setBoundary(edges[0], edges[-1])
        self.autoRange()

    @property
    def xData(self):
        return self.curve.xData

    @property
    def yData(self):
        return self.curve.yData


if __name__ == "__main__":
    from Orange.distance import Euclidean
    appl = QApplication([])
    data = Table('iris')
    dm = Euclidean(data)
    ow = OWDuplicates()
    ow.set_distances(dm)
    ow.show()
    appl.exec_()
예제 #28
0
        if not filename:
            return
        self.filename = filename
        self.unconditional_save_file()
        self.last_dir = os.path.split(self.filename)[0]
        self.adjust_label()

    def save_file(self):
        dist = self.distances
        if dist is None:
            return
        if not self.filename:
            self.save_file_as()
        else:
            dist.save(self.filename)
            skip_row = not dist.has_row_labels() and dist.row_items is not None
            skip_col = not dist.has_col_labels() and dist.col_items is not None
            if skip_row and skip_col:
                self.warning("Associated data table was not saved")
            elif skip_row or skip_col:
                self.warning("Data associated with {} was not saved".format(
                    ["rows", "columns"][skip_col]))
            else:
                self.warning()


if __name__ == "__main__":
    from Orange.data import Table
    from Orange.distance import Euclidean
    WidgetPreview(OWSaveDistances).run(Euclidean(Table("iris")))
예제 #29
0
 def setUp(self):
     self.learner = knn.kNNLearner(distance_constructor=Euclidean())
예제 #30
0
    def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        cls.signal_name = "Distances"
        cls.signal_data = Euclidean(cls.data)