Esempio n. 1
0
    def select_data(self):
        """
        Function takes two selected columns from data table and merge them
        in new Orange.data.Table

        Returns
        -------
        Table
            Table with selected columns
        """
        if self.data is None:
            return

        self.Error.clear()

        attr_x = self.data.domain[self.attr_x]
        attr_y = self.data.domain[self.attr_y] if self.is_logistic else None
        cols = []
        for attr in (attr_x, attr_y) if attr_y is not None else (attr_x, ):
            subset = self.data[:, attr]
            cols.append(
                subset.X if not sp.issparse(subset.X) else subset.X.toarray())
        x = np.column_stack(cols)
        y_c = self.data.Y if not sp.issparse(
            self.data.Y) else self.data.Y.toarray()
        if y_c.ndim == 2 and y_c.shape[1] == 1:
            y_c = y_c.flatten()
        # remove nans
        indices = ~np.isnan(x).any(axis=1) & ~np.isnan(y_c)
        x = x[indices]
        y_c = y_c[indices]

        if len(x) == 0:
            self.Error.all_none()
            return None

        if self.is_logistic:
            two_classes = len(self.data.domain.class_var.values) == 2
            if two_classes:
                domain = Domain([attr_x, attr_y], [self.data.domain.class_var])
            else:
                domain = Domain([attr_x, attr_y], [
                    DiscreteVariable(
                        name=self.data.domain.class_var.name + "-bin",
                        values=[self.target_class, 'Others'])
                ], [self.data.domain.class_var])

            y = [(0 if self.data.domain.class_var.values[int(d)]
                  == self.target_class else 1) for d in y_c]

            return Normalize()(Table(domain, x, y_c) if two_classes else Table(
                domain, x, y, y_c[:, None]))
        else:
            domain = Domain([attr_x], self.data.domain.class_var)
            return Normalize(transform_class=True)(Table(domain, x, y_c))
    def test_apply(self):
        self.widget.set_data(self.data)
        self.widget.apply()

        self.assertNotEqual(self.widget.plot_item, None)
        self.assertNotEqual(self.widget.scatterplot_item, None)

        self.widget.set_data(None)
        self.widget.apply()
        # TODO: output will be checked when it available in GuiTest

        # check if function does not change plots that are None according to test_set_data
        self.assertEqual(self.widget.plot_item, None)
        self.assertEqual(self.widget.scatterplot_item, None)

        self.widget.set_data(self.data)
        self.widget.set_learner(LinearRegressionLearner())
        self.widget.apply()

        self.assertNotEqual(self.widget.plot_item, None)
        self.assertNotEqual(self.widget.scatterplot_item, None)

        self.widget.set_learner(RandomForestRegressionLearner())
        self.widget.apply()

        self.assertNotEqual(self.widget.plot_item, None)
        self.assertNotEqual(self.widget.scatterplot_item, None)

        self.widget.set_preprocessor((Normalize(), ))
        self.assertNotEqual(self.widget.plot_item, None)
        self.assertNotEqual(self.widget.scatterplot_item, None)
Esempio n. 3
0
class RadViz(LinearProjector):
    name = "RadViz"
    supports_sparse = False
    preprocessors = [RemoveNaNRows(),
                     Continuize(multinomial_treatment=Continuize.FirstAsBase),
                     Normalize(norm_type=Normalize.NormalizeBySpan)]
    projection = RadVizModel

    def __call__(self, data):
        if data is not None:
            if len([attr for attr in data.domain.attributes
                    if attr.is_discrete and len(attr.values) > 2]):
                raise ValueError("Can not handle categorical variables"
                                 " with more than two values")
        return super().__call__(data)

    def get_components(self, X, Y):
        return np.array([
            (np.cos(t), np.sin(t)) for t in
            [2.0 * np.pi * (i / X.shape[1]) for i in range(X.shape[1])]]).T

    def transform(self, X):
        table = np.zeros((X.shape[0], 2))
        for i in range(X.shape[0]):
            row = np.repeat(np.expand_dims(X[i], axis=1), 2, axis=1)
            with np.errstate(divide='ignore', invalid='ignore'):
                s = X[i].sum()
                table[i] = np.divide((self.components_.T * row).sum(axis=0),
                                     s, where=s != 0)
        return table