Ejemplo n.º 1
0
 def test_as_values(self):
     x = ContinuousVariable("x")
     values = Value._as_values(x, [0., 1., 2.])  # pylint: disable=protected-access
     self.assertIsInstance(values[0], Value)
     self.assertEqual(values[0], 0)
     s = StringVariable("s")
     values = Value._as_values(s, ["a", "b", ""])  # pylint: disable=protected-access
     self.assertIsInstance(values[0], Value)
     self.assertEqual(values[0], "a")
Ejemplo n.º 2
0
 def test_hash(self):
     v = 1234.5
     val = Value(ContinuousVariable("var"), v)
     self.assertTrue(val == v and hash(val) == hash(v))
     v = "test"
     val = Value(StringVariable("var"), v)
     self.assertTrue(val == v and hash(val) == hash(v))
     v = 1234.5
     val = Value(TimeVariable("var"), v)
     self.assertTrue(val == v and hash(val) == hash(v))
     val = Value(DiscreteVariable("var", ["red", "green", "blue"]), 1)
     self.assertRaises(TypeError, hash, val)
Ejemplo n.º 3
0
 def extract_column(self, table: Table, var: Variable):
     data, _ = table.get_column_view(var)
     if var.is_string:
         return list(map(var.str_val, data))
     elif var.is_discrete and not self.use_values:
         values = np.array([*var.values, None], dtype=object)
         idx = data.astype(int)
         idx[~np.isfinite(data)] = len(values) - 1
         return values[idx].tolist()
     elif var.is_time:  # time always needs Values due to str(val) formatting
         return Value._as_values(var, data.tolist())  # pylint: disable=protected-access
     elif not self.use_values:
         return data.tolist()
     else:
         return Value._as_values(var, data.tolist())  # pylint: disable=protected-access
Ejemplo n.º 4
0
    def _update_predictions_model(self):
        """Update the prediction view model."""
        if self.data is not None:
            slots = self._valid_predictors()
            results = []
            class_var = self.class_var
            for p in slots:
                values, prob = p.results
                if self.class_var.is_discrete:
                    values = [Value(class_var, v) for v in values]
                results.append((values, prob))
            results = list(zip(*(zip(*res) for res in results)))
            headers = [p.name for p in slots]
            model = PredictionsModel(results, headers)
        else:
            model = None

        predmodel = PredictionsSortProxyModel()
        predmodel.setSourceModel(model)
        predmodel.setDynamicSortFilter(True)
        self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        self.predictionsview.setModel(predmodel)
        hheader = self.predictionsview.horizontalHeader()
        hheader.setSortIndicatorShown(False)
        # SortFilterProxyModel is slow due to large abstraction overhead
        # (every comparison triggers multiple `model.index(...)`,
        # model.rowCount(...), `model.parent`, ... calls)
        hheader.setSectionsClickable(predmodel.rowCount() < 20000)

        predmodel.layoutChanged.connect(self._update_data_sort_order)
        self._update_data_sort_order()
        self.predictionsview.resizeColumnsToContents()
Ejemplo n.º 5
0
    def _update_predictions_model(self):
        results = []
        headers = []
        for p in self._non_errored_predictors():
            values = p.results.unmapped_predicted
            target = p.predictor.domain.class_var
            if target.is_discrete:
                prob = p.results.unmapped_probabilities
                values = [Value(target, v) for v in values]
            else:
                prob = numpy.zeros((len(values), 0))
            results.append((values, prob))
            headers.append(p.predictor.name)

        if results:
            results = list(zip(*(zip(*res) for res in results)))
            model = PredictionsModel(results, headers)
        else:
            model = None

        predmodel = PredictionsSortProxyModel()
        predmodel.setSourceModel(model)
        predmodel.setDynamicSortFilter(True)
        self.predictionsview.setModel(predmodel)
        hheader = self.predictionsview.horizontalHeader()
        hheader.setSortIndicatorShown(False)
        # SortFilterProxyModel is slow due to large abstraction overhead
        # (every comparison triggers multiple `model.index(...)`,
        # model.rowCount(...), `model.parent`, ... calls)
        hheader.setSectionsClickable(predmodel.rowCount() < 20000)

        predmodel.layoutChanged.connect(self._update_data_sort_order)
        self._update_data_sort_order()
        self.predictionsview.resizeColumnsToContents()
Ejemplo n.º 6
0
 def get_class(self):
     """
     Return the class value as an instance of :obj:`Orange.data.Value`.
     Throws an exception if there are multiple classes.
     """
     self._check_single_class()
     return Value(self._domain.class_var, self._y[0])
Ejemplo n.º 7
0
 def get_classes(self):
     """
     Return the class value as a list of instances of
     :obj:`Orange.data.Value`.
     """
     return (Value(var, value)
             for var, value in zip(self._domain.class_vars, self._y))
Ejemplo n.º 8
0
 def __getitem__(self, key):
     if not isinstance(key, Integral):
         key = self._domain.index(key)
     if 0 <= key < len(self._domain.attributes):
         value = self._x[key]
     elif key >= len(self._domain.attributes):
         value = self._y[key - len(self.domain.attributes)]
     else:
         value = self._metas[-1 - key]
     return Value(self._domain[key], value)
Ejemplo n.º 9
0
 def __getitem__(self, key):
     idx = key if isinstance(key, Integral) else self._domain.index(key)
     if 0 <= idx < len(self._domain.attributes):
         value = self._x[idx]
     elif idx >= len(self._domain.attributes):
         value = self._y[idx - len(self.domain.attributes)]
     else:
         value = self._metas[-1 - idx]
     var = self._domain[idx]
     if isinstance(key, DiscreteVariable) and var is not key:
         value = key.get_mapper_from(var)(value)
         var = key
     return Value(var, value)
Ejemplo n.º 10
0
    def __getitem__(self, key):
        """ Indexing of SqlTable is performed in the following way:

        If a single row is requested, it is fetched from the database and
        returned as a SqlRowInstance.

        A new SqlTable with appropriate filters is constructed and returned
        otherwise.
        """
        if isinstance(key, int):
            # one row
            return self._fetch_row(key)

        if not isinstance(key, tuple):
            # row filter
            key = (key, Ellipsis)

        if len(key) != 2:
            raise IndexError("Table indices must be one- or two-dimensional")

        row_idx, col_idx = key
        if isinstance(row_idx, int):
            try:
                col_idx = self.domain.index(col_idx)
                var = self.domain[col_idx]
                return Value(
                    var,
                    next(self._query([var], rows=[row_idx]))[0]
                )
            except TypeError:
                pass

        elif not (row_idx is Ellipsis or row_idx == slice(None)):
            # TODO if row_idx specify multiple rows, one of the following must
            # happen
            #  - the new table remembers which rows are selected (implement
            #     table.limit_rows and whatever else is necessary)
            #  - return an ordinary (non-SQL) Table
            #  - raise an exception
            raise NotImplementedError("Row indices must be integers.")

        # multiple rows OR single row but multiple columns:
        # construct a new table
        table = self.copy()
        table.domain = self.domain.select_columns(col_idx)
        # table.limit_rows(row_idx)
        return table
Ejemplo n.º 11
0
    def _update_predictions_model(self):
        """Update the prediction view model."""
        if self.data is not None and self.class_var is not None:
            slots = self._valid_predictors()
            results = []
            class_var = self.class_var
            for p in slots:
                if isinstance(p.results, str):
                    continue
                values = p.results.predicted[0]
                if self.class_var.is_discrete:
                    # if values were added to class_var between building the
                    # model and predicting, add zeros for new class values,
                    # which are always at the end
                    prob = p.results.probabilities[0]
                    prob = numpy.c_[prob,
                                    numpy.zeros(
                                        (prob.shape[0], len(class_var.values) -
                                         prob.shape[1]))]
                    values = [Value(class_var, v) for v in values]
                else:
                    prob = numpy.zeros((len(values), 0))
                results.append((values, prob))
            results = list(zip(*(zip(*res) for res in results)))
            headers = [p.name for p in slots]
            model = PredictionsModel(results, headers)
        else:
            model = None

        predmodel = PredictionsSortProxyModel()
        predmodel.setSourceModel(model)
        predmodel.setDynamicSortFilter(True)
        self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        self.predictionsview.setModel(predmodel)
        hheader = self.predictionsview.horizontalHeader()
        hheader.setSortIndicatorShown(False)
        # SortFilterProxyModel is slow due to large abstraction overhead
        # (every comparison triggers multiple `model.index(...)`,
        # model.rowCount(...), `model.parent`, ... calls)
        hheader.setSectionsClickable(predmodel.rowCount() < 20000)

        predmodel.layoutChanged.connect(self._update_data_sort_order)
        self._update_data_sort_order()
        self.predictionsview.resizeColumnsToContents()
Ejemplo n.º 12
0
 def test_repr_value(self):
     # https://github.com/biolab/orange3/pull/1760
     var = TimeVariable('time')
     self.assertEqual(var.repr_val(Value(var, 416.3)), '416.3')
Ejemplo n.º 13
0
 def test_val(self):
     a = StringVariable("a")
     self.assertEqual(a.to_val(None), "")
     self.assertEqual(a.str_val(""), "?")
     self.assertEqual(a.str_val(Value(a, "")), "?")
     self.assertEqual(a.repr_val(Value(a, "foo")), '"foo"')
Ejemplo n.º 14
0
    def __call__(self, data, ret=Value):
        if not 0 <= ret <= 2:
            raise ValueError("invalid value of argument 'ret'")
        if ret > 0 and any(v.is_continuous for v in self.domain.class_vars):
            raise ValueError("cannot predict continuous distributions")

        # Call the predictor
        if isinstance(data, np.ndarray):
            prediction = self.predict(np.atleast_2d(data))
        elif isinstance(data, scipy.sparse.csr.csr_matrix):
            prediction = self.predict(data)
        elif isinstance(data, (Table, Instance)):
            if isinstance(data, Instance):
                data = Table(data.domain, [data])
            if data.domain != self.domain:
                data = data.transform(self.domain)
            prediction = self.predict_storage(data)
        elif isinstance(data, (list, tuple)):
            if not isinstance(data[0], (list, tuple)):
                data = [data]
            data = Table(self.original_domain, data)
            data = data.transform(self.domain)
            prediction = self.predict_storage(data)
        else:
            raise TypeError("Unrecognized argument (instance of '{}')".format(
                type(data).__name__))

        # Parse the result into value and probs
        multitarget = len(self.domain.class_vars) > 1
        if isinstance(prediction, tuple):
            value, probs = prediction
        elif prediction.ndim == 1 + multitarget:
            value, probs = prediction, None
        elif prediction.ndim == 2 + multitarget:
            value, probs = None, prediction
        else:
            raise TypeError("model returned a %i-dimensional array",
                            prediction.ndim)

        # Ensure that we have what we need to return
        if ret != Model.Probs and value is None:
            value = np.argmax(probs, axis=-1)
        if ret != Model.Value and probs is None:
            if multitarget:
                max_card = max(len(c.values) for c in self.domain.class_vars)
                probs = np.zeros(value.shape + (max_card, ), float)
                for i, cvar in enumerate(self.domain.class_vars):
                    probs[:, i, :] = one_hot(value[:, i])
            else:
                probs = one_hot(value)
            if ret == Model.ValueProbs:
                return value, probs
            else:
                return probs

        # Return what we need to
        if ret == Model.Probs:
            return probs
        if isinstance(data, Instance) and not multitarget:
            value = Value(self.domain.class_var, value[0])
        if ret == Model.Value:
            return value
        else:  # ret == Model.ValueProbs
            return value, probs
Ejemplo n.º 15
0
 def values(self):
     return (Value(var, val)
             for var, val in zip(self.domain.variables, self))
Ejemplo n.º 16
0
    def __call__(self, data, ret=Value):
        multitarget = len(self.domain.class_vars) > 1

        def one_hot_probs(value):
            if not multitarget:
                return one_hot(value,
                               dim=len(self.domain.class_var.values)
                               if self.domain is not None else None)

            max_card = max(len(c.values) for c in self.domain.class_vars)
            probs = np.zeros(value.shape + (max_card, ), float)
            for i in range(len(self.domain.class_vars)):
                probs[:, i, :] = one_hot(value[:, i])
            return probs

        def extend_probabilities(probs):
            """
            Since SklModels and models implementing `fit` and not `fit_storage`
            do not guarantee correct prediction dimensionality, extend
            dimensionality of probabilities when it does not match the number
            of values in the domain.
            """
            class_vars = self.domain.class_vars
            max_values = max(len(cv.values) for cv in class_vars)
            if max_values == probs.shape[-1]:
                return probs

            if not self.supports_multiclass:
                probs = probs[:, np.newaxis, :]

            probs_ext = np.zeros((len(probs), len(class_vars), max_values))
            for c, used_vals in enumerate(self.used_vals):
                for i, cv in enumerate(used_vals):
                    probs_ext[:, c, cv] = probs[:, c, i]

            if not self.supports_multiclass:
                probs_ext = probs_ext[:, 0, :]
            return probs_ext

        def fix_dim(x):
            return x[0] if one_d else x

        if not 0 <= ret <= 2:
            raise ValueError("invalid value of argument 'ret'")
        if ret > 0 and any(v.is_continuous for v in self.domain.class_vars):
            raise ValueError("cannot predict continuous distributions")

        # Convert 1d structures to 2d and remember doing it
        one_d = True
        if isinstance(data, Instance):
            data = Table.from_list(data.domain, [data])
        elif isinstance(data, (list, tuple)) \
                and not isinstance(data[0], (list, tuple)):
            data = [data]
        elif isinstance(data, np.ndarray) and data.ndim == 1:
            data = np.atleast_2d(data)
        else:
            one_d = False

        # if sparse convert to csr_matrix
        if scipy.sparse.issparse(data):
            data = data.tocsr()

        # Call the predictor
        backmappers = None
        n_values = []
        if isinstance(data, (np.ndarray, scipy.sparse.csr.csr_matrix)):
            prediction = self.predict(data)
        elif isinstance(data, Table):
            backmappers, n_values = self.get_backmappers(data)
            data = self.data_to_model_domain(data)
            prediction = self.predict_storage(data)
        elif isinstance(data, (list, tuple)):
            data = Table.from_list(self.original_domain, data)
            data = data.transform(self.domain)
            prediction = self.predict_storage(data)
        else:
            raise TypeError("Unrecognized argument (instance of '{}')".format(
                type(data).__name__))

        # Parse the result into value and probs
        if isinstance(prediction, tuple):
            value, probs = prediction
        elif prediction.ndim == 1 + multitarget:
            value, probs = prediction, None
        elif prediction.ndim == 2 + multitarget:
            value, probs = None, prediction
        else:
            raise TypeError("model returned a %i-dimensional array",
                            prediction.ndim)

        # Ensure that we have what we need to return; backmapp everything
        if probs is None and (ret != Model.Value or backmappers is not None):
            probs = one_hot_probs(value)
        if probs is not None:
            probs = extend_probabilities(probs)
            probs = self.backmap_probs(probs, n_values, backmappers)
        if ret != Model.Probs:
            if value is None:
                value = np.argmax(probs, axis=-1)
                # probs are already backmapped
            else:
                value = self.backmap_value(value, probs, n_values, backmappers)

        # Return what we need to
        if ret == Model.Probs:
            return fix_dim(probs)
        if isinstance(data, Instance) and not multitarget:
            value = [Value(self.domain.class_var, value[0])]
        if ret == Model.Value:
            return fix_dim(value)
        else:  # ret == Model.ValueProbs
            return fix_dim(value), fix_dim(probs)
Ejemplo n.º 17
0
    def __call__(self, data, ret=Value):
        multitarget = len(self.domain.class_vars) > 1

        def one_hot_probs(value):
            if not multitarget:
                return one_hot(value)

            max_card = max(len(c.values) for c in self.domain.class_vars)
            probs = np.zeros(value.shape + (max_card,), float)
            for i in range(len(self.domain.class_vars)):
                probs[:, i, :] = one_hot(value[:, i])
            return probs

        def fix_dim(x):
            return x[0] if one_d else x

        def data_to_model_domain():
            if data.domain == self.domain:
                return data

            if self.original_domain.attributes != data.domain.attributes \
                    and data.X.size \
                    and not np.isnan(data.X).all():
                new_data = data.transform(self.original_domain)
                if np.isnan(new_data.X).all():
                    raise DomainTransformationError(
                        "domain transformation produced no defined values")
                return new_data.transform(self.domain)

            return data.transform(self.domain)

        if not 0 <= ret <= 2:
            raise ValueError("invalid value of argument 'ret'")
        if ret > 0 and any(v.is_continuous for v in self.domain.class_vars):
            raise ValueError("cannot predict continuous distributions")

        # Convert 1d structures to 2d and remember doing it
        one_d = True
        if isinstance(data, Instance):
            data = Table(data.domain, [data])
        elif isinstance(data, (list, tuple)) \
                and not isinstance(data[0], (list, tuple)):
            data = [data]
        elif isinstance(data, np.ndarray) and data.ndim == 1:
            data = np.atleast_2d(data)
        else:
            one_d = False

        # Call the predictor
        backmappers = None
        n_values = []
        if isinstance(data, (np.ndarray, scipy.sparse.csr.csr_matrix)):
            prediction = self.predict(data)
        elif isinstance(data, Table):
            backmappers, n_values = self.get_backmappers(data)
            data = data_to_model_domain()
            prediction = self.predict_storage(data)
        elif isinstance(data, (list, tuple)):
            data = Table.from_list(self.original_domain, data)
            data = data.transform(self.domain)
            prediction = self.predict_storage(data)
        else:
            raise TypeError("Unrecognized argument (instance of '{}')"
                            .format(type(data).__name__))

        # Parse the result into value and probs
        if isinstance(prediction, tuple):
            value, probs = prediction
        elif prediction.ndim == 1 + multitarget:
            value, probs = prediction, None
        elif prediction.ndim == 2 + multitarget:
            value, probs = None, prediction
        else:
            raise TypeError("model returned a %i-dimensional array",
                            prediction.ndim)

        # Ensure that we have what we need to return; backmapp everything
        if probs is None and (ret != Model.Value or backmappers is not None):
            probs = one_hot_probs(value)
        if probs is not None:
            probs = self.backmap_probs(probs, n_values, backmappers)
        if ret != Model.Probs:
            if value is None:
                value = np.argmax(probs, axis=-1)
                # probs are already backmapped
            else:
                value = self.backmap_value(value, probs, n_values, backmappers)

        # Return what we need to
        if ret == Model.Probs:
            return fix_dim(probs)
        if isinstance(data, Instance) and not multitarget:
            value = [Value(self.domain.class_var, value[0])]
        if ret == Model.Value:
            return fix_dim(value)
        else:  # ret == Model.ValueProbs
            return fix_dim(value), fix_dim(probs)