Example #1
0
    def test_normalize_data(self):
        # not normalized
        self.widget.controls.normalize.setChecked(False)

        data = Table("heart_disease")
        self.send_signal(self.widget.Inputs.data, data)

        kwargs = {
            "eps": self.widget.eps,
            "min_samples": self.widget.min_samples,
            "metric": "euclidean"
        }
        clusters = DBSCAN(**kwargs)(data)

        output = self.get_output(self.widget.Outputs.annotated_data)
        output_clusters = output.metas[:, 0].copy()
        output_clusters[np.isnan(output_clusters)] = -1
        np.testing.assert_array_equal(output_clusters, clusters)

        # normalized
        self.widget.controls.normalize.setChecked(True)

        kwargs = {
            "eps": self.widget.eps,
            "min_samples": self.widget.min_samples,
            "metric": "euclidean"
        }
        for pp in (Continuize(), Normalize(), SklImpute()):
            data = pp(data)
        clusters = DBSCAN(**kwargs)(data)

        output = self.get_output(self.widget.Outputs.annotated_data)
        output_clusters = output.metas[:, 0].copy()
        output_clusters[np.isnan(output_clusters)] = -1
        np.testing.assert_array_equal(output_clusters, clusters)
Example #2
0
 def set_data(self, data):
     self.closeContext()
     self.clear_messages()
     self.data = data
     self.cont_data = None
     self.selection = ()
     if data is not None:
         if len(data) < 2:
             self.Warning.not_enough_inst()
         else:
             domain = data.domain
             cont_attrs = [a for a in domain.attributes if a.is_continuous]
             cont_dom = Domain(cont_attrs, domain.class_vars, domain.metas)
             cont_data = Table.from_table(cont_dom, data)
             remover = Remove(Remove.RemoveConstant)
             cont_data = remover(cont_data)
             if remover.attr_results["removed"]:
                 self.Information.removed_cons_feat()
             if len(cont_data.domain.attributes) < 2:
                 self.Warning.not_enough_vars()
             else:
                 self.cont_data = SklImpute()(cont_data)
     self.set_feature_model()
     self.openContext(self.cont_data)
     self.apply()
     self.vizrank.button.setEnabled(self.cont_data is not None)
Example #3
0
def _preprocess(table):
    """Remove categorical attributes and impute missing values."""
    if not len(table):
        return table
    new_domain = data.Domain(
        [a for a in table.domain.attributes if a.is_continuous],
        table.domain.class_vars, table.domain.metas)
    new_data = data.Table(new_domain, table)
    new_data = SklImpute(new_data)
    return new_data
Example #4
0
def _preprocess(table, impute=True):
    """Remove categorical attributes and impute missing values."""
    if not len(table):
        return table
    new_domain = Domain(
        [a for a in table.domain.attributes if a.is_continuous],
        table.domain.class_vars, table.domain.metas)
    new_data = table.transform(new_domain)
    if impute:
        new_data = SklImpute()(new_data)
    return new_data
class EllipticEnvelopeLearner(SklLearner):
    __wraps__ = skl_covariance.EllipticEnvelope
    __returns__ = EllipticEnvelopeClassifier
    preprocessors = [Continuize(), RemoveNaNColumns(), SklImpute()]

    def __init__(self,
                 store_precision=True,
                 assume_centered=False,
                 support_fraction=None,
                 contamination=0.1,
                 random_state=None,
                 preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.params = vars()
Example #6
0
class PolynomialLearner(Learner):
    name = 'poly learner'
    preprocessors = [Continuize(), RemoveNaNColumns(), SklImpute()]

    def __init__(self, learner, degree=1, preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.degree = degree
        self.learner = learner

    def fit(self, X, Y, W):
        polyfeatures = skl_preprocessing.PolynomialFeatures(self.degree)
        X = polyfeatures.fit_transform(X)
        clf = self.learner
        if W is None or not self.supports_weights:
            model = clf.fit(X, Y, None)
        else:
            model = clf.fit(X, Y, sample_weight=W.reshape(-1))
        return PolynomialModel(model, polyfeatures)
Example #7
0
class TreeRegressionLearner(SklLearner):
    __wraps__ = skl_tree.DecisionTreeRegressor
    __returns__ = TreeRegressor
    name = 'regression tree'
    preprocessors = [RemoveNaNColumns(), SklImpute(), Continuize()]

    def __init__(self,
                 criterion="mse",
                 splitter="best",
                 max_depth=None,
                 min_samples_split=2,
                 min_samples_leaf=1,
                 max_features=None,
                 random_state=None,
                 max_leaf_nodes=None,
                 preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.params = vars()
Example #8
0
class Clustering(metaclass=WrapperMeta):
    """
    ${skldoc}
    Additional Orange parameters

    preprocessors : list, optional (default = [Continuize(), SklImpute()])
        An ordered list of preprocessors applied to data before
        training or testing.
    """
    __wraps__ = None
    __returns__ = ClusteringModel
    preprocessors = [Continuize(), SklImpute()]

    def __init__(self, preprocessors, parameters):
        self.preprocessors = preprocessors if preprocessors is not None else self.preprocessors
        self.params = {
            k: v
            for k, v in parameters.items()
            if k not in ["self", "preprocessors", "__class__"]
        }

    def __call__(self, data):
        return self.get_model(data).labels

    def get_model(self, data):
        orig_domain = data.domain
        data = self.preprocess(data)
        model = self.fit_storage(data)
        model.domain = data.domain
        model.original_domain = orig_domain
        return model

    def fit_storage(self, data):
        # only data Table
        return self.fit(data.X)

    def fit(self, X: np.ndarray, y: np.ndarray = None):
        return self.__returns__(self.__wraps__(**self.params).fit(X))

    def preprocess(self, data):
        for pp in self.preprocessors:
            data = pp(data)
        return data
 def set_data(self, data):
     self.closeContext()
     self.clear_messages()
     self.data = data
     self.cont_data = None
     self.selection = ()
     if data is not None:
         cont_attrs = [a for a in data.domain.attributes if a.is_continuous]
         if len(cont_attrs) < 2:
             self.Information.not_enough_vars()
         elif len(data) < 2:
             self.Information.not_enough_inst()
         else:
             domain = data.domain
             cont_dom = Domain(cont_attrs, domain.class_vars, domain.metas)
             self.cont_data = SklImpute()(Table.from_table(cont_dom, data))
     self.apply()
     self.openContext(self.data)
     self._vizrank_select()
Example #10
0
from sklearn.metrics import pairwise_distances

from Orange.preprocess import Normalize, Continuize, SklImpute
from Orange.widgets import widget, gui
from Orange.widgets.utils.slidergraph import SliderGraph
from Orange.widgets.settings import Setting
from Orange.data import Table, DiscreteVariable
from Orange.data.util import get_unique_names
from Orange.clustering import DBSCAN
from Orange.widgets.utils.annotated_data import ANNOTATED_DATA_SIGNAL_NAME
from Orange.widgets.utils.signals import Input, Output
from Orange.widgets.widget import Msg


DEFAULT_CUT_POINT = 0.1
PREPROCESSORS = [Continuize(), Normalize(), SklImpute()]
EPS_BOTTOM_LIMIT = 0.01


def get_kth_distances(data, metric, k=5):
    """
    The function computes the epsilon parameter for DBSCAN through method
    proposed in the paper.
    Parameters
    ----------
    data : Orange.data.Table
        Visualisation coordinates - embeddings
    metric : callable or str
        The metric to compute the distance.
    k : int
        Number kth observed neighbour
Example #11
0
class SklLearner(Learner, metaclass=WrapperMeta):
    """
    ${skldoc}
    Additional Orange parameters

    preprocessors : list, optional
        An ordered list of preprocessors applied to data before
        training or testing.
        Defaults to
        `[RemoveNaNClasses(), Continuize(), SklImpute(), RemoveNaNColumns()]`
    """
    __wraps__ = None
    __returns__ = SklModel
    _params = {}

    preprocessors = default_preprocessors = [
        HasClass(), Continuize(),
        RemoveNaNColumns(),
        SklImpute()
    ]

    @property
    def params(self):
        return self._params

    @params.setter
    def params(self, value):
        self._params = self._get_sklparams(value)

    def _get_sklparams(self, values):
        skllearner = self.__wraps__
        if skllearner is not None:
            spec = inspect.getargs(skllearner.__init__.__code__)
            # first argument is 'self'
            assert spec.args[0] == "self"
            params = {
                name: values[name]
                for name in spec.args[1:] if name in values
            }
        else:
            raise TypeError("Wrapper does not define '__wraps__'")
        return params

    def preprocess(self, data):
        data = super().preprocess(data)

        if any(v.is_discrete and len(v.values) > 2
               for v in data.domain.attributes):
            raise ValueError("Wrapped scikit-learn methods do not support " +
                             "multinomial variables.")

        return data

    def __call__(self, data):
        m = super().__call__(data)
        m.params = self.params
        return m

    def fit(self, X, Y, W=None):
        clf = self.__wraps__(**self.params)
        Y = Y.reshape(-1)
        if W is None or not self.supports_weights:
            return self.__returns__(clf.fit(X, Y))
        return self.__returns__(clf.fit(X, Y, sample_weight=W.reshape(-1)))

    @property
    def supports_weights(self):
        """Indicates whether this learner supports weighted instances.
        """
        return 'sample_weight' in self.__wraps__.fit.__code__.co_varnames

    def __getattr__(self, item):
        try:
            return self.params[item]
        except (KeyError, AttributeError):
            raise AttributeError(item) from None

    # TODO: Disallow (or mirror) __setattr__ for keys in params?

    def __dir__(self):
        dd = super().__dir__()
        return list(sorted(set(dd) | set(self.params.keys())))
Example #12
0
class CatBoostLearnerRegression(CatBoostLearner, LearnerRegression):
    __wraps__ = None
    __returns__ = CatBoostModel
    supports_multiclass = True
    _params = {}

    learner_adequacy_err_msg = "Continuous class variable expected."

    preprocessors = default_preprocessors = [
        HasClass(), Continuize(),
        RemoveNaNColumns(),
        SklImpute()
    ]

    def check_learner_adequacy(self, domain):
        return domain.has_continuous_class

    @property
    def params(self):
        return self._params

    @params.setter
    def params(self, value):
        self._params = self._get_sklparams(value)

    def _get_sklparams(self, values):
        skllearner = self.__wraps__
        if skllearner is not None:
            spec = inspect.getargs(skllearner.__init__.__code__)
            # first argument is 'self'
            assert spec.args[0] == "self"
            params = {
                name: values[name]
                for name in spec.args[1:] if name in values
            }
        else:
            raise TypeError("Wrapper does not define '__wraps__'")
        return params

    def preprocess(self, data):
        data = super().preprocess(data)

        if any(v.is_discrete and len(v.values) > 2
               for v in data.domain.attributes):
            raise ValueError("Wrapped scikit-learn methods do not support " +
                             "multinomial variables.")

        return data

    def __call__(self, data):
        m = super().__call__(data)
        m.params = self.params
        return m

    def fit(self, X, Y, W=None):
        clf = self.__wraps__(**self.params)
        Y = Y.reshape(-1)
        if W is None or not self.supports_weights:
            return self.__returns__(clf.fit(X, Y))
        return self.__returns__(clf.fit(X, Y, sample_weight=W.reshape(-1)))

    @property
    def supports_weights(self):
        """Indicates whether this learner supports weighted instances.
        """
        return 'sample_weight' in self.__wraps__.fit.__code__.co_varnames

    def __getattr__(self, item):
        try:
            return self.params[item]
        except (KeyError, AttributeError):
            raise AttributeError(item) from None

    # TODO: Disallow (or mirror) __setattr__ for keys in params?

    def __dir__(self):
        dd = super().__dir__()
        return list(sorted(set(dd) | set(self.params.keys())))
Example #13
0
class SklLearner(Learner, metaclass=WrapperMeta):
    """
    ${skldoc}
    Additional Orange parameters

    preprocessors : list, optional (default=[Continuize(), SklImpute(), RemoveNaNColumns()])
        An ordered list of preprocessors applied to data before
        training or testing.
    """
    __wraps__ = None
    __returns__ = SklModel
    _params = None

    name = 'skl learner'
    preprocessors = [Continuize(),
                     RemoveNaNColumns(),
                     SklImpute(force=False)]

    @property
    def params(self):
        return self._params

    @params.setter
    def params(self, value):
        self._params = self._get_sklparams(value)

    def _get_sklparams(self, values):
        skllearner = self.__wraps__
        if skllearner is not None:
            spec = inspect.getargs(skllearner.__init__.__code__)
            # first argument is 'self'
            assert spec.args[0] == "self"
            params = {name: values[name] for name in spec.args[1:]
                      if name in values}
        else:
            raise TypeError("Wrapper does not define '__wraps__'")
        return params

    def preprocess(self, data):
        data = super().preprocess(data)

        if any(v.is_discrete and len(v.values) > 2
               for v in data.domain.attributes):
            raise ValueError("Wrapped scikit-learn methods do not support " +
                             "multinomial variables.")

        return data

    def __call__(self, data):
        m = super().__call__(data)
        m.used_vals = [np.unique(y) for y in data.Y[:, None].T]
        m.params = self.params
        return m

    def fit(self, X, Y, W):
        clf = self.__wraps__(**self.params)
        Y = Y.reshape(-1)
        if W is None or not self.supports_weights:
            return self.__returns__(clf.fit(X, Y))
        return self.__returns__(clf.fit(X, Y, sample_weight=W.reshape(-1)))

    def __repr__(self):
        return '{} {}'.format(self.name, self.params)
Example #14
0
def impute(data):
    """Impute missing values."""
    return SklImpute()(data)
Example #15
-1
    def _calculate_table_values(self):
        genes = self.data.domain.attributes[:self.GENE_MAXIMUM]
        matrix = self.aggregated_data
        clusters = self.clusters_unordered
        if self.transpose:
            matrix, clusters, genes = self._transpose(matrix, clusters, genes)

        # create data table since imputation of nan values is required
        matrix = Table(Domain(genes), matrix)
        matrix_before_norm = matrix.copy()  # for tooltip
        matrix = SklImpute()(matrix)

        if self.log_scale:
            matrix.X = np.log(matrix.X + 1)
        if self.normalize:
            matrix.X = self._normalize(matrix.X)

        # values must be in range [0, 1] for visualisation
        matrix.X = self._norm_min_max(matrix.X)

        if self.biclustering:
            cluster_order, gene_order = self.cluster_data(matrix)
        else:
            cluster_order, gene_order = np.arange(matrix.X.shape[0]), np.arange(matrix.X.shape[1])

        # reorder
        self.matrix = matrix[cluster_order][:, gene_order]
        self.matrix_before_norm = matrix_before_norm[cluster_order][:, gene_order]
        self.clusters = clusters[cluster_order]

        self._refresh_table()
        self._update_selection()
        self._invalidate()