Esempi in Python per Continuize, esempi in Python per Orange.preprocess.Continuize

Esempio n. 1

0

Mostra file

File: test_pca.py Progetto: rmcatee/orange3

 def test_chain(self):
     zoo = Orange.data.Table('zoo')
     zoo_c = Continuize(zoo)
     pca = PCA()(zoo_c)(zoo)
     pca2 = PCA()(zoo_c)(zoo_c)
     pca3 = PCA(preprocessors=[Continuize()])(zoo)(zoo)
     np.testing.assert_almost_equal(pca.X, pca2.X)
     np.testing.assert_almost_equal(pca.X, pca3.X)

Esempio n. 2

0

Mostra file

File: test_pca.py Progetto: zhoubo3666/orange3

 def test_chain(self):
     zoo_c = Continuize()(self.zoo)
     pca = PCA(n_components=3)(zoo_c)(self.zoo)
     pca2 = PCA(n_components=3)(zoo_c)(zoo_c)
     pp = [Continuize()]
     pca3 = PCA(n_components=3, preprocessors=pp)(self.zoo)(self.zoo)
     np.testing.assert_almost_equal(pca.X, pca2.X)
     np.testing.assert_almost_equal(pca.X, pca3.X)

Esempio n. 3

0

Mostra file

 def __init__(self,
              preprocessors=None,
              penalty=1,
              opt_penalty=False,
              rule_learner=None,
              basic_attributes=True,
              fit_intercept=True,
              intercept_scaling=2,
              penalize_rules=True):
     """
     Parameters
     ----------
     preprocessors :
         A sequence of data preprocessors to apply on data prior to
         fitting the model.
     penalty : L2-penalty in loss function.
     rule_learner: Rule learner used to construct new attributes.
     fit_intercept: Should we add a constant column to data?
     intercept_scaling: Value of constant in the intercept column. Note that
         intercept column is appended after normalization, therefore higher
         values will be less affected by penalization.
     """
     super().__init__(preprocessors)
     self.penalty = penalty
     self.opt_penalty = opt_penalty
     self.rule_learner = rule_learner
     self.fit_intercept = fit_intercept
     self.intercept_scaling = intercept_scaling
     self.basic_attributes = basic_attributes
     self.penalize_rules = penalize_rules
     # Post rule learning preprocessing should not decrease the
     # number of examples.
     self.post_rule_preprocess = [Normalize(), Continuize()]

Esempio n. 4

0

Mostra file

    def __new__(cls, data, address='localhost:9465', batch=100, max_iter=100):
        from orangecontrib.remote import aborted, save_state
        import Orange.data.sql.table

        cont = Continuize(multinomial_treatment=Continuize.Remove,
                          normalize_continuous=None)
        data = cont(data)
        pca = Orange.projection.IncrementalPCA()
        percent = batch / data.approx_len() * 100
        if percent < 100:
            data_sample = data.sample_percentage(percent, no_cache=True)
        else:
            data_sample = data
        data_sample.download_data(1000000)
        data_sample = Orange.data.Table.from_numpy(
            Orange.data.Domain(data_sample.domain.attributes), data_sample.X)
        model = pca(data_sample)
        save_state(model)
        for i in range(max_iter if percent < 100 else 0):
            data_sample = data.sample_percentage(percent, no_cache=True)
            data_sample.download_data(1000000)
            data_sample = Orange.data.Table.from_numpy(
                Orange.data.Domain(data_sample.domain.attributes),
                data_sample.X)
            model.partial_fit(data_sample)
            model.iteration = i
            save_state(model)
            if aborted():
                break
        return model

Esempio n. 5

0

Mostra file

    def test_normalize_data(self):
        # not normalized
        self.widget.controls.normalize.setChecked(False)

        data = Table("heart_disease")
        self.send_signal(self.widget.Inputs.data, data)

        kwargs = {
            "eps": self.widget.eps,
            "min_samples": self.widget.min_samples,
            "metric": "euclidean"
        }
        clusters = DBSCAN(**kwargs)(data)

        output = self.get_output(self.widget.Outputs.annotated_data)
        output_clusters = output.metas[:, 0].copy()
        output_clusters[np.isnan(output_clusters)] = -1
        np.testing.assert_array_equal(output_clusters, clusters)

        # normalized
        self.widget.controls.normalize.setChecked(True)

        kwargs = {
            "eps": self.widget.eps,
            "min_samples": self.widget.min_samples,
            "metric": "euclidean"
        }
        for pp in (Continuize(), Normalize(), SklImpute()):
            data = pp(data)
        clusters = DBSCAN(**kwargs)(data)

        output = self.get_output(self.widget.Outputs.annotated_data)
        output_clusters = output.metas[:, 0].copy()
        output_clusters[np.isnan(output_clusters)] = -1
        np.testing.assert_array_equal(output_clusters, clusters)

Esempio n. 6

0

Mostra file

File: test_lda.py Progetto: qeryq/SFECOMLA

    def test_transform_changed_domain(self):
        """
        1. Open data, apply some preprocessor, splits the data into two parts,
        use LDA on the first part, and then transform the second part.

        2. Open data, split into two parts, apply the same preprocessor and
        LDA only on the first part, and then transform the second part.

        The transformed second part in (1) and (2) has to be the same.
        """
        data = Table("iris")
        data = Randomize()(data)
        preprocessor = Continuize()
        lda = LDA()

        # normalize all
        ndata = preprocessor(data)

        model = lda(ndata[:75])
        result_1 = model(ndata[75:])

        # normalize only the "training" part
        ndata = preprocessor(data[:75])
        model = lda(ndata)
        result_2 = model(data[75:])

        np.testing.assert_almost_equal(result_1.X, result_2.X)

Esempio n. 7

0

Mostra file

File: test_freeviz.py Progetto: qeryq/SFECOMLA

    def test_transform_changed_domain(self):
        """
        1. Open data, apply some preprocessor, splits the data into two parts,
        use FreeViz on the first part, and then transform the second part.

        2. Open data, split into two parts, apply the same preprocessor and
        FreeViz only on the first part, and then transform the second part.

        The transformed second part in (1) and (2) has to be the same.
        """
        data = Table("titanic")[::10]
        normalize = Continuize()
        freeviz = FreeViz(maxiter=40)

        # normalize all
        ndata = normalize(data)
        model = freeviz(ndata[:100])
        result_1 = model(ndata[100:])

        # normalize only the "training" part
        ndata = normalize(data[:100])
        model = freeviz(ndata)
        result_2 = model(data[100:])

        np.testing.assert_almost_equal(result_1.X, result_2.X)

Esempio n. 8

0

Mostra file

File: test_owheatmap.py Progetto: synergy-robotics-a-b/orange3

 def test_information_message(self):
     self.widget.set_row_clustering(Clustering.OrderedClustering)
     continuizer = Continuize()
     cont_titanic = continuizer(self.titanic)
     self.send_signal(self.widget.Inputs.data, cont_titanic)
     self.assertTrue(self.widget.Information.active)
     self.send_signal(self.widget.Inputs.data, self.data)
     self.assertFalse(self.widget.Information.active)

Esempio n. 9

0

Mostra file

 def test_preprocessor_chaining(self):
     domain = Domain([DiscreteVariable("a", values="01"),
                      DiscreteVariable("b", values="01")],
                     DiscreteVariable("y", values="01"))
     table = Table.from_list(domain, [[0, 1], [1, np.NaN]], [0, 1])
     pre1 = Continuize()(Impute()(table))
     pre2 = table.transform(pre1.domain)
     np.testing.assert_almost_equal(pre1.X, pre2.X)

Esempio n. 10

0

Mostra file

 def test_attr_label_metas(self, timeout=DEFAULT_TIMEOUT):
     """Set 'Label' from string meta attribute"""
     cont = Continuize(multinomial_treatment=Continuize.AsOrdinal)
     data = cont(Table("zoo"))
     self.send_signal(self.widget.Inputs.data, data)
     self.wait_until_finished(timeout=timeout)
     simulate.combobox_activate_item(self.widget.controls.attr_label,
                                     data.domain[-1].name)

Esempio n. 11

0

Mostra file

File: test_owheatmap.py Progetto: chrinide/orange3

 def test_information_message(self):
     self.widget.controls.row_clustering.setChecked(True)
     continuizer = Continuize()
     cont_titanic = continuizer(self.titanic)
     self.send_signal("Data", cont_titanic)
     self.assertTrue(self.widget.Information.active)
     self.send_signal("Data", self.data)
     self.assertFalse(self.widget.Information.active)

Esempio n. 12

0

Mostra file

 def test_callback(self):
     callback = unittest.mock.Mock()
     learner = DummySklLearner(preprocessors=[Continuize(), Randomize()])
     learner(Table("iris"), callback)
     args = [x[0][0] for x in callback.call_args_list]
     self.assertEqual(min(args), 0)
     self.assertEqual(max(args), 1)
     self.assertListEqual(args, sorted(args))

Esempio n. 13

0

Mostra file

 def test_information_message(self):
     self.widget.sort_rows = self.widget.OrderedClustering
     continuizer = Continuize()
     cont_titanic = continuizer(self.titanic)
     self.send_signal("Data", cont_titanic)
     self.assertTrue(self.widget.Information.active)
     self.send_signal("Data", self.iris)
     self.assertFalse(self.widget.Information.active)

Esempio n. 14

0

Mostra file

File: test_owcurvefit.py Progetto: sckevmit/orange3

 def test_discrete_expression(self):
     data = Table("heart_disease")
     attrs = data.domain.attributes
     domain = Domain(attrs[1:4], attrs[4])
     data = data.transform(domain)
     self.send_signal(self.widget.Inputs.preprocessor, Continuize())
     self.__init_widget(data)
     self.assertEqual(self.widget.expression, "p1 + gender_female")
     self.assertIsNotNone(self.get_output(self.widget.Outputs.model))

Esempio n. 15

0

Mostra file

 def test_retain_all_data(self):
     data = Table("zoo")
     cont_data = Continuize()(data)
     self.send_signal(self.widget.Inputs.data, data)
     self.send_signal(self.widget.Inputs.template_data, cont_data)
     self.widget.controls.retain_all_data.click()
     output = self.get_output(self.widget.Outputs.transformed_data)
     self.assertIsInstance(output, Table)
     self.assertEqual(output.X.shape, (len(data), 16))
     self.assertEqual(output.metas.shape, (len(data), 38))

Esempio n. 16

0

Mostra file

File: base.py Progetto: newinker/orange3

 def test_attr_label_metas(self, timeout=DEFAULT_TIMEOUT):
     """Set 'Label' from string meta attribute"""
     cont = Continuize(multinomial_treatment=Continuize.AsOrdinal)
     data = cont(Table("zoo"))
     self.send_signal(self.widget.Inputs.data, data)
     if self.widget.isBlocking():
         spy = QSignalSpy(self.widget.blockingStateChanged)
         self.assertTrue(spy.wait(timeout))
     simulate.combobox_activate_item(self.widget.controls.attr_label,
                                     data.domain[-1].name)

Esempio n. 17

0

Mostra file

File: elliptic_envelope.py Progetto: davan690/Orange3_bayes_biolab_ARD

class EllipticEnvelopeLearner(SklLearner):
    __wraps__ = skl_covariance.EllipticEnvelope
    __returns__ = EllipticEnvelopeClassifier
    preprocessors = [Continuize(), RemoveNaNColumns(), SklImpute()]

    def __init__(self,
                 store_precision=True,
                 assume_centered=False,
                 support_fraction=None,
                 contamination=0.1,
                 random_state=None,
                 preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.params = vars()

Esempio n. 18

0

Mostra file

File: base.py Progetto: ycpengpeng/orange3

class XGBBase(SklLearner):
    """Base class for xgboost (classification and regression) learners """
    preprocessors = default_preprocessors = [
        HasClass(),
        Continuize(),
        RemoveNaNColumns(),
    ]

    def __init__(self, preprocessors=None, **kwargs):
        super().__init__(preprocessors=preprocessors)
        self.params = kwargs

    @SklLearner.params.setter
    def params(self, values: Dict):
        self._params = values

Esempio n. 19

0

Mostra file

class TreeRegressionLearner(SklLearner):
    __wraps__ = skl_tree.DecisionTreeRegressor
    __returns__ = TreeRegressor
    name = 'regression tree'
    preprocessors = [RemoveNaNColumns(), SklImpute(), Continuize()]

    def __init__(self,
                 criterion="mse",
                 splitter="best",
                 max_depth=None,
                 min_samples_split=2,
                 min_samples_leaf=1,
                 max_features=None,
                 random_state=None,
                 max_leaf_nodes=None,
                 preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.params = vars()

Esempio n. 20

0

Mostra file

File: linear.py Progetto: wibrt/orange3

class PolynomialLearner(Learner):
    name = 'poly learner'
    preprocessors = [Continuize(), RemoveNaNColumns(), SklImpute()]

    def __init__(self, learner, degree=1, preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.degree = degree
        self.learner = learner

    def fit(self, X, Y, W):
        polyfeatures = skl_preprocessing.PolynomialFeatures(self.degree)
        X = polyfeatures.fit_transform(X)
        clf = self.learner
        if W is None or not self.supports_weights:
            model = clf.fit(X, Y, None)
        else:
            model = clf.fit(X, Y, sample_weight=W.reshape(-1))
        return PolynomialModel(model, polyfeatures)

Esempio n. 21

0

Mostra file

File: pca.py Progetto: neo-nie/orange3

 def __new__(cls, data, batch=100, max_iter=100):
     cont = Continuize(multinomial_treatment=Continuize.Remove)
     data = cont(data)
     model = Orange.projection.IncrementalPCA()
     percent = batch / data.approx_len() * 100
     for i in range(max_iter):
         data_sample = data.sample_percentage(percent, no_cache=True)
         if not data_sample:
             continue
         data_sample.download_data(1000000)
         data_sample = Orange.data.Table.from_numpy(
             Orange.data.Domain(data_sample.domain.attributes),
             data_sample.X)
         model = model.partial_fit(data_sample)
         model.iteration = i
         save_state(model)
         if aborted() or data_sample is data:
             break
     return model

Esempio n. 22

0

Mostra file

File: clustering.py Progetto: szzyiit/orange3

class Clustering(metaclass=WrapperMeta):
    """
    ${skldoc}
    Additional Orange parameters

    preprocessors : list, optional (default = [Continuize(), SklImpute()])
        An ordered list of preprocessors applied to data before
        training or testing.
    """
    __wraps__ = None
    __returns__ = ClusteringModel
    preprocessors = [Continuize(), SklImpute()]

    def __init__(self, preprocessors, parameters):
        self.preprocessors = preprocessors if preprocessors is not None else self.preprocessors
        self.params = {
            k: v
            for k, v in parameters.items()
            if k not in ["self", "preprocessors", "__class__"]
        }

    def __call__(self, data):
        return self.get_model(data).labels

    def get_model(self, data):
        orig_domain = data.domain
        data = self.preprocess(data)
        model = self.fit_storage(data)
        model.domain = data.domain
        model.original_domain = orig_domain
        return model

    def fit_storage(self, data):
        # only data Table
        return self.fit(data.X)

    def fit(self, X: np.ndarray, y: np.ndarray = None):
        return self.__returns__(self.__wraps__(**self.params).fit(X))

    def preprocess(self, data):
        for pp in self.preprocessors:
            data = pp(data)
        return data

Esempio n. 23

0

Mostra file

File: test_owcurvefit.py Progetto: sckevmit/orange3

    def test_discrete_features(self):
        combo = self.widget.controls._feature
        model = combo.model()
        disc_housing = Discretize()(self.housing)
        self.send_signal(self.widget.Inputs.data, disc_housing)
        self.assertEqual(model.rowCount(), 1)
        self.assertTrue(self.widget.Error.data_error.is_shown())

        continuizer = Continuize()
        self.send_signal(self.widget.Inputs.preprocessor, continuizer)
        self.assertGreater(model.rowCount(), 1)
        self.assertFalse(self.widget.Error.data_error.is_shown())

        self.send_signal(self.widget.Inputs.preprocessor, None)
        self.assertEqual(model.rowCount(), 1)
        self.assertTrue(self.widget.Error.data_error.is_shown())

        self.send_signal(self.widget.Inputs.data, None)
        self.assertEqual(model.rowCount(), 1)
        self.assertFalse(self.widget.Error.data_error.is_shown())

Esempio n. 24

0

Mostra file

    def test_reconstruct_domain(self):
        data = Table("heart_disease")
        cls = LogisticRegressionLearner()(data)
        domain = OWNomogram.reconstruct_domain(cls, cls.domain)
        transformed_data = cls.original_data.transform(domain)
        self.assertEqual(transformed_data.X.shape, data.X.shape)
        self.assertFalse(np.isnan(transformed_data.X[0]).any())

        scaled_data = Scale()(data)
        cls = LogisticRegressionLearner()(scaled_data)
        domain = OWNomogram.reconstruct_domain(cls, cls.domain)
        transformed_data = cls.original_data.transform(domain)
        self.assertEqual(transformed_data.X.shape, scaled_data.X.shape)
        self.assertFalse(np.isnan(transformed_data.X[0]).any())

        disc_data = Continuize()(data)
        cls = LogisticRegressionLearner()(disc_data)
        domain = OWNomogram.reconstruct_domain(cls, cls.domain)
        transformed_data = cls.original_data.transform(domain)
        self.assertEqual(transformed_data.X.shape, disc_data.X.shape)
        self.assertFalse(np.isnan(transformed_data.X[0]).any())

Esempio n. 25

0

Mostra file

File: owlda.py Progetto: www3838438/orange3-prototypes

    def apply(self):
        transformed = components = None
        if self.data is not None:
            self.data = Continuize(Impute(self.data))
            lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
            X = lda.fit_transform(self.data.X, self.data.Y)
            dom = Domain([
                ContinuousVariable('Component_1'),
                ContinuousVariable('Component_2')
            ], self.data.domain.class_vars, self.data.domain.metas)
            transformed = Table(dom, X, self.data.Y, self.data.metas)
            transformed.name = self.data.name + ' (LDA)'
            dom = Domain(self.data.domain.attributes,
                         metas=[StringVariable(name='component')])
            metas = np.array([[
                'Component_{}'.format(i + 1)
                for i in range(lda.scalings_.shape[1])
            ]],
                             dtype=object).T
            components = Table(dom, lda.scalings_.T, metas=metas)
            components.name = 'components'

        self.send("Transformed data", transformed)
        self.send("Components", components)

Esempio n. 26

0

Mostra file

File: owpreprocess.py Progetto: coro-binal/orange3

 def createinstance(params):
     params = dict(params)
     treatment = params.pop("multinomial_treatment", Continuize.Indicators)
     return Continuize(multinomial_treatment=treatment)

Esempio n. 27

0

Mostra file

File: owdbscan.py Progetto: wangcj05/orange3

from sklearn.metrics import pairwise_distances

from Orange.preprocess import Normalize, Continuize, SklImpute
from Orange.widgets import widget, gui
from Orange.widgets.utils.slidergraph import SliderGraph
from Orange.widgets.settings import Setting
from Orange.data import Table, DiscreteVariable
from Orange.data.util import get_unique_names
from Orange.clustering import DBSCAN
from Orange.widgets.utils.annotated_data import ANNOTATED_DATA_SIGNAL_NAME
from Orange.widgets.utils.signals import Input, Output
from Orange.widgets.widget import Msg


DEFAULT_CUT_POINT = 0.1
PREPROCESSORS = [Continuize(), Normalize(), SklImpute()]
EPS_BOTTOM_LIMIT = 0.01


def get_kth_distances(data, metric, k=5):
    """
    The function computes the epsilon parameter for DBSCAN through method
    proposed in the paper.
    Parameters
    ----------
    data : Orange.data.Table
        Visualisation coordinates - embeddings
    metric : callable or str
        The metric to compute the distance.
    k : int
        Number kth observed neighbour

Esempio n. 28

0

Mostra file

File: softmax.py Progetto: am93/fri-ozip-naloge

class SoftmaxLearner(Learner):
    """
    Implementation of softmax regression with k*(n+1) parameters
    trained using L-BFGS optimization.
    """
    name = 'softmax'
    preprocessors = [
        RemoveNaNClasses(),
        Normalize(),
        Continuize(),
        Impute(),
        RemoveNaNColumns()
    ]

    def __init__(self, preprocessors=None):
        super().__init__(preprocessors=preprocessors)

    def mysigma(self, x):
        """
        My softmax function. Always check that you provide correctly oriented data (ignore - solved with slicing).
        I subtracted max value to prevent overflow at calculation of exponent - it may cause undeflow, but that is
        not a problem.
        """
        tmpx = np.exp(x - np.max(x, axis=1)[:, None])
        return tmpx / np.sum(tmpx, axis=1)[:, None]

    def cost(self, theta, X, y):
        """
        Args:
            theta (np.ndarray): model parameters of shape [n_classes * n_features]
            X (np.ndarray): data of shape [n_examples, n_features]
            y (np.ndarray): target variable of shape [n_examples]

        Returns:
            float: The value of cost function evaluated with given parameters.
        """
        #################################################################################################
        # Theta pretvorim iz dolgega vektorja v matricno obliko, nato pripravim indikatorsko funkcijo
        #################################################################################################
        theta = theta.reshape((-1, X.shape[1]))
        indicator = np.identity(theta.shape[0])[y.astype(int)]
        return -(np.sum(indicator * np.log(self.mysigma(X.dot(theta.T)))))

    def grad(self, theta, X, y):
        """
        Args:
            theta (np.ndarray): model parameters of shape [n_classes * n_features]
            X (np.ndarray): data of shape [n_examples, n_features]
            y (np.ndarray): target variable of shape [n_examples]

        Returns:
            np.ndarray: Gradients wrt. all model's parameters of shape
                [n_classes * n_features]
        """
        theta = theta.reshape((-1, X.shape[1]))
        indicator = np.identity(theta.shape[0])[y.astype(int)]
        return -(X.T.dot(
            (indicator - self.mysigma(X.dot(theta.T))))).T.flatten()

    def approx_grad(self, theta, X, y, eps=1e-5):
        """
        Args:
            theta (np.ndarray): model parameters of shape [n_classes * n_features]
            X (np.ndarray): data of shape [n_examples, n_features]
            y (np.ndarray): target variable of shape [n_examples]
            eps (float): value offset for gradient estimation

        Returns:
            np.ndarray: Gradients wrt. all model's parameters of shape
                [n_classes * n_features]
        """
        result = []
        for i in range(len(theta)):
            crr = np.zeros(len(theta))
            crr[i] = 1
            result.append((self.cost(theta + (crr * eps), X, y) -
                           self.cost(theta - (crr * eps), X, y)) / (2 * eps))

        return np.array(result)

    def fit(self, X, y, W=None):
        """
        Args:
            X (np.ndarray): data of shape [n_examples, n_features]
            y (np.ndarray): target variable of shape [n_examples]
            W (np.ndarray): Orange weights - ignore for this exercise

        Returns:
            SoftmaxModel: Orange's classification model
        """
        num_classes = len(
            np.unique(y))  # predpostavljamo da so vsi razredi prisotni
        X = np.column_stack((np.ones(X.shape[0]), X))
        theta = np.ones(num_classes * X.shape[1]) * 1e-9
        result = fmin_l_bfgs_b(self.cost, theta, self.grad, args=(X, y))[0]
        return SoftmaxModel(result.reshape((-1, X.shape[1])))

Esempio n. 29

0

Mostra file

class CatBoostLearnerRegression(CatBoostLearner, LearnerRegression):
    __wraps__ = None
    __returns__ = CatBoostModel
    supports_multiclass = True
    _params = {}

    learner_adequacy_err_msg = "Continuous class variable expected."

    preprocessors = default_preprocessors = [
        HasClass(), Continuize(),
        RemoveNaNColumns(),
        SklImpute()
    ]

    def check_learner_adequacy(self, domain):
        return domain.has_continuous_class

    @property
    def params(self):
        return self._params

    @params.setter
    def params(self, value):
        self._params = self._get_sklparams(value)

    def _get_sklparams(self, values):
        skllearner = self.__wraps__
        if skllearner is not None:
            spec = inspect.getargs(skllearner.__init__.__code__)
            # first argument is 'self'
            assert spec.args[0] == "self"
            params = {
                name: values[name]
                for name in spec.args[1:] if name in values
            }
        else:
            raise TypeError("Wrapper does not define '__wraps__'")
        return params

    def preprocess(self, data):
        data = super().preprocess(data)

        if any(v.is_discrete and len(v.values) > 2
               for v in data.domain.attributes):
            raise ValueError("Wrapped scikit-learn methods do not support " +
                             "multinomial variables.")

        return data

    def __call__(self, data):
        m = super().__call__(data)
        m.params = self.params
        return m

    def fit(self, X, Y, W=None):
        clf = self.__wraps__(**self.params)
        Y = Y.reshape(-1)
        if W is None or not self.supports_weights:
            return self.__returns__(clf.fit(X, Y))
        return self.__returns__(clf.fit(X, Y, sample_weight=W.reshape(-1)))

    @property
    def supports_weights(self):
        """Indicates whether this learner supports weighted instances.
        """
        return 'sample_weight' in self.__wraps__.fit.__code__.co_varnames

    def __getattr__(self, item):
        try:
            return self.params[item]
        except (KeyError, AttributeError):
            raise AttributeError(item) from None

    # TODO: Disallow (or mirror) __setattr__ for keys in params?

    def __dir__(self):
        dd = super().__dir__()
        return list(sorted(set(dd) | set(self.params.keys())))

Esempio n. 30

0

Mostra file

class SklLearner(Learner, metaclass=WrapperMeta):
    """
    ${skldoc}
    Additional Orange parameters

    preprocessors : list, optional
        An ordered list of preprocessors applied to data before
        training or testing.
        Defaults to
        `[RemoveNaNClasses(), Continuize(), SklImpute(), RemoveNaNColumns()]`
    """
    __wraps__ = None
    __returns__ = SklModel
    _params = {}

    preprocessors = default_preprocessors = [
        HasClass(), Continuize(),
        RemoveNaNColumns(),
        SklImpute()
    ]

    @property
    def params(self):
        return self._params

    @params.setter
    def params(self, value):
        self._params = self._get_sklparams(value)

    def _get_sklparams(self, values):
        skllearner = self.__wraps__
        if skllearner is not None:
            spec = inspect.getargs(skllearner.__init__.__code__)
            # first argument is 'self'
            assert spec.args[0] == "self"
            params = {
                name: values[name]
                for name in spec.args[1:] if name in values
            }
        else:
            raise TypeError("Wrapper does not define '__wraps__'")
        return params

    def preprocess(self, data):
        data = super().preprocess(data)

        if any(v.is_discrete and len(v.values) > 2
               for v in data.domain.attributes):
            raise ValueError("Wrapped scikit-learn methods do not support " +
                             "multinomial variables.")

        return data

    def __call__(self, data):
        m = super().__call__(data)
        m.params = self.params
        return m

    def fit(self, X, Y, W=None):
        clf = self.__wraps__(**self.params)
        Y = Y.reshape(-1)
        if W is None or not self.supports_weights:
            return self.__returns__(clf.fit(X, Y))
        return self.__returns__(clf.fit(X, Y, sample_weight=W.reshape(-1)))

    @property
    def supports_weights(self):
        """Indicates whether this learner supports weighted instances.
        """
        return 'sample_weight' in self.__wraps__.fit.__code__.co_varnames

    def __getattr__(self, item):
        try:
            return self.params[item]
        except (KeyError, AttributeError):
            raise AttributeError(item) from None

    # TODO: Disallow (or mirror) __setattr__ for keys in params?

    def __dir__(self):
        dd = super().__dir__()
        return list(sorted(set(dd) | set(self.params.keys())))