예제 #1
0
class MultiClassSVMLearner(Orange.core.LinearLearner):
    """ Multi-class SVM (Crammer and Singer) from the `LIBLINEAR`_ library.
    """
    __new__ = _orange__new__(base=Orange.core.LinearLearner)

    def __init__(self, C=1.0, eps=0.01, bias=1.0,
                 normalization=True,
                 multinomial_treatment=DomainContinuizer.NValues,
                 **kwargs):
        """\
        :param C: Regularization parameter (default 1.0)
        :type C: float

        :param eps: Stopping criteria (default 0.01)
        :type eps: float

        :param bias: If non negative then each instance is appended a constant
            bias term (default 1.0).

        :type bias: float

        :param normalization: Normalize the input data prior to learning
            (default True)
        :type normalization: bool

        :param multinomial_treatment: Defines how to handle multinomial
            features for learning. It can be one of the
            :class:`~.DomainContinuizer` `multinomial_treatment`
            constants (default: `DomainContinuizer.NValues`).

        :type multinomial_treatment: int

        .. versionadded:: 2.6.1
            Added `multinomial_treatment`

        """
        self.C = C
        self.eps = eps
        self.bias = bias
        self.normalization = normalization
        self.multinomial_treatment = multinomial_treatment
        for name, val in kwargs.items():
            setattr(self, name, val)

        self.solver_type = self.MCSVM_CS

    def __call__(self, data, weight_id=None):
        if not isinstance(data.domain.class_var, variable.Discrete):
            raise TypeError("Can only learn a discrete class.")

        if data.domain.has_discrete_attributes(False) or self.normalization:
            dc = DomainContinuizer()
            dc.multinomial_treatment = self.multinomial_treatment
            dc.class_treatment = dc.Ignore
            dc.continuous_treatment = \
                    dc.NormalizeBySpan if self.normalization else dc.Leave
            c_domain = dc(data)
            data = data.translate(c_domain)

        return super(MultiClassSVMLearner, self).__call__(data, weight_id)
예제 #2
0
class FeatureSelection(Preprocessor):
    """ A preprocessor that runs feature selection using an feature scoring function.
    
    :param measure: a scoring function (default: orange.MeasureAttribute_relief)
    :param filter: a filter function to use for selection (default Preprocessor_featureSelection.bestN)
    :param limit: the limit for the filter function (default 10)
        
    """
    __new__ = _orange__new__(Preprocessor)
    __reduce__ = _orange__reduce__

    bestN = staticmethod(_bestN)
    bestP = staticmethod(_bestP)

    def __init__(self,
                 measure=Orange.feature.scoring.Relief(),
                 filter=None,
                 limit=10):
        self.measure = measure
        self.filter = filter if filter is not None else self.bestN
        self.limit = limit

    def attrScores(self, data):
        """ Return a list of computed scores for all attributes in `data`. 
        """
        measures = sorted([(self.measure(attr, data), attr)
                           for attr in data.domain.attributes])
        return measures

    def __call__(self, data, weightId=None):
        measures = self.attrScores(data)
        attrs = [attr for _, attr in self.filter(measures, self.limit)]
        domain = Orange.data.Domain(attrs, data.domain.classVar)
        domain.addmetas(data.domain.getmetas())
        return Orange.data.Table(domain, data)
예제 #3
0
class PreprocessorList(Preprocessor):
    """ A preprocessor wrapping a sequence of other preprocessors.
    
    :param preprocessors: a list of :obj:`Preprocessor` instances
    
    """

    __new__ = _orange__new__(Preprocessor)
    __reduce__ = _orange__reduce__

    def __init__(self, preprocessors=()):
        self.preprocessors = preprocessors

    def __call__(self, data, weightId=None):
        hadWeight = hasWeight = weightId is not None
        for preprocessor in self.preprocessors:
            t = preprocessor(data,
                             weightId) if hasWeight else preprocessor(data)
            if isinstance(t, tuple):
                data, weightId = t
                hasWeight = True
            else:
                data = t
        if hadWeight:
            return data, weightId
        else:
            return data
예제 #4
0
class Continuize(Preprocessor):
    """ A preprocessor that continuizes a discrete domain (and optionally normalizes it).
    See :obj:`Orange.data.continuization.DomainContinuizer` for list of
    accepted arguments.
    
    """
    __new__ = _orange__new__(Preprocessor)
    __reduce__ = _orange__reduce__

    def __init__(self,
                 zeroBased=True,
                 multinomialTreatment=DomainContinuizer.NValues,
                 continuousTreatment=DomainContinuizer.Leave,
                 classTreatment=DomainContinuizer.Ignore,
                 **kwargs):
        self.zeroBased = zeroBased
        self.multinomialTreatment = multinomialTreatment
        self.continuousTreatment = continuousTreatment
        self.classTreatment = classTreatment

    def __call__(self, data, weightId=0):
        continuizer = DomainContinuizer(
            zeroBased=self.zeroBased,
            multinomialTreatment=self.multinomialTreatment,
            continuousTreatment=self.continuousTreatment,
            classTreatment=self.classTreatment)
        c_domain = continuizer(data, weightId)
        return data.translate(c_domain)
예제 #5
0
class DiscretizeEntropy(Discretize):
    """ An discretizer that uses orange.EntropyDiscretization method but,
    unlike Preprocessor_discretize class, also removes unused attributes
    from the domain.
    """

    __new__ = _orange__new__(Discretize)
    __reduce__ = _orange__reduce__

    def __init__(self, method=Orange.feature.discretization.Entropy()):
        self.method = method
        assert (isinstance(method, Orange.feature.discretization.Entropy))

    def __call__(self, data, weightId=0):
        newattr_list = []
        for attr in data.domain.attributes:
            if attr.varType == Orange.feature.Type.Continuous:
                newattr = self.method(attr, data)
                if newattr.getValueFrom.transformer.points:
                    newattr_list.append(newattr)
            else:
                newattr_list.append(attr)
        newdomain = Orange.data.Domain(newattr_list, data.domain.classVar)
        newdomain.addmetas(data.domain.getmetas())
        return Orange.data.Table(newdomain, data)
예제 #6
0
class RemoveDiscrete(Continuize):
    """ A Preprocessor that removes all discrete attributes from the domain.
    """
    __new__ = _orange__new__(Continuize)

    def __call__(self, data, weightId=None):
        attrs = [
            attr for attr in data.domain.attributes
            if attr.varType == Orange.feature.Type.Continuous
        ]
        domain = Orange.data.Domain(attrs, data.domain.classVar)
        domain.addmetas(data.domain.getmetas())
        return Orange.data.Table(domain, data)
예제 #7
0
class RemoveContinuous(Discretize):
    """ A preprocessor that removes all continuous features.
    """
    __new__ = _orange__new__(Discretize)
    __reduce__ = _orange__reduce__

    def __call__(self, data, weightId=None):
        attrs = [
            attr for attr in data.domain.attributes
            if attr.varType == Orange.feature.Type.Discrete
        ]
        domain = Orange.data.Domain(attrs, data.domain.classVar)
        domain.addmetas(data.domain.getmetas())
        return Orange.data.Table(domain, data)
예제 #8
0
class Impute(Preprocessor):
    """ A preprocessor that imputes unknown values using a learner.
    
    :param model: a learner class.
    
    """
    __new__ = _orange__new__(Preprocessor)
    __reduce__ = _orange__reduce__

    def __init__(self, model=None, **kwargs):
        self.model = Orange.classification.majority.MajorityLearner(
        ) if model is None else model

    def __call__(self, data, weightId=0):
        return ImputeByLearner(data, learner=self.model)
예제 #9
0
class Sample(Preprocessor):
    """ A preprocessor that samples a subset of the data.
    
    :param filter: a filter function to use for selection (default
                   Preprocessor_sample.selectNRandom)
    :param limit: the limit for the filter function (default 10)
    
    """
    __new__ = _orange__new__(Preprocessor)
    __reduce__ = _orange__reduce__

    selectNRandom = staticmethod(_selectNRandom)
    selectPRandom = staticmethod(_selectPRandom)

    def __init__(self, filter=None, limit=10):
        self.filter = filter if filter is not None else self.selectNRandom
        self.limit = limit

    def __call__(self, data, weightId=None):
        return Orange.data.Table(data.domain, self.filter(data, self.limit))
예제 #10
0
class RFE(FeatureSelection):
    """ A preprocessor that runs RFE(Recursive Feature Elimination) using
    linear SVM derived attribute weights.
    
    :param filter: a filter function to use for selection (default
                   Preprocessor_featureSelection.bestN)
    :param limit: the limit for the filter function (default 10)
        
    """
    __new__ = _orange__new__(FeatureSelection)
    __reduce__ = _orange__reduce__

    def __init__(self, filter=None, limit=10):
        super(RFE, self).__init__(filter=filter, limit=limit)

    def __call__(self, data, weightId=None):
        from Orange.classification.svm import RFE
        rfe = RFE()
        filtered = self.filter(range(len(data)), self.limit)
        return rfe(data, len(filtered))
예제 #11
0
class LinearSVMLearner(Orange.core.LinearLearner):
    """Train a linear SVM model."""

    L2R_L2LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
    L2R_L2LOSS = Orange.core.LinearLearner.L2R_L2Loss_SVC
    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L1Loss_SVC_Dual
    L1R_L2LOSS = Orange.core.LinearLearner.L1R_L2Loss_SVC

    __new__ = _orange__new__(base=Orange.core.LinearLearner)

    def __init__(self, solver_type=L2R_L2LOSS_DUAL, C=1.0, eps=0.01,
                 bias=1.0, normalization=True,
                 multinomial_treatment=DomainContinuizer.NValues, **kwargs):
        """
        :param solver_type: One of the following class constants:
            ``L2R_L2LOSS_DUAL``, ``L2R_L2LOSS``,
            ``L2R_L1LOSS_DUAL``, ``L1R_L2LOSS``

            The first part (``L2R`` or ``L1R``) is the regularization term
            on the weight vector (squared or absolute norm respectively),
            the ``L1LOSS`` or ``L2LOSS`` indicate absolute or squared
            loss function ``DUAL`` means the optimization problem is
            solved in the dual space (for more information see the
            documentation on `LIBLINEAR`_).

        :param C: Regularization parameter (default 1.0)
        :type C: float

        :param eps: Stopping criteria (default 0.01)
        :type eps: float

        :param bias: If non negative then each instance is appended a constant
            bias term (default 1.0).

        :type bias: float

        :param normalization: Normalize the input data into range [0..1] prior
            to learning (default ``True``)
        :type normalization: bool

        :param multinomial_treatment: Defines how to handle multinomial
            features for learning. It can be one of the
            :class:`~.DomainContinuizer` `multinomial_treatment`
            constants (default: `DomainContinuizer.NValues`).

        :type multinomial_treatment: int

        .. versionadded:: 2.6.1
            Added `multinomial_treatment`

        .. note:: By default if the training data contains discrete features
            they are replaced by indicator columns one for each value of the
            feature regardless of the value of `normalization`. This is
            different then in :class:`SVMLearner` where this is done only if
            `normalization` is ``True``.

        Example

            >>> linear_svm = LinearSVMLearner(
            ...     solver_type=LinearSVMLearner.L1R_L2LOSS,
            ...     C=2.0)
            ...

        """
        self.solver_type = solver_type
        self.eps = eps
        self.C = C
        self.bias = bias
        self.normalization = normalization
        self.multinomial_treatment = multinomial_treatment

        for name, val in kwargs.items():
            setattr(self, name, val)

        if self.solver_type not in [self.L2R_L2LOSS_DUAL, self.L2R_L2LOSS,
                self.L2R_L1LOSS_DUAL, self.L1R_L2LOSS]:
            warnings.warn(
                " Deprecated 'solver_type', use "
                "'Orange.classification.logreg.LibLinearLogRegLearner'"
                "to build a logistic regression models using LIBLINEAR.",
                DeprecationWarning
            )

    def __call__(self, data, weight_id=None):
        if not isinstance(data.domain.class_var, variable.Discrete):
            raise TypeError("Can only learn a discrete class.")

        if data.domain.has_discrete_attributes(False) or self.normalization:
            dc = DomainContinuizer()
            dc.multinomial_treatment = self.multinomial_treatment
            dc.class_treatment = dc.Ignore
            dc.continuous_treatment = \
                    dc.NormalizeBySpan if self.normalization else dc.Leave
            c_domain = dc(data)
            data = data.translate(c_domain)

        return super(LinearSVMLearner, self).__call__(data, weight_id)
예제 #12
0
class SVMLearner(_SVMLearner):
    """
    :param svm_type: the SVM type
    :type svm_type: SVMLearner.SVMType
    :param kernel_type: the kernel type
    :type kernel_type: SVMLearner.Kernel
    :param degree: kernel parameter (only for ``Polynomial``)
    :type degree: int
    :param gamma: kernel parameter; if 0, it is set to 1.0/#features
        (for ``Polynomial``, ``RBF`` and ``Sigmoid``)
    :type gamma: float
    :param coef0: kernel parameter (for ``Polynomial`` and ``Sigmoid``)
    :type coef0: int
    :param kernel_func: kernel function if ``kernel_type`` is
        ``kernels.Custom``
    :type kernel_func: callable object
    :param C: C parameter (for ``C_SVC``, ``Epsilon_SVR`` and ``Nu_SVR``)
    :type C: float
    :param nu: Nu parameter (for ``Nu_SVC``, ``Nu_SVR`` and ``OneClass``)
    :type nu: float
    :param p: epsilon parameter (for ``Epsilon_SVR``)
    :type p: float
    :param cache_size: cache memory size in MB
    :type cache_size: int
    :param eps: tolerance of termination criterion
    :type eps: float
    :param probability: build a probability model
    :type probability: bool
    :param shrinking: use shrinking heuristics
    :type shrinking: bool
    :param normalization: normalize the input data prior to learning into
        range [0..1] and replace discrete features with indicator columns
        one for each value of the feature using
        :class:`~Orange.data.continuization.DomainContinuizer` class
        (default ``True``)
    :type normalization: bool
    :param weight: a list of class weights
    :type weight: list
    :param verbose: If `True` show training progress (default is `False`).
    :type verbose: bool

    Example:

        >>> import Orange
        >>> from Orange.classification import svm
        >>> from Orange.evaluation import testing, scoring
        >>> data = Orange.data.Table("vehicle.tab")
        >>> learner = svm.SVMLearner()
        >>> results = testing.cross_validation([learner], data, folds=5)
        >>> print "CA:  %.4f" % scoring.CA(results)[0]
        CA:  0.7908
        >>> print "AUC: %.4f" % scoring.AUC(results)[0]
        AUC: 0.9565

    """
    __new__ = _orange__new__(_SVMLearner)

    C_SVC = _SVMLearner.C_SVC
    Nu_SVC = _SVMLearner.Nu_SVC
    OneClass = _SVMLearner.OneClass
    Nu_SVR = _SVMLearner.Nu_SVR
    Epsilon_SVR = _SVMLearner.Epsilon_SVR

    @Orange.utils.deprecated_keywords({"kernelFunc": "kernel_func"})
    def __init__(self, svm_type=Nu_SVC, kernel_type=kernels.RBF,
                 kernel_func=None, C=1.0, nu=0.5, p=0.1, gamma=0.0, degree=3,
                 coef0=0, shrinking=True, probability=True, verbose=False,
                 cache_size=200, eps=0.001, normalization=True,
                 weight=[], **kwargs):
        self.svm_type = svm_type
        self.kernel_type = kernel_type
        self.kernel_func = kernel_func
        self.C = C
        self.nu = nu
        self.p = p
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.shrinking = shrinking
        self.probability = probability
        self.verbose = verbose
        self.cache_size = cache_size
        self.eps = eps
        self.normalization = normalization
        for key, val in kwargs.items():
            setattr(self, key, val)
        self.learner = Orange.core.SVMLearner(**kwargs)
        self.weight = weight

    max_nu = staticmethod(max_nu)

    def __call__(self, data, weight=0):
        """Construct a SVM classifier

        :param table: data with continuous features
        :type table: Orange.data.Table

        :param weight: ignored (required due to base class signature);
        """

        examples = Orange.core.Preprocessor_dropMissingClasses(data)
        class_var = examples.domain.class_var
        if len(examples) == 0:
            raise ValueError("Example table is without any defined classes")

        # Fix the svm_type parameter if we have a class_var/svm_type mismatch
        if self.svm_type in [0, 1] and \
            isinstance(class_var, Orange.feature.Continuous):
            self.svm_type += 3

        if self.svm_type in [3, 4] and \
            isinstance(class_var, Orange.feature.Discrete):
            self.svm_type -= 3

        if self.kernel_type == kernels.Custom and not self.kernel_func:
            raise ValueError("Custom kernel function not supplied")

        nu = self.nu
        if self.svm_type == SVMLearner.Nu_SVC:
            # Check if nu is feasible
            max_nu = self.max_nu(examples)
            if self.nu > max_nu:
                if getattr(self, "verbose", 0):
                    warnings.warn("Specified nu %.3f is infeasible. \
                    Setting nu to %.3f" % (self.nu, max_nu))
                nu = max(max_nu - 1e-7, 0.0)

        for name in ["svm_type", "kernel_type", "kernel_func", "C", "nu", "p",
                     "gamma", "degree", "coef0", "shrinking", "probability",
                     "verbose", "cache_size", "eps"]:
            setattr(self.learner, name, getattr(self, name))

        self.learner.nu = nu
        self.learner.set_weights(self.weight)

        if self.svm_type == SVMLearner.OneClass and self.probability:
            self.learner.probability = False
            warnings.warn("One-class SVM probability output not supported.")
        return self.learn_classifier(examples)

    def learn_classifier(self, data):
        if self.normalization:
            data = self._normalize(data)
        svm = self.learner(data)
        return SVMClassifier(svm)

    @Orange.utils.deprecated_keywords({"progressCallback": "progress_callback"})
    def tune_parameters(self, data, parameters=None, folds=5, verbose=0,
                       progress_callback=None):
        """Tune the ``parameters`` on the given ``data`` using
        internal cross validation.

        :param data: data for parameter tuning
        :type data: Orange.data.Table
        :param parameters: names of parameters to tune
            (default: ["nu", "C", "gamma"])
        :type parameters: list of strings
        :param folds: number of folds for internal cross validation
        :type folds: int
        :param verbose: set verbose output
        :type verbose: bool
        :param progress_callback: callback function for reporting progress
        :type progress_callback: callback function

        Here is example of tuning the `gamma` parameter using
        3-fold cross validation. ::

            svm = Orange.classification.svm.SVMLearner()
            svm.tune_parameters(table, parameters=["gamma"], folds=3)

        """

        import orngWrap

        if parameters is None:
            parameters = ["nu", "C", "gamma"]

        searchParams = []
        normalization = self.normalization
        if normalization:
            data = self._normalize(data)
            self.normalization = False
        if self.svm_type in [SVMLearner.Nu_SVC, SVMLearner.Nu_SVR] \
                    and "nu" in parameters:
            if isinstance(data.domain.class_var, variable.Discrete):
                max_nu = max(self.max_nu(data) - 1e-7, 0.0)
            else:
                max_nu = 1.0
            searchParams.append(("nu", [i / 10.0 for i in range(1, 9) if \
                                        i / 10.0 < max_nu] + [max_nu]))
        elif "C" in parameters:
            searchParams.append(("C", [2 ** a for a in  range(-5, 15, 2)]))

        if self.kernel_type == 2 and "gamma" in parameters:
            searchParams.append(("gamma",
                                 [2 ** a for a in range(-5, 5, 2)] + [0])
                                )
        tunedLearner = orngWrap.TuneMParameters(object=self,
                            parameters=searchParams,
                            folds=folds,
                            returnWhat=orngWrap.TuneMParameters.returnLearner,
                            progressCallback=progress_callback
                            if progress_callback else lambda i: None)
        tunedLearner(data, verbose=verbose)
        if normalization:
            self.normalization = normalization

    def _normalize(self, data):
        dc = preprocess.DomainContinuizer()
        dc.class_treatment = preprocess.DomainContinuizer.Ignore
        dc.continuous_treatment = preprocess.DomainContinuizer.NormalizeBySpan
        dc.multinomial_treatment = preprocess.DomainContinuizer.NValues
        newdomain = dc(data)
        return data.translate(newdomain)
예제 #13
0
        class L(Orange.core.Learner):
            __new__ = utils._orange__new__(Orange.core.Learner)

            def __call__(self, data, weight=0):
                return data, weight, self.msg
예제 #14
0
        class A(Orange.core.OrangeBase):
            __new__ = utils._orange__new__(Orange.core.OrangeBase)

            def __call__(self, data):
                return data, self.name, self.msg