Beispiel #1
0
class Preprocessor_featureSelection(orange.Preprocessor):
    """ A preprocessor that runs feature selection using an feature scoring function.
    
    :param measure: a scoring function (default: orange.MeasureAttribute_relief)
    :param filter: a filter function to use for selection (default Preprocessor_featureSelection.bestN)
    :param limit: the limit for the filter function (default 10)
        
    """
    __new__ = _orange__new__(orange.Preprocessor)
    __reduce__ = _orange__reduce__
    
    bestN = staticmethod(bestN)
    bestP = staticmethod(bestP)
    
    def __init__(self, measure=orange.MeasureAttribute_relief(), filter=None, limit=10):
        self.measure = measure
        self.filter = filter if filter is not None else self.bestN
        self.limit = limit
    
    def attrScores(self, data):
        """ Return a list of computed scores for all attributes in `data`. 
        """
        measures = sorted([(self.measure(attr, data), attr) for attr in data.domain.attributes])
        return measures
         
    def __call__(self, data, weightId=None):
        measures = self.attrScores(data)
        attrs = [attr for _, attr in self.filter(measures, self.limit)]
        domain = orange.Domain(attrs, data.domain.classVar)
        domain.addmetas(data.domain.getmetas())
        return orange.ExampleTable(domain, data)
Beispiel #2
0
class MultiClassSVMLearner(Orange.core.LinearLearner):
    """ Multi-class SVM (Crammer and Singer) from the `LIBLINEAR`_ library.
    """
    __new__ = _orange__new__(base=Orange.core.LinearLearner)

    def __init__(self, C=1.0, eps=0.01, **kwargs):
        """\
        :param C: Regularization parameter (default 1.0)
        :type C: float  
        
        :param eps: Stopping criteria (default 0.01)
        :type eps: float
        
        """
        self.C = C
        self.eps = eps
        for name, val in kwargs.items():
            setattr(self, name, val)

        self.solver_type = self.MCSVM_CS
        self.preproc = default_preprocessor()

    def __call__(self, instances, weight_id=None):
        instances = self.preproc(instances)
        classifier = super(MultiClassSVMLearner, self).__call__(instances, weight_id)
        return classifier
Beispiel #3
0
class Preprocessor_preprocessorList(orange.Preprocessor):
    """ A preprocessor wrapping a sequence of other preprocessors.
    
    :param preprocessors: a list of :obj:`Preprocessor` instances
    
    """
    
    __new__ = _orange__new__(orange.Preprocessor)
    __reduce__ = _orange__reduce__
    
    def __init__(self, preprocessors=[]):
        self.preprocessors = preprocessors
        
    def __call__(self, data, weightId=None):
        import orange
        hadWeight = hasWeight = weightId is not None
        for preprocessor in self.preprocessors:
            t = preprocessor(data, weightId) if hasWeight else preprocessor(data)
            if isinstance(t, tuple):
                data, weightId = t
                hasWeight = True
            else:
                data = t
        if hadWeight:
            return data, weightId
        else:
            return data
Beispiel #4
0
class Preprocessor_continuize(orange.Preprocessor):
    """ A preprocessor that continuizes a discrete domain (and optionally normalizes it).
    See :obj:`Orange.data.continuization.DomainContinuizer` for list of
    accepted arguments.
    
    """
    __new__ = _orange__new__(orange.Preprocessor)
    __reduce__ = _orange__reduce__
    
    def __init__(self, zeroBased=True, multinomialTreatment=orange.DomainContinuizer.NValues,
                 continuousTreatment=orange.DomainContinuizer.Leave,
                 classTreatment=orange.DomainContinuizer.Ignore,
                 **kwargs):
        self.zeroBased = zeroBased
        self.multinomialTreatment = multinomialTreatment
        self.continuousTreatment = continuousTreatment
        self.classTreatment = classTreatment
            
    def __call__(self, data, weightId=0):
        continuizer = orange.DomainContinuizer(zeroBased=self.zeroBased,
                                               multinomialTreatment=self.multinomialTreatment,
                                               continuousTreatment=self.continuousTreatment,
                                               classTreatment=self.classTreatment)
        c_domain = continuizer(data, weightId)
        return data.translate(c_domain)
Beispiel #5
0
class Preprocessor_discretizeEntropy(Preprocessor_discretize):
    """ An discretizer that uses orange.EntropyDiscretization method but,
    unlike Preprocessor_discretize class, also removes unused attributes
    from the domain.
    
    """
    
    __new__ = _orange__new__(Preprocessor_discretize)
    __reduce__ = _orange__reduce__
    
    def __init__(self, method=orange.EntropyDiscretization()):
        self.method = method
        assert(isinstance(method, orange.EntropyDiscretization))
        
    def __call__(self, data, wightId=0):
        newattr_list = []
        for attr in data.domain.attributes:
            if attr.varType == orange.VarTypes.Continuous:
                newattr = self.method(attr, data)
                if newattr.getValueFrom.transformer.points:
                    newattr_list.append(newattr)
            else:
                newattr_list.append(attr)
        newdomain = orange.Domain(newattr_list, data.domain.classVar)
        newdomain.addmetas(data.domain.getmetas())
        return orange.ExampleTable(newdomain, data)
Beispiel #6
0
class Preprocessor_removeDiscrete(Preprocessor_continuize):
    """ A Preprocessor that removes all discrete attributes from the domain.
    """
    __new__ = _orange__new__(Preprocessor_continuize)
    
    def __call__(self, data, weightId=None):
        attrs = [attr for attr in data.domain.attributes if attr.varType == orange.VarTypes.Continuous]
        domain = orange.Domain(attrs, data.domain.classVar)
        domain.addmetas(data.domain.getmetas())
        return orange.ExampleTable(domain, data)
Beispiel #7
0
class Preprocessor_removeContinuous(Preprocessor_discretize):
    """ A preprocessor that removes all continuous features.
    """
    __new__ = _orange__new__(Preprocessor_discretize)
    __reduce__ = _orange__reduce__
    
    def __call__(self, data, weightId=None):
        attrs = [attr for attr in data.domain.attributes if attr.varType == orange.VarTypes.Discrete]
        domain = orange.Domain(attrs, data.domain.classVar)
        domain.addmetas(data.domain.getmetas())
        return orange.ExampleTable(domain, data)
Beispiel #8
0
class Preprocessor_impute(orange.Preprocessor):
    """ A preprocessor that imputes unknown values using a learner.
    
    :param model: a learner class.
    
    """
    __new__ = _orange__new__(orange.Preprocessor)
    __reduce__ = _orange__reduce__
    
    def __init__(self, model=None, **kwargs):
        self.model = orange.MajorityLearner() if model is None else model
        
    def __call__(self, data, weightId=0):
        return orange.Preprocessor_imputeByLearner(data, learner=self.model)
Beispiel #9
0
class Preprocessor_sample(orange.Preprocessor):
    """ A preprocessor that samples a subset of the data.
    
    :param filter: a filter function to use for selection (default
                   Preprocessor_sample.selectNRandom)
    :param limit: the limit for the filter function (default 10)
    
    """
    __new__ = _orange__new__(orange.Preprocessor)
    __reduce__ = _orange__reduce__

    selectNRandom = staticmethod(selectNRandom)
    selectPRandom = staticmethod(selectPRandom)
    
    def __init__(self, filter=None, limit=10):
        self.filter = filter if filter is not None else self.selectNRandom
        self.limit = limit
        
    def __call__(self, data, weightId=None):
        return orange.ExampleTable(data.domain, self.filter(data, self.limit))
Beispiel #10
0
class Preprocessor_RFE(Preprocessor_featureSelection):
    """ A preprocessor that runs RFE(Recursive Feature Elimination) using
    linear SVM derived attribute weights.
    
    :param filter: a filter function to use for selection (default
                   Preprocessor_featureSelection.bestN)
    :param limit: the limit for the filter function (default 10)
        
    """
    __new__ = _orange__new__(Preprocessor_featureSelection)
    __reduce__ = _orange__reduce__
    def __init__(self, filter=None, limit=10):
        self.limit = limit
        self.filter = filter if filter is not None else self.bestN
        
    def __call__(self, data, weightId=None):
        from Orange.classification.svm import RFE
        rfe = RFE()
        filtered = self.filter(range(len(data)), self.limit)
        return rfe(data, len(filtered))
Beispiel #11
0
class LinearSVMLearner(Orange.core.LinearLearner):
    """Train a linear SVM model."""

    L2R_L2LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
    L2R_L2LOSS = Orange.core.LinearLearner.L2R_L2Loss_SVC
    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L1Loss_SVC_Dual
    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
    L1R_L2LOSS = Orange.core.LinearLearner.L1R_L2Loss_SVC

    __new__ = _orange__new__(base=Orange.core.LinearLearner)

    def __init__(self, solver_type=L2R_L2LOSS_DUAL, C=1.0, eps=0.01, **kwargs):
        """
        :param solver_type: One of the following class constants: ``LR2_L2LOSS_DUAL``, ``L2R_L2LOSS``, ``LR2_L1LOSS_DUAL``, ``L2R_L1LOSS`` or ``L1R_L2LOSS``
        
        :param C: Regularization parameter (default 1.0)
        :type C: float  
        
        :param eps: Stopping criteria (default 0.01)
        :type eps: float
         
        """
        self.solver_type = solver_type
        self.eps = eps
        self.C = C
        for name, val in kwargs.items():
            setattr(self, name, val)
        if self.solver_type not in [self.L2R_L2LOSS_DUAL, self.L2R_L2LOSS,
                self.L2R_L1LOSS_DUAL, self.L2R_L1LOSS_DUAL, self.L1R_L2LOSS]:
            pass
#            raise ValueError("Invalid solver_type parameter.")

        self.preproc = default_preprocessor()

    def __call__(self, instances, weight_id=None):
        instances = self.preproc(instances)
        classifier = super(LinearSVMLearner, self).__call__(instances, weight_id)
        return classifier
Beispiel #12
0
class SVMLearner(_SVMLearner):
    """
    :param svm_type: the SVM type
    :type svm_type: SVMLearner.SVMType
    :param kernel_type: the kernel type
    :type kernel_type: SVMLearner.Kernel
    :param degree: kernel parameter (only for ``Polynomial``)
    :type degree: int
    :param gamma: kernel parameter; if 0, it is set to 1.0/#features (for ``Polynomial``, ``RBF`` and ``Sigmoid``)
    :type gamma: float
    :param coef0: kernel parameter (for ``Polynomial`` and ``Sigmoid``)
    :type coef0: int
    :param kernel_func: kernel function if ``kernel_type`` is
        ``kernels.Custom``
    :type kernel_func: callable object
    :param C: C parameter (for ``C_SVC``, ``Epsilon_SVR`` and ``Nu_SVR``)
    :type C: float
    :param nu: Nu parameter (for ``Nu_SVC``, ``Nu_SVR`` and ``OneClass``)
    :type nu: float
    :param p: epsilon parameter (for ``Epsilon_SVR``)
    :type p: float
    :param cache_size: cache memory size in MB
    :type cache_size: int
    :param eps: tolerance of termination criterion
    :type eps: float
    :param probability: build a probability model
    :type probability: bool
    :param shrinking: use shrinking heuristics 
    :type shrinking: bool
    :param weight: a list of class weights
    :type weight: list

    Example:
    
        >>> import Orange
        >>> from Orange.classification import svm
        >>> from Orange.evaluation import testing, scoring
        >>> data = Orange.data.Table("vehicle.tab")
        >>> learner = svm.SVMLearner()
        >>> results = testing.cross_validation([learner], data, folds=5)
        >>> print scoring.CA(results)[0]
        0.789613644274
    
    """
    __new__ = _orange__new__(_SVMLearner)

    C_SVC = _SVMLearner.C_SVC
    Nu_SVC = _SVMLearner.Nu_SVC
    OneClass = _SVMLearner.OneClass
    Nu_SVR = _SVMLearner.Nu_SVR
    Epsilon_SVR = _SVMLearner.Epsilon_SVR

    @Orange.misc.deprecated_keywords({"kernelFunc": "kernel_func"})
    def __init__(self, svm_type=Nu_SVC, kernel_type=kernels.RBF,
                 kernel_func=None, C=1.0, nu=0.5, p=0.1, gamma=0.0, degree=3,
                 coef0=0, shrinking=True, probability=True, verbose=False,
                 cache_size=200, eps=0.001, normalization=True,
                 weight=[], **kwargs):
        self.svm_type = svm_type
        self.kernel_type = kernel_type
        self.kernel_func = kernel_func
        self.C = C
        self.nu = nu
        self.p = p
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.shrinking = shrinking
        self.probability = probability
        self.verbose = verbose
        self.cache_size = cache_size
        self.eps = eps
        self.normalization = normalization
        for key, val in kwargs.items():
            setattr(self, key, val)
        self.learner = Orange.core.SVMLearner(**kwargs)
        self.weight = weight

    max_nu = staticmethod(max_nu)

    def __call__(self, data, weight=0):
        """Construct a SVM classifier
        
        :param table: data with continuous features
        :type table: Orange.data.Table
        
        :param weight: ignored (required due to base class signature);
        """

        examples = Orange.core.Preprocessor_dropMissingClasses(data)
        class_var = examples.domain.class_var
        if len(examples) == 0:
            raise ValueError("Example table is without any defined classes")

        # Fix the svm_type parameter if we have a class_var/svm_type mismatch
        if self.svm_type in [0, 1] and \
            isinstance(class_var, Orange.feature.Continuous):
            self.svm_type += 3
            #raise AttributeError, "Cannot learn a discrete classifier from non descrete class data. Use EPSILON_SVR or NU_SVR for regression"
        if self.svm_type in [3, 4] and \
            isinstance(class_var, Orange.feature.Discrete):
            self.svm_type -= 3
            #raise AttributeError, "Cannot do regression on descrete class data. Use C_SVC or NU_SVC for classification"
        if self.kernel_type == kernels.Custom and not self.kernel_func:
            raise ValueError("Custom kernel function not supplied")

        import warnings

        nu = self.nu
        if self.svm_type == SVMLearner.Nu_SVC: #is nu feasible
            max_nu = self.max_nu(examples)
            if self.nu > max_nu:
                if getattr(self, "verbose", 0):
                    warnings.warn("Specified nu %.3f is infeasible. \
                    Setting nu to %.3f" % (self.nu, max_nu))
                nu = max(max_nu - 1e-7, 0.0)

        for name in ["svm_type", "kernel_type", "kernel_func", "C", "nu", "p",
                     "gamma", "degree", "coef0", "shrinking", "probability",
                     "verbose", "cache_size", "eps"]:
            setattr(self.learner, name, getattr(self, name))
        self.learner.nu = nu
        self.learner.set_weights(self.weight)

        if self.svm_type == SVMLearner.OneClass and self.probability:
            self.learner.probability = False
            warnings.warn("One-class SVM probability output not supported yet.")
        return self.learn_classifier(examples)

    def learn_classifier(self, data):
        if self.normalization:
            data = self._normalize(data)
            svm = self.learner(data)
            return SVMClassifierWrapper(svm)
        return self.learner(data)

    @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback"})
    def tune_parameters(self, data, parameters=None, folds=5, verbose=0,
                       progress_callback=None):
        """Tune the ``parameters`` on the given ``data`` using 
        internal cross validation.
        
        :param data: data for parameter tuning
        :type data: Orange.data.Table 
        :param parameters: names of parameters to tune
            (default: ["nu", "C", "gamma"])
        :type parameters: list of strings
        :param folds: number of folds for internal cross validation
        :type folds: int
        :param verbose: set verbose output
        :type verbose: bool
        :param progress_callback: callback function for reporting progress
        :type progress_callback: callback function
            
        Here is example of tuning the `gamma` parameter using
        3-fold cross validation. ::

            svm = Orange.classification.svm.SVMLearner()
            svm.tune_parameters(table, parameters=["gamma"], folds=3)
                    
        """

        import orngWrap

        if parameters is None:
            parameters = ["nu", "C", "gamma"]

        searchParams = []
        normalization = self.normalization
        if normalization:
            data = self._normalize(data)
            self.normalization = False
        if self.svm_type in [SVMLearner.Nu_SVC, SVMLearner.Nu_SVR] \
                    and "nu" in parameters:
            numOfNuValues = 9
            if isinstance(data.domain.class_var, variable.Discrete):
                max_nu = max(self.max_nu(data) - 1e-7, 0.0)
            else:
                max_nu = 1.0
            searchParams.append(("nu", [i / 10.0 for i in range(1, 9) if \
                                        i / 10.0 < max_nu] + [max_nu]))
        elif "C" in parameters:
            searchParams.append(("C", [2 ** a for a in  range(-5, 15, 2)]))
        if self.kernel_type == 2 and "gamma" in parameters:
            searchParams.append(("gamma", [2 ** a for a in range(-5, 5, 2)] + [0]))
        tunedLearner = orngWrap.TuneMParameters(object=self,
                            parameters=searchParams,
                            folds=folds,
                            returnWhat=orngWrap.TuneMParameters.returnLearner,
                            progressCallback=progress_callback
                            if progress_callback else lambda i:None)
        tunedLearner(data, verbose=verbose)
        if normalization:
            self.normalization = normalization

    def _normalize(self, data):
        dc = Orange.core.DomainContinuizer()
        dc.class_treatment = Orange.core.DomainContinuizer.Ignore
        dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan
        dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues
        newdomain = dc(data)
        return data.translate(newdomain)