class Preprocessor_featureSelection(orange.Preprocessor): """ A preprocessor that runs feature selection using an feature scoring function. :param measure: a scoring function (default: orange.MeasureAttribute_relief) :param filter: a filter function to use for selection (default Preprocessor_featureSelection.bestN) :param limit: the limit for the filter function (default 10) """ __new__ = _orange__new__(orange.Preprocessor) __reduce__ = _orange__reduce__ bestN = staticmethod(bestN) bestP = staticmethod(bestP) def __init__(self, measure=orange.MeasureAttribute_relief(), filter=None, limit=10): self.measure = measure self.filter = filter if filter is not None else self.bestN self.limit = limit def attrScores(self, data): """ Return a list of computed scores for all attributes in `data`. """ measures = sorted([(self.measure(attr, data), attr) for attr in data.domain.attributes]) return measures def __call__(self, data, weightId=None): measures = self.attrScores(data) attrs = [attr for _, attr in self.filter(measures, self.limit)] domain = orange.Domain(attrs, data.domain.classVar) domain.addmetas(data.domain.getmetas()) return orange.ExampleTable(domain, data)
class MultiClassSVMLearner(Orange.core.LinearLearner): """ Multi-class SVM (Crammer and Singer) from the `LIBLINEAR`_ library. """ __new__ = _orange__new__(base=Orange.core.LinearLearner) def __init__(self, C=1.0, eps=0.01, **kwargs): """\ :param C: Regularization parameter (default 1.0) :type C: float :param eps: Stopping criteria (default 0.01) :type eps: float """ self.C = C self.eps = eps for name, val in kwargs.items(): setattr(self, name, val) self.solver_type = self.MCSVM_CS self.preproc = default_preprocessor() def __call__(self, instances, weight_id=None): instances = self.preproc(instances) classifier = super(MultiClassSVMLearner, self).__call__(instances, weight_id) return classifier
class Preprocessor_preprocessorList(orange.Preprocessor): """ A preprocessor wrapping a sequence of other preprocessors. :param preprocessors: a list of :obj:`Preprocessor` instances """ __new__ = _orange__new__(orange.Preprocessor) __reduce__ = _orange__reduce__ def __init__(self, preprocessors=[]): self.preprocessors = preprocessors def __call__(self, data, weightId=None): import orange hadWeight = hasWeight = weightId is not None for preprocessor in self.preprocessors: t = preprocessor(data, weightId) if hasWeight else preprocessor(data) if isinstance(t, tuple): data, weightId = t hasWeight = True else: data = t if hadWeight: return data, weightId else: return data
class Preprocessor_continuize(orange.Preprocessor): """ A preprocessor that continuizes a discrete domain (and optionally normalizes it). See :obj:`Orange.data.continuization.DomainContinuizer` for list of accepted arguments. """ __new__ = _orange__new__(orange.Preprocessor) __reduce__ = _orange__reduce__ def __init__(self, zeroBased=True, multinomialTreatment=orange.DomainContinuizer.NValues, continuousTreatment=orange.DomainContinuizer.Leave, classTreatment=orange.DomainContinuizer.Ignore, **kwargs): self.zeroBased = zeroBased self.multinomialTreatment = multinomialTreatment self.continuousTreatment = continuousTreatment self.classTreatment = classTreatment def __call__(self, data, weightId=0): continuizer = orange.DomainContinuizer(zeroBased=self.zeroBased, multinomialTreatment=self.multinomialTreatment, continuousTreatment=self.continuousTreatment, classTreatment=self.classTreatment) c_domain = continuizer(data, weightId) return data.translate(c_domain)
class Preprocessor_discretizeEntropy(Preprocessor_discretize): """ An discretizer that uses orange.EntropyDiscretization method but, unlike Preprocessor_discretize class, also removes unused attributes from the domain. """ __new__ = _orange__new__(Preprocessor_discretize) __reduce__ = _orange__reduce__ def __init__(self, method=orange.EntropyDiscretization()): self.method = method assert(isinstance(method, orange.EntropyDiscretization)) def __call__(self, data, wightId=0): newattr_list = [] for attr in data.domain.attributes: if attr.varType == orange.VarTypes.Continuous: newattr = self.method(attr, data) if newattr.getValueFrom.transformer.points: newattr_list.append(newattr) else: newattr_list.append(attr) newdomain = orange.Domain(newattr_list, data.domain.classVar) newdomain.addmetas(data.domain.getmetas()) return orange.ExampleTable(newdomain, data)
class Preprocessor_removeDiscrete(Preprocessor_continuize): """ A Preprocessor that removes all discrete attributes from the domain. """ __new__ = _orange__new__(Preprocessor_continuize) def __call__(self, data, weightId=None): attrs = [attr for attr in data.domain.attributes if attr.varType == orange.VarTypes.Continuous] domain = orange.Domain(attrs, data.domain.classVar) domain.addmetas(data.domain.getmetas()) return orange.ExampleTable(domain, data)
class Preprocessor_removeContinuous(Preprocessor_discretize): """ A preprocessor that removes all continuous features. """ __new__ = _orange__new__(Preprocessor_discretize) __reduce__ = _orange__reduce__ def __call__(self, data, weightId=None): attrs = [attr for attr in data.domain.attributes if attr.varType == orange.VarTypes.Discrete] domain = orange.Domain(attrs, data.domain.classVar) domain.addmetas(data.domain.getmetas()) return orange.ExampleTable(domain, data)
class Preprocessor_impute(orange.Preprocessor): """ A preprocessor that imputes unknown values using a learner. :param model: a learner class. """ __new__ = _orange__new__(orange.Preprocessor) __reduce__ = _orange__reduce__ def __init__(self, model=None, **kwargs): self.model = orange.MajorityLearner() if model is None else model def __call__(self, data, weightId=0): return orange.Preprocessor_imputeByLearner(data, learner=self.model)
class Preprocessor_sample(orange.Preprocessor): """ A preprocessor that samples a subset of the data. :param filter: a filter function to use for selection (default Preprocessor_sample.selectNRandom) :param limit: the limit for the filter function (default 10) """ __new__ = _orange__new__(orange.Preprocessor) __reduce__ = _orange__reduce__ selectNRandom = staticmethod(selectNRandom) selectPRandom = staticmethod(selectPRandom) def __init__(self, filter=None, limit=10): self.filter = filter if filter is not None else self.selectNRandom self.limit = limit def __call__(self, data, weightId=None): return orange.ExampleTable(data.domain, self.filter(data, self.limit))
class Preprocessor_RFE(Preprocessor_featureSelection): """ A preprocessor that runs RFE(Recursive Feature Elimination) using linear SVM derived attribute weights. :param filter: a filter function to use for selection (default Preprocessor_featureSelection.bestN) :param limit: the limit for the filter function (default 10) """ __new__ = _orange__new__(Preprocessor_featureSelection) __reduce__ = _orange__reduce__ def __init__(self, filter=None, limit=10): self.limit = limit self.filter = filter if filter is not None else self.bestN def __call__(self, data, weightId=None): from Orange.classification.svm import RFE rfe = RFE() filtered = self.filter(range(len(data)), self.limit) return rfe(data, len(filtered))
class LinearSVMLearner(Orange.core.LinearLearner): """Train a linear SVM model.""" L2R_L2LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual L2R_L2LOSS = Orange.core.LinearLearner.L2R_L2Loss_SVC L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L1Loss_SVC_Dual L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual L1R_L2LOSS = Orange.core.LinearLearner.L1R_L2Loss_SVC __new__ = _orange__new__(base=Orange.core.LinearLearner) def __init__(self, solver_type=L2R_L2LOSS_DUAL, C=1.0, eps=0.01, **kwargs): """ :param solver_type: One of the following class constants: ``LR2_L2LOSS_DUAL``, ``L2R_L2LOSS``, ``LR2_L1LOSS_DUAL``, ``L2R_L1LOSS`` or ``L1R_L2LOSS`` :param C: Regularization parameter (default 1.0) :type C: float :param eps: Stopping criteria (default 0.01) :type eps: float """ self.solver_type = solver_type self.eps = eps self.C = C for name, val in kwargs.items(): setattr(self, name, val) if self.solver_type not in [self.L2R_L2LOSS_DUAL, self.L2R_L2LOSS, self.L2R_L1LOSS_DUAL, self.L2R_L1LOSS_DUAL, self.L1R_L2LOSS]: pass # raise ValueError("Invalid solver_type parameter.") self.preproc = default_preprocessor() def __call__(self, instances, weight_id=None): instances = self.preproc(instances) classifier = super(LinearSVMLearner, self).__call__(instances, weight_id) return classifier
class SVMLearner(_SVMLearner): """ :param svm_type: the SVM type :type svm_type: SVMLearner.SVMType :param kernel_type: the kernel type :type kernel_type: SVMLearner.Kernel :param degree: kernel parameter (only for ``Polynomial``) :type degree: int :param gamma: kernel parameter; if 0, it is set to 1.0/#features (for ``Polynomial``, ``RBF`` and ``Sigmoid``) :type gamma: float :param coef0: kernel parameter (for ``Polynomial`` and ``Sigmoid``) :type coef0: int :param kernel_func: kernel function if ``kernel_type`` is ``kernels.Custom`` :type kernel_func: callable object :param C: C parameter (for ``C_SVC``, ``Epsilon_SVR`` and ``Nu_SVR``) :type C: float :param nu: Nu parameter (for ``Nu_SVC``, ``Nu_SVR`` and ``OneClass``) :type nu: float :param p: epsilon parameter (for ``Epsilon_SVR``) :type p: float :param cache_size: cache memory size in MB :type cache_size: int :param eps: tolerance of termination criterion :type eps: float :param probability: build a probability model :type probability: bool :param shrinking: use shrinking heuristics :type shrinking: bool :param weight: a list of class weights :type weight: list Example: >>> import Orange >>> from Orange.classification import svm >>> from Orange.evaluation import testing, scoring >>> data = Orange.data.Table("vehicle.tab") >>> learner = svm.SVMLearner() >>> results = testing.cross_validation([learner], data, folds=5) >>> print scoring.CA(results)[0] 0.789613644274 """ __new__ = _orange__new__(_SVMLearner) C_SVC = _SVMLearner.C_SVC Nu_SVC = _SVMLearner.Nu_SVC OneClass = _SVMLearner.OneClass Nu_SVR = _SVMLearner.Nu_SVR Epsilon_SVR = _SVMLearner.Epsilon_SVR @Orange.misc.deprecated_keywords({"kernelFunc": "kernel_func"}) def __init__(self, svm_type=Nu_SVC, kernel_type=kernels.RBF, kernel_func=None, C=1.0, nu=0.5, p=0.1, gamma=0.0, degree=3, coef0=0, shrinking=True, probability=True, verbose=False, cache_size=200, eps=0.001, normalization=True, weight=[], **kwargs): self.svm_type = svm_type self.kernel_type = kernel_type self.kernel_func = kernel_func self.C = C self.nu = nu self.p = p self.gamma = gamma self.degree = degree self.coef0 = coef0 self.shrinking = shrinking self.probability = probability self.verbose = verbose self.cache_size = cache_size self.eps = eps self.normalization = normalization for key, val in kwargs.items(): setattr(self, key, val) self.learner = Orange.core.SVMLearner(**kwargs) self.weight = weight max_nu = staticmethod(max_nu) def __call__(self, data, weight=0): """Construct a SVM classifier :param table: data with continuous features :type table: Orange.data.Table :param weight: ignored (required due to base class signature); """ examples = Orange.core.Preprocessor_dropMissingClasses(data) class_var = examples.domain.class_var if len(examples) == 0: raise ValueError("Example table is without any defined classes") # Fix the svm_type parameter if we have a class_var/svm_type mismatch if self.svm_type in [0, 1] and \ isinstance(class_var, Orange.feature.Continuous): self.svm_type += 3 #raise AttributeError, "Cannot learn a discrete classifier from non descrete class data. Use EPSILON_SVR or NU_SVR for regression" if self.svm_type in [3, 4] and \ isinstance(class_var, Orange.feature.Discrete): self.svm_type -= 3 #raise AttributeError, "Cannot do regression on descrete class data. Use C_SVC or NU_SVC for classification" if self.kernel_type == kernels.Custom and not self.kernel_func: raise ValueError("Custom kernel function not supplied") import warnings nu = self.nu if self.svm_type == SVMLearner.Nu_SVC: #is nu feasible max_nu = self.max_nu(examples) if self.nu > max_nu: if getattr(self, "verbose", 0): warnings.warn("Specified nu %.3f is infeasible. \ Setting nu to %.3f" % (self.nu, max_nu)) nu = max(max_nu - 1e-7, 0.0) for name in ["svm_type", "kernel_type", "kernel_func", "C", "nu", "p", "gamma", "degree", "coef0", "shrinking", "probability", "verbose", "cache_size", "eps"]: setattr(self.learner, name, getattr(self, name)) self.learner.nu = nu self.learner.set_weights(self.weight) if self.svm_type == SVMLearner.OneClass and self.probability: self.learner.probability = False warnings.warn("One-class SVM probability output not supported yet.") return self.learn_classifier(examples) def learn_classifier(self, data): if self.normalization: data = self._normalize(data) svm = self.learner(data) return SVMClassifierWrapper(svm) return self.learner(data) @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback"}) def tune_parameters(self, data, parameters=None, folds=5, verbose=0, progress_callback=None): """Tune the ``parameters`` on the given ``data`` using internal cross validation. :param data: data for parameter tuning :type data: Orange.data.Table :param parameters: names of parameters to tune (default: ["nu", "C", "gamma"]) :type parameters: list of strings :param folds: number of folds for internal cross validation :type folds: int :param verbose: set verbose output :type verbose: bool :param progress_callback: callback function for reporting progress :type progress_callback: callback function Here is example of tuning the `gamma` parameter using 3-fold cross validation. :: svm = Orange.classification.svm.SVMLearner() svm.tune_parameters(table, parameters=["gamma"], folds=3) """ import orngWrap if parameters is None: parameters = ["nu", "C", "gamma"] searchParams = [] normalization = self.normalization if normalization: data = self._normalize(data) self.normalization = False if self.svm_type in [SVMLearner.Nu_SVC, SVMLearner.Nu_SVR] \ and "nu" in parameters: numOfNuValues = 9 if isinstance(data.domain.class_var, variable.Discrete): max_nu = max(self.max_nu(data) - 1e-7, 0.0) else: max_nu = 1.0 searchParams.append(("nu", [i / 10.0 for i in range(1, 9) if \ i / 10.0 < max_nu] + [max_nu])) elif "C" in parameters: searchParams.append(("C", [2 ** a for a in range(-5, 15, 2)])) if self.kernel_type == 2 and "gamma" in parameters: searchParams.append(("gamma", [2 ** a for a in range(-5, 5, 2)] + [0])) tunedLearner = orngWrap.TuneMParameters(object=self, parameters=searchParams, folds=folds, returnWhat=orngWrap.TuneMParameters.returnLearner, progressCallback=progress_callback if progress_callback else lambda i:None) tunedLearner(data, verbose=verbose) if normalization: self.normalization = normalization def _normalize(self, data): dc = Orange.core.DomainContinuizer() dc.class_treatment = Orange.core.DomainContinuizer.Ignore dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues newdomain = dc(data) return data.translate(newdomain)