def __init__(self, sample, **kwargs): r""" See ``SVMClassificationAlgorithm`` for full documentation. """ LearningAlgorithm.__init__(self, sample) check_svm_classification_sample(sample) self.sample = sample self.model = None try: self.c = kwargs['c'] except KeyError: self.c = float('inf') try: self.kernel = kwargs['kernel'] except KeyError: self.kernel = LinearKernel() try: self.solver = kwargs['solver'] except KeyError: #self.solver = CVXOPTClassificationSolver(*args, **kwargs) self.solver = SVMClassificationAlgorithm.default_solver
class SVMClassificationAlgorithm(LearningAlgorithm): r""" SVM Classification Algorithm. INPUT: - ``sample`` -- list or tuple of ``LabeledExample`` instances whose labels are all set either to `1` or `-1`. - ``c`` -- float (default: None, amounting to the hard-margin version of the algorithm) value for the trade-off constant `C` between steepness and accuracy in the soft-margin version of the algorithm. - ``kernel`` -- ``Kernel`` (default: ``LinearKernel()``) instance defining the kernel to be used. - ``solver`` -- ``SVMClassificationSolver`` (default: ``CVXOPTClassificationSolver()``, unless differently specified through the ``SVMClassificationAlgorithm.default_solver`` class field) solver to be used in order to find the solution of the SV classification optimization problem. OUTPUT: ``LearningAlgorithm`` instance. EXAMPLES: SV classification algorithm can be directly applied to any problem whose labels are encodable in terms of two classes. For instance, consider the binary XOR problem: :: >>> from yaplf.data import LabeledExample >>> xor_sample = [LabeledExample((1, 1), -1), ... LabeledExample((0, 0), -1), LabeledExample((0, 1), 1), ... LabeledExample((1, 0), 1)] As this sample is not linearly separable, a nonlinear kernel is needed in order to learn it, for instance a polynomial kernel: :: >>> from yaplf.algorithms.svm.classification \ ... import SVMClassificationAlgorithm >>> from yaplf.models.kernel import PolynomialKernel >>> alg = SVMClassificationAlgorithm(xor_sample, ... kernel = PolynomialKernel(2)) Running the algorithm and subsequently accessing to its ``model`` field allows to get the learnt SV classifier: :: >>> alg.run() >>> alg.model SVMClassifier([2.000000000043493, 3.3333333334006983, 2.6666666667220955, 2.6666666667220955], -1.00000000001, [LabeledExample((1, 1), -1.0), LabeledExample((0, 0), -1.0), LabeledExample((0, 1), 1.0), LabeledExample((1, 0), 1.0)], kernel = PolynomialKernel(2)) The latter can be tested on the original sample in order to verify that learning succeeded: :: >>> alg.model.test(xor_sample) 0.0 As an aside remark, it should be highlighted that these examples are shown for illustrative purpose. The suitable way of assessing a learnt model performance involves more complex techniques involving the use of a test set possibly coupled with a cross validation procedure (see function ``cross_validation`` in package ``yaplf.utility.validation``.) Finally, it is worth noting that this class invokes under the hood a solver specialized in finding the solution of a quadratic constrained optimization problem. This solver is available in various flavours, corresponding to specific subclasses of ``SVMClassificationSolver``, all defined in package ``yaplf.algorithms.svm.solvers``. Currently, the following solvers are available: - ``CVXOPTClassificationSolver``, the default choice, solves generic quadratic problems; - ``PyMLClassificationSolver`` is tailored on the specific optimization problem linked to the SV classification task. A specific solver can be selected using the ``solver`` named argument when creating an instance of ``SVMClassificationAlgorithm``: :: >>> from yaplf.algorithms.svm.classification.solvers \ ... import PyMLClassificationSolver >>> alg = SVMClassificationAlgorithm(xor_sample, ... kernel = PolynomialKernel(2), solver = PyMLClassificationSolver()) >>> alg.run() # doctest:+ELLIPSIS Cpos, Cneg... >>> print alg.model SVMClassifier([2.000000000030325, 3.3333333333791955, 2.6666666667061474, 2.666666666703373], -1.00000000001, [LabeledExample((1, 1), -1.0), LabeledExample((0, 0), -1.0), LabeledExample((0, 1), 1.0), LabeledExample((1, 0), 1.0)], kernel = PolynomialKernel(2)) Note how the results slightly change when using different solvers. The default solver can be modified through the ``default_solver`` class variable: :: >>> SVMClassificationAlgorithm.default_solver = \ ... PyMLClassificationSolver() >>> alg = SVMClassificationAlgorithm(xor_sample, ... kernel = PolynomialKernel(2)) >>> alg.run() # doctest: +ELLIPSIS Cpos, Cneg... >>> print alg.model SVMClassifier([2.0000000000434928, 3.333333333400698, 2.6666666667220946, 2.6666666667220946], -1.00000000001, [LabeledExample((1, 1), -1.0), LabeledExample((0, 0), -1.0), LabeledExample((0, 1), 1.0), LabeledExample((1, 0), 1.0)], kernel = PolynomialKernel(2)) AUTHORS: - Dario Malchiodi (2010-02-22) - Dario Malchiodi (2010-04-06): added customizable default solver. """ def __init__(self, sample, **kwargs): r""" See ``SVMClassificationAlgorithm`` for full documentation. """ LearningAlgorithm.__init__(self, sample) check_svm_classification_sample(sample) self.sample = sample self.model = None try: self.c = kwargs['c'] except KeyError: self.c = float('inf') try: self.kernel = kwargs['kernel'] except KeyError: self.kernel = LinearKernel() try: self.solver = kwargs['solver'] except KeyError: #self.solver = CVXOPTClassificationSolver(*args, **kwargs) self.solver = SVMClassificationAlgorithm.default_solver def run(self): r""" Run the SVM classification learning algorithm. INPUT: No input. OUTPUT: No output. After the invocation the inferred model is available through the ``model`` field, in form of a ``SVMClassifier`` instance. EXAMPLES: Consider the following sample describing the binary XOR function, and a ``SVMClassificationAlgorithm`` instance dealing with the corresponding learning problem: :: >>> from yaplf.data import LabeledExample >>> xor_sample = [LabeledExample((1, 1), -1), ... LabeledExample((0, 0), -1), LabeledExample((0, 1), 1), ... LabeledExample((1, 0), 1)] >>> from yaplf.algorithms.svm.classification \ ... import SVMClassificationAlgorithm >>> from yaplf.models.kernel import PolynomialKernel >>> alg = SVMClassificationAlgorithm(xor_sample, ... kernel = PolynomialKernel(2)) Running the algorithm and subsequently accessing to its ``model`` field allows to get the learnt SV classifier: :: >>> alg.run() # doctest: +ELLIPSIS Cpos, Cneg... >>> alg.model SVMClassifier([2.0000000000434928, 3.333333333400698, 2.6666666667220946, 2.6666666667220946], -1.00000000001, [LabeledExample((1, 1), -1.0), LabeledExample((0, 0), -1.0), LabeledExample((0, 1), 1.0), LabeledExample((1, 0), 1.0)], kernel = PolynomialKernel(2)) The latter can be tested on the original sample in order to verify that learning succeeded: :: >>> alg.model.test(xor_sample) 0.0 As a final remark, it should be highlighted that these examples are shown for illustrative purpose. The suitable way of assessing a learnt model performance involves more complex techniques involving the use of a test set possibly coupled with a cross validation procedure (see function ``cross_validation`` in package ``yaplf.utility``.) AUTHORS: - Dario Malchiodi (2010-02-22) """ alpha = self.solver.solve(self.sample, self.c, self.kernel) num_examples = len(self.sample) if self.c == float('inf'): threshold = mean([self.sample[i].label - sum([alpha[j] * self.sample[j].label * self.kernel.compute(self.sample[j].pattern, self.sample[i].pattern) for j in range(num_examples)]) for i in range(num_examples) if alpha[i] > 0]) else: threshold = mean([self.sample[i].label - sum([alpha[j] * self.sample[j].label * self.kernel.compute(self.sample[j].pattern, self.sample[i].pattern) for j in range(num_examples)]) for i in range(num_examples) if alpha[i] > 0 and alpha[i] < self.c]) self.model = SVMClassifier(alpha, threshold, self.sample, kernel=self.kernel)