Exemple #1
0
    def cross_validate(self, dataset, nr_fold):
        """
        Perform stratified cross-validation to determine the
        suitability of chosen model parameters.

        Data are separated to nr_fold folds. Each fold is validated
        against a model trained using the data from the remaining
        (nr_fold-1) folds.

        This function returns the percentage of data that was
        classified correctly over all the experiments.
        """
        problem = dataset._create_svm_problem()
        target = N.empty((len(dataset.data),), dtype=N.float64)
        tp = target.ctypes.data_as(POINTER(c_double))
        libsvm.svm_cross_validation(problem, self.param, nr_fold, tp)
        total_correct = 0.
        for x, t in zip(dataset.data, target):
            if x[0] == int(t):
                total_correct += 1
        # XXX also return results from folds in a list
        return 100.0 * total_correct / len(dataset.data)
    def cross_validate(self, dataset, nr_fold):
        """
        Perform stratified cross-validation to determine the
        suitability of chosen model parameters.

        Data are separated to nr_fold folds. Each fold is validated
        against a model trained using the data from the remaining
        (nr_fold-1) folds.

        This function returns the percentage of data that was
        classified correctly over all the experiments.
        """
        problem = dataset._create_svm_problem()
        target = N.empty((len(dataset.data),), dtype=N.float64)
        tp = target.ctypes.data_as(POINTER(c_double))
        libsvm.svm_cross_validation(problem, self.param, nr_fold, tp)
        total_correct = 0.0
        for x, t in zip(dataset.data, target):
            if x[0] == int(t):
                total_correct += 1
        # XXX also return results from folds in a list
        return 100.0 * total_correct / len(dataset.data)
Exemple #3
0
    def cross_validate(self, dataset, nr_fold):
        """
        Perform stratified cross-validation to determine the
        suitability of chosen model parameters.

        Data are separated to nr_fold folds. Each fold is validated
        against a model trained using the data from the remaining
        (nr_fold-1) folds.

        This function returns a 2-tuple containing the mean squared
        error and the squared correlation coefficient.
        """

        problem = dataset._create_svm_problem()
        target = N.empty((len(dataset.data),), dtype=N.float64)
        tp = target.ctypes.data_as(POINTER(c_double))
        libsvm.svm_cross_validation(problem, self.param, nr_fold, tp)

        total_error = sumv = sumy = sumvv = sumyy = sumvy = 0.0
        for i in range(len(dataset.data)):
            v = target[i]
            y = dataset.data[i][0]
            sumv = sumv + v
            sumy = sumy + y
            sumvv = sumvv + v * v
            sumyy = sumyy + y * y
            sumvy = sumvy + v * y
            total_error = total_error + (v - y) * (v - y)

        # mean squared error
        mse = total_error / len(dataset.data)
        # squared correlation coefficient
        l = len(dataset.data)
        scc = ((l * sumvy - sumv * sumy) * (l * sumvy - sumv * sumy)) / (
            (l * sumvv - sumv * sumv) * (l * sumyy - sumy * sumy)
        )

        return mse, scc
Exemple #4
0
    def cross_validate(self, dataset, nr_fold):
        """
        Perform stratified cross-validation to determine the
        suitability of chosen model parameters.

        Data are separated to nr_fold folds. Each fold is validated
        against a model trained using the data from the remaining
        (nr_fold-1) folds.

        This function returns a 2-tuple containing the mean squared
        error and the squared correlation coefficient.
        """

        problem = dataset._create_svm_problem()
        target = N.empty((len(dataset.data), ), dtype=N.float64)
        tp = target.ctypes.data_as(POINTER(c_double))
        libsvm.svm_cross_validation(problem, self.param, nr_fold, tp)

        total_error = sumv = sumy = sumvv = sumyy = sumvy = 0.
        for i in range(len(dataset.data)):
            v = target[i]
            y = dataset.data[i][0]
            sumv = sumv + v
            sumy = sumy + y
            sumvv = sumvv + v * v
            sumyy = sumyy + y * y
            sumvy = sumvy + v * y
            total_error = total_error + (v - y) * (v - y)

        # mean squared error
        mse = total_error / len(dataset.data)
        # squared correlation coefficient
        l = len(dataset.data)
        scc = ((l * sumvy - sumv * sumy) * (l * sumvy - sumv * sumy)) / \
            ((l * sumvv - sumv*sumv) * (l * sumyy - sumy * sumy))

        return mse, scc