def _fit(self, x, y):
        """Trains the classifier.

        All the One-Vs-One classifier are trained for each dataset class.

        Parameters
        ----------
        x : CArray
            Array to be used for training with shape (n_samples, n_features).
        y : CArray
            Array of shape (n_samples,) containing the class labels.

        Returns
        -------
        trained_cls : CClassifierMulticlassOVO
            Instance of the classifier trained using input dataset.

        """
        # Number of unique classes
        n_classes = y.unique().size
        # Number of classifiers to be trained
        ovo_clf_number = int((n_classes * (n_classes - 1)) / 2)
        # Preparing the binary classifiers
        self.prepare(ovo_clf_number)
        # Preparing the list of binary classifiers indices
        self._clf_pair_idx = list(combinations(range(n_classes), 2))

        # Fit a one-vs-one classifier
        # Use the specified number of workers
        self._binary_classifiers = parfor2(_fit_one_ovo, self.num_classifiers,
                                           self.n_jobs, self, CDataset(x, y),
                                           self.verbose)

        return self
    def _fit(self, dataset, n_jobs=1):
        """Trains the classifier.

        A One-Vs-All classifier is trained for each dataset class.

        Parameters
        ----------
        dataset : CDataset
            Training set. Must be a :class:`.CDataset` instance with
            patterns data and corresponding labels.
        n_jobs : int
            Number of parallel workers to use for training the classifier.
            Default 1. Cannot be higher than processor's number of cores.

        Returns
        -------
        trained_cls : CClassifierMulticlassOVA
            Instance of the classifier trained using input dataset.

        """
        # Preparing the binary classifiers
        self.prepare(dataset.num_classes)

        # Fit a one-vs-all classifier for each class
        # Use the specified number of workers
        self._binary_classifiers = parfor2(_fit_one_ova,
                                           self.classes.size,
                                           n_jobs, self, dataset,
                                           self.verbose)

        return self
Exemple #3
0
    def _fit(self, x, y):
        """Trains the classifier.

        A One-Vs-All classifier is trained for each dataset class.

        Parameters
        ----------
        x : CArray
            Array to be used for training with shape (n_samples, n_features).
        y : CArray
            Array of shape (n_samples,) containing the class labels.

        Returns
        -------
        trained_cls : CClassifierMulticlassOVA
            Instance of the classifier trained using input dataset.

        """
        # Preparing the binary classifiers
        self.prepare(y.unique().size)

        # Fit a one-vs-all classifier for each class
        # Use the specified number of workers
        self._binary_classifiers = parfor2(_fit_one_ova,
                                           self.classes.size,
                                           self.n_jobs, self, CDataset(x, y),
                                           self.verbose)

        return self
    def _forward(self, x):
        """Computes the decision function for each pattern in x.

        For One-Vs-All (OVA) multiclass scheme,
         this is the output of the `label`^th classifier.

        Parameters
        ----------
        x : CArray
            Array with new patterns to classify, 2-Dimensional of shape
            (n_patterns, n_features).

        Returns
        -------
        score : CArray
            Value of the decision function for each test pattern.
            Dense flat array of shape (n_samples,) if y is not None,
            otherwise a (n_samples, n_classes) array.

        """
        # Getting predicted scores for classifier associated with y
        scores = CArray.empty(shape=(x.shape[0], self.n_classes))

        # Discriminant function is now called for each different class
        res = parfor2(_forward_one_ova,
                      self.n_classes,
                      self.n_jobs, self, x,
                      self.verbose)

        # Building results array
        for i in range(self.n_classes):
            scores[:, i] = CArray(res[i])

        return scores
Exemple #5
0
    def _fit_one_vs_all(self, x, y, svc_kernel):
        # ova (but we can also implement ovo - let's do separate functions)
        out = parfor2(_fit_one_ova,
                      self.n_classes, self.n_jobs,
                      self, x, y, svc_kernel, self.verbose)

        # Building results
        for i in range(self.n_classes):
            out_i = out[i]
            if self.kernel is None:
                self._w[i, :] = out_i[0]
            else:
                self._alpha[i, out_i[1]] = out_i[2]
            self._b[i] = out_i[3]
    def _forward(self, x):
        """Computes the decision function for each pattern in x.

        To evaluate correctly, scores are also taken from the
        negative classes in each binary classifier.


        Parameters
        ----------
        x : CArray
            Array with new patterns to classify, 2-Dimensional of shape
            (n_patterns, n_features).

        Returns
        -------
        score : CArray
            Value of the decision function for each test pattern.
            Dense flat array of shape (n_samples,) if y is not None,
            otherwise a (n_samples, n_classes) array.

        """
        scores = CArray.zeros(shape=(x.shape[0], self.n_classes))

        # Discriminant function is now called for each different class
        res = parfor2(_forward_one_ovo, self.num_classifiers, self.n_jobs,
                      self, x, self.verbose)

        # Building results array
        for i in range(self.num_classifiers):
            # Adjusting the scores for the OVO scheme
            idx0 = self._clf_pair_idx[i][0]
            idx1 = self._clf_pair_idx[i][1]
            scores[:, idx0] += res[i][:, 1]
            scores[:, idx1] += res[i][:, 0]

        return scores / (self.n_classes - 1)
    def evaluate_params(
            self, estimator, dataset, parameters, pick='first', n_jobs=1):
        """Evaluate parameters for input estimator on input dataset.

        Parameters
        ----------
        estimator : CClassifier
            The classifier for witch we want chose best parameters.
        dataset : CDataset
            Dataset to be used for evaluating parameters.
        parameters : dict
            Dictionary with each entry as {parameter: list of values to test}.
        pick : {'first', 'last', 'random'}, optional
            Defines which of the best parameters set pick.
            Usually, 'first' (default) correspond to the smallest
            parameters while 'last' correspond to the biggest.
            The order is consistent to the parameters dict passed as input.
        n_jobs : int, optional
            Number of parallel workers to use. Default 1.
            Cannot be higher than processor's number of cores.

        Returns
        -------
        best_param_dict : dict
            A dictionary with the best value for each evaluated parameter.
        best_value : any
            Metric value obtained on validation set by the estimator.

        """
        self.logger.info("Parameters to evaluate: {:}".format(parameters))

        # FIRST OF ALL: save current classifier to restore later
        original_estimator = deepcopy(estimator)

        # Compute dataset splits
        self.splitter.compute_indices(dataset)

        # OrderedDict returns keys always in the same order,
        # so we are safe when iterating on params_matrix.shape[1]
        parameters = OrderedDict(
            sorted(parameters.items(), key=lambda t: t[0]))

        params_idx = []
        # create a list of list 'param_idx' with index of parameters' values
        for param_name in parameters:
            if not isinstance(parameters[param_name], list):
                raise TypeError("values for parameter `{:}` must be "
                                "specified as a list.".format(param_name))
            # Add an index for each parameter's value
            params_idx.append(list(range(len(parameters[param_name]))))

        # this is a matrix of indices.... e.g. [[1,1] [1,2], ..]
        # each row corresponds to the indices of parameters to be set
        params_matrix = CArray.comblist(params_idx).astype(int)

        # Parallelize (if requested) over the rows of params_matrix
        res_vect = parfor2(_evaluate_one, params_matrix.shape[0],
                           n_jobs, self, parameters, params_matrix,
                           estimator, dataset, self.verbose)
        # Transforming the list to array
        res_vect = CArray(res_vect)

        # Retrieve the best parameters
        best_params_dict, best_value = self._get_best_params(
            res_vect, parameters, params_matrix, pick=pick)

        self.logger.info("Best params: {:} - Value: {:}".format(
            best_params_dict, best_value))

        # Restore original parameters of classifier
        for param in original_estimator.__dict__:
            estimator.__dict__[param] = original_estimator.__dict__[param]

        return best_params_dict, best_value