def _fit(self, x, y): """Trains the classifier. All the One-Vs-One classifier are trained for each dataset class. Parameters ---------- x : CArray Array to be used for training with shape (n_samples, n_features). y : CArray Array of shape (n_samples,) containing the class labels. Returns ------- trained_cls : CClassifierMulticlassOVO Instance of the classifier trained using input dataset. """ # Number of unique classes n_classes = y.unique().size # Number of classifiers to be trained ovo_clf_number = int((n_classes * (n_classes - 1)) / 2) # Preparing the binary classifiers self.prepare(ovo_clf_number) # Preparing the list of binary classifiers indices self._clf_pair_idx = list(combinations(range(n_classes), 2)) # Fit a one-vs-one classifier # Use the specified number of workers self._binary_classifiers = parfor2(_fit_one_ovo, self.num_classifiers, self.n_jobs, self, CDataset(x, y), self.verbose) return self
def _fit(self, dataset, n_jobs=1): """Trains the classifier. A One-Vs-All classifier is trained for each dataset class. Parameters ---------- dataset : CDataset Training set. Must be a :class:`.CDataset` instance with patterns data and corresponding labels. n_jobs : int Number of parallel workers to use for training the classifier. Default 1. Cannot be higher than processor's number of cores. Returns ------- trained_cls : CClassifierMulticlassOVA Instance of the classifier trained using input dataset. """ # Preparing the binary classifiers self.prepare(dataset.num_classes) # Fit a one-vs-all classifier for each class # Use the specified number of workers self._binary_classifiers = parfor2(_fit_one_ova, self.classes.size, n_jobs, self, dataset, self.verbose) return self
def _fit(self, x, y): """Trains the classifier. A One-Vs-All classifier is trained for each dataset class. Parameters ---------- x : CArray Array to be used for training with shape (n_samples, n_features). y : CArray Array of shape (n_samples,) containing the class labels. Returns ------- trained_cls : CClassifierMulticlassOVA Instance of the classifier trained using input dataset. """ # Preparing the binary classifiers self.prepare(y.unique().size) # Fit a one-vs-all classifier for each class # Use the specified number of workers self._binary_classifiers = parfor2(_fit_one_ova, self.classes.size, self.n_jobs, self, CDataset(x, y), self.verbose) return self
def _forward(self, x): """Computes the decision function for each pattern in x. For One-Vs-All (OVA) multiclass scheme, this is the output of the `label`^th classifier. Parameters ---------- x : CArray Array with new patterns to classify, 2-Dimensional of shape (n_patterns, n_features). Returns ------- score : CArray Value of the decision function for each test pattern. Dense flat array of shape (n_samples,) if y is not None, otherwise a (n_samples, n_classes) array. """ # Getting predicted scores for classifier associated with y scores = CArray.empty(shape=(x.shape[0], self.n_classes)) # Discriminant function is now called for each different class res = parfor2(_forward_one_ova, self.n_classes, self.n_jobs, self, x, self.verbose) # Building results array for i in range(self.n_classes): scores[:, i] = CArray(res[i]) return scores
def _fit_one_vs_all(self, x, y, svc_kernel): # ova (but we can also implement ovo - let's do separate functions) out = parfor2(_fit_one_ova, self.n_classes, self.n_jobs, self, x, y, svc_kernel, self.verbose) # Building results for i in range(self.n_classes): out_i = out[i] if self.kernel is None: self._w[i, :] = out_i[0] else: self._alpha[i, out_i[1]] = out_i[2] self._b[i] = out_i[3]
def _forward(self, x): """Computes the decision function for each pattern in x. To evaluate correctly, scores are also taken from the negative classes in each binary classifier. Parameters ---------- x : CArray Array with new patterns to classify, 2-Dimensional of shape (n_patterns, n_features). Returns ------- score : CArray Value of the decision function for each test pattern. Dense flat array of shape (n_samples,) if y is not None, otherwise a (n_samples, n_classes) array. """ scores = CArray.zeros(shape=(x.shape[0], self.n_classes)) # Discriminant function is now called for each different class res = parfor2(_forward_one_ovo, self.num_classifiers, self.n_jobs, self, x, self.verbose) # Building results array for i in range(self.num_classifiers): # Adjusting the scores for the OVO scheme idx0 = self._clf_pair_idx[i][0] idx1 = self._clf_pair_idx[i][1] scores[:, idx0] += res[i][:, 1] scores[:, idx1] += res[i][:, 0] return scores / (self.n_classes - 1)
def evaluate_params( self, estimator, dataset, parameters, pick='first', n_jobs=1): """Evaluate parameters for input estimator on input dataset. Parameters ---------- estimator : CClassifier The classifier for witch we want chose best parameters. dataset : CDataset Dataset to be used for evaluating parameters. parameters : dict Dictionary with each entry as {parameter: list of values to test}. pick : {'first', 'last', 'random'}, optional Defines which of the best parameters set pick. Usually, 'first' (default) correspond to the smallest parameters while 'last' correspond to the biggest. The order is consistent to the parameters dict passed as input. n_jobs : int, optional Number of parallel workers to use. Default 1. Cannot be higher than processor's number of cores. Returns ------- best_param_dict : dict A dictionary with the best value for each evaluated parameter. best_value : any Metric value obtained on validation set by the estimator. """ self.logger.info("Parameters to evaluate: {:}".format(parameters)) # FIRST OF ALL: save current classifier to restore later original_estimator = deepcopy(estimator) # Compute dataset splits self.splitter.compute_indices(dataset) # OrderedDict returns keys always in the same order, # so we are safe when iterating on params_matrix.shape[1] parameters = OrderedDict( sorted(parameters.items(), key=lambda t: t[0])) params_idx = [] # create a list of list 'param_idx' with index of parameters' values for param_name in parameters: if not isinstance(parameters[param_name], list): raise TypeError("values for parameter `{:}` must be " "specified as a list.".format(param_name)) # Add an index for each parameter's value params_idx.append(list(range(len(parameters[param_name])))) # this is a matrix of indices.... e.g. [[1,1] [1,2], ..] # each row corresponds to the indices of parameters to be set params_matrix = CArray.comblist(params_idx).astype(int) # Parallelize (if requested) over the rows of params_matrix res_vect = parfor2(_evaluate_one, params_matrix.shape[0], n_jobs, self, parameters, params_matrix, estimator, dataset, self.verbose) # Transforming the list to array res_vect = CArray(res_vect) # Retrieve the best parameters best_params_dict, best_value = self._get_best_params( res_vect, parameters, params_matrix, pick=pick) self.logger.info("Best params: {:} - Value: {:}".format( best_params_dict, best_value)) # Restore original parameters of classifier for param in original_estimator.__dict__: estimator.__dict__[param] = original_estimator.__dict__[param] return best_params_dict, best_value