Example #1
0
    def test_comblist(self):
        """Test for comblist() classmethod."""
        self.logger.info("Test for comblist() classmethod.")

        l = [[1, 2], [4]]
        self.logger.info("list of lists: \n{:}".format(l))
        comb_array = CArray.comblist(l)
        self.logger.info("comblist(l): \n{:}".format(comb_array))
        self.assertTrue((comb_array == CArray([[1., 4.], [2., 4.]])).all())

        l = [[1, 2], []]
        self.logger.info("list of lists: \n{:}".format(l))
        comb_array = CArray.comblist(l)
        self.logger.info("comblist(l): \n{:}".format(comb_array))
        self.assertTrue((comb_array == CArray([[1.], [2.]])).all())

        l = [[], []]
        comb_array = CArray.comblist(l)
        self.logger.info("comblist(l): \n{:}".format(comb_array))
        self.assertTrue((comb_array == CArray([])).all())
    def evaluate_params(
            self, estimator, dataset, parameters, pick='first', n_jobs=1):
        """Evaluate parameters for input estimator on input dataset.

        Parameters
        ----------
        estimator : CClassifier
            The classifier for witch we want chose best parameters.
        dataset : CDataset
            Dataset to be used for evaluating parameters.
        parameters : dict
            Dictionary with each entry as {parameter: list of values to test}.
        pick : {'first', 'last', 'random'}, optional
            Defines which of the best parameters set pick.
            Usually, 'first' (default) correspond to the smallest
            parameters while 'last' correspond to the biggest.
            The order is consistent to the parameters dict passed as input.
        n_jobs : int, optional
            Number of parallel workers to use. Default 1.
            Cannot be higher than processor's number of cores.

        Returns
        -------
        best_param_dict : dict
            A dictionary with the best value for each evaluated parameter.
        best_value : any
            Metric value obtained on validation set by the estimator.

        """
        self.logger.info("Parameters to evaluate: {:}".format(parameters))

        # FIRST OF ALL: save current classifier to restore later
        original_estimator = deepcopy(estimator)

        # Compute dataset splits
        self.splitter.compute_indices(dataset)

        # OrderedDict returns keys always in the same order,
        # so we are safe when iterating on params_matrix.shape[1]
        parameters = OrderedDict(
            sorted(parameters.items(), key=lambda t: t[0]))

        params_idx = []
        # create a list of list 'param_idx' with index of parameters' values
        for param_name in parameters:
            if not isinstance(parameters[param_name], list):
                raise TypeError("values for parameter `{:}` must be "
                                "specified as a list.".format(param_name))
            # Add an index for each parameter's value
            params_idx.append(list(range(len(parameters[param_name]))))

        # this is a matrix of indices.... e.g. [[1,1] [1,2], ..]
        # each row corresponds to the indices of parameters to be set
        params_matrix = CArray.comblist(params_idx).astype(int)

        # Parallelize (if requested) over the rows of params_matrix
        res_vect = parfor2(_evaluate_one, params_matrix.shape[0],
                           n_jobs, self, parameters, params_matrix,
                           estimator, dataset, self.verbose)
        # Transforming the list to array
        res_vect = CArray(res_vect)

        # Retrieve the best parameters
        best_params_dict, best_value = self._get_best_params(
            res_vect, parameters, params_matrix, pick=pick)

        self.logger.info("Best params: {:} - Value: {:}".format(
            best_params_dict, best_value))

        # Restore original parameters of classifier
        for param in original_estimator.__dict__:
            estimator.__dict__[param] = original_estimator.__dict__[param]

        return best_params_dict, best_value