Ejemplo n.º 1
0
 def __init__(self, node, **kwargs):
     super(CVBestSearchRefit, self).__init__(estimator=None)
     ####?????
     score = kwargs.pop("score") if "score" in kwargs else 'y/test/score_recall_mean'
     arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True
     from epac.workflow.splitters import CV
     #methods = Methods(*tasks)
     self.cv = CV(node=node, reducer=ClassificationReport(keep=False), **kwargs)
     self.score = score
     self.arg_max = arg_max
Ejemplo n.º 2
0
 def __init__(self, node, **kwargs):
     super(CVBestSearchRefit, self).__init__(wrapped_node=None)
     #### 'y/test/score_recall_mean'
     default_score = "y" + conf.SEP + \
                     conf.TEST + conf.SEP + \
                     conf.SCORE_RECALL_MEAN
     score = kwargs.pop("score") if "score" in kwargs else default_score
     arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True
     from epac.workflow.splitters import CV
     #methods = Methods(*tasks)
     self.cv = CV(node=node,
                  reducer=ClassificationReport(keep=False),
                  **kwargs)
     self.score = score
     self.arg_max = arg_max
Ejemplo n.º 3
0
 def __init__(self, node, **kwargs):
     super(CVBestSearchRefit, self).__init__(wrapped_node=None)
     #### 'y/test/score_recall_mean'
     default_score = "y" + conf.SEP + \
                     conf.TEST + conf.SEP + \
                     conf.SCORE_RECALL_MEAN
     score = kwargs.pop("score") if "score" in kwargs else default_score
     arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True
     from epac.workflow.splitters import CV
     #methods = Methods(*tasks)
     self.cv = CV(node=node, reducer=ClassificationReport(keep=False),
                  **kwargs)
     self.score = score
     self.arg_max = arg_max
Ejemplo n.º 4
0
class CVBestSearchRefit(Estimator):
    """Cross-validation + grid-search then refit with optimals parameters.

    Average results over first axis, then find the arguments that maximize or
    minimise a "score" over other axis.

    Parameters
    ----------

    See CV parameters, plus other parameters:

    score: string
        the score name to be optimized (default "mean_score_te").

    arg_max: boolean
        True/False take parameters that maximize/minimize the score. Default
        is True.
    """

    def __init__(self, node, **kwargs):
        super(CVBestSearchRefit, self).__init__(estimator=None)
        ####?????
        score = kwargs.pop("score") if "score" in kwargs else 'y/test/score_recall_mean'
        arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True
        from epac.workflow.splitters import CV
        #methods = Methods(*tasks)
        self.cv = CV(node=node, reducer=ClassificationReport(keep=False), **kwargs)
        self.score = score
        self.arg_max = arg_max

    def get_signature(self):
        return self.__class__.__name__

    def transform(self, **Xy):
        Xy_train, Xy_test = train_test_split(Xy)
        if Xy_train is Xy_test:            
            to_refit, best_params = self._search_best(**Xy)
        else:
            to_refit, best_params = self._search_best(**Xy_train)
        out = to_refit.top_down(**Xy)
        out[conf.BEST_PARAMS] = best_params
        self.refited = to_refit
        self.best_params = best_params
        return out

    def _search_best(self, **Xy):
        # Fit/predict CV grid search
        self.cv.store = StoreMem()  # local store erased at each fit
        from epac.workflow.pipeline import Pipe
        self.cv.top_down(**Xy)
        #  Pump-up results
        cv_result_set = self.cv.reduce(store_results=False)
        key_val = [(result.key(), result[self.score]) \
                for result in cv_result_set]
        scores = np.asarray(zip(*key_val)[1])
        scores_opt = np.max(scores) if self.arg_max else  np.min(scores)
        idx_best = np.where(scores == scores_opt)[0][0]
        best_key = key_val[idx_best][0]
        # Find nodes that match the best
        nodes_dict = {n.get_signature(): n for n in self.cv.walk_true_nodes() \
            if n.get_signature() in key_split(best_key)}
        to_refit = Pipe(*[nodes_dict[k].estimator for k in key_split(best_key)])
        best_params = [dict(sig) for sig in key_split(best_key, eval=True)]
        return to_refit, best_params

    def reduce(self, store_results=True):
        # Terminaison (leaf) node return result_set
        return self.load_state(name="result_set")
Ejemplo n.º 5
0
class CVBestSearchRefit(Wrapper):
    """Cross-validation + grid-search then refit with optimals parameters.

    Average results over first axis, then find the arguments that maximize or
    minimise a "score" over other axis.

    Parameters
    ----------

    See CV parameters, plus other parameters:

    score: string
        the score name to be optimized (default "mean_score_te").

    arg_max: boolean
        True/False take parameters that maximize/minimize the score. Default
        is True.

    Example
    -------
    >>> from sklearn import datasets
    >>> from sklearn.svm import SVC
    >>> from epac import Methods
    >>> from epac.workflow.splitters import CVBestSearchRefit
    >>> X, y = datasets.make_classification(n_samples=12,
    ... n_features=10,
    ... n_informative=2,
    ... random_state=1)
    >>> n_folds_nested = 2
    >>> C_values = [.1, 0.5, 1, 2, 5]
    >>> kernels = ["linear", "rbf"]
    >>> methods = Methods(*[SVC(C=C, kernel=kernel)
    ...     for C in C_values for kernel in kernels])
    >>> wf = CVBestSearchRefit(methods, n_folds=n_folds_nested)
    >>> wf.transform(X=X, y=y)
    {'best_params': [{'kernel': 'rbf', 'C': 0.1, 'name': 'SVC'}], 'y/true': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1]), 'y/pred': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1])}
    >>> wf.reduce()
    >>> wf.run(X=X, y=y)
    {'best_params': [{'kernel': 'rbf', 'C': 0.1, 'name': 'SVC'}], 'y/true': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1]), 'y/pred': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1])}
    >>> wf.reduce()
    ResultSet(
    [{'key': CVBestSearchRefit, 'best_params': [{'kernel': 'rbf', 'C': 0.1, 'name': 'SVC'}], 'y/true': [1 0 0 1 0 0 1 0 1 1 0 1], 'y/pred': [1 0 0 1 0 0 1 0 1 1 0 1]}])

    """
    def __init__(self, node, **kwargs):
        super(CVBestSearchRefit, self).__init__(wrapped_node=None)
        #### 'y/test/score_recall_mean'
        default_score = "y" + conf.SEP + \
                        conf.TEST + conf.SEP + \
                        conf.SCORE_RECALL_MEAN
        score = kwargs.pop("score") if "score" in kwargs else default_score
        arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True
        from epac.workflow.splitters import CV
        #methods = Methods(*tasks)
        self.cv = CV(node=node,
                     reducer=ClassificationReport(keep=False),
                     **kwargs)
        self.score = score
        self.arg_max = arg_max


#        warnings.warn("%s is deprecated. Please use %s instead." % \
#                        (self.__class__.__name__,\
#                         CVBestSearchRefitParallel.__name__),
#                        category=DeprecationWarning)

    def get_signature(self):
        return self.__class__.__name__

    def transform(self, **Xy):
        Xy_train, Xy_test = train_test_split(Xy)
        if Xy_train is Xy_test:
            to_refit, best_params = self._search_best(**Xy)
        else:
            to_refit, best_params = self._search_best(**Xy_train)
        out = to_refit.top_down(**Xy)
        out[conf.BEST_PARAMS] = best_params
        self.refited = to_refit
        self.best_params = best_params
        return out

    def _search_best(self, **Xy):
        # Fit/predict CV grid search
        self.cv.store = StoreMem()  # local store erased at each fit
        from epac.workflow.pipeline import Pipe
        self.cv.top_down(**Xy)
        #  Pump-up results
        cv_result_set = self.cv.reduce(store_results=False)
        key_val = [(result.key(), result[self.score])
                   for result in cv_result_set]
        scores = np.asarray(zip(*key_val)[1])
        scores_opt = np.max(scores) if self.arg_max else np.min(scores)
        idx_best = np.where(scores == scores_opt)[0][0]
        best_key = key_val[idx_best][0]
        # Find nodes that match the best
        nodes_dict = {
            n.get_signature(): n
            for n in self.cv.walk_true_nodes()
            if n.get_signature() in key_split(best_key)
        }
        to_refit = Pipe(
            *[nodes_dict[k].wrapped_node for k in key_split(best_key)])
        best_params = [dict(sig) for sig in key_split(best_key, eval=True)]
        return to_refit, best_params

    def reduce(self, store_results=True):
        # Terminaison (leaf) node return result_set
        return self.load_results()
Ejemplo n.º 6
0
class CVBestSearchRefit(Wrapper):
    """Cross-validation + grid-search then refit with optimals parameters.

    Average results over first axis, then find the arguments that maximize or
    minimise a "score" over other axis.

    Parameters
    ----------

    See CV parameters, plus other parameters:

    score: string
        the score name to be optimized (default "mean_score_te").

    arg_max: boolean
        True/False take parameters that maximize/minimize the score. Default
        is True.

    Example
    -------
    >>> from sklearn import datasets
    >>> from sklearn.svm import SVC
    >>> from epac import Methods
    >>> from epac.workflow.splitters import CVBestSearchRefit
    >>> X, y = datasets.make_classification(n_samples=12,
    ... n_features=10,
    ... n_informative=2,
    ... random_state=1)
    >>> n_folds_nested = 2
    >>> C_values = [.1, 0.5, 1, 2, 5]
    >>> kernels = ["linear", "rbf"]
    >>> methods = Methods(*[SVC(C=C, kernel=kernel)
    ...     for C in C_values for kernel in kernels])
    >>> wf = CVBestSearchRefit(methods, n_folds=n_folds_nested)
    >>> wf.transform(X=X, y=y)
    {'best_params': [{'kernel': 'rbf', 'C': 0.1, 'name': 'SVC'}], 'y/true': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1]), 'y/pred': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1])}
    >>> wf.reduce()
    >>> wf.run(X=X, y=y)
    {'best_params': [{'kernel': 'rbf', 'C': 0.1, 'name': 'SVC'}], 'y/true': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1]), 'y/pred': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1])}
    >>> wf.reduce()
    ResultSet(
    [{'key': CVBestSearchRefit, 'best_params': [{'kernel': 'rbf', 'C': 0.1, 'name': 'SVC'}], 'y/true': [1 0 0 1 0 0 1 0 1 1 0 1], 'y/pred': [1 0 0 1 0 0 1 0 1 1 0 1]}])

    """

    def __init__(self, node, **kwargs):
        super(CVBestSearchRefit, self).__init__(wrapped_node=None)
        #### 'y/test/score_recall_mean'
        default_score = "y" + conf.SEP + \
                        conf.TEST + conf.SEP + \
                        conf.SCORE_RECALL_MEAN
        score = kwargs.pop("score") if "score" in kwargs else default_score
        arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True
        from epac.workflow.splitters import CV
        #methods = Methods(*tasks)
        self.cv = CV(node=node, reducer=ClassificationReport(keep=False),
                     **kwargs)
        self.score = score
        self.arg_max = arg_max
#        warnings.warn("%s is deprecated. Please use %s instead." % \
#                        (self.__class__.__name__,\
#                         CVBestSearchRefitParallel.__name__),
#                        category=DeprecationWarning)

    def get_signature(self):
        return self.__class__.__name__

    def transform(self, **Xy):
        Xy_train, Xy_test = train_test_split(Xy)
        if Xy_train is Xy_test:
            to_refit, best_params = self._search_best(**Xy)
        else:
            to_refit, best_params = self._search_best(**Xy_train)
        out = to_refit.top_down(**Xy)
        out[conf.BEST_PARAMS] = best_params
        self.refited = to_refit
        self.best_params = best_params
        return out

    def _search_best(self, **Xy):
        # Fit/predict CV grid search
        self.cv.store = StoreMem()  # local store erased at each fit
        from epac.workflow.pipeline import Pipe
        self.cv.top_down(**Xy)
        #  Pump-up results
        cv_result_set = self.cv.reduce(store_results=False)
        key_val = [(result.key(), result[self.score])
                   for result in cv_result_set]
        scores = np.asarray(zip(*key_val)[1])
        scores_opt = np.max(scores) if self.arg_max else np.min(scores)
        idx_best = np.where(scores == scores_opt)[0][0]
        best_key = key_val[idx_best][0]
        # Find nodes that match the best
        nodes_dict = {n.get_signature(): n for n in self.cv.walk_true_nodes()
                      if n.get_signature() in key_split(best_key)}
        to_refit = Pipe(*[nodes_dict[k].wrapped_node
                          for k in key_split(best_key)])
        best_params = [dict(sig) for sig in key_split(best_key, eval=True)]
        return to_refit, best_params

    def reduce(self, store_results=True):
        # Terminaison (leaf) node return result_set
        return self.load_results()