Beispiel #1
0
def partial_call_signature(func):
    """Return the fully qualified call signature for a (partial) function
    """
    func = partial(func)
    fa = jfi.getfullargspec(func)
    default_kw = {}
    if fa.args:
        default_kw = dict(zip(fa.args, fa.defaults))
    fq_keywords = {**default_kw, **fa.kwonlydefaults}
    return jfi.format_signature(func.func, *func.args, **fq_keywords)
Beispiel #2
0
def args_extractor(f, merge_defaults=False):
    """
    Takes a function, inspects it's parameter lists, and returns a
    function that will return all of the named and key arguments
    back as a dictionary. The varargs are also returned which don't
    have a names.

    """
    spec = getfullargspec(f)
    if spec.defaults:
        param_defaults = dict(
            zip(spec.args[-len(spec.defaults):], spec.defaults))
    else:
        param_defaults = {}
    named_param_defaults = spec.kwonlydefaults or {}
    default_dicts = {}
    num_named_args = len(spec.args)

    if merge_defaults is True and hasattr(f, '__merge_defaults__'):
        merge_defaults = f.__merge_defaults__

    if merge_defaults:
        default_dicts = t.pipe(t.merge(named_param_defaults, param_defaults),
                               tc.valfilter(lambda v: isinstance(v, dict)))

        if isinstance(merge_defaults, Sequence):
            default_dicts = {k: default_dicts[k] for k in merge_defaults}

        def _args_dict(args, kargs):
            unnamed_args = dict(zip(spec.args, args[0:num_named_args]))
            varargs = args[num_named_args:]
            kargs = t.merge(kargs, unnamed_args)
            for k, d in default_dicts.items():
                kargs[k] = t.merge(d, kargs.get(k) or {})
            return varargs, kargs
    else:

        def _args_dict(args, kargs):
            unnamed_args = dict(zip(spec.args, args[0:num_named_args]))
            varargs = args[num_named_args:]
            kargs = t.merge(kargs, unnamed_args)
            return varargs, kargs

    return _args_dict
Beispiel #3
0
    def _fit(self, X, y, parameter_iterable=None):
        if parameter_iterable is not None:
            raise NotImplementedError('The parameter_iterable argument is not supported.')

        # Actual fitting,  performing the search over parameters.
        estimator = self.estimator
        cv = self.cv
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        n_samples = _num_samples(X)
        X, y = indexable(X, y)

        if y is not None:
            if len(y) != n_samples:
                raise ValueError('Target variable (y) has a different number of samples (%i) than data (X: %i samples)'
                                 % (len(y), n_samples))

        cv = check_cv(cv, X, y, classifier=is_classifier(estimator))

        base_estimator = clone(self.estimator)
        pre_dispatch = self.pre_dispatch

        # setup SigOpt experiment and run optimization
        n_folds = len(cv)
        self._create_sigopt_exp(self.sigopt_connection, n_folds)

        # start tracking time to optimize estimator
        opt_start_time = time.time()
        for jk in range(0, self.n_iter, self.n_sug):
            # check for opt timeout, ensuring at least 1 observation
            # TODO : handling failure observations
            if (
                self.opt_timeout is not None and
                time.time() - opt_start_time > self.opt_timeout and
                jk >= 1
            ):
                # break out of loop and refit model with best params so far
                break

            suggestions = []
            jobs = []
            for _ in range(self.n_sug):
                for train, test in cv:
                    suggestion = self.sigopt_connection.experiments(self.experiment.id).suggestions().create()
                    parameters = self._convert_sigopt_api_to_sklearn_assignments(suggestion.assignments.to_json())
                    suggestions.append(suggestion)
                    jobs.append([parameters, train, test])

            if self.verbose > 0:
                print('Evaluating params : ', [job[0] for job in jobs])


            # do CV folds in parallel using joblib
            # returns scores on test set
            obs_timed_out = False
            try:
                par_kwargs = {'n_jobs': self.n_jobs, 'verbose': self.verbose,
                              'pre_dispatch': pre_dispatch}
                # add timeout kwarg if version of joblib supports it
                if 'timeout' in getfullargspec(Parallel.__init__).args:
                    par_kwargs['timeout'] = self.cv_timeout
                out = Parallel(
                    **par_kwargs
                )(
                    delayed(_fit_and_score)(clone(base_estimator), X, y,
                                            self.scorer_, train, test,
                                            self.verbose, parameters,
                                            self.fit_params,
                                            return_parameters=True,
                                            error_score=self.error_score)
                        for parameters, train, test in jobs)
            except TimeoutError:
                 obs_timed_out = True

            if not obs_timed_out:
                # grab scores from results
                for sidx, suggestion in enumerate(suggestions):
                    score = out[sidx][0]
                    self.sigopt_connection.experiments(self.experiment.id).observations().create(
                        suggestion=suggestion.id,
                        value=score)
            else:
                # obsevation timed out so report a failure
                self.sigopt_connection.experiments(self.experiment.id).observations().create(
                    suggestion=suggestion.id,
                    failed=True)

        # return best SigOpt assignments so far
        best_assignments = self.sigopt_connection.experiments(self.experiment.id).best_assignments().fetch().data

        if not best_assignments:
            raise RuntimeError(
                'No valid observations found. '
                'Make sure opt_timeout and cv_timeout provide sufficient time for observations to be reported.')

        self.best_params_ = self._convert_sigopt_api_to_sklearn_assignments(best_assignments[0].assignments.to_json())
        self.best_score_ = best_assignments[0].value

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(**self.best_params_)
            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator
        return self
Beispiel #4
0
    def _fit(self, X, y, groups=None, parameter_iterable=None, **fit_params):
        if groups is not None:
            raise NotImplementedError('The groups argument is not supported.')
        if parameter_iterable is not None:
            raise NotImplementedError('The parameter_iterable argument is not supported.')
        if self.fit_params is not None:
            fit_params = self.fit_params

        # Actual fitting,  performing the search over parameters.
        estimator = self.estimator
        cv = self.cv
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        n_samples = _num_samples(X)
        X, y = indexable(X, y)

        if y is not None:
            if len(y) != n_samples:
                raise ValueError('Target variable (y) has a different number of samples (%i) than data (X: %i samples)'
                                 % (len(y), n_samples))

        n_folds, cv_iter = our_check_cv(cv, X, y, classifier=is_classifier(estimator))

        base_estimator = clone(self.estimator)
        pre_dispatch = self.pre_dispatch

        # setup SigOpt experiment and run optimization
        self._create_sigopt_exp(self.sigopt_connection)

        # start tracking time to optimize estimator
        opt_start_time = time.time()
        for jk in range(0, self.n_iter, self.n_sug):
            # check for opt timeout, ensuring at least 1 observation
            # TODO : handling failure observations
            if (
                self.opt_timeout is not None and
                time.time() - opt_start_time > self.opt_timeout and
                jk >= 1
            ):
                # break out of loop and refit model with best params so far
                break

            suggestions = []
            parameter_configs = []
            for _ in range(self.n_sug):
                suggestion = self.sigopt_connection.experiments(self.experiment.id).suggestions().create()
                parameters = self._convert_sigopt_api_to_sklearn_assignments(suggestion.assignments.to_json())
                suggestions.append(suggestion)
                parameter_configs.append(parameters)

            if self.verbose > 0:
                print('Evaluating params : ', parameter_configs)


            # do CV folds in parallel using joblib
            # returns scores on test set
            obs_timed_out = False
            try:
                par_kwargs = {'n_jobs': self.n_jobs, 'verbose': self.verbose,
                              'pre_dispatch': pre_dispatch}
                # add timeout kwarg if version of joblib supports it
                if 'timeout' in getfullargspec(Parallel.__init__).args:
                    par_kwargs['timeout'] = self.cv_timeout
                out = Parallel(
                    **par_kwargs
                )(
                    delayed(_fit_and_score)(clone(base_estimator), X, y,
                                            self.scorer_, train, test,
                                            self.verbose, parameters,
                                            fit_params,
                                            return_parameters=True,
                                            error_score=self.error_score)
                        for parameters in parameter_configs
                        for train, test in cv_iter)
            except TimeoutError:
                 obs_timed_out = True

            if not obs_timed_out:
                # grab scores from results
                for sidx, suggestion in enumerate(suggestions):
                    out_idx = sidx * n_folds
                    scores = [o[0] for o in out[out_idx:out_idx+n_folds]]
                    self.sigopt_connection.experiments(self.experiment.id).observations().create(
                        suggestion=suggestion.id,
                        value=numpy.mean(scores),
                        value_stddev=numpy.std(scores)
                    )
            else:
                # obsevation timed out so report a failure
                self.sigopt_connection.experiments(self.experiment.id).observations().create(
                    suggestion=suggestion.id,
                    failed=True)

        # return best SigOpt assignments so far
        best_assignments = self.sigopt_connection.experiments(self.experiment.id).best_assignments().fetch().data

        if not best_assignments:
            raise RuntimeError(
                'No valid observations found. '
                'Make sure opt_timeout and cv_timeout provide sufficient time for observations to be reported.')

        self.our_best_params_ = self._convert_sigopt_api_to_sklearn_assignments(
            best_assignments[0].assignments.to_json())
        self.our_best_score_ = best_assignments[0].value

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(**self.best_params_)
            if y is not None:
                best_estimator.fit(X, y, **fit_params)
            else:
                best_estimator.fit(X, **fit_params)
            self.our_best_estimator_ = best_estimator
        return self