def partial_call_signature(func): """Return the fully qualified call signature for a (partial) function """ func = partial(func) fa = jfi.getfullargspec(func) default_kw = {} if fa.args: default_kw = dict(zip(fa.args, fa.defaults)) fq_keywords = {**default_kw, **fa.kwonlydefaults} return jfi.format_signature(func.func, *func.args, **fq_keywords)
def args_extractor(f, merge_defaults=False): """ Takes a function, inspects it's parameter lists, and returns a function that will return all of the named and key arguments back as a dictionary. The varargs are also returned which don't have a names. """ spec = getfullargspec(f) if spec.defaults: param_defaults = dict( zip(spec.args[-len(spec.defaults):], spec.defaults)) else: param_defaults = {} named_param_defaults = spec.kwonlydefaults or {} default_dicts = {} num_named_args = len(spec.args) if merge_defaults is True and hasattr(f, '__merge_defaults__'): merge_defaults = f.__merge_defaults__ if merge_defaults: default_dicts = t.pipe(t.merge(named_param_defaults, param_defaults), tc.valfilter(lambda v: isinstance(v, dict))) if isinstance(merge_defaults, Sequence): default_dicts = {k: default_dicts[k] for k in merge_defaults} def _args_dict(args, kargs): unnamed_args = dict(zip(spec.args, args[0:num_named_args])) varargs = args[num_named_args:] kargs = t.merge(kargs, unnamed_args) for k, d in default_dicts.items(): kargs[k] = t.merge(d, kargs.get(k) or {}) return varargs, kargs else: def _args_dict(args, kargs): unnamed_args = dict(zip(spec.args, args[0:num_named_args])) varargs = args[num_named_args:] kargs = t.merge(kargs, unnamed_args) return varargs, kargs return _args_dict
def _fit(self, X, y, parameter_iterable=None): if parameter_iterable is not None: raise NotImplementedError('The parameter_iterable argument is not supported.') # Actual fitting, performing the search over parameters. estimator = self.estimator cv = self.cv self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) n_samples = _num_samples(X) X, y = indexable(X, y) if y is not None: if len(y) != n_samples: raise ValueError('Target variable (y) has a different number of samples (%i) than data (X: %i samples)' % (len(y), n_samples)) cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch # setup SigOpt experiment and run optimization n_folds = len(cv) self._create_sigopt_exp(self.sigopt_connection, n_folds) # start tracking time to optimize estimator opt_start_time = time.time() for jk in range(0, self.n_iter, self.n_sug): # check for opt timeout, ensuring at least 1 observation # TODO : handling failure observations if ( self.opt_timeout is not None and time.time() - opt_start_time > self.opt_timeout and jk >= 1 ): # break out of loop and refit model with best params so far break suggestions = [] jobs = [] for _ in range(self.n_sug): for train, test in cv: suggestion = self.sigopt_connection.experiments(self.experiment.id).suggestions().create() parameters = self._convert_sigopt_api_to_sklearn_assignments(suggestion.assignments.to_json()) suggestions.append(suggestion) jobs.append([parameters, train, test]) if self.verbose > 0: print('Evaluating params : ', [job[0] for job in jobs]) # do CV folds in parallel using joblib # returns scores on test set obs_timed_out = False try: par_kwargs = {'n_jobs': self.n_jobs, 'verbose': self.verbose, 'pre_dispatch': pre_dispatch} # add timeout kwarg if version of joblib supports it if 'timeout' in getfullargspec(Parallel.__init__).args: par_kwargs['timeout'] = self.cv_timeout out = Parallel( **par_kwargs )( delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_, train, test, self.verbose, parameters, self.fit_params, return_parameters=True, error_score=self.error_score) for parameters, train, test in jobs) except TimeoutError: obs_timed_out = True if not obs_timed_out: # grab scores from results for sidx, suggestion in enumerate(suggestions): score = out[sidx][0] self.sigopt_connection.experiments(self.experiment.id).observations().create( suggestion=suggestion.id, value=score) else: # obsevation timed out so report a failure self.sigopt_connection.experiments(self.experiment.id).observations().create( suggestion=suggestion.id, failed=True) # return best SigOpt assignments so far best_assignments = self.sigopt_connection.experiments(self.experiment.id).best_assignments().fetch().data if not best_assignments: raise RuntimeError( 'No valid observations found. ' 'Make sure opt_timeout and cv_timeout provide sufficient time for observations to be reported.') self.best_params_ = self._convert_sigopt_api_to_sklearn_assignments(best_assignments[0].assignments.to_json()) self.best_score_ = best_assignments[0].value if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params(**self.best_params_) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self
def _fit(self, X, y, groups=None, parameter_iterable=None, **fit_params): if groups is not None: raise NotImplementedError('The groups argument is not supported.') if parameter_iterable is not None: raise NotImplementedError('The parameter_iterable argument is not supported.') if self.fit_params is not None: fit_params = self.fit_params # Actual fitting, performing the search over parameters. estimator = self.estimator cv = self.cv self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) n_samples = _num_samples(X) X, y = indexable(X, y) if y is not None: if len(y) != n_samples: raise ValueError('Target variable (y) has a different number of samples (%i) than data (X: %i samples)' % (len(y), n_samples)) n_folds, cv_iter = our_check_cv(cv, X, y, classifier=is_classifier(estimator)) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch # setup SigOpt experiment and run optimization self._create_sigopt_exp(self.sigopt_connection) # start tracking time to optimize estimator opt_start_time = time.time() for jk in range(0, self.n_iter, self.n_sug): # check for opt timeout, ensuring at least 1 observation # TODO : handling failure observations if ( self.opt_timeout is not None and time.time() - opt_start_time > self.opt_timeout and jk >= 1 ): # break out of loop and refit model with best params so far break suggestions = [] parameter_configs = [] for _ in range(self.n_sug): suggestion = self.sigopt_connection.experiments(self.experiment.id).suggestions().create() parameters = self._convert_sigopt_api_to_sklearn_assignments(suggestion.assignments.to_json()) suggestions.append(suggestion) parameter_configs.append(parameters) if self.verbose > 0: print('Evaluating params : ', parameter_configs) # do CV folds in parallel using joblib # returns scores on test set obs_timed_out = False try: par_kwargs = {'n_jobs': self.n_jobs, 'verbose': self.verbose, 'pre_dispatch': pre_dispatch} # add timeout kwarg if version of joblib supports it if 'timeout' in getfullargspec(Parallel.__init__).args: par_kwargs['timeout'] = self.cv_timeout out = Parallel( **par_kwargs )( delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_, train, test, self.verbose, parameters, fit_params, return_parameters=True, error_score=self.error_score) for parameters in parameter_configs for train, test in cv_iter) except TimeoutError: obs_timed_out = True if not obs_timed_out: # grab scores from results for sidx, suggestion in enumerate(suggestions): out_idx = sidx * n_folds scores = [o[0] for o in out[out_idx:out_idx+n_folds]] self.sigopt_connection.experiments(self.experiment.id).observations().create( suggestion=suggestion.id, value=numpy.mean(scores), value_stddev=numpy.std(scores) ) else: # obsevation timed out so report a failure self.sigopt_connection.experiments(self.experiment.id).observations().create( suggestion=suggestion.id, failed=True) # return best SigOpt assignments so far best_assignments = self.sigopt_connection.experiments(self.experiment.id).best_assignments().fetch().data if not best_assignments: raise RuntimeError( 'No valid observations found. ' 'Make sure opt_timeout and cv_timeout provide sufficient time for observations to be reported.') self.our_best_params_ = self._convert_sigopt_api_to_sklearn_assignments( best_assignments[0].assignments.to_json()) self.our_best_score_ = best_assignments[0].value if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params(**self.best_params_) if y is not None: best_estimator.fit(X, y, **fit_params) else: best_estimator.fit(X, **fit_params) self.our_best_estimator_ = best_estimator return self